Merge remote-tracking branch 'origin/master' into redesign

This commit is contained in:
Luna D. 2024-06-06 22:55:50 +02:00
commit 34ff8abb36
No known key found for this signature in database
GPG key ID: 4B1C63448394F688
174 changed files with 3053 additions and 1525 deletions

View file

@ -22,7 +22,7 @@ Once the application has started, navigate to http://localhost:8080 and login wi
If you are running Docker on Windows and the application crashes immediately upon startup, please ensure that `autocrlf` is set to `false` in your Git config, and then re-clone the repository. Additionally, it is recommended that you allocate at least 4GB of RAM to your Docker VM.
If you run into an Elasticsearch bootstrap error, you may need to increase your `max_map_count` on the host as follows:
If you run into an OpenSearch bootstrap error, you may need to increase your `max_map_count` on the host as follows:
```
sudo sysctl -w vm.max_map_count=262144
```

View file

@ -4,9 +4,18 @@
import { LocalAutocompleter } from './utils/local-autocompleter';
import { handleError } from './utils/requests';
import { getTermContexts } from './match_query';
import store from './utils/store';
const cache = {};
let inputField, originalTerm;
/** @type {HTMLInputElement} */
let inputField,
/** @type {string} */
originalTerm,
/** @type {string} */
originalQuery,
/** @type {TermContext} */
selectedTerm;
function removeParent() {
const parent = document.querySelector('.autocomplete');
@ -18,13 +27,37 @@ function removeSelected() {
if (selected) selected.classList.remove('autocomplete__item--selected');
}
function isSearchField() {
return inputField && inputField.dataset.acMode === 'search';
}
function restoreOriginalValue() {
inputField.value = isSearchField() ? originalQuery : originalTerm;
}
function applySelectedValue(selection) {
if (!isSearchField()) {
inputField.value = selection;
return;
}
if (!selectedTerm) {
return;
}
const [startIndex, endIndex] = selectedTerm[0];
inputField.value = originalQuery.slice(0, startIndex) + selection + originalQuery.slice(endIndex);
inputField.setSelectionRange(startIndex + selection.length, startIndex + selection.length);
inputField.focus();
}
function changeSelected(firstOrLast, current, sibling) {
if (current && sibling) { // if the currently selected item has a sibling, move selection to it
current.classList.remove('autocomplete__item--selected');
sibling.classList.add('autocomplete__item--selected');
}
else if (current) { // if the next keypress will take the user outside the list, restore the unautocompleted term
inputField.value = originalTerm;
restoreOriginalValue();
removeSelected();
}
else if (firstOrLast) { // if no item in the list is selected, select the first or last
@ -32,17 +65,36 @@ function changeSelected(firstOrLast, current, sibling) {
}
}
function isSelectionOutsideCurrentTerm() {
const selectionIndex = Math.min(inputField.selectionStart, inputField.selectionEnd);
const [startIndex, endIndex] = selectedTerm[0];
return startIndex > selectionIndex || endIndex < selectionIndex;
}
function keydownHandler(event) {
const selected = document.querySelector('.autocomplete__item--selected'),
firstItem = document.querySelector('.autocomplete__item:first-of-type'),
lastItem = document.querySelector('.autocomplete__item:last-of-type');
if (isSearchField()) {
// Prevent submission of the search field when Enter was hit
if (selected && event.keyCode === 13) event.preventDefault(); // Enter
// Close autocompletion popup when text cursor is outside current tag
if (selectedTerm && firstItem && (event.keyCode === 37 || event.keyCode === 39)) { // ArrowLeft || ArrowRight
requestAnimationFrame(() => {
if (isSelectionOutsideCurrentTerm()) removeParent();
});
}
}
if (event.keyCode === 38) changeSelected(lastItem, selected, selected && selected.previousSibling); // ArrowUp
if (event.keyCode === 40) changeSelected(firstItem, selected, selected && selected.nextSibling); // ArrowDown
if (event.keyCode === 13 || event.keyCode === 27 || event.keyCode === 188) removeParent(); // Enter || Esc || Comma
if (event.keyCode === 38 || event.keyCode === 40) { // ArrowUp || ArrowDown
const newSelected = document.querySelector('.autocomplete__item--selected');
if (newSelected) inputField.value = newSelected.dataset.value;
if (newSelected) applySelectedValue(newSelected.dataset.value);
event.preventDefault();
}
}
@ -64,7 +116,7 @@ function createItem(list, suggestion) {
});
item.addEventListener('click', () => {
inputField.value = item.dataset.value;
applySelectedValue(item.dataset.value);
inputField.dispatchEvent(
new CustomEvent('autocomplete', {
detail: {
@ -119,9 +171,36 @@ function showAutocomplete(suggestions, fetchedTerm, targetInput) {
}
function getSuggestions(term) {
// In case source URL was not given at all, do not try sending the request.
if (!inputField.dataset.acSource) return [];
return fetch(`${inputField.dataset.acSource}${term}`).then(response => response.json());
}
function getSelectedTerm() {
if (!inputField || !originalQuery) {
return null;
}
const selectionIndex = Math.min(inputField.selectionStart, inputField.selectionEnd);
const terms = getTermContexts(originalQuery);
return terms.find(([range]) => range[0] < selectionIndex && range[1] >= selectionIndex);
}
function toggleSearchAutocomplete() {
const enable = store.get('enable_search_ac');
for (const searchField of document.querySelectorAll('input[data-ac-mode=search]')) {
if (enable) {
searchField.autocomplete = 'off';
}
else {
searchField.removeAttribute('data-ac');
searchField.autocomplete = 'on';
}
}
}
function listenAutocomplete() {
let timeout;
@ -138,9 +217,25 @@ function listenAutocomplete() {
if (localAc !== null && 'ac' in event.target.dataset) {
inputField = event.target;
originalTerm = `${inputField.value}`.toLowerCase();
let suggestionsCount = 5;
const suggestions = localAc.topK(originalTerm, 5).map(({ name, imageCount }) => ({ label: `${name} (${imageCount})`, value: name }));
if (isSearchField()) {
originalQuery = inputField.value;
selectedTerm = getSelectedTerm();
suggestionsCount = 10;
// We don't need to run auto-completion if user is not selecting tag at all
if (!selectedTerm) {
return;
}
originalTerm = selectedTerm[1].toLowerCase();
}
else {
originalTerm = `${inputField.value}`.toLowerCase();
}
const suggestions = localAc.topK(originalTerm, suggestionsCount).map(({ name, imageCount }) => ({ label: `${name} (${imageCount})`, value: name }));
if (suggestions.length) {
return showAutocomplete(suggestions, originalTerm, event.target);
@ -153,9 +248,9 @@ function listenAutocomplete() {
originalTerm = inputField.value;
const fetchedTerm = inputField.value;
const {ac, acMinLength} = inputField.dataset;
const {ac, acMinLength, acSource} = inputField.dataset;
if (ac && (fetchedTerm.length >= acMinLength)) {
if (ac && acSource && (fetchedTerm.length >= acMinLength)) {
if (cache[fetchedTerm]) {
showAutocomplete(cache[fetchedTerm], fetchedTerm, event.target);
}
@ -174,6 +269,7 @@ function listenAutocomplete() {
// If there's a click outside the inputField, remove autocomplete
document.addEventListener('click', event => {
if (event.target && event.target !== inputField) removeParent();
if (event.target === inputField && isSearchField() && isSelectionOutsideCurrentTerm()) removeParent();
});
function fetchLocalAutocomplete(event) {
@ -189,6 +285,8 @@ function listenAutocomplete() {
.then(buf => localAc = new LocalAutocompleter(buf));
}
}
toggleSearchAutocomplete();
}
export { listenAutocomplete };

View file

@ -1,5 +1,5 @@
import { $ } from './utils/dom';
import parseSearch from './match_query';
import { parseSearch } from './match_query';
import store from './utils/store';
/**

View file

@ -1,5 +1,5 @@
import { defaultMatcher } from './query/matcher';
import { generateLexArray } from './query/lex';
import { generateLexArray, generateLexResult } from './query/lex';
import { parseTokens } from './query/parse';
import { getAstMatcherForTerm } from './query/term';
@ -7,9 +7,11 @@ function parseWithDefaultMatcher(term: string, fuzz: number) {
return getAstMatcherForTerm(term, fuzz, defaultMatcher);
}
function parseSearch(query: string) {
export function parseSearch(query: string) {
const tokens = generateLexArray(query, parseWithDefaultMatcher);
return parseTokens(tokens);
}
export default parseSearch;
export function getTermContexts(query: string) {
return generateLexResult(query, parseWithDefaultMatcher).termContexts;
}

View file

@ -170,8 +170,8 @@ describe('Lexical analysis', () => {
expect(array).toEqual([noMatch, noMatch, 'or_op', noMatch, 'or_op', noMatch, 'or_op']);
});
it('should throw exception on mismatched parentheses', () => {
it('should mark error on mismatched parentheses', () => {
expect(() => generateLexArray('(safe OR solo AND fluttershy', parseTerm)).toThrow('Mismatched parentheses.');
// expect(() => generateLexArray(')bad', parseTerm)).toThrow('Mismatched parentheses.');
// expect(() => generateLexArray(')bad', parseTerm).error).toThrow('Mismatched parentheses.');
});
});

View file

@ -22,10 +22,18 @@ const tokenList: Token[] = [
export type ParseTerm = (term: string, fuzz: number, boost: number) => AstMatcher;
export function generateLexArray(searchStr: string, parseTerm: ParseTerm): TokenList {
export type Range = [number, number];
export type TermContext = [Range, string];
export interface LexResult {
tokenList: TokenList,
termContexts: TermContext[],
error: ParseError | null
}
export function generateLexResult(searchStr: string, parseTerm: ParseTerm): LexResult {
const opQueue: string[] = [],
groupNegate: boolean[] = [],
tokenStack: TokenList = [];
groupNegate: boolean[] = [];
let searchTerm: string | null = null;
let boostFuzzStr = '';
@ -35,10 +43,25 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
let fuzz = 0;
let lparenCtr = 0;
const pushTerm = () => {
let termIndex = 0;
let index = 0;
const ret: LexResult = {
tokenList: [],
termContexts: [],
error: null
};
const beginTerm = (token: string) => {
searchTerm = token;
termIndex = index;
};
const endTerm = () => {
if (searchTerm !== null) {
// Push to stack.
tokenStack.push(parseTerm(searchTerm, fuzz, boost));
ret.tokenList.push(parseTerm(searchTerm, fuzz, boost));
ret.termContexts.push([[termIndex, termIndex + searchTerm.length], searchTerm]);
// Reset term and options data.
boost = 1;
fuzz = 0;
@ -48,7 +71,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
}
if (negate) {
tokenStack.push('not_op');
ret.tokenList.push('not_op');
negate = false;
}
};
@ -64,19 +87,19 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
const token = match[0];
if (searchTerm !== null && (['and_op', 'or_op'].indexOf(tokenName) !== -1 || tokenName === 'rparen' && lparenCtr === 0)) {
pushTerm();
endTerm();
}
switch (tokenName) {
case 'and_op':
while (opQueue[0] === 'and_op') {
tokenStack.push(assertNotUndefined(opQueue.shift()));
ret.tokenList.push(assertNotUndefined(opQueue.shift()));
}
opQueue.unshift('and_op');
break;
case 'or_op':
while (opQueue[0] === 'and_op' || opQueue[0] === 'or_op') {
tokenStack.push(assertNotUndefined(opQueue.shift()));
ret.tokenList.push(assertNotUndefined(opQueue.shift()));
}
opQueue.unshift('or_op');
break;
@ -113,10 +136,10 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
if (op === 'lparen') {
break;
}
tokenStack.push(op);
ret.tokenList.push(op);
}
if (groupNegate.length > 0 && groupNegate.pop()) {
tokenStack.push('not_op');
ret.tokenList.push('not_op');
}
}
break;
@ -128,7 +151,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
boostFuzzStr += token;
}
else {
searchTerm = token;
beginTerm(token);
}
break;
case 'boost':
@ -137,7 +160,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
boostFuzzStr += token;
}
else {
searchTerm = token;
beginTerm(token);
}
break;
case 'quoted_lit':
@ -145,7 +168,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
searchTerm += token;
}
else {
searchTerm = token;
beginTerm(token);
}
break;
case 'word':
@ -159,7 +182,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
searchTerm += token;
}
else {
searchTerm = token;
beginTerm(token);
}
break;
default:
@ -171,6 +194,7 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
// Truncate string and restart the token tests.
localSearchStr = localSearchStr.substring(token.length);
index += token.length;
// Break since we have found a match.
break;
@ -178,14 +202,24 @@ export function generateLexArray(searchStr: string, parseTerm: ParseTerm): Token
}
// Append final tokens to the stack.
pushTerm();
endTerm();
if (opQueue.indexOf('rparen') !== -1 || opQueue.indexOf('lparen') !== -1) {
throw new ParseError('Mismatched parentheses.');
ret.error = new ParseError('Mismatched parentheses.');
}
// Concatenatte remaining operators to the token stack.
tokenStack.push(...opQueue);
// Concatenate remaining operators to the token stack.
ret.tokenList.push(...opQueue);
return tokenStack;
return ret;
}
export function generateLexArray(searchStr: string, parseTerm: ParseTerm): TokenList {
const ret = generateLexResult(searchStr, parseTerm);
if (ret.error) {
throw ret.error;
}
return ret.tokenList;
}

View file

@ -4,9 +4,11 @@ import { AstMatcher, ParseError, TokenList } from './types';
export function parseTokens(lexicalArray: TokenList): AstMatcher {
const operandStack: AstMatcher[] = [];
lexicalArray.forEach((token, i) => {
for (let i = 0; i < lexicalArray.length; i += 1) {
const token = lexicalArray[i];
if (token === 'not_op') {
return;
continue;
}
let intermediate: AstMatcher;
@ -36,7 +38,7 @@ export function parseTokens(lexicalArray: TokenList): AstMatcher {
else {
operandStack.push(intermediate);
}
});
}
if (operandStack.length > 1) {
throw new ParseError('Missing operator.');

View file

@ -1,7 +1,7 @@
import { displayTags, getHiddenTags, getSpoileredTags, imageHitsComplex, imageHitsTags, TagData } from '../tag';
import { mockStorage } from '../../../test/mock-storage';
import { getRandomArrayItem } from '../../../test/randomness';
import parseSearch from '../../match_query';
import { parseSearch } from '../../match_query';
import { SpoilerType } from '../../../types/booru-object';
describe('Tag utilities', () => {

View file

@ -0,0 +1,10 @@
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/">
<ShortName>Derpibooru</ShortName>
<Description>Derpibooru image search</Description>
<InputEncoding>UTF-8</InputEncoding>
<Image width="16" height="16" type="image/x-icon">https://derpibooru.org/favicon.ico</Image>
<Image width="64" height="64" type="image/svg+xml">https://derpibooru.org/favicon.svg</Image>
<Url type="text/html" method="get" template="https://derpibooru.org/search">
<Param name="q" value="{searchTerms}"/>
</Url>
</OpenSearchDescription>

View file

@ -58,7 +58,6 @@ config :logger, :console,
# Use Jason for JSON parsing in Phoenix
config :phoenix, :json_library, Jason
config :bamboo, :json_library, Jason
# Import environment specific config. This must remain at the bottom
# of this file so it overrides the configuration defined above.

View file

@ -6,7 +6,6 @@ import Config
# by calling `mix release`.
#
# See `mix help release` for more information.
{:ok, _} = Application.ensure_all_started(:tls_certificate_check)
config :bcrypt_elixir,
log_rounds: String.to_integer(System.get_env("BCRYPT_ROUNDS", "12"))
@ -117,17 +116,14 @@ end
if config_env() == :prod do
# Production mailer config
config :philomena, Philomena.Mailer,
adapter: Bamboo.SMTPAdapter,
server: System.fetch_env!("SMTP_RELAY"),
hostname: System.fetch_env!("SMTP_DOMAIN"),
port: System.get_env("SMTP_PORT") || 587,
adapter: Swoosh.Adapters.Mua,
relay: System.fetch_env!("SMTP_RELAY"),
port: String.to_integer(System.get_env("SMTP_PORT", "587")),
auth: [
username: System.fetch_env!("SMTP_USERNAME"),
password: System.fetch_env!("SMTP_PASSWORD"),
tls: :always,
auth: :always,
tls_options:
[middlebox_comp_mode: false] ++
:tls_certificate_check.options(System.fetch_env!("SMTP_RELAY"))
password: System.fetch_env!("SMTP_PASSWORD")
],
ssl: [middlebox_comp_mode: false]
# Production endpoint config
{:ok, ip} = :inet.parse_address(System.get_env("APP_IP", "127.0.0.1") |> String.to_charlist())
@ -139,7 +135,7 @@ if config_env() == :prod do
server: not is_nil(System.get_env("START_ENDPOINT"))
else
# Don't send email in development
config :philomena, Philomena.Mailer, adapter: Bamboo.LocalAdapter
config :philomena, Philomena.Mailer, adapter: Swoosh.Adapters.Local
# Use this to debug slime templates
# config :slime, :keep_lines, true

View file

@ -5,9 +5,9 @@ export MIX_ENV=test
# Always install mix dependencies
(cd /srv/philomena && mix deps.get)
# Sleep to allow Elasticsearch to finish initializing
# Sleep to allow OpenSearch to finish initializing
# if it's not done doing whatever it does yet
echo -n "Waiting for Elasticsearch"
echo -n "Waiting for OpenSearch"
until wget -qO - opensearch:9200; do
echo -n "."

View file

@ -1,8 +0,0 @@
defmodule Camo.Image do
@doc """
Convert a potentially untrusted external image URL into a trusted one
loaded through a gocamo proxy (specified by the environment).
"""
@spec image_url(String.t()) :: String.t()
def image_url(input), do: Philomena.Native.camo_image_url(input)
end

View file

@ -1,233 +0,0 @@
defmodule Mix.Tasks.ConvertToVerifiedRoutes do
@moduledoc """
Replaces routes with verified routes.
Forked from
https://gist.github.com/andreaseriksson/e454b9244a734310d4ab74d8595f98cd
https://gist.github.com/jiegillet/e6357c82e36a848ad59295eb3d5a1135
This requires all routes to consistently be aliased with
alias PhilomenaWeb.Router.Helpers, as: Routes
Run with
mix convert_to_verified_routes
"""
use Mix.Task
@regex ~r/(Routes\.)([a-zA-Z0-9_]+)(path|url)\(/
@web_module PhilomenaWeb
def run(_) do
Path.wildcard("test/**/*.ex*")
|> Enum.concat(Path.wildcard("lib/**/*.ex*"))
|> Enum.concat(Path.wildcard("lib/**/*.eex*"))
|> Enum.concat(Path.wildcard("lib/**/*.slime"))
|> Enum.sort()
|> Enum.reject(&String.contains?(&1, "convert_to_verified_routes.ex"))
|> Enum.filter(&(&1 |> File.read!() |> String.contains?("Routes.")))
|> Enum.each(&format_file/1)
:ok
end
def format_file(filename) do
Mix.shell().info(filename)
formatted_content =
filename
|> File.read!()
|> format_string()
File.write!(filename, [formatted_content])
end
def format_string(source) do
case Regex.run(@regex, source, capture: :first, return: :index) do
[{index, length}] ->
# Compute full length of expression
length = nibble_expression(source, index, length)
# Convert to verified route format
route = format_route(String.slice(source, index, length))
# Split string around expression
prefix = String.slice(source, 0, index)
suffix = String.slice(source, index + length, String.length(source))
# Insert verified route and rerun
format_string("#{prefix}#{route}#{suffix}")
_ ->
source
end
end
defp nibble_expression(source, index, length) do
if index + length > String.length(source) do
raise "Failed to match route expression"
end
case Code.string_to_quoted(String.slice(source, index, length)) do
{:ok, _macro} ->
length
_ ->
nibble_expression(source, index, length + 1)
end
end
defp format_route(route) do
ast =
Code.string_to_quoted!(route,
literal_encoder: &{:ok, {:__block__, &2, [&1]}},
unescape: false,
token_metadata: true
)
ast
|> Macro.prewalk(&replace_route/1)
|> Code.quoted_to_algebra(escape: false)
|> Inspect.Algebra.format(:infinity)
end
defp decode_literal(literal) when is_binary(literal) or is_integer(literal) do
{:ok, literal}
end
defp decode_literal({:__block__, _, [literal]}) do
{:ok, literal}
end
defp decode_literal(node), do: {:error, node}
defp encode_literal(literal) do
{:__block__, [], [literal]}
end
# Routes.url(MyAppWeb.Endpoint)
defp replace_route({{:., _, [{:__aliases__, _, [:Routes]}, :url]}, _, [_conn_or_endpoint]}) do
{:url, [], [{:sigil_p, [delimiter: "\""], [{:<<>>, [], ["/"]}, []]}]}
end
# Routes.static_path(conn, "/images/favicon.ico")
defp replace_route({{:., _, [{:__aliases__, _, [:Routes]}, :static_path]}, _, args}) do
[_conn_or_endpoint, path] = args
case decode_literal(path) do
{:ok, path} -> {:sigil_p, [delimiter: "\""], [{:<<>>, [], [path]}, []]}
_ -> {:sigil_p, [delimiter: "\""], [path, []]}
end
end
# Routes.static_url(conn, "/images/favicon.ico")
defp replace_route({{:., _, [{:__aliases__, _, [:Routes]}, :static_url]}, _, args}) do
[_conn_or_endpoint, path] = args
sigil =
case decode_literal(path) do
{:ok, path} -> {:sigil_p, [delimiter: "\""], [{:<<>>, [], [path]}, []]}
_ -> {:sigil_p, [delimiter: "\""], [path, []]}
end
{:url, [], [sigil]}
end
# Routes.some_path(conn, :action, "en", query_params)
defp replace_route(
{{:., _, [{:__aliases__, _, [:Routes]}, path_name]}, _, [_ | _] = args} = node
) do
[_conn_or_endpoint, action | params] = args
action =
case decode_literal(action) do
{:ok, action} -> action
_ -> action
end
path_name = "#{path_name}"
case find_verified_route(path_name, action, params) do
:ok -> node
route -> route
end
end
defp replace_route(node), do: node
defp find_verified_route(path_name, action, arguments) do
# pleaaaase don't have a route named Routes.product_url_path(conn, :index)
trimmed_path = path_name |> String.trim_trailing("_path") |> String.trim_trailing("_url")
route =
Phoenix.Router.routes(@web_module.Router)
|> Enum.find(fn %{helper: helper, plug_opts: plug_opts} ->
plug_opts == action && is_binary(helper) && trimmed_path == helper
end)
case route do
%{path: path} ->
{path_bits, query_params} =
path
|> String.split("/", trim: true)
|> replace_path_variables(arguments, [])
path_bits =
path_bits
|> Enum.flat_map(fn bit -> ["/", bit] end)
|> format_for_sigil_binary_args(query_params)
sigil = {:sigil_p, [delimiter: "\""], [{:<<>>, [], path_bits}, []]}
if String.ends_with?(path_name, "_url") do
{:url, [], [sigil]}
else
sigil
end
_ ->
Mix.shell().error(
"Could not find route #{path_name}, with action #{inspect(action)} and arguments #{inspect(arguments)}"
)
end
end
defp replace_path_variables([], arguments, path_bits) do
{Enum.reverse(path_bits), arguments}
end
defp replace_path_variables(path, [], path_bits) do
{Enum.reverse(path_bits) ++ path, []}
end
# conceptually /post/:post_id -> /post/#{id}
defp replace_path_variables([path_piece | rest], [arg | args], path_bits) do
if String.starts_with?(path_piece, ":") do
replace_path_variables(rest, args, [arg | path_bits])
else
replace_path_variables(rest, [arg | args], [path_piece | path_bits])
end
end
defp format_for_sigil_binary_args(path_bits, [_ | _] = query_params) do
format_for_sigil_binary_args(path_bits ++ ["?" | query_params], [])
end
defp format_for_sigil_binary_args(path_bits, []) do
path_bits
|> Enum.map(&decode_literal/1)
|> Enum.map(fn
{:ok, bit} when is_binary(bit) ->
bit
{:ok, bit} when is_atom(bit) or is_integer(bit) ->
to_string(bit)
{_, bit} ->
{:"::", [],
[
{{:., [], [Kernel, :to_string]}, [from_interpolation: true], [encode_literal(bit)]},
{:binary, [], Elixir}
]}
end)
end
end

View file

@ -1,7 +1,7 @@
defmodule Mix.Tasks.ReindexAll do
use Mix.Task
alias Philomena.Elasticsearch
alias PhilomenaQuery.Search
alias Philomena.{
Comments.Comment,
@ -27,7 +27,7 @@ defmodule Mix.Tasks.ReindexAll do
{Filters, Filter}
]
@shortdoc "Destroys and recreates all Elasticsearch indices."
@shortdoc "Destroys and recreates all OpenSearch indices."
@requirements ["app.start"]
@impl Mix.Task
def run(args) do
@ -38,23 +38,23 @@ defmodule Mix.Tasks.ReindexAll do
@indices
|> Enum.map(fn {context, schema} ->
Task.async(fn ->
Elasticsearch.delete_index!(schema)
Elasticsearch.create_index!(schema)
Search.delete_index!(schema)
Search.create_index!(schema)
Elasticsearch.reindex(preload(schema, ^context.indexing_preloads()), schema)
Search.reindex(preload(schema, ^context.indexing_preloads()), schema)
end)
end)
|> Task.await_many(:infinity)
# Reports are a bit special
Elasticsearch.delete_index!(Report)
Elasticsearch.create_index!(Report)
Search.delete_index!(Report)
Search.create_index!(Report)
Report
|> preload([:user, :admin])
|> Repo.all()
|> Polymorphic.load_polymorphic(reportable: [reportable_id: :reportable_type])
|> Enum.map(&Elasticsearch.index_document(&1, Report))
|> Enum.map(&Search.index_document(&1, Report))
end
end

View file

@ -10,8 +10,8 @@ defmodule Mix.Tasks.UploadToS3 do
}
alias Philomena.Images.Thumbnailer
alias Philomena.Objects
alias Philomena.Batch
alias PhilomenaMedia.Objects
alias PhilomenaQuery.Batch
import Ecto.Query
@shortdoc "Dumps existing image files to S3 storage backend"

View file

@ -4,7 +4,7 @@ defmodule Philomena.Adverts.Uploader do
"""
alias Philomena.Adverts.Advert
alias Philomena.Uploader
alias PhilomenaMedia.Uploader
def analyze_upload(advert, params) do
Uploader.analyze_upload(advert, "image", params["image"], &Advert.image_changeset/2)

View file

@ -1,56 +0,0 @@
defmodule Philomena.Analyzers do
@moduledoc """
Utilities for analyzing the format and various attributes of uploaded files.
"""
alias Philomena.Mime
alias Philomena.Analyzers.Gif
alias Philomena.Analyzers.Jpeg
alias Philomena.Analyzers.Png
alias Philomena.Analyzers.Svg
alias Philomena.Analyzers.Webm
@doc """
Returns an {:ok, analyzer} tuple, with the analyzer being a module capable
of analyzing this content type, or :error.
To use an analyzer, call the analyze/1 method on it with the path to the
file. It will return a map such as the following:
%{
animated?: false,
dimensions: {800, 600},
duration: 0.0,
extension: "png",
mime_type: "image/png"
}
"""
@spec analyzer(binary()) :: {:ok, module()} | :error
def analyzer(content_type)
def analyzer("image/gif"), do: {:ok, Gif}
def analyzer("image/jpeg"), do: {:ok, Jpeg}
def analyzer("image/png"), do: {:ok, Png}
def analyzer("image/svg+xml"), do: {:ok, Svg}
def analyzer("video/webm"), do: {:ok, Webm}
def analyzer(_content_type), do: :error
@doc """
Attempts a mime check and analysis on the given pathname or Plug.Upload.
"""
@spec analyze(Plug.Upload.t() | String.t()) :: {:ok, map()} | :error
def analyze(%Plug.Upload{path: path}), do: analyze(path)
def analyze(path) when is_binary(path) do
with {:ok, mime} <- Mime.file(path),
{:ok, analyzer} <- analyzer(mime) do
{:ok, analyzer.analyze(path)}
else
error ->
error
end
end
def analyze(_path), do: :error
end

View file

@ -14,6 +14,9 @@ defmodule Philomena.Application do
# Background queueing system
Philomena.ExqSupervisor,
# Mailer
{Task.Supervisor, name: Philomena.AsyncEmailSupervisor},
# Starts a worker by calling: Philomena.Worker.start_link(arg)
# {Philomena.Worker, arg},
{Redix, name: :redix, host: Application.get_env(:philomena, :redis_host)},

View file

@ -60,6 +60,9 @@ defmodule Philomena.ArtistLinks.ArtistLink do
|> parse_uri()
|> put_verification_code()
|> put_next_check_at()
|> unique_constraint([:uri, :tag_id, :user_id],
name: :index_artist_links_on_uri_tag_id_user_id
)
end
def validate_category(changeset) do

View file

@ -1,7 +1,7 @@
defmodule Philomena.ArtistLinks.AutomaticVerifier do
def check_link(artist_link, recheck_time) do
artist_link.uri
|> Philomena.Http.get()
|> PhilomenaProxy.Http.get()
|> contains_verification_code?(artist_link.verification_code)
|> case do
true ->

View file

@ -2,6 +2,8 @@ defmodule Philomena.Autocomplete.Autocomplete do
use Ecto.Schema
import Ecto.Changeset
@type t :: %__MODULE__{}
@primary_key false
schema "autocomplete" do
field :content, :binary

View file

@ -4,7 +4,7 @@ defmodule Philomena.Badges.Uploader do
"""
alias Philomena.Badges.Badge
alias Philomena.Uploader
alias PhilomenaMedia.Uploader
def analyze_upload(badge, params) do
Uploader.analyze_upload(badge, "image", params["image"], &Badge.image_changeset/2)

View file

@ -1,56 +0,0 @@
defmodule Philomena.Batch do
alias Philomena.Repo
import Ecto.Query
@doc """
Load records from the given queryable in batches, to avoid locking.
Valid options:
* :batch_size
* :id_field
"""
def record_batches(queryable, opts \\ [], callback) do
query_batches(queryable, opts, &callback.(Repo.all(&1)))
end
@doc """
Load queries from the given queryable in batches, to avoid locking.
Valid options:
* :batch_size
* :id_field
"""
def query_batches(queryable, opts \\ [], callback) do
ids = load_ids(queryable, -1, opts)
query_batches(queryable, opts, callback, ids)
end
defp query_batches(_queryable, _opts, _callback, []), do: []
defp query_batches(queryable, opts, callback, ids) do
id_field = Keyword.get(opts, :id_field, :id)
queryable
|> where([m], field(m, ^id_field) in ^ids)
|> callback.()
ids = load_ids(queryable, Enum.max(ids), opts)
query_batches(queryable, opts, callback, ids)
end
defp load_ids(queryable, max_id, opts) do
id_field = Keyword.get(opts, :id_field, :id)
batch_size = Keyword.get(opts, :batch_size, 1000)
queryable
|> exclude(:preload)
|> exclude(:order_by)
|> order_by(asc: ^id_field)
|> where([m], field(m, ^id_field) > ^max_id)
|> select([m], field(m, ^id_field))
|> limit(^batch_size)
|> Repo.all()
end
end

View file

@ -4,7 +4,7 @@ defmodule Philomena.Channels.PicartoChannel do
@spec live_channels(DateTime.t()) :: map()
def live_channels(now) do
@api_online
|> Philomena.Http.get()
|> PhilomenaProxy.Http.get()
|> case do
{:ok, %Tesla.Env{body: body, status: 200}} ->
body

View file

@ -4,7 +4,7 @@ defmodule Philomena.Channels.PiczelChannel do
@spec live_channels(DateTime.t()) :: map()
def live_channels(now) do
@api_online
|> Philomena.Http.get()
|> PhilomenaProxy.Http.get()
|> case do
{:ok, %Tesla.Env{body: body, status: 200}} ->
body

View file

@ -7,11 +7,11 @@ defmodule Philomena.Comments do
alias Ecto.Multi
alias Philomena.Repo
alias Philomena.Elasticsearch
alias PhilomenaQuery.Search
alias Philomena.Reports.Report
alias Philomena.UserStatistics
alias Philomena.Comments.Comment
alias Philomena.Comments.ElasticsearchIndex, as: CommentIndex
alias Philomena.Comments.SearchIndex, as: CommentIndex
alias Philomena.IndexWorker
alias Philomena.Images.Image
alias Philomena.Images
@ -265,7 +265,7 @@ defmodule Philomena.Comments do
def user_name_reindex(old_name, new_name) do
data = CommentIndex.user_name_update_by_query(old_name, new_name)
Elasticsearch.update_by_query(Comment, data.query, data.set_replacements, data.replacements)
Search.update_by_query(Comment, data.query, data.set_replacements, data.replacements)
end
def reindex_comment(%Comment{} = comment) do
@ -288,6 +288,6 @@ defmodule Philomena.Comments do
Comment
|> preload(^indexing_preloads())
|> where([c], field(c, ^column) in ^condition)
|> Elasticsearch.reindex(Comment)
|> Search.reindex(Comment)
end
end

View file

@ -1,5 +1,5 @@
defmodule Philomena.Comments.Query do
alias Philomena.Search.Parser
alias PhilomenaQuery.Parse.Parser
defp user_id_transform(_ctx, data) do
case Integer.parse(data) do

View file

@ -1,5 +1,5 @@
defmodule Philomena.Comments.ElasticsearchIndex do
@behaviour Philomena.ElasticsearchIndex
defmodule Philomena.Comments.SearchIndex do
@behaviour PhilomenaQuery.SearchIndex
@impl true
def index_name do

View file

@ -57,7 +57,7 @@ defmodule Philomena.Conversations.Conversation do
|> put_recipient()
|> set_slug()
|> set_last_message()
|> cast_assoc(:messages, with: {Message, :creation_changeset, [from]})
|> cast_assoc(:messages, with: &Message.creation_changeset(&1, &2, from))
|> validate_length(:messages, is: 1)
end

View file

@ -1,294 +0,0 @@
defmodule Philomena.Elasticsearch do
alias Philomena.Batch
alias Philomena.Repo
require Logger
import Ecto.Query
import Elastix.HTTP
alias Philomena.Comments.Comment
alias Philomena.Galleries.Gallery
alias Philomena.Images.Image
alias Philomena.Posts.Post
alias Philomena.Reports.Report
alias Philomena.Tags.Tag
alias Philomena.Filters.Filter
alias Philomena.Comments.ElasticsearchIndex, as: CommentIndex
alias Philomena.Galleries.ElasticsearchIndex, as: GalleryIndex
alias Philomena.Images.ElasticsearchIndex, as: ImageIndex
alias Philomena.Posts.ElasticsearchIndex, as: PostIndex
alias Philomena.Reports.ElasticsearchIndex, as: ReportIndex
alias Philomena.Tags.ElasticsearchIndex, as: TagIndex
alias Philomena.Filters.ElasticsearchIndex, as: FilterIndex
defp index_for(Comment), do: CommentIndex
defp index_for(Gallery), do: GalleryIndex
defp index_for(Image), do: ImageIndex
defp index_for(Post), do: PostIndex
defp index_for(Report), do: ReportIndex
defp index_for(Tag), do: TagIndex
defp index_for(Filter), do: FilterIndex
defp elastic_url do
Application.get_env(:philomena, :opensearch_url)
end
def create_index!(module) do
index = index_for(module)
Elastix.Index.create(
elastic_url(),
index.index_name(),
index.mapping()
)
end
def delete_index!(module) do
index = index_for(module)
Elastix.Index.delete(elastic_url(), index.index_name())
end
def update_mapping!(module) do
index = index_for(module)
index_name = index.index_name()
mapping = index.mapping().mappings.properties
Elastix.Mapping.put(elastic_url(), index_name, "_doc", %{properties: mapping},
include_type_name: true
)
end
def index_document(doc, module) do
index = index_for(module)
data = index.as_json(doc)
Elastix.Document.index(
elastic_url(),
index.index_name(),
"_doc",
data.id,
data
)
end
def delete_document(id, module) do
index = index_for(module)
Elastix.Document.delete(
elastic_url(),
index.index_name(),
"_doc",
id
)
end
def reindex(queryable, module, opts \\ []) do
index = index_for(module)
Batch.record_batches(queryable, opts, fn records ->
lines =
Enum.flat_map(records, fn record ->
doc = index.as_json(record)
[
%{index: %{_index: index.index_name(), _id: doc.id}},
doc
]
end)
Elastix.Bulk.post(
elastic_url(),
lines,
index: index.index_name(),
httpoison_options: [timeout: 30_000]
)
end)
end
def update_by_query(module, query_body, set_replacements, replacements) do
index = index_for(module)
url =
elastic_url()
|> prepare_url([index.index_name(), "_update_by_query"])
|> append_query_string(%{conflicts: "proceed", wait_for_completion: "false"})
# Elasticsearch "Painless" scripting language
script = """
// Replace values in "sets" (arrays in the source document)
for (int i = 0; i < params.set_replacements.length; ++i) {
def replacement = params.set_replacements[i];
def path = replacement.path;
def old_value = replacement.old;
def new_value = replacement.new;
def reference = ctx._source;
for (int j = 0; j < path.length; ++j) {
reference = reference[path[j]];
}
for (int j = 0; j < reference.length; ++j) {
if (reference[j].equals(old_value)) {
reference[j] = new_value;
}
}
}
// Replace values in standalone fields
for (int i = 0; i < params.replacements.length; ++i) {
def replacement = params.replacements[i];
def path = replacement.path;
def old_value = replacement.old;
def new_value = replacement.new;
def reference = ctx._source;
// A little bit more complicated: go up to the last one before it
// so that the value can actually be replaced
for (int j = 0; j < path.length - 1; ++j) {
reference = reference[path[j]];
}
if (reference[path[path.length - 1]] != null && reference[path[path.length - 1]].equals(old_value)) {
reference[path[path.length - 1]] = new_value;
}
}
"""
body =
Jason.encode!(%{
script: %{
source: script,
params: %{
set_replacements: set_replacements,
replacements: replacements
}
},
query: query_body
})
{:ok, %{status_code: 200}} = Elastix.HTTP.post(url, body)
end
def search(module, query_body) do
index = index_for(module)
{:ok, %{body: results, status_code: 200}} =
Elastix.Search.search(
elastic_url(),
index.index_name(),
[],
query_body
)
results
end
def msearch(definitions) do
msearch_body =
Enum.flat_map(definitions, fn def ->
[
%{index: index_for(def.module).index_name()},
def.body
]
end)
{:ok, %{body: results, status_code: 200}} =
Elastix.Search.search(
elastic_url(),
"_all",
[],
msearch_body
)
results["responses"]
end
def search_definition(module, elastic_query, pagination_params \\ %{}) do
page_number = pagination_params[:page_number] || 1
page_size = pagination_params[:page_size] || 25
elastic_query =
Map.merge(elastic_query, %{
from: (page_number - 1) * page_size,
size: page_size,
_source: false,
track_total_hits: true
})
%{
module: module,
body: elastic_query,
page_number: page_number,
page_size: page_size
}
end
defp process_results(results, definition) do
time = results["took"]
count = results["hits"]["total"]["value"]
entries = Enum.map(results["hits"]["hits"], &{String.to_integer(&1["_id"]), &1})
Logger.debug("[Elasticsearch] Query took #{time}ms")
Logger.debug("[Elasticsearch] #{Jason.encode!(definition.body)}")
%Scrivener.Page{
entries: entries,
page_number: definition.page_number,
page_size: definition.page_size,
total_entries: count,
total_pages: div(count + definition.page_size - 1, definition.page_size)
}
end
def search_results(definition) do
process_results(search(definition.module, definition.body), definition)
end
def msearch_results(definitions) do
Enum.map(Enum.zip(msearch(definitions), definitions), fn {result, definition} ->
process_results(result, definition)
end)
end
defp load_records_from_results(results, ecto_queries) do
Enum.map(Enum.zip(results, ecto_queries), fn {page, ecto_query} ->
{ids, hits} = Enum.unzip(page.entries)
records =
ecto_query
|> where([m], m.id in ^ids)
|> Repo.all()
|> Enum.sort_by(&Enum.find_index(ids, fn el -> el == &1.id end))
%{page | entries: Enum.zip(records, hits)}
end)
end
def search_records_with_hits(definition, ecto_query) do
[page] = load_records_from_results([search_results(definition)], [ecto_query])
page
end
def msearch_records_with_hits(definitions, ecto_queries) do
load_records_from_results(msearch_results(definitions), ecto_queries)
end
def search_records(definition, ecto_query) do
page = search_records_with_hits(definition, ecto_query)
{records, _hits} = Enum.unzip(page.entries)
%{page | entries: records}
end
def msearch_records(definitions, ecto_queries) do
Enum.map(load_records_from_results(msearch_results(definitions), ecto_queries), fn page ->
{records, _hits} = Enum.unzip(page.entries)
%{page | entries: records}
end)
end
end

View file

@ -1,21 +0,0 @@
defmodule Philomena.Filename do
@moduledoc """
Utilities for building arbitrary filenames for uploaded files.
"""
@spec build(String.t()) :: String.t()
def build(extension) do
[
time_identifier(DateTime.utc_now()),
"/",
UUID.uuid1(),
".",
extension
]
|> Enum.join()
end
defp time_identifier(time) do
Enum.join([time.year, time.month, time.day], "/")
end
end

View file

@ -7,8 +7,8 @@ defmodule Philomena.Filters do
alias Philomena.Repo
alias Philomena.Filters.Filter
alias Philomena.Elasticsearch
alias Philomena.Filters.ElasticsearchIndex, as: FilterIndex
alias PhilomenaQuery.Search
alias Philomena.Filters.SearchIndex, as: FilterIndex
alias Philomena.IndexWorker
@doc """
@ -223,7 +223,7 @@ defmodule Philomena.Filters do
def user_name_reindex(old_name, new_name) do
data = FilterIndex.user_name_update_by_query(old_name, new_name)
Elasticsearch.update_by_query(Filter, data.query, data.set_replacements, data.replacements)
Search.update_by_query(Filter, data.query, data.set_replacements, data.replacements)
end
def reindex_filter(%Filter{} = filter) do
@ -233,7 +233,7 @@ defmodule Philomena.Filters do
end
def unindex_filter(%Filter{} = filter) do
Elasticsearch.delete_document(filter.id, Filter)
Search.delete_document(filter.id, Filter)
filter
end
@ -246,6 +246,6 @@ defmodule Philomena.Filters do
Filter
|> preload(^indexing_preloads())
|> where([f], field(f, ^column) in ^condition)
|> Elasticsearch.reindex(Filter)
|> Search.reindex(Filter)
end
end

View file

@ -1,5 +1,5 @@
defmodule Philomena.Filters.Query do
alias Philomena.Search.Parser
alias PhilomenaQuery.Parse.Parser
defp user_my_transform(%{user: %{id: id}}, "filters"),
do: {:ok, %{term: %{user_id: id}}}

View file

@ -1,5 +1,5 @@
defmodule Philomena.Filters.ElasticsearchIndex do
@behaviour Philomena.ElasticsearchIndex
defmodule Philomena.Filters.SearchIndex do
@behaviour PhilomenaQuery.SearchIndex
@impl true
def index_name do

View file

@ -7,10 +7,10 @@ defmodule Philomena.Galleries do
alias Ecto.Multi
alias Philomena.Repo
alias Philomena.Elasticsearch
alias PhilomenaQuery.Search
alias Philomena.Galleries.Gallery
alias Philomena.Galleries.Interaction
alias Philomena.Galleries.ElasticsearchIndex, as: GalleryIndex
alias Philomena.Galleries.SearchIndex, as: GalleryIndex
alias Philomena.IndexWorker
alias Philomena.GalleryReorderWorker
alias Philomena.Notifications
@ -135,7 +135,7 @@ defmodule Philomena.Galleries do
def user_name_reindex(old_name, new_name) do
data = GalleryIndex.user_name_update_by_query(old_name, new_name)
Elasticsearch.update_by_query(Gallery, data.query, data.set_replacements, data.replacements)
Search.update_by_query(Gallery, data.query, data.set_replacements, data.replacements)
end
defp reindex_after_update({:ok, gallery}) do
@ -155,7 +155,7 @@ defmodule Philomena.Galleries do
end
def unindex_gallery(%Gallery{} = gallery) do
Elasticsearch.delete_document(gallery.id, Gallery)
Search.delete_document(gallery.id, Gallery)
gallery
end
@ -168,7 +168,7 @@ defmodule Philomena.Galleries do
Gallery
|> preload(^indexing_preloads())
|> where([g], field(g, ^column) in ^condition)
|> Elasticsearch.reindex(Gallery)
|> Search.reindex(Gallery)
end
def add_image_to_gallery(gallery, image) do

View file

@ -1,5 +1,5 @@
defmodule Philomena.Galleries.Query do
alias Philomena.Search.Parser
alias PhilomenaQuery.Parse.Parser
defp fields do
[

View file

@ -1,5 +1,5 @@
defmodule Philomena.Galleries.ElasticsearchIndex do
@behaviour Philomena.ElasticsearchIndex
defmodule Philomena.Galleries.SearchIndex do
@behaviour PhilomenaQuery.SearchIndex
@impl true
def index_name do

View file

@ -1,46 +0,0 @@
defmodule Philomena.Http do
def get(url, headers \\ [], options \\ []) do
Tesla.get(client(headers), url, opts: [adapter: adapter_opts(options)])
end
def head(url, headers \\ [], options \\ []) do
Tesla.head(client(headers), url, opts: [adapter: adapter_opts(options)])
end
def post(url, body, headers \\ [], options \\ []) do
Tesla.post(client(headers), url, body, opts: [adapter: adapter_opts(options)])
end
defp adapter_opts(opts) do
opts = Keyword.merge(opts, max_body: 125_000_000, inet6: true)
case Application.get_env(:philomena, :proxy_host) do
nil ->
opts
url ->
Keyword.merge(opts, proxy: proxy_opts(URI.parse(url)))
end
end
defp proxy_opts(%{host: host, port: port, scheme: "https"}),
do: {:https, host, port, [transport_opts: [inet6: true]]}
defp proxy_opts(%{host: host, port: port, scheme: "http"}),
do: {:http, host, port, [transport_opts: [inet6: true]]}
defp client(headers) do
Tesla.client(
[
{Tesla.Middleware.FollowRedirects, max_redirects: 1},
{Tesla.Middleware.Headers,
[
{"User-Agent",
"Mozilla/5.0 (X11; Philomena; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0"}
| headers
]}
],
Tesla.Adapter.Mint
)
end
end

View file

@ -18,5 +18,6 @@ defmodule Philomena.ImageFaves.ImageFave do
image_fave
|> cast(attrs, [])
|> validate_required([])
|> unique_constraint([:image_id, :user_id], name: :index_image_faves_on_image_id_and_user_id)
end
end

View file

@ -18,5 +18,6 @@ defmodule Philomena.ImageHides.ImageHide do
image_hide
|> cast(attrs, [])
|> validate_required([])
|> unique_constraint([:image_id, :user_id], name: :index_image_hides_on_image_id_and_user_id)
end
end

View file

@ -36,9 +36,9 @@ defmodule Philomena.ImageIntensities do
{:error, %Ecto.Changeset{}}
"""
def create_image_intensity(image, attrs \\ %{}) do
def create_image_intensity(image, attrs \\ %PhilomenaMedia.Intensities{}) do
%ImageIntensity{image_id: image.id}
|> ImageIntensity.changeset(attrs)
|> ImageIntensity.changeset(Map.from_struct(attrs))
|> Repo.insert()
end
@ -56,7 +56,7 @@ defmodule Philomena.ImageIntensities do
"""
def update_image_intensity(%ImageIntensity{} = image_intensity, attrs) do
image_intensity
|> ImageIntensity.changeset(attrs)
|> ImageIntensity.changeset(Map.from_struct(attrs))
|> Repo.update()
end

View file

@ -19,5 +19,6 @@ defmodule Philomena.ImageVotes.ImageVote do
image_vote
|> cast(attrs, [])
|> validate_required([])
|> unique_constraint([:image_id, :user_id], name: :index_image_votes_on_image_id_and_user_id)
end
end

View file

@ -9,7 +9,7 @@ defmodule Philomena.Images do
alias Ecto.Multi
alias Philomena.Repo
alias Philomena.Elasticsearch
alias PhilomenaQuery.Search
alias Philomena.ThumbnailWorker
alias Philomena.ImagePurgeWorker
alias Philomena.DuplicateReports.DuplicateReport
@ -18,7 +18,7 @@ defmodule Philomena.Images do
alias Philomena.Images.Tagging
alias Philomena.Images.Thumbnailer
alias Philomena.Images.Source
alias Philomena.Images.ElasticsearchIndex, as: ImageIndex
alias Philomena.Images.SearchIndex, as: ImageIndex
alias Philomena.IndexWorker
alias Philomena.ImageFeatures.ImageFeature
alias Philomena.SourceChanges.SourceChange
@ -812,7 +812,7 @@ defmodule Philomena.Images do
def user_name_reindex(old_name, new_name) do
data = ImageIndex.user_name_update_by_query(old_name, new_name)
Elasticsearch.update_by_query(Image, data.query, data.set_replacements, data.replacements)
Search.update_by_query(Image, data.query, data.set_replacements, data.replacements)
end
def reindex_image(%Image{} = image) do
@ -845,7 +845,7 @@ defmodule Philomena.Images do
Image
|> preload(^indexing_preloads())
|> where([i], field(i, ^column) in ^condition)
|> Elasticsearch.reindex(Image)
|> Search.reindex(Image)
end
def purge_files(image, hidden_key) do

View file

@ -1,5 +1,5 @@
defmodule Philomena.Images.Query do
alias Philomena.Search.Parser
alias PhilomenaQuery.Parse.Parser
alias Philomena.Repo
defp gallery_id_transform(_ctx, value) do
@ -60,7 +60,7 @@ defmodule Philomena.Images.Query do
do: {:error, "Unknown `my' value."}
defp invalid_filter_guard(ctx, search_string) do
case parse(user_fields(), ctx, Philomena.Search.String.normalize(search_string)) do
case parse(user_fields(), ctx, PhilomenaQuery.Parse.String.normalize(search_string)) do
{:ok, query} -> query
_error -> %{match_all: %{}}
end

View file

@ -1,5 +1,5 @@
defmodule Philomena.Images.ElasticsearchIndex do
@behaviour Philomena.ElasticsearchIndex
defmodule Philomena.Images.SearchIndex do
@behaviour PhilomenaQuery.SearchIndex
@impl true
def index_name do

View file

@ -3,15 +3,16 @@ defmodule Philomena.Images.Thumbnailer do
Prevewing and thumbnailing logic for Images.
"""
alias PhilomenaMedia.Processors
alias PhilomenaMedia.Analyzers
alias PhilomenaMedia.Uploader
alias PhilomenaMedia.Objects
alias PhilomenaMedia.Sha512
alias Philomena.DuplicateReports
alias Philomena.ImageIntensities
alias Philomena.ImagePurgeWorker
alias Philomena.Images.Image
alias Philomena.Processors
alias Philomena.Analyzers
alias Philomena.Uploader
alias Philomena.Objects
alias Philomena.Sha512
alias Philomena.Repo
@versions [

View file

@ -5,7 +5,7 @@ defmodule Philomena.Images.Uploader do
alias Philomena.Images.Thumbnailer
alias Philomena.Images.Image
alias Philomena.Uploader
alias PhilomenaMedia.Uploader
def analyze_upload(image, params) do
Uploader.analyze_upload(image, "image", params["image"], &Image.image_changeset/2)

View file

@ -1,23 +0,0 @@
defmodule Philomena.Intensities do
@doc """
Gets the corner intensities of the given image file.
The image file must be in the PNG or JPEG format.
"""
@spec file(String.t()) :: {:ok, map()} | :error
def file(input) do
System.cmd("image-intensities", [input])
|> case do
{output, 0} ->
[nw, ne, sw, se] =
output
|> String.trim()
|> String.split("\t")
|> Enum.map(&String.to_float/1)
{:ok, %{nw: nw, ne: ne, sw: sw, se: se}}
_error ->
:error
end
end
end

View file

@ -1,3 +1,9 @@
defmodule Philomena.Mailer do
use Bamboo.Mailer, otp_app: :philomena
use Swoosh.Mailer, otp_app: :philomena
@spec deliver_later(Swoosh.Email.t()) :: {:ok, Swoosh.Email.t()}
def deliver_later(mail) do
Task.Supervisor.start_child(Philomena.AsyncEmailSupervisor, fn -> deliver(mail) end)
{:ok, mail}
end
end

View file

@ -1,37 +0,0 @@
defmodule Philomena.Mime do
@type mime :: String.t()
@doc """
Gets the mime type of the given pathname.
"""
@spec file(String.t()) :: {:ok, mime()} | :error
def file(path) do
System.cmd("file", ["-b", "--mime-type", path])
|> case do
{output, 0} ->
true_mime(String.trim(output))
_error ->
:error
end
end
@doc """
Provides the "true" content type of this file.
Some files are identified incorrectly as a mime type they should not be.
These incorrect mime types (and their "corrected") versions are:
- image/svg -> image/svg+xml
- audio/webm -> video/webm
"""
@spec true_mime(String.t()) :: {:ok, mime()}
def true_mime("image/svg"), do: {:ok, "image/svg+xml"}
def true_mime("audio/webm"), do: {:ok, "video/webm"}
def true_mime(mime)
when mime in ~W(image/gif image/jpeg image/png image/svg+xml video/webm),
do: {:ok, mime}
def true_mime(mime), do: {:unsupported_mime, mime}
end

View file

@ -1,148 +0,0 @@
defmodule Philomena.Objects do
@moduledoc """
Replication wrapper for object storage backends.
"""
alias Philomena.Mime
require Logger
#
# Fetch a key from the storage backend and
# write it into the destination file.
#
# sobelow_skip ["Traversal.FileModule"]
@spec download_file(String.t(), String.t()) :: any()
def download_file(key, file_path) do
contents =
backends()
|> Enum.find_value(fn opts ->
ExAws.S3.get_object(opts[:bucket], key)
|> ExAws.request(opts[:config_overrides])
|> case do
{:ok, result} -> result
_ -> nil
end
end)
File.write!(file_path, contents.body)
end
#
# Upload a file using a single API call, writing the
# contents from the given path to storage.
#
# sobelow_skip ["Traversal.FileModule"]
@spec put(String.t(), String.t()) :: any()
def put(key, file_path) do
{_, mime} = Mime.file(file_path)
contents = File.read!(file_path)
run_all(fn opts ->
ExAws.S3.put_object(opts[:bucket], key, contents, content_type: mime)
|> ExAws.request!(opts[:config_overrides])
end)
end
#
# Upload a file using multiple API calls, writing the
# contents from the given path to storage.
#
@spec upload(String.t(), String.t()) :: any()
def upload(key, file_path) do
# Workaround for API rate limit issues on R2
put(key, file_path)
end
#
# Copies a key from the source to the destination,
# overwriting the destination object if its exists.
#
@spec copy(String.t(), String.t()) :: any()
def copy(source_key, dest_key) do
# Potential workaround for inconsistent PutObjectCopy on R2
#
# run_all(fn opts->
# ExAws.S3.put_object_copy(opts[:bucket], dest_key, opts[:bucket], source_key)
# |> ExAws.request!(opts[:config_overrides])
# end)
try do
file_path = Briefly.create!()
download_file(source_key, file_path)
upload(dest_key, file_path)
catch
_kind, _value -> Logger.warning("Failed to copy #{source_key} -> #{dest_key}")
end
end
#
# Removes the key from storage.
#
@spec delete(String.t()) :: any()
def delete(key) do
run_all(fn opts ->
ExAws.S3.delete_object(opts[:bucket], key)
|> ExAws.request!(opts[:config_overrides])
end)
end
#
# Removes all given keys from storage.
#
@spec delete_multiple([String.t()]) :: any()
def delete_multiple(keys) do
run_all(fn opts ->
ExAws.S3.delete_multiple_objects(opts[:bucket], keys)
|> ExAws.request!(opts[:config_overrides])
end)
end
defp run_all(wrapped) do
fun = fn opts ->
try do
wrapped.(opts)
:ok
catch
_kind, _value -> :error
end
end
backends()
|> Task.async_stream(fun, timeout: :infinity)
|> Enum.any?(fn {_, v} -> v == :error end)
|> case do
true ->
Logger.warning("Failed to operate on all backends")
_ ->
:ok
end
end
defp backends do
primary_opts() ++ replica_opts()
end
defp primary_opts do
[
%{
config_overrides: Application.fetch_env!(:philomena, :s3_primary_options),
bucket: Application.fetch_env!(:philomena, :s3_primary_bucket)
}
]
end
defp replica_opts do
replica_bucket = Application.get_env(:philomena, :s3_secondary_bucket)
if not is_nil(replica_bucket) do
[
%{
config_overrides: Application.fetch_env!(:philomena, :s3_secondary_options),
bucket: replica_bucket
}
]
else
[]
end
end
end

View file

@ -7,12 +7,12 @@ defmodule Philomena.Posts do
alias Ecto.Multi
alias Philomena.Repo
alias Philomena.Elasticsearch
alias PhilomenaQuery.Search
alias Philomena.Topics.Topic
alias Philomena.Topics
alias Philomena.UserStatistics
alias Philomena.Posts.Post
alias Philomena.Posts.ElasticsearchIndex, as: PostIndex
alias Philomena.Posts.SearchIndex, as: PostIndex
alias Philomena.IndexWorker
alias Philomena.Forums.Forum
alias Philomena.Notifications
@ -309,7 +309,7 @@ defmodule Philomena.Posts do
def user_name_reindex(old_name, new_name) do
data = PostIndex.user_name_update_by_query(old_name, new_name)
Elasticsearch.update_by_query(Post, data.query, data.set_replacements, data.replacements)
Search.update_by_query(Post, data.query, data.set_replacements, data.replacements)
end
defp reindex_after_update({:ok, post}) do
@ -336,6 +336,6 @@ defmodule Philomena.Posts do
Post
|> preload(^indexing_preloads())
|> where([p], field(p, ^column) in ^condition)
|> Elasticsearch.reindex(Post)
|> Search.reindex(Post)
end
end

View file

@ -1,5 +1,5 @@
defmodule Philomena.Posts.Query do
alias Philomena.Search.Parser
alias PhilomenaQuery.Parse.Parser
defp user_id_transform(_ctx, data) do
case Integer.parse(data) do

View file

@ -1,5 +1,5 @@
defmodule Philomena.Posts.ElasticsearchIndex do
@behaviour Philomena.ElasticsearchIndex
defmodule Philomena.Posts.SearchIndex do
@behaviour PhilomenaQuery.SearchIndex
@impl true
def index_name do

View file

@ -1,78 +0,0 @@
defmodule Philomena.Processors do
@moduledoc """
Utilities for processing uploads.
Processors have 3 methods available:
- process/3:
Takes an analysis, file path, and version list and generates an
"edit script" that represents how to store this file according to the
given version list. See Philomena.Images.Thumbnailer for more
information on how this works.
- post_process/2:
Takes an analysis and file path and performs optimizations on the
upload. See Philomena.Images.Thumbnailer for more information on how this
works.
- intensities/2:
Takes an analysis and file path and generates an intensities map
appropriate for use by Philomena.DuplicateReports.
"""
alias Philomena.Processors.Gif
alias Philomena.Processors.Jpeg
alias Philomena.Processors.Png
alias Philomena.Processors.Svg
alias Philomena.Processors.Webm
@doc """
Returns a processor, with the processor being a module capable
of processing this content type, or nil.
"""
@spec processor(String.t()) :: module() | nil
def processor(content_type)
def processor("image/gif"), do: Gif
def processor("image/jpeg"), do: Jpeg
def processor("image/png"), do: Png
def processor("image/svg+xml"), do: Svg
def processor("video/webm"), do: Webm
def processor(_content_type), do: nil
@doc """
Takes a MIME type and version list and generates a list of versions to be
generated (e.g., ["thumb.png"]). List contents differ based on file type.
"""
@spec versions(String.t(), keyword) :: [String.t()]
def versions(mime_type, valid_sizes) do
processor(mime_type).versions(valid_sizes)
end
@doc """
Takes an analyzer, file path, and version list and runs the appropriate
processor's process/3.
"""
@spec process(map(), String.t(), keyword) :: map()
def process(analysis, file, versions) do
processor(analysis.mime_type).process(analysis, file, versions)
end
@doc """
Takes an analyzer and file path and runs the appropriate processor's
post_process/2.
"""
@spec post_process(map(), String.t()) :: map()
def post_process(analysis, file) do
processor(analysis.mime_type).post_process(analysis, file)
end
@doc """
Takes an analyzer and file path and runs the appropriate processor's
intensities/2.
"""
@spec intensities(map(), String.t()) :: map()
def intensities(analysis, file) do
processor(analysis.mime_type).intensities(analysis, file)
end
end

View file

@ -6,9 +6,9 @@ defmodule Philomena.Reports do
import Ecto.Query, warn: false
alias Philomena.Repo
alias Philomena.Elasticsearch
alias PhilomenaQuery.Search
alias Philomena.Reports.Report
alias Philomena.Reports.ElasticsearchIndex, as: ReportIndex
alias Philomena.Reports.SearchIndex, as: ReportIndex
alias Philomena.IndexWorker
alias Philomena.Polymorphic
@ -152,7 +152,7 @@ defmodule Philomena.Reports do
def user_name_reindex(old_name, new_name) do
data = ReportIndex.user_name_update_by_query(old_name, new_name)
Elasticsearch.update_by_query(Report, data.query, data.set_replacements, data.replacements)
Search.update_by_query(Report, data.query, data.set_replacements, data.replacements)
end
defp reindex_after_update({:ok, report}) do
@ -183,7 +183,7 @@ defmodule Philomena.Reports do
|> preload([:user, :admin])
|> Repo.all()
|> Polymorphic.load_polymorphic(reportable: [reportable_id: :reportable_type])
|> Enum.map(&Elasticsearch.index_document(&1, Report))
|> Enum.map(&Search.index_document(&1, Report))
end
def count_reports(user) do

View file

@ -1,5 +1,5 @@
defmodule Philomena.Reports.Query do
alias Philomena.Search.Parser
alias PhilomenaQuery.Parse.Parser
defp fields do
[

View file

@ -1,5 +1,5 @@
defmodule Philomena.Reports.ElasticsearchIndex do
@behaviour Philomena.ElasticsearchIndex
defmodule Philomena.Reports.SearchIndex do
@behaviour PhilomenaQuery.SearchIndex
@impl true
def index_name do

View file

@ -1,6 +1,6 @@
defmodule Philomena.Schema.Search do
alias Philomena.Images.Query
alias Philomena.Search.String
alias PhilomenaQuery.Parse.String
import Ecto.Changeset
def validate_search(changeset, field, user, watched \\ false) do

View file

@ -1,5 +1,5 @@
defmodule Philomena.Schema.Time do
alias Philomena.RelativeDate
alias PhilomenaQuery.RelativeDate
import Ecto.Changeset
def assign_time(changeset, field, target_field) do

View file

@ -1,25 +0,0 @@
defmodule Philomena.Scrapers do
@scrapers [
Philomena.Scrapers.Deviantart,
Philomena.Scrapers.Pillowfort,
Philomena.Scrapers.Twitter,
Philomena.Scrapers.Tumblr,
Philomena.Scrapers.Raw
]
def scrape!(url) do
uri = URI.parse(url)
@scrapers
|> Enum.find(& &1.can_handle?(uri, url))
|> wrap()
|> Enum.map(& &1.scrape(uri, url))
|> unwrap()
end
defp wrap(nil), do: []
defp wrap(res), do: [res]
defp unwrap([result]), do: result
defp unwrap(_result), do: nil
end

View file

@ -1,13 +0,0 @@
defmodule Philomena.Search.String do
def normalize(nil) do
""
end
def normalize(str) do
str
|> String.replace("\r", "")
|> String.split("\n", trim: true)
|> Enum.map(fn s -> "(#{s})" end)
|> Enum.join(" || ")
end
end

View file

@ -0,0 +1,55 @@
defmodule Philomena.SearchPolicy do
alias Philomena.Comments.Comment
alias Philomena.Galleries.Gallery
alias Philomena.Images.Image
alias Philomena.Posts.Post
alias Philomena.Reports.Report
alias Philomena.Tags.Tag
alias Philomena.Filters.Filter
alias Philomena.Comments.SearchIndex, as: CommentIndex
alias Philomena.Galleries.SearchIndex, as: GalleryIndex
alias Philomena.Images.SearchIndex, as: ImageIndex
alias Philomena.Posts.SearchIndex, as: PostIndex
alias Philomena.Reports.SearchIndex, as: ReportIndex
alias Philomena.Tags.SearchIndex, as: TagIndex
alias Philomena.Filters.SearchIndex, as: FilterIndex
@type schema_module :: Comment | Gallery | Image | Post | Report | Tag | Filter
@doc """
For a given schema module (e.g. `m:Philomena.Images.Image`), return the associated module
which implements the `SearchIndex` behaviour (e.g. `m:Philomena.Images.SearchIndex`).
## Example
iex> SearchPolicy.index_for(Gallery)
Philomena.Galleries.SearchIndex
iex> SearchPolicy.index_for(:foo)
** (FunctionClauseError) no function clause matching in Philomena.SearchPolicy.index_for/1
"""
@spec index_for(schema_module()) :: module()
def index_for(Comment), do: CommentIndex
def index_for(Gallery), do: GalleryIndex
def index_for(Image), do: ImageIndex
def index_for(Post), do: PostIndex
def index_for(Report), do: ReportIndex
def index_for(Tag), do: TagIndex
def index_for(Filter), do: FilterIndex
@doc """
Return the path used to interact with the search engine.
## Example
iex> SearchPolicy.opensearch_url()
"http://localhost:9200"
"""
@spec opensearch_url :: String.t()
def opensearch_url do
Application.get_env(:philomena, :opensearch_url)
end
end

View file

@ -1,11 +0,0 @@
defmodule Philomena.Sha512 do
@spec file(String.t()) :: String.t()
def file(file) do
hash_ref = :crypto.hash_init(:sha512)
File.stream!(file, [], 10_485_760)
|> Enum.reduce(hash_ref, &:crypto.hash_update(&2, &1))
|> :crypto.hash_final()
|> Base.encode16(case: :lower)
end
end

View file

@ -6,7 +6,7 @@ defmodule Philomena.Tags do
import Ecto.Query, warn: false
alias Philomena.Repo
alias Philomena.Elasticsearch
alias PhilomenaQuery.Search
alias Philomena.IndexWorker
alias Philomena.TagAliasWorker
alias Philomena.TagUnaliasWorker
@ -194,12 +194,12 @@ defmodule Philomena.Tags do
{:ok, tag} = Repo.delete(tag)
Elasticsearch.delete_document(tag.id, Tag)
Search.delete_document(tag.id, Tag)
Image
|> where([i], i.id in ^image_ids)
|> preload(^Images.indexing_preloads())
|> Elasticsearch.reindex(Image)
|> Search.reindex(Image)
end
def alias_tag(%Tag{} = tag, attrs) do
@ -301,13 +301,13 @@ defmodule Philomena.Tags do
|> join(:inner, [i], _ in assoc(i, :tags))
|> where([_i, t], t.id == ^tag.id)
|> preload(^Images.indexing_preloads())
|> Elasticsearch.reindex(Image)
|> Search.reindex(Image)
Filter
|> where([f], fragment("? @> ARRAY[?]::integer[]", f.hidden_tag_ids, ^tag.id))
|> or_where([f], fragment("? @> ARRAY[?]::integer[]", f.spoilered_tag_ids, ^tag.id))
|> preload(^Filters.indexing_preloads())
|> Elasticsearch.reindex(Filter)
|> Search.reindex(Filter)
end
def unalias_tag(%Tag{} = tag) do
@ -416,7 +416,7 @@ defmodule Philomena.Tags do
Tag
|> preload(^indexing_preloads())
|> where([t], field(t, ^column) in ^condition)
|> Elasticsearch.reindex(Tag)
|> Search.reindex(Tag)
end
alias Philomena.Tags.Implication

View file

@ -1,5 +1,5 @@
defmodule Philomena.Tags.Query do
alias Philomena.Search.Parser
alias PhilomenaQuery.Parse.Parser
defp fields do
[

View file

@ -1,5 +1,5 @@
defmodule Philomena.Tags.ElasticsearchIndex do
@behaviour Philomena.ElasticsearchIndex
defmodule Philomena.Tags.SearchIndex do
@behaviour PhilomenaQuery.SearchIndex
@impl true
def index_name do

View file

@ -4,7 +4,7 @@ defmodule Philomena.Tags.Uploader do
"""
alias Philomena.Tags.Tag
alias Philomena.Uploader
alias PhilomenaMedia.Uploader
def analyze_upload(tag, params) do
Uploader.analyze_upload(tag, "image", params["image"], &Tag.image_changeset/2)

View file

@ -59,7 +59,7 @@ defmodule Philomena.Topics.Topic do
|> change(forum: forum, user: attribution[:user])
|> validate_required(:forum)
|> cast_assoc(:poll, with: &Poll.update_changeset/2)
|> cast_assoc(:posts, with: {Post, :topic_creation_changeset, [attribution, anonymous?]})
|> cast_assoc(:posts, with: &Post.topic_creation_changeset(&1, &2, attribution, anonymous?))
|> validate_length(:posts, is: 1)
|> unique_constraint(:slug, name: :index_topics_on_forum_id_and_slug)
end

View file

@ -1,125 +0,0 @@
defmodule Philomena.Uploader do
@moduledoc """
Upload and processing callback logic for image files.
"""
alias Philomena.Filename
alias Philomena.Analyzers
alias Philomena.Objects
alias Philomena.Sha512
import Ecto.Changeset
@doc """
Performs analysis of the passed Plug.Upload, and invokes a changeset
callback on the model or changeset passed in with attributes set on
the field_name.
"""
@spec analyze_upload(any(), String.t(), Plug.Upload.t(), (any(), map() -> Ecto.Changeset.t())) ::
Ecto.Changeset.t()
def analyze_upload(model_or_changeset, field_name, upload_parameter, changeset_fn) do
with {:ok, analysis} <- Analyzers.analyze(upload_parameter),
analysis <- extra_attributes(analysis, upload_parameter) do
removed =
model_or_changeset
|> change()
|> get_field(field(field_name))
attributes =
%{
"name" => analysis.name,
"width" => analysis.width,
"height" => analysis.height,
"size" => analysis.size,
"format" => analysis.extension,
"mime_type" => analysis.mime_type,
"duration" => analysis.duration,
"aspect_ratio" => analysis.aspect_ratio,
"orig_sha512_hash" => analysis.sha512,
"sha512_hash" => analysis.sha512,
"is_animated" => analysis.animated?
}
|> prefix_attributes(field_name)
|> Map.put(field_name, analysis.new_name)
|> Map.put(upload_key(field_name), upload_parameter.path)
|> Map.put(remove_key(field_name), removed)
changeset_fn.(model_or_changeset, attributes)
else
{:unsupported_mime, mime} ->
attributes = prefix_attributes(%{"mime_type" => mime}, field_name)
changeset_fn.(model_or_changeset, attributes)
_error ->
changeset_fn.(model_or_changeset, %{})
end
end
@doc """
Writes the file to permanent storage. This should be the second-to-last step
in the transaction.
"""
@spec persist_upload(any(), String.t(), String.t()) :: any()
def persist_upload(model, file_root, field_name) do
source = Map.get(model, field(upload_key(field_name)))
dest = Map.get(model, field(field_name))
target = Path.join(file_root, dest)
persist_file(target, source)
end
@doc """
Persist an arbitrary file to storage at the given path with the correct
content type and permissions.
"""
def persist_file(path, file) do
Objects.upload(path, file)
end
@doc """
Removes the old file from permanent storage. This should be the last step in
the transaction.
"""
@spec unpersist_old_upload(any(), String.t(), String.t()) :: any()
def unpersist_old_upload(model, file_root, field_name) do
model
|> Map.get(field(remove_key(field_name)))
|> try_remove(file_root)
end
defp extra_attributes(analysis, %Plug.Upload{path: path, filename: filename}) do
{width, height} = analysis.dimensions
aspect_ratio = aspect_ratio(width, height)
stat = File.stat!(path)
sha512 = Sha512.file(path)
new_name = Filename.build(analysis.extension)
analysis
|> Map.put(:size, stat.size)
|> Map.put(:name, filename)
|> Map.put(:width, width)
|> Map.put(:height, height)
|> Map.put(:sha512, sha512)
|> Map.put(:new_name, new_name)
|> Map.put(:aspect_ratio, aspect_ratio)
end
defp aspect_ratio(_, 0), do: 0.0
defp aspect_ratio(w, h), do: w / h
defp try_remove("", _file_root), do: nil
defp try_remove(nil, _file_root), do: nil
defp try_remove(file, file_root) do
Objects.delete(Path.join(file_root, file))
end
defp prefix_attributes(map, prefix),
do: Map.new(map, fn {key, value} -> {"#{prefix}_#{key}", value} end)
defp upload_key(field_name), do: "uploaded_#{field_name}"
defp remove_key(field_name), do: "removed_#{field_name}"
defp field(field_name), do: String.to_existing_atom(field_name)
end

View file

@ -1,6 +1,6 @@
defmodule Philomena.UserDownvoteWipe do
alias Philomena.Batch
alias Philomena.Elasticsearch
alias PhilomenaQuery.Batch
alias PhilomenaQuery.Search
alias Philomena.Users
alias Philomena.Users.User
alias Philomena.Images.Image
@ -63,7 +63,7 @@ defmodule Philomena.UserDownvoteWipe do
Image
|> where([i], i.id in ^image_ids)
|> preload(^Images.indexing_preloads())
|> Elasticsearch.reindex(Image)
|> Search.reindex(Image)
# allow time for indexing to catch up
:timer.sleep(:timer.seconds(10))

View file

@ -4,7 +4,7 @@ defmodule Philomena.Users.Uploader do
"""
alias Philomena.Users.User
alias Philomena.Uploader
alias PhilomenaMedia.Uploader
def analyze_upload(user, params) do
Uploader.analyze_upload(user, "avatar", params["avatar"], &User.avatar_changeset/2)

View file

@ -1,9 +1,9 @@
defmodule Philomena.Users.UserNotifier do
alias Bamboo.Email
alias Swoosh.Email
alias Philomena.Mailer
defp deliver(to, subject, body) do
Email.new_email(
Email.new(
to: to,
from: mailer_address(),
subject: subject,

View file

@ -15,7 +15,7 @@ defmodule Philomena.IndexWorker do
# Image
# |> preload(^indexing_preloads())
# |> where([i], field(i, ^column) in ^condition)
# |> Elasticsearch.reindex(Image)
# |> Search.reindex(Image)
# end
#
def perform(module, column, condition) do

View file

@ -1,7 +1,7 @@
defmodule Philomena.TagChangeRevertWorker do
alias Philomena.TagChanges.TagChange
alias Philomena.TagChanges
alias Philomena.Batch
alias PhilomenaQuery.Batch
alias Philomena.Repo
import Ecto.Query

View file

@ -0,0 +1,71 @@
defmodule PhilomenaMedia.Analyzers do
@moduledoc """
Utilities for analyzing the format and various attributes of uploaded files.
"""
alias PhilomenaMedia.Analyzers.{Gif, Jpeg, Png, Svg, Webm}
alias PhilomenaMedia.Analyzers.Result
alias PhilomenaMedia.Mime
@doc """
Returns an `{:ok, analyzer}` tuple, with the analyzer being a module capable
of analyzing this media type, or `:error`.
The allowed MIME types are:
- `image/gif`
- `image/jpeg`
- `image/png`
- `image/svg+xml`
- `video/webm`
> #### Info {: .info}
>
> This is an interface intended for use when the MIME type is already known.
> Using an analyzer not matched to the file may cause unexpected results.
## Examples
{:ok, analyzer} = PhilomenaMedia.Analyzers.analyzer("image/png")
:error = PhilomenaMedia.Analyzers.analyzer("application/octet-stream")
"""
@spec analyzer(Mime.t()) :: {:ok, module()} | :error
def analyzer(content_type)
def analyzer("image/gif"), do: {:ok, Gif}
def analyzer("image/jpeg"), do: {:ok, Jpeg}
def analyzer("image/png"), do: {:ok, Png}
def analyzer("image/svg+xml"), do: {:ok, Svg}
def analyzer("video/webm"), do: {:ok, Webm}
def analyzer(_content_type), do: :error
@doc """
Attempts a MIME type check and analysis on the given path or `m:Plug.Upload`.
## Examples
file = "image_file.png"
{:ok, %Result{...}} = Analyzers.analyze(file)
file = %Plug.Upload{...}
{:ok, %Result{...}} = Analyzers.analyze(file)
file = "text_file.txt"
:error = Analyzers.analyze(file)
"""
@spec analyze(Plug.Upload.t() | Path.t()) :: {:ok, Result.t()} | :error
def analyze(%Plug.Upload{path: path}), do: analyze(path)
def analyze(path) when is_binary(path) do
with {:ok, mime} <- Mime.file(path),
{:ok, analyzer} <- analyzer(mime) do
{:ok, analyzer.analyze(path)}
else
error ->
error
end
end
def analyze(_path), do: :error
end

View file

@ -0,0 +1,5 @@
defmodule PhilomenaMedia.Analyzers.Analyzer do
@moduledoc false
@callback analyze(Path.t()) :: PhilomenaMedia.Analyzers.Result.t()
end

View file

@ -1,8 +1,16 @@
defmodule Philomena.Analyzers.Gif do
defmodule PhilomenaMedia.Analyzers.Gif do
@moduledoc false
alias PhilomenaMedia.Analyzers.Analyzer
alias PhilomenaMedia.Analyzers.Result
@behaviour Analyzer
@spec analyze(Path.t()) :: Result.t()
def analyze(file) do
stats = stats(file)
%{
%Result{
extension: "gif",
mime_type: "image/gif",
animated?: stats.animated?,

View file

@ -1,8 +1,16 @@
defmodule Philomena.Analyzers.Jpeg do
defmodule PhilomenaMedia.Analyzers.Jpeg do
@moduledoc false
alias PhilomenaMedia.Analyzers.Analyzer
alias PhilomenaMedia.Analyzers.Result
@behaviour Analyzer
@spec analyze(Path.t()) :: Result.t()
def analyze(file) do
stats = stats(file)
%{
%Result{
extension: "jpg",
mime_type: "image/jpeg",
animated?: false,

View file

@ -1,8 +1,16 @@
defmodule Philomena.Analyzers.Png do
defmodule PhilomenaMedia.Analyzers.Png do
@moduledoc false
alias PhilomenaMedia.Analyzers.Analyzer
alias PhilomenaMedia.Analyzers.Result
@behaviour Analyzer
@spec analyze(Path.t()) :: Result.t()
def analyze(file) do
stats = stats(file)
%{
%Result{
extension: "png",
mime_type: "image/png",
animated?: stats.animated?,

View file

@ -0,0 +1,36 @@
defmodule PhilomenaMedia.Analyzers.Result do
@moduledoc """
The analysis result.
- `:animated?` - whether the media file is animated
- `:dimensions` - the maximum dimensions of the media file, as `{width, height}`
- `:duration` - the maximum duration of the media file, or 0 if not applicable
- `:extension` - the file extension the media file should take, based on its contents
- `:mime_type` - the MIME type the media file should take, based on its contents
## Example
%Result{
animated?: false,
dimensions: {800, 600},
duration: 0.0,
extension: "png",
mime_type: "image/png"
}
"""
@type t :: %__MODULE__{
animated?: boolean(),
dimensions: {integer(), integer()},
duration: float(),
extension: String.t(),
mime_type: String.t()
}
defstruct animated?: false,
dimensions: {0, 0},
duration: 0.0,
extension: "",
mime_type: "application/octet-stream"
end

View file

@ -1,8 +1,16 @@
defmodule Philomena.Analyzers.Svg do
defmodule PhilomenaMedia.Analyzers.Svg do
@moduledoc false
alias PhilomenaMedia.Analyzers.Analyzer
alias PhilomenaMedia.Analyzers.Result
@behaviour Analyzer
@spec analyze(Path.t()) :: Result.t()
def analyze(file) do
stats = stats(file)
%{
%Result{
extension: "svg",
mime_type: "image/svg+xml",
animated?: false,

View file

@ -1,8 +1,16 @@
defmodule Philomena.Analyzers.Webm do
defmodule PhilomenaMedia.Analyzers.Webm do
@moduledoc false
alias PhilomenaMedia.Analyzers.Analyzer
alias PhilomenaMedia.Analyzers.Result
@behaviour Analyzer
@spec analyze(Path.t()) :: Result.t()
def analyze(file) do
stats = stats(file)
%{
%Result{
extension: "webm",
mime_type: "video/webm",
animated?: stats.animated?,

View file

@ -0,0 +1,36 @@
defmodule PhilomenaMedia.Filename do
@moduledoc """
Utilities for building arbitrary filenames for uploaded files.
"""
@type extension :: String.t()
@doc """
This function builds a replacement "filename key" based on the supplied file extension.
Names are generated in the form `year/month/day/uuid.ext`. It is recommended to avoid
providing user-controlled file-extensions to this function; select them from a list of
known extensions instead.
## Example
iex> PhilomenaMedia.Filename.build("png")
"2024/1/1/0bce8eea-17e0-11ef-b7d4-0242ac120006.png"
"""
@spec build(extension()) :: String.t()
def build(extension) do
[
time_identifier(DateTime.utc_now()),
"/",
UUID.uuid1(),
".",
extension
]
|> Enum.join()
end
defp time_identifier(time) do
Enum.join([time.year, time.month, time.day], "/")
end
end

View file

@ -0,0 +1,68 @@
defmodule PhilomenaMedia.Intensities do
@moduledoc """
Corner intensities are a simple mechanism for automatic image deduplication,
designed for a time when computer vision was an expensive technology and
resources were scarce.
Each image is divided into quadrants; image with odd numbers of pixels
on either dimension overlap quadrants by one pixel. The luma (brightness)
value corresponding each the pixel is computed according to BTU.709 primaries,
and its value is added to a sum for each quadrant. Finally, the value is divided
by the number of pixels in the quadrant to produce an average. The minimum luma
value of any pixel is 0, and the maximum is 255, so an average will be between
these values. Transparent pixels are composited on black before processing.
By using a range search in the database, this produces a reverse image search which
suffers no dimensionality issues, is exceptionally fast to evaluate, and is independent
of image dimensions, with poor precision and a poor-to-fair accuracy.
"""
@type t :: %__MODULE__{
nw: float(),
ne: float(),
sw: float(),
se: float()
}
defstruct nw: 0.0,
ne: 0.0,
sw: 0.0,
se: 0.0
@doc """
Gets the corner intensities of the given image file.
The image file must be in the PNG or JPEG format.
> #### Info {: .info}
>
> Clients should prefer to use `m:PhilomenaMedia.Processors.intensities/2`, as it handles
> media files of any type supported by this library, not just PNG or JPEG.
## Examples
iex> Intensities.file("image.png")
{:ok, %Intensities{nw: 111.689148, ne: 116.228048, sw: 93.268433, se: 104.630064}}
iex> Intensities.file("nonexistent.jpg")
:error
"""
@spec file(Path.t()) :: {:ok, t()} | :error
def file(input) do
System.cmd("image-intensities", [input])
|> case do
{output, 0} ->
[nw, ne, sw, se] =
output
|> String.trim()
|> String.split("\t")
|> Enum.map(&String.to_float/1)
{:ok, %__MODULE__{nw: nw, ne: ne, sw: sw, se: se}}
_error ->
:error
end
end
end

View file

@ -0,0 +1,67 @@
defmodule PhilomenaMedia.Mime do
@moduledoc """
Utilities for determining the MIME type of a file via parsing.
Many MIME type libraries assume the MIME type of the file by reading file extensions.
This is inherently unreliable, as many websites disguise the content types of files with
specific names for cost or bandwidth saving reasons. As processing depends on correctly
identifying the type of a file, parsing the file contents is necessary.
"""
@type t :: String.t()
@doc """
Gets the MIME type of the given pathname.
## Examples
iex> PhilomenaMedia.Mime.file("image.png")
{:ok, "image/png"}
iex> PhilomenaMedia.Mime.file("file.txt")
{:unsupported_mime, "text/plain"}
iex> PhilomenaMedia.Mime.file("nonexistent.file")
:error
"""
@spec file(Path.t()) :: {:ok, t()} | {:unsupported_mime, t()} | :error
def file(path) do
System.cmd("file", ["-b", "--mime-type", path])
|> case do
{output, 0} ->
true_mime(String.trim(output))
_error ->
:error
end
end
@doc """
Provides the "true" MIME type of this file.
Some files are identified as a type they should not be based on how they are used by
this library. These MIME types (and their "corrected") versions are:
- `image/svg` -> `image/svg+xml`
- `audio/webm` -> `video/webm`
## Examples
iex> PhilomenaMedia.Mime.file("image.svg")
"image/svg+xml"
iex> PhilomenaMedia.Mime.file("audio.webm")
"video/webm"
"""
@spec true_mime(String.t()) :: {:ok, t()} | {:unsupported_mime, t()}
def true_mime("image/svg"), do: {:ok, "image/svg+xml"}
def true_mime("audio/webm"), do: {:ok, "video/webm"}
def true_mime(mime)
when mime in ~W(image/gif image/jpeg image/png image/svg+xml video/webm),
do: {:ok, mime}
def true_mime(mime), do: {:unsupported_mime, mime}
end

View file

@ -0,0 +1,236 @@
defmodule PhilomenaMedia.Objects do
@moduledoc """
Replication wrapper for object storage backends.
While cloud services can be an inexpensive way to access large amounts of storage, they
are inherently less available than local file-based storage. For this reason, it is generally
recommended to maintain a secondary storage provider, such as in the
[3-2-1 backup strategy](https://www.backblaze.com/blog/the-3-2-1-backup-strategy/).
Functions in this module replicate operations on both the primary and secondary storage
providers. Alternatively, a mode with only a primary storage provider is supported.
This module assumes storage endpoints are S3-compatible and can be communicated with via the
`m:ExAws` module. This does not preclude the usage of local file-based storage, which can be
accomplished with the [`s3proxy` project](https://github.com/gaul/s3proxy). The development
repository provides an example of `s3proxy` in use.
Bucket names should be set with configuration on `s3_primary_bucket` and `s3_secondary_bucket`.
If `s3_secondary_bucket` is not set, then only the primary will be used. However, the primary
bucket name must always be set.
These are read from environment variables at runtime by Philomena.
# S3/Object store config
config :philomena, :s3_primary_bucket, System.fetch_env!("S3_BUCKET")
config :philomena, :s3_secondary_bucket, System.get_env("ALT_S3_BUCKET")
Additional options (e.g. controlling the remote endpoint used) may be set with
`s3_primary_options` and `s3_secondary_options` keys. This allows you to use a provider other
than AWS, like [Cloudflare R2](https://developers.cloudflare.com/r2/).
These are read from environment variables at runtime by Philomena.
config :philomena, :s3_primary_options,
region: System.get_env("S3_REGION", "us-east-1"),
scheme: System.fetch_env!("S3_SCHEME"),
host: System.fetch_env!("S3_HOST"),
port: System.fetch_env!("S3_PORT"),
access_key_id: System.fetch_env!("AWS_ACCESS_KEY_ID"),
secret_access_key: System.fetch_env!("AWS_SECRET_ACCESS_KEY"),
http_opts: [timeout: 180_000, recv_timeout: 180_000]
"""
alias PhilomenaMedia.Mime
require Logger
@type key :: String.t()
@doc """
Fetch a key from the storage backend and write it into the destination path.
## Example
key = "2024/1/1/5/full.png"
Objects.download_file(key, file_path)
"""
# sobelow_skip ["Traversal.FileModule"]
@spec download_file(key(), Path.t()) :: :ok
def download_file(key, file_path) do
contents =
backends()
|> Enum.find_value(fn opts ->
ExAws.S3.get_object(opts[:bucket], key)
|> ExAws.request(opts[:config_overrides])
|> case do
{:ok, result} -> result
_ -> nil
end
end)
File.write!(file_path, contents.body)
end
@doc """
Upload a file using a single API call, writing the contents from the given path to storage.
## Example
key = "2024/1/1/5/full.png"
Objects.put(key, file_path)
"""
# sobelow_skip ["Traversal.FileModule"]
@spec put(key(), Path.t()) :: :ok
def put(key, file_path) do
{_, mime} = Mime.file(file_path)
contents = File.read!(file_path)
run_all(fn opts ->
ExAws.S3.put_object(opts[:bucket], key, contents, content_type: mime)
|> ExAws.request!(opts[:config_overrides])
end)
end
@doc """
Upload a file using multiple API calls, writing the contents from the given path to storage.
## Example
key = "2024/1/1/5/full.png"
Objects.upload(key, file_path)
"""
@spec upload(key(), Path.t()) :: :ok
def upload(key, file_path) do
# Workaround for API rate limit issues on R2
put(key, file_path)
end
@doc """
Copies a key from the source to the destination, overwriting the destination object if its exists.
> #### Warning {: .warning}
>
> `copy/2` does not use the `PutObjectCopy` S3 request. It downloads the file and uploads it again.
> This may use more disk space than expected if the file is large.
## Example
source_key = "2024/1/1/5/full.png"
dest_key = "2024/1/1/5-a5323e542e0f/full.png"
Objects.copy(source_key, dest_key)
"""
@spec copy(key(), key()) :: :ok
def copy(source_key, dest_key) do
# Potential workaround for inconsistent PutObjectCopy on R2
#
# run_all(fn opts->
# ExAws.S3.put_object_copy(opts[:bucket], dest_key, opts[:bucket], source_key)
# |> ExAws.request!(opts[:config_overrides])
# end)
try do
file_path = Briefly.create!()
download_file(source_key, file_path)
upload(dest_key, file_path)
catch
_kind, _value -> Logger.warning("Failed to copy #{source_key} -> #{dest_key}")
end
:ok
end
@doc """
Removes the key from storage.
## Example
key = "2024/1/1/5/full.png"
Objects.delete(key)
"""
@spec delete(key()) :: :ok
def delete(key) do
run_all(fn opts ->
ExAws.S3.delete_object(opts[:bucket], key)
|> ExAws.request!(opts[:config_overrides])
end)
end
@doc """
Removes all given keys from storage.
## Example
keys = [
"2024/1/1/5/full.png",
"2024/1/1/5/small.png",
"2024/1/1/5/thumb.png",
"2024/1/1/5/thumb_tiny.png"
]
Objects.delete_multiple(keys)
"""
@spec delete_multiple([key()]) :: :ok
def delete_multiple(keys) do
run_all(fn opts ->
ExAws.S3.delete_multiple_objects(opts[:bucket], keys)
|> ExAws.request!(opts[:config_overrides])
end)
end
defp run_all(wrapped) do
fun = fn opts ->
try do
wrapped.(opts)
:ok
catch
_kind, _value -> :error
end
end
backends()
|> Task.async_stream(fun, timeout: :infinity)
|> Enum.any?(fn {_, v} -> v == :error end)
|> case do
true ->
Logger.warning("Failed to operate on all backends")
_ ->
:ok
end
:ok
end
defp backends do
primary_opts() ++ replica_opts()
end
defp primary_opts do
[
%{
config_overrides: Application.fetch_env!(:philomena, :s3_primary_options),
bucket: Application.fetch_env!(:philomena, :s3_primary_bucket)
}
]
end
defp replica_opts do
replica_bucket = Application.get_env(:philomena, :s3_secondary_bucket)
if not is_nil(replica_bucket) do
[
%{
config_overrides: Application.fetch_env!(:philomena, :s3_secondary_options),
bucket: replica_bucket
}
]
else
[]
end
end
end

View file

@ -0,0 +1,202 @@
defmodule PhilomenaMedia.Processors do
@moduledoc """
Utilities for processing uploads.
Processors have 4 functions available:
- `versions/1`:
Takes a version list and generates a list of files which the processor will generate
during the scope of `process/3`.
- `process/3`:
Takes an analysis result, file path, and version list and generates an "edit script" that
represents how to store this file according to the given version list. See
`m:Philomena.Images.Thumbnailer` for a usage example.
- `post_process/2`:
Takes an analysis result and file path and performs optimizations on the upload. See
`m:Philomena.Images.Thumbnailer` for a usage example.
- `intensities/2`:
Takes an analysis result and file path and generates corner intensities, performing.
any conversion necessary before processing. See `m:PhilomenaMedia.Intensities`
for more information.
## Version lists
`process/3` and `post_process/2` take _version lists_ as input. A version list is a structure
like the following, which contains pairs of _version names_ and _dimensions_:
[
thumb_tiny: {50, 50},
thumb_small: {150, 150},
thumb: {250, 250},
small: {320, 240},
medium: {800, 600},
large: {1280, 1024},
tall: {1024, 4096}
]
When calling these functions, it is recommended prefilter the version list based on the media
dimensions to avoid generating unnecessary versions which are larger than the original file.
See `m:Philomena.Images.Thumbnailer` for an example.
## Edit scripts
`process/3` and `post_process/2` return _edit scripts_. An edit script is a list where each
entry may be one of the following:
{:thumbnails, [copy_requests]}
{:replace_original, path}
{:intensities, intensities}
Within the thumbnail request, a copy request is defined with the following structure:
{:copy, path, version_filename}
See the respective functions for more information about their return values.
"""
alias PhilomenaMedia.Analyzers.Result
alias PhilomenaMedia.Intensities
alias PhilomenaMedia.Processors.{Gif, Jpeg, Png, Svg, Webm}
alias PhilomenaMedia.Mime
# The name of a version, like :large
@type version_name :: atom()
@type dimensions :: {integer(), integer()}
@type version_list :: [{version_name(), dimensions()}]
# The file name of a processed version, like "large.png"
@type version_filename :: String.t()
# A single file to be copied to satisfy a request for a version name
@type copy_request :: {:copy, Path.t(), version_filename()}
# A list of thumbnail versions to copy into place
@type thumbnails :: {:thumbnails, [copy_request()]}
# Replace the original file to strip metadata or losslessly optimize
@type replace_original :: {:replace_original, Path.t()}
# Apply the computed corner intensities
@type intensities :: {:intensities, Intensities.t()}
# An edit script, representing the changes to apply to the storage backend
# after successful processing
@type edit_script :: [thumbnails() | replace_original() | intensities()]
@doc """
Returns a processor, with the processor being a module capable
of processing this content type, or nil.
The allowed MIME types are:
- `image/gif`
- `image/jpeg`
- `image/png`
- `image/svg+xml`
- `video/webm`
> #### Info {: .info}
>
> This is an interface intended for use when the MIME type is already known.
> Using a processor not matched to the file may cause unexpected results.
## Examples
iex> PhilomenaMedia.Processors.processor("image/png")
PhilomenaMedia.Processors.Png
iex> PhilomenaMedia.Processors.processor("application/octet-stream")
nil
"""
@spec processor(Mime.t()) :: module() | nil
def processor(content_type)
def processor("image/gif"), do: Gif
def processor("image/jpeg"), do: Jpeg
def processor("image/png"), do: Png
def processor("image/svg+xml"), do: Svg
def processor("video/webm"), do: Webm
def processor(_content_type), do: nil
@doc """
Takes a MIME type and filtered version list and generates a list of version files to be
generated by `process/2`. List contents may differ based on file type.
## Examples
iex> PhilomenaMedia.Processors.versions("image/png", [thumb_tiny: {50, 50}])
["thumb_tiny.png"]
iex> PhilomenaMedia.Processors.versions("video/webm", [thumb_tiny: {50, 50}])
["full.mp4", "rendered.png", "thumb_tiny.webm", "thumb_tiny.mp4", "thumb_tiny.gif"]
"""
@spec versions(Mime.t(), version_list()) :: [version_name()]
def versions(mime_type, valid_sizes) do
processor(mime_type).versions(valid_sizes)
end
@doc """
Takes an analyzer result, file path, and version list and runs the appropriate processor's
`process/3`, processing the media.
Returns an edit script to apply changes. Depending on the media type, this make take a long
time to execute.
## Example
iex> PhilomenaMedia.Processors.process(%Result{...}, "image.png", [thumb_tiny: {50, 50}])
[
intensities: %Intensities{...},
thumbnails: [
{:copy, "/tmp/briefly-5764/vSHsM3kn7k4yvrvZH.png", "thumb_tiny.png"}
]
]
"""
@spec process(Result.t(), Path.t(), version_list()) :: edit_script()
def process(analysis, file, versions) do
processor(analysis.mime_type).process(analysis, file, versions)
end
@doc """
Takes an analyzer result and file path and runs the appropriate processor's `post_process/2`,
performing long-running optimizations on the media source file.
Returns an edit script to apply changes. Depending on the media type, this make take a long
time to execute. This may also be an empty list, if there are no changes to perform.
## Example
iex> PhilomenaMedia.Processors.post_process(%Result{...}, "image.gif", [thumb_tiny: {50, 50}])
[replace_original: "/tmp/briefly-5764/cyZSQnmL59XDRoPoaDxr.gif"]
"""
@spec post_process(Result.t(), Path.t()) :: edit_script()
def post_process(analysis, file) do
processor(analysis.mime_type).post_process(analysis, file)
end
@doc """
Takes an analyzer result and file path and runs the appropriate processor's `intensities/2`,
returning the corner intensities.
This allows for generating intensities for file types that are not directly supported by
`m:PhilomenaMedia.Intensities`, and should be the preferred function to call when intensities
are needed.
## Example
iex> PhilomenaMedia.Processors.intensities(%Result{...}, "video.webm")
%Intensities{nw: 111.689148, ne: 116.228048, sw: 93.268433, se: 104.630064}
"""
@spec intensities(Result.t(), Path.t()) :: Intensities.t()
def intensities(analysis, file) do
processor(analysis.mime_type).intensities(analysis, file)
end
end

View file

@ -1,12 +1,21 @@
defmodule Philomena.Processors.Gif do
alias Philomena.Intensities
defmodule PhilomenaMedia.Processors.Gif do
@moduledoc false
alias PhilomenaMedia.Intensities
alias PhilomenaMedia.Analyzers.Result
alias PhilomenaMedia.Processors.Processor
alias PhilomenaMedia.Processors
@behaviour Processor
@spec versions(Processors.version_list()) :: [Processors.version_filename()]
def versions(sizes) do
sizes
|> Enum.map(fn {name, _} -> "#{name}.gif" end)
|> Kernel.++(["full.webm", "full.mp4", "rendered.png"])
end
@spec process(Result.t(), Path.t(), Processors.version_list()) :: Processors.edit_script()
def process(analysis, file, versions) do
duration = analysis.duration
preview = preview(duration, file)
@ -17,16 +26,18 @@ defmodule Philomena.Processors.Gif do
scaled = Enum.flat_map(versions, &scale(palette, file, &1))
videos = generate_videos(file)
%{
[
intensities: intensities,
thumbnails: scaled ++ videos ++ [{:copy, preview, "rendered.png"}]
}
]
end
@spec post_process(Result.t(), Path.t()) :: Processors.edit_script()
def post_process(_analysis, file) do
%{replace_original: optimize(file)}
[replace_original: optimize(file)]
end
@spec intensities(Result.t(), Path.t()) :: Intensities.t()
def intensities(analysis, file) do
{:ok, intensities} = Intensities.file(preview(analysis.duration, file))
intensities

View file

@ -1,10 +1,19 @@
defmodule Philomena.Processors.Jpeg do
alias Philomena.Intensities
defmodule PhilomenaMedia.Processors.Jpeg do
@moduledoc false
alias PhilomenaMedia.Intensities
alias PhilomenaMedia.Analyzers.Result
alias PhilomenaMedia.Processors.Processor
alias PhilomenaMedia.Processors
@behaviour Processor
@spec versions(Processors.version_list()) :: [Processors.version_filename()]
def versions(sizes) do
Enum.map(sizes, fn {name, _} -> "#{name}.jpg" end)
end
@spec process(Result.t(), Path.t(), Processors.version_list()) :: Processors.edit_script()
def process(_analysis, file, versions) do
stripped = optimize(strip(file))
@ -12,15 +21,17 @@ defmodule Philomena.Processors.Jpeg do
scaled = Enum.flat_map(versions, &scale(stripped, &1))
%{
[
replace_original: stripped,
intensities: intensities,
thumbnails: scaled
}
]
end
def post_process(_analysis, _file), do: %{}
@spec post_process(Result.t(), Path.t()) :: Processors.edit_script()
def post_process(_analysis, _file), do: []
@spec intensities(Result.t(), Path.t()) :: Intensities.t()
def intensities(_analysis, file) do
{:ok, intensities} = Intensities.file(file)
intensities

View file

@ -1,10 +1,19 @@
defmodule Philomena.Processors.Png do
alias Philomena.Intensities
defmodule PhilomenaMedia.Processors.Png do
@moduledoc false
alias PhilomenaMedia.Intensities
alias PhilomenaMedia.Analyzers.Result
alias PhilomenaMedia.Processors.Processor
alias PhilomenaMedia.Processors
@behaviour Processor
@spec versions(Processors.version_list()) :: [Processors.version_filename()]
def versions(sizes) do
Enum.map(sizes, fn {name, _} -> "#{name}.png" end)
end
@spec process(Result.t(), Path.t(), Processors.version_list()) :: Processors.edit_script()
def process(analysis, file, versions) do
animated? = analysis.animated?
@ -12,21 +21,23 @@ defmodule Philomena.Processors.Png do
scaled = Enum.flat_map(versions, &scale(file, animated?, &1))
%{
[
intensities: intensities,
thumbnails: scaled
}
]
end
@spec post_process(Result.t(), Path.t()) :: Processors.edit_script()
def post_process(analysis, file) do
if analysis.animated? do
# libpng has trouble with animations, so skip optimization
%{}
[]
else
%{replace_original: optimize(file)}
[replace_original: optimize(file)]
end
end
@spec intensities(Result.t(), Path.t()) :: Intensities.t()
def intensities(_analysis, file) do
{:ok, intensities} = Intensities.file(file)
intensities

View file

@ -0,0 +1,21 @@
defmodule PhilomenaMedia.Processors.Processor do
@moduledoc false
alias PhilomenaMedia.Analyzers.Result
alias PhilomenaMedia.Processors
alias PhilomenaMedia.Intensities
# Generate a list of version filenames for the given version list.
@callback versions(Processors.version_list()) :: [Processors.version_filename()]
# Process the media at the given path against the given version list, and return an
# edit script with the resulting files
@callback process(Result.t(), Path.t(), Processors.version_list()) :: Processors.edit_script()
# Perform post-processing optimization tasks on the file, to reduce its size
# and strip non-essential metadata
@callback post_process(Result.t(), Path.t()) :: Processors.edit_script()
# Generate corner intensities for the given path
@callback intensities(Result.t(), Path.t()) :: Intensities.t()
end

View file

@ -1,12 +1,21 @@
defmodule Philomena.Processors.Svg do
alias Philomena.Intensities
defmodule PhilomenaMedia.Processors.Svg do
@moduledoc false
alias PhilomenaMedia.Intensities
alias PhilomenaMedia.Analyzers.Result
alias PhilomenaMedia.Processors.Processor
alias PhilomenaMedia.Processors
@behaviour Processor
@spec versions(Processors.version_list()) :: [Processors.version_filename()]
def versions(sizes) do
sizes
|> Enum.map(fn {name, _} -> "#{name}.png" end)
|> Kernel.++(["rendered.png", "full.png"])
end
@spec process(Result.t(), Path.t(), Processors.version_list()) :: Processors.edit_script()
def process(_analysis, file, versions) do
preview = preview(file)
@ -15,14 +24,16 @@ defmodule Philomena.Processors.Svg do
scaled = Enum.flat_map(versions, &scale(preview, &1))
full = [{:copy, preview, "full.png"}]
%{
[
intensities: intensities,
thumbnails: scaled ++ full ++ [{:copy, preview, "rendered.png"}]
}
]
end
def post_process(_analysis, _file), do: %{}
@spec post_process(Result.t(), Path.t()) :: Processors.edit_script()
def post_process(_analysis, _file), do: []
@spec intensities(Result.t(), Path.t()) :: Intensities.t()
def intensities(_analysis, file) do
{:ok, intensities} = Intensities.file(preview(file))
intensities

View file

@ -1,7 +1,15 @@
defmodule Philomena.Processors.Webm do
alias Philomena.Intensities
defmodule PhilomenaMedia.Processors.Webm do
@moduledoc false
alias PhilomenaMedia.Intensities
alias PhilomenaMedia.Analyzers.Result
alias PhilomenaMedia.Processors.Processor
alias PhilomenaMedia.Processors
import Bitwise
@behaviour Processor
@spec versions(Processors.version_list()) :: [Processors.version_filename()]
def versions(sizes) do
webm_versions = Enum.map(sizes, fn {name, _} -> "#{name}.webm" end)
mp4_versions = Enum.map(sizes, fn {name, _} -> "#{name}.mp4" end)
@ -14,6 +22,7 @@ defmodule Philomena.Processors.Webm do
["full.mp4", "rendered.png"] ++ webm_versions ++ mp4_versions ++ gif_versions
end
@spec process(Result.t(), Path.t(), Processors.version_list()) :: Processors.edit_script()
def process(analysis, file, versions) do
dimensions = analysis.dimensions
duration = analysis.duration
@ -27,15 +36,17 @@ defmodule Philomena.Processors.Webm do
scaled = Enum.flat_map(versions, &scale(stripped, palette, duration, dimensions, &1))
mp4 = [{:copy, mp4, "full.mp4"}]
%{
[
replace_original: stripped,
intensities: intensities,
thumbnails: scaled ++ mp4 ++ [{:copy, preview, "rendered.png"}]
}
]
end
def post_process(_analysis, _file), do: %{}
@spec post_process(Result.t(), Path.t()) :: Processors.edit_script()
def post_process(_analysis, _file), do: []
@spec intensities(Result.t(), Path.t()) :: Intensities.t()
def intensities(analysis, file) do
{:ok, intensities} = Intensities.file(preview(analysis.duration, file))
intensities

View file

@ -0,0 +1,42 @@
defmodule PhilomenaMedia.Sha512 do
@moduledoc """
Streaming SHA-512 processor.
"""
@chunk_size 10_485_760
@doc """
Generate the SHA2-512 hash of the file at the given path as a string.
The file is processed in 10MiB chunks.
## Example
iex> Sha512.file("image.png")
"97fd5243cd39e225f1478097acae71fbbff7f3027b24f0e6a8e06a0d7d3e6861cd05691d7470c76e7dfc4eb30459a906918d5ba0d144184fff02b8e34bd9ecf8"
"""
@spec file(Path.t()) :: String.t()
def file(path) do
hash_ref = :crypto.hash_init(:sha512)
path
|> stream_file()
|> Enum.reduce(hash_ref, &:crypto.hash_update(&2, &1))
|> :crypto.hash_final()
|> Base.encode16(case: :lower)
end
if Version.match?(System.version(), ">= 1.16.0") do
# `stream!/2` was added in Elixir 1.16 to accept a shortened form,
# where we only need to specify the size of each stream chunk
defp stream_file(file) do
File.stream!(file, @chunk_size)
end
else
# Use legacy stream/3 for older Elixir versions
defp stream_file(file) do
File.stream!(file, [], @chunk_size)
end
end
end

View file

@ -0,0 +1,360 @@
defmodule PhilomenaMedia.Uploader do
@moduledoc """
Upload and processing callback logic for media files.
To use the uploader, the target schema must be modified to add at least the
following fields, assuming the name of the field to write to the database is `foo`:
field :foo, :string
field :uploaded_foo, :string, virtual: true
field :removed_foo, :string, virtual: true
The schema should also define a changeset function which casts the file parameters. This may be
the default changeset function, or a function specialized to accept only the file parameters. A
minimal schema must cast at least the following to successfully upload and replace files:
def foo_changeset(schema, attrs) do
cast(schema, attrs, [:foo, :uploaded_foo, :removed_foo])
end
Additional fields may be added to perform validations. For example, specifying a field name
`foo_mime_type` allows the creation of a MIME type filter in the changeset:
def foo_changeset(schema, attrs) do
schema
|> cast(attrs, [:foo, :foo_mime_type, :uploaded_foo, :removed_foo])
|> validate_required([:foo, :foo_mime_type])
|> validate_inclusion(:foo_mime_type, ["image/svg+xml"])
end
See `analyze_upload/4` for more information about what fields may be validated in this
fashion.
Generally, you should expect to create a `Schemas.Uploader` module, which defines functions as
follows, pointing to `m:PhilomenaMedia.Uploader`. Assuming the target field name is `"foo"`, then:
defmodule Philomena.Schemas.Uploader do
alias Philomena.Schemas.Schema
alias PhilomenaMedia.Uploader
@field_name "foo"
def analyze_upload(schema, params) do
Uploader.analyze_upload(schema, @field_name, params[@field_name], &Schema.foo_changeset/2)
end
def persist_upload(schema) do
Uploader.persist_upload(schema, schema_file_root(), @field_name)
end
def unpersist_old_upload(schema) do
Uploader.unpersist_old_upload(schema, schema_file_root(), @field_name)
end
defp schema_file_root do
Application.get_env(:philomena, :schema_file_root)
end
end
A typical context usage may then look like:
alias Philomena.Schemas.Schema
alias Philomena.Schemas.Uploader
@spec create_schema(map()) :: {:ok, Schema.t()} | {:error, Ecto.Changeset.t()}
def create_schema(attrs) do
%Schema{}
|> Uploader.analyze_upload(attrs)
|> Repo.insert()
|> case do
{:ok, schema} ->
Uploader.persist_upload(schema)
{:ok, schema}
error ->
error
end
end
@spec update_schema(Schema.t(), map()) :: {:ok, Schema.t()} | {:error, Ecto.Changeset.t()}
def update_schema(%Schema{} = schema, attrs) do
schema
|> Uploader.analyze_upload(attrs)
|> Repo.update()
|> case do
{:ok, schema} ->
Uploader.persist_upload(schema)
Uploader.unpersist_old_upload(schema)
{:ok, schema}
error ->
error
end
end
This forwards to the core `m:PhilomenaMedia.Uploader` logic with information about the file root.
The file root is the location at which files of the given schema type are located under
the storage path. For example, the file root for the Adverts schema may be
`/srv/philomena/priv/s3/philomena/adverts` in development with the file backend,
and just `adverts` in production with the S3 backend.
It is not recommended to perform persist or unpersist operations in the scope of an `m:Ecto.Multi`,
as they may block indefinitely.
"""
alias PhilomenaMedia.Analyzers
alias PhilomenaMedia.Filename
alias PhilomenaMedia.Objects
alias PhilomenaMedia.Sha512
import Ecto.Changeset
@type schema :: struct()
@type schema_or_changeset :: struct() | Ecto.Changeset.t()
@type field_name :: String.t()
@type file_root :: String.t()
@doc """
Performs analysis of the specified `m:Plug.Upload`, and invokes a changeset callback on the schema
or changeset passed in.
The file name which will be written to is set by the assignment to the schema's `field_name`, and
the below attributes are prefixed by the `field_name`.
Assuming the file is successfully parsed, this will attempt to cast the following
attributes into the specified changeset function:
* `name` (String) - the name of the file
* `width` (integer) - the width of the file
* `height` (integer) - the height of the file
* `size` (integer) - the size of the file, in bytes
* `format` (String) - the file extension, one of `~w(gif jpg png svg webm)`, determined by reading the file
* `mime_type` (String) - the file's sniffed MIME type, determined by reading the file
* `duration` (float) - the duration of the media file
* `aspect_ratio` (float) - width divided by height.
* `orig_sha512_hash` (String) - the SHA-512 hash of the file
* `sha512_hash` (String) - the SHA-512 hash of the file
* `is_animated` (boolean) - whether the file contains animation
You may design your changeset callback to accept any of these. Here is an example which accepts
all of them:
def foo_changeset(schema, attrs)
cast(schema, attrs, [
:foo,
:foo_name,
:foo_width,
:foo_height,
:foo_size,
:foo_format,
:foo_mime_type,
:foo_duration,
:foo_aspect_ratio,
:foo_orig_sha512_hash,
:foo_sha512_hash,
:foo_is_animated,
:uploaded_foo,
:removed_foo
])
end
Attributes are prefixed, so assuming a `field_name` of `"foo"`, this would result in
the changeset function receiving attributes `"foo_name"`, `"foo_width"`, ... etc.
Validations on the uploaded media are also possible in the changeset callback. For example,
`m:Philomena.Adverts.Advert` performs validations on MIME type and width of its field, named
`image`:
def image_changeset(advert, attrs) do
advert
|> cast(attrs, [
:image,
:image_mime_type,
:image_size,
:image_width,
:image_height,
:uploaded_image,
:removed_image
])
|> validate_required([:image])
|> validate_inclusion(:image_mime_type, ["image/png", "image/jpeg", "image/gif"])
|> validate_inclusion(:image_width, 699..729)
end
The key (location to write the persisted file) is passed with the `field_name` attribute into the
changeset callback. The key is calculated using the current date, a UUID, and the computed
extension. A file uploaded may therefore be given a key such as
`2024/1/1/0bce8eea-17e0-11ef-b7d4-0242ac120006.png`. See `PhilomenaMedia.Filename.build/1` for
the actual construction.
This function does not persist an upload to storage.
See the module documentation for a complete example.
## Example
@spec analyze_upload(Uploader.schema_or_changeset(), map()) :: Ecto.Changeset.t()
def analyze_upload(schema, params) do
Uploader.analyze_upload(schema, "foo", params["foo"], &Schema.foo_changeset/2)
end
"""
@spec analyze_upload(
schema_or_changeset(),
field_name(),
Plug.Upload.t(),
(schema_or_changeset(), map() -> Ecto.Changeset.t())
) :: Ecto.Changeset.t()
def analyze_upload(schema_or_changeset, field_name, upload_parameter, changeset_fn) do
with {:ok, analysis} <- Analyzers.analyze(upload_parameter),
analysis <- extra_attributes(analysis, upload_parameter) do
removed =
schema_or_changeset
|> change()
|> get_field(field(field_name))
attributes =
%{
"name" => analysis.name,
"width" => analysis.width,
"height" => analysis.height,
"size" => analysis.size,
"format" => analysis.extension,
"mime_type" => analysis.mime_type,
"duration" => analysis.duration,
"aspect_ratio" => analysis.aspect_ratio,
"orig_sha512_hash" => analysis.sha512,
"sha512_hash" => analysis.sha512,
"is_animated" => analysis.animated?
}
|> prefix_attributes(field_name)
|> Map.put(field_name, analysis.new_name)
|> Map.put(upload_key(field_name), upload_parameter.path)
|> Map.put(remove_key(field_name), removed)
changeset_fn.(schema_or_changeset, attributes)
else
{:unsupported_mime, mime} ->
attributes = prefix_attributes(%{"mime_type" => mime}, field_name)
changeset_fn.(schema_or_changeset, attributes)
_error ->
changeset_fn.(schema_or_changeset, %{})
end
end
@doc """
Writes the file to permanent storage. This should be the second-to-last step
before completing a file operation.
The key (location to write the persisted file) is fetched from the schema by `field_name`.
This is then prefixed with the `file_root` specified by the caller. Finally, the file is
written to storage.
See the module documentation for a complete example.
## Example
@spec persist_upload(Schema.t()) :: :ok
def persist_upload(schema) do
Uploader.persist_upload(schema, schema_file_root(), "foo")
end
"""
@spec persist_upload(schema(), file_root(), field_name()) :: :ok
def persist_upload(schema, file_root, field_name) do
source = Map.get(schema, field(upload_key(field_name)))
dest = Map.get(schema, field(field_name))
target = Path.join(file_root, dest)
persist_file(target, source)
end
@doc """
Persist an arbitrary file to storage with the given key.
> #### Warning {: .warning}
>
> This is exposed for schemas which do not store their files at at an offset from a file root,
> to allow overriding the key. If you do not need to override the key, use
> `persist_upload/3` instead.
The key (location to write the persisted file) and the file path to upload are passed through
to `PhilomenaMedia.Objects.upload/2` without modification. See the definition of that function for
additional details.
## Example
key = "2024/1/1/5/full.png"
Uploader.persist_file(key, file_path)
"""
@spec persist_file(Objects.key(), Path.t()) :: :ok
def persist_file(key, file_path) do
Objects.upload(key, file_path)
end
@doc """
Removes the old file from permanent storage. This should be the last step in
completing a file operation.
The key (location to write the persisted file) is fetched from the schema by `field_name`.
This is then prefixed with the `file_root` specified by the caller. Finally, the file is
purged from storage.
See the module documentation for a complete example.
## Example
@spec unpersist_old_upload(Schema.t()) :: :ok
def unpersist_old_upload(schema) do
Uploader.unpersist_old_upload(schema, schema_file_root(), "foo")
end
"""
@spec unpersist_old_upload(schema(), file_root(), field_name()) :: :ok
def unpersist_old_upload(schema, file_root, field_name) do
schema
|> Map.get(field(remove_key(field_name)))
|> try_remove(file_root)
end
defp extra_attributes(analysis, %Plug.Upload{path: path, filename: filename}) do
{width, height} = analysis.dimensions
aspect_ratio = aspect_ratio(width, height)
stat = File.stat!(path)
sha512 = Sha512.file(path)
new_name = Filename.build(analysis.extension)
analysis
|> Map.put(:size, stat.size)
|> Map.put(:name, filename)
|> Map.put(:width, width)
|> Map.put(:height, height)
|> Map.put(:sha512, sha512)
|> Map.put(:new_name, new_name)
|> Map.put(:aspect_ratio, aspect_ratio)
end
defp aspect_ratio(_, 0), do: 0.0
defp aspect_ratio(w, h), do: w / h
defp try_remove("", _file_root), do: :ok
defp try_remove(nil, _file_root), do: :ok
defp try_remove(file, file_root) do
Objects.delete(Path.join(file_root, file))
end
defp prefix_attributes(map, prefix),
do: Map.new(map, fn {key, value} -> {"#{prefix}_#{key}", value} end)
defp upload_key(field_name), do: "uploaded_#{field_name}"
defp remove_key(field_name), do: "removed_#{field_name}"
defp field(field_name), do: String.to_existing_atom(field_name)
end

View file

@ -0,0 +1,24 @@
defmodule PhilomenaProxy.Camo do
@moduledoc """
Image proxying utilities.
"""
@doc """
Convert a potentially untrusted external image URL into a trusted one
loaded through a gocamo proxy (specified by the environment).
Configuration is read from environment variables at runtime by Philomena.
config :philomena,
camo_host: System.get_env("CAMO_HOST"),
camo_key: System.get_env("CAMO_KEY"),
## Example
iex> PhilomenaProxy.Camo.image_url("https://example.org/img/view/2024/1/1/1.png")
"https://example.net/L5MqSmYq1ZEqiBGGvsvSDpILyJI/aHR0cHM6Ly9leGFtcGxlLm9yZy9pbWcvdmlldy8yMDI0LzEvMS8xLnBuZwo"
"""
@spec image_url(String.t()) :: String.t()
def image_url(input), do: Philomena.Native.camo_image_url(input)
end

107
lib/philomena_proxy/http.ex Normal file
View file

@ -0,0 +1,107 @@
defmodule PhilomenaProxy.Http do
@moduledoc """
HTTP client implementation.
This applies the Philomena User-Agent header, and optionally proxies traffic through a SOCKS5
HTTP proxy to allow the application to connect when the local network is restricted.
If a proxy host is not specified in the configuration, then a proxy is not used and external
traffic is originated from the same network as application.
Proxy options are read from environment variables at runtime by Philomena.
config :philomena,
proxy_host: System.get_env("PROXY_HOST"),
"""
@type url :: String.t()
@type header_list :: [{String.t(), String.t()}]
@type body :: binary()
@type client_options :: keyword()
@doc ~S"""
Perform a HTTP GET request.
## Example
iex> PhilomenaProxy.Http.get("http://example.com", [{"authorization", "Bearer #{token}"}])
{:ok, %Tesla.Env{...}}
iex> PhilomenaProxy.Http.get("http://nonexistent.example.com")
{:error, %Mint.TransportError{reason: :nxdomain}}
"""
@spec get(url(), header_list(), client_options()) :: Tesla.Env.result()
def get(url, headers \\ [], options \\ []) do
Tesla.get(client(headers), url, opts: [adapter: adapter_opts(options)])
end
@doc ~S"""
Perform a HTTP HEAD request.
## Example
iex> PhilomenaProxy.Http.head("http://example.com", [{"authorization", "Bearer #{token}"}])
{:ok, %Tesla.Env{...}}
iex> PhilomenaProxy.Http.head("http://nonexistent.example.com")
{:error, %Mint.TransportError{reason: :nxdomain}}
"""
@spec head(url(), header_list(), client_options()) :: Tesla.Env.result()
def head(url, headers \\ [], options \\ []) do
Tesla.head(client(headers), url, opts: [adapter: adapter_opts(options)])
end
@doc ~S"""
Perform a HTTP POST request.
## Example
iex> PhilomenaProxy.Http.post("http://example.com", "", [{"authorization", "Bearer #{token}"}])
{:ok, %Tesla.Env{...}}
iex> PhilomenaProxy.Http.post("http://nonexistent.example.com", "")
{:error, %Mint.TransportError{reason: :nxdomain}}
"""
@spec post(url(), body(), header_list(), client_options()) :: Tesla.Env.result()
def post(url, body, headers \\ [], options \\ []) do
Tesla.post(client(headers), url, body, opts: [adapter: adapter_opts(options)])
end
defp adapter_opts(opts) do
opts = Keyword.merge(opts, max_body: 125_000_000, inet6: true)
case Application.get_env(:philomena, :proxy_host) do
nil ->
opts
url ->
Keyword.merge(opts, proxy: proxy_opts(URI.parse(url)))
end
end
defp proxy_opts(%{host: host, port: port, scheme: "https"}),
do: {:https, host, port, [transport_opts: [inet6: true]]}
defp proxy_opts(%{host: host, port: port, scheme: "http"}),
do: {:http, host, port, [transport_opts: [inet6: true]]}
defp client(headers) do
Tesla.client(
[
{Tesla.Middleware.FollowRedirects, max_redirects: 1},
{Tesla.Middleware.Headers,
[
{"User-Agent",
"Mozilla/5.0 (X11; Philomena; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0"}
| headers
]}
],
Tesla.Adapter.Mint
)
end
end

View file

@ -0,0 +1,71 @@
defmodule PhilomenaProxy.Scrapers do
@moduledoc """
Scrape utilities to facilitate uploading media from other websites.
"""
# The URL to fetch, as a string.
@type url :: String.t()
# An individual image in a list associated with a scrape result.
@type image_result :: %{
url: url(),
camo_url: url()
}
# Result of a successful scrape.
@type scrape_result :: %{
source_url: url(),
description: String.t() | nil,
author_name: String.t() | nil,
images: [image_result()]
}
@scrapers [
PhilomenaProxy.Scrapers.Deviantart,
PhilomenaProxy.Scrapers.Pillowfort,
PhilomenaProxy.Scrapers.Twitter,
PhilomenaProxy.Scrapers.Tumblr,
PhilomenaProxy.Scrapers.Raw
]
@doc """
Scrape a URL for content.
The scrape result is intended for serialization to JSON.
## Examples
iex> PhilomenaProxy.Scrapers.scrape!("http://example.org/image-page")
%{
source_url: "http://example.org/image-page",
description: "Test",
author_name: "myself",
images: [
%{
url: "http://example.org/image.png"
camo_url: "http://example.net/UT2YIjkWDas6CQBmQcYlcNGmKfQ/aHR0cDovL2V4YW1wbGUub3JnL2ltY"
}
]
}
iex> PhilomenaProxy.Scrapers.scrape!("http://example.org/nonexistent-path")
nil
"""
@spec scrape!(url()) :: scrape_result() | nil
def scrape!(url) do
uri = URI.parse(url)
@scrapers
|> Enum.find(& &1.can_handle?(uri, url))
|> wrap()
|> Enum.map(& &1.scrape(uri, url))
|> unwrap()
end
defp wrap(nil), do: []
defp wrap(res), do: [res]
defp unwrap([result]), do: result
defp unwrap(_result), do: nil
end

Some files were not shown because too many files have changed in this diff Show more