Update settings .yml.default

This commit is contained in:
Manuel
2025-09-18 17:17:31 +02:00
committed by GitHub
parent 39214e1b2b
commit 507ca51ba6

View File

@@ -1,9 +1,5 @@
# settings.yml
# General application settings
app_settings: app_settings:
max_file_size_mb: 2000 # Maximum upload size in Megabytes max_file_size_mb: '2000'
# Allowed extensions (list will be normalized to a set by the server)
allowed_all_extensions: allowed_all_extensions:
- .pdf - .pdf
- .ps - .ps
@@ -44,229 +40,203 @@ app_settings:
- .azw3 - .azw3
- .pptx - .pptx
- .xlsx - .xlsx
# Settings for Optical Character Recognition (OCR) tasks
ocr_settings: ocr_settings:
ocrmypdf: ocrmypdf:
deskew: true deskew: true
clean: true clean: true
optimize: 1 optimize: 1
force_ocr: true force_ocr: true
# Settings for audio transcription tasks
transcription_settings: transcription_settings:
whisper: whisper:
compute_type: "int8" compute_type: int8
allowed_models: allowed_models:
- "tiny" - tiny
- "base" - base
- "small" - small
- "medium" - medium
- "large-v3" - large-v3
- "distil-large-v2" - distil-large-v2
# optional: specify device if workers have GPU (e.g. "cuda" or "cpu")
# device: "cpu"
# --- Conversion Tool Definitions ---
# The server validates placeholders against an allowlist:
# {input}, {output}, {output_dir}, {output_ext}, {quality}, {speed},
# {preset}, {device}, {dpi}, {samplerate}, {bitdepth}
conversion_tools: conversion_tools:
libreoffice: libreoffice:
name: "LibreOffice" name: LibreOffice
# Use {filter} so we can supply liboffce export filters like "txt:Text" command_template: libreoffice --headless --convert-to {filter} --outdir {output_dir}
command_template: 'libreoffice --headless --convert-to {filter} --outdir {output_dir} {input}' {input}
timeout: 120 timeout: 300
# Optional: per-format export filter. If missing for a format, server falls back to the extension.
filters: filters:
pdf: "pdf" pdf: pdf
docx: "docx" docx: docx
odt: "odt" odt: odt
html: "html" html: html
rtf: "rtf" rtf: rtf
txt: "txt:Text" txt: txt:Text
xml: "xml" xml: xml
epub: "epub" epub: epub
xlsx: "xlsx" xlsx: xlsx
ods: "ods" ods: ods
csv: "csv:Text" csv: csv:Text
pptx: "pptx" pptx: pptx
odp: "odp" odp: odp
svg: "svg" svg: svg
formats: formats:
pdf: "PDF" pdf: PDF
docx: "Word Document" docx: Word Document
odt: "OpenDocument Text" odt: OpenDocument Text
html: "HTML" html: HTML
rtf: "Rich Text Format" rtf: Rich Text Format
txt: "Plain Text" txt: Plain Text
xml: "Word 2003 XML" xml: Word 2003 XML
epub: "EPUB" epub: EPUB
xlsx: "Excel Spreadsheet" xlsx: Excel Spreadsheet
ods: "OpenDocument Spreadsheet" ods: OpenDocument Spreadsheet
csv: "CSV" csv: CSV
pptx: "PowerPoint Presentation" pptx: PowerPoint Presentation
odp: "OpenDocument Presentation" odp: OpenDocument Presentation
svg: "SVG" svg: SVG
pandoc: pandoc:
name: "Pandoc" name: Pandoc
command_template: 'pandoc --standalone {input} -o {output} --to={output_ext} --pdf-engine=xelatex' command_template: pandoc --standalone {input} -o {output} --to={output_ext} --pdf-engine=xelatex
timeout: 60
formats:
docx: "Word Document"
odt: "OpenDocument Text"
pdf: "PDF"
rtf: "Rich Text Format"
txt: "Plain Text"
tex: "LaTeX"
man: "Groff Man Page"
epub: "EPUB v3 Book"
epub2: "EPUB v2 Book"
html: "HTML"
html5: "HTML5"
pptx: "PowerPoint Presentation"
beamer: "Beamer PDF Slides"
slidy: "Slidy HTML Slides"
md: "Markdown"
rst: "reStructuredText"
jira: "Jira Wiki Markup"
mediawiki: "MediaWiki Markup"
ghostscript_pdf:
name: "Ghostscript (PDF)"
# placeholders used: {preset}, {output}, {input}
command_template: 'gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dNOPAUSE -dQUIET -dBATCH {preset} -sOutputFile={output} {input}'
timeout: 60
formats:
screen: "PDF (Optimized for Screen)"
ebook: "PDF (Optimized for Ebooks)"
printer: "PDF (Optimized for Print)"
archive: "PDF/A (for Archiving)"
ghostscript_image:
name: "Ghostscript (Image)"
# placeholders used: {device}, {dpi}, {output}, {input}
command_template: 'gs -dNOPAUSE -dBATCH -sDEVICE={device} -r{dpi} -sOutputFile={output} {input}'
timeout: 60
formats:
jpeg_72: "JPEG Image (72 DPI)"
jpeg_300: "JPEG Image (300 DPI)"
png16m_150: "PNG Image (150 DPI)"
png16m_300: "PNG Image (300 DPI)"
tiff24nc_300: "TIFF Image (300 DPI)"
tiff24nc_600: "TIFF Image (600 DPI)"
calibre:
name: "Calibre (ebook-convert)"
command_template: 'ebook-convert {input} {output}'
timeout: 60
formats:
epub: "EPUB"
mobi: "MOBI"
azw3: "Amazon Kindle"
pdf: "PDF"
docx: "Word Document"
ffmpeg:
name: "FFmpeg"
command_template: 'ffmpeg -i {input} -y -preset medium {output}'
timeout: 300 timeout: 300
formats: formats:
mp4: "MP4 Video" docx: Word Document
mkv: "MKV Video" odt: OpenDocument Text
mov: "MOV Video" pdf: PDF
webm: "WebM Video" rtf: Rich Text Format
mp3: "MP3 Audio" txt: Plain Text
wav: "WAV Audio" tex: LaTeX
flac: "FLAC Audio" man: Groff Man Page
gif: "Animated GIF" epub: EPUB v3 Book
epub2: EPUB v2 Book
html: HTML
html5: HTML5
pptx: PowerPoint Presentation
beamer: Beamer PDF Slides
slidy: Slidy HTML Slides
md: Markdown
rst: reStructuredText
jira: Jira Wiki Markup
mediawiki: MediaWiki Markup
ghostscript_pdf:
name: Ghostscript (PDF)
command_template: gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dNOPAUSE -dQUIET
-dBATCH {preset} -sOutputFile={output} {input}
timeout: 60
formats:
screen: PDF (Optimized for Screen)
ebook: PDF (Optimized for Ebooks)
printer: PDF (Optimized for Print)
archive: PDF/A (for Archiving)
ghostscript_image:
name: Ghostscript (Image)
command_template: gs -dNOPAUSE -dBATCH -sDEVICE={device} -r{dpi} -sOutputFile={output}
{input}
timeout: 60
formats:
jpeg_72: JPEG Image (72 DPI)
jpeg_300: JPEG Image (300 DPI)
png16m_150: PNG Image (150 DPI)
png16m_300: PNG Image (300 DPI)
tiff24nc_300: TIFF Image (300 DPI)
tiff24nc_600: TIFF Image (600 DPI)
calibre:
name: Calibre (ebook-convert)
command_template: ebook-convert {input} {output}
timeout: 600
formats:
epub: EPUB
mobi: MOBI
azw3: Amazon Kindle
pdf: PDF
docx: Word Document
ffmpeg:
name: FFmpeg
command_template: ffmpeg -i {input} -y -preset medium {output}
timeout: 600
formats:
mp4: MP4 Video
mkv: MKV Video
mov: MOV Video
webm: WebM Video
mp3: MP3 Audio
wav: WAV Audio
flac: FLAC Audio
gif: Animated GIF
vips: vips:
name: "VIPS" name: VIPS
command_template: 'vips copy {input} {output}[Q=90]' command_template: vips copy {input} {output}[Q=90]
timeout: 60 timeout: 60
formats: formats:
jpg: "JPEG Image (Q90)" jpg: JPEG Image (Q90)
png: "PNG Image" png: PNG Image
webp: "WebP Image (Q90)" webp: WebP Image (Q90)
tiff: "TIFF Image" tiff: TIFF Image
avif: "AVIF Image" avif: AVIF Image
graphicsmagick: graphicsmagick:
name: "GraphicsMagick" name: GraphicsMagick
command_template: 'gm convert {input} -quality 90 {output}' command_template: gm convert {input} -quality 90 {output}
timeout: 60 timeout: 60
formats: formats:
jpg: "JPEG Image (Q90)" jpg: JPEG Image (Q90)
png: "PNG Image" png: PNG Image
webp: "WebP Image" webp: WebP Image
tiff: "TIFF Image" tiff: TIFF Image
pdf: "PDF from Images" pdf: PDF from Images
inkscape: inkscape:
name: "Inkscape" name: Inkscape
command_template: 'inkscape {input} --export-filename={output}' command_template: inkscape {input} --export-filename={output}
timeout: 30 timeout: 30
formats: formats:
svg: "SVG (Plain)" svg: SVG (Plain)
png: "PNG Image (96 DPI)" png: PNG Image (96 DPI)
pdf: "PDF Document" pdf: PDF Document
libjxl: libjxl:
name: "libjxl (cjxl)" name: libjxl (cjxl)
command_template: 'cjxl {input} {output} -q 90' command_template: cjxl {input} {output} -q 90
timeout: 30 timeout: 30
formats: formats:
jxl: "JPEG XL (Q90)" jxl: JPEG XL (Q90)
resvg: resvg:
name: "resvg" name: resvg
command_template: 'resvg {input} {output}' command_template: resvg {input} {output}
timeout: 30 timeout: 30
formats: formats:
png: "PNG from SVG" png: PNG from SVG
potrace: potrace:
name: "Potrace" name: Potrace
command_template: 'potrace {input} --svg -o {output}' command_template: potrace {input} --svg -o {output}
timeout: 30 timeout: 30
formats: formats:
svg: "SVG from Bitmap" svg: SVG from Bitmap
markitdown: markitdown:
name: "Markitdown" name: Markitdown
command_template: 'markitdown {input} -o {output}' command_template: markitdown {input} -o {output}
timeout: 30 timeout: 300
formats: formats:
md: "Markdown from Everything!" md: Markdown from Everything!
pngquant: pngquant:
name: "pngquant" name: pngquant
command_template: 'pngquant --quality={quality} --speed {speed} --force --output {output} {input}' command_template: pngquant --quality={quality} --speed {speed} --force --output
timeout: 30 {output} {input}
timeout: 300
formats: formats:
png_hq: "PNG (High Quality Compression)" png_hq: PNG (High Quality Compression)
png_mq: "PNG (Medium Quality Compression)" png_mq: PNG (Medium Quality Compression)
png_fast: "PNG (Fast Compression)" png_fast: PNG (Fast Compression)
sox: sox:
name: "SoX Audio Converter" name: SoX Audio Converter
command_template: 'sox {input} -r {samplerate} -b {bitdepth} {output}' command_template: sox {input} -r {samplerate} -b {bitdepth} {output}
timeout: 120 timeout: 600
formats: formats:
wav_48k_24b: "WAV (48kHz, 24-bit)" wav_48k_24b: WAV (48kHz, 24-bit)
wav_44k_16b: "WAV (CD, 44.1kHz, 16-bit)" wav_44k_16b: WAV (CD, 44.1kHz, 16-bit)
flac_48k_24b: "FLAC (48kHz, 24-bit)" flac_48k_24b: FLAC (48kHz, 24-bit)
flac_44k_16b: "FLAC (CD, 44.1kHz, 16-bit)" flac_44k_16b: FLAC (CD, 44.1kHz, 16-bit)
ogg_32k_16b: "Ogg Vorbis (32kHz)" ogg_32k_16b: Ogg Vorbis (32kHz)
ogg_16k_16b: "Ogg Vorbis (16kHz, Voice)" ogg_16k_16b: Ogg Vorbis (16kHz, Voice)
mozjpeg: mozjpeg:
name: "MozJPEG" name: MozJPEG
command_template: 'cjpeg -quality {quality} -outfile {output} {input}' command_template: cjpeg -quality {quality} -outfile {output} {input}
timeout: 30 timeout: 30
formats: formats:
jpg_q85: "JPEG (High Quality)" jpg_q85: JPEG (High Quality)
jpg_q75: "JPEG (Web Quality)" jpg_q75: JPEG (Web Quality)
jpg_q60: "JPEG (Aggressive Compression)" jpg_q60: JPEG (Aggressive Compression)