acadmic texts

This commit is contained in:
Manuel
2025-09-23 20:01:52 +02:00
parent 2658a71651
commit 918889e6df
24 changed files with 1040 additions and 867 deletions

0
.dockerignore Normal file → Executable file
View File

0
.env.example Normal file → Executable file
View File

0
.gitignore vendored Normal file → Executable file
View File

1
Dockerfile Normal file → Executable file
View File

@@ -36,6 +36,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
pandoc texlive-xetex \ pandoc texlive-xetex \
texlive-latex-recommended \ texlive-latex-recommended \
texlive-fonts-recommended \ texlive-fonts-recommended \
unpaper \
calibre \ calibre \
ffmpeg \ ffmpeg \
libvips-tools \ libvips-tools \

0
LICENSE Normal file → Executable file
View File

0
README.md Normal file → Executable file
View File

0
docker-compose.yml Normal file → Executable file
View File

810
main.py Normal file → Executable file

File diff suppressed because it is too large Load Diff

0
requirements.txt Normal file → Executable file
View File

2
run.sh
View File

@@ -9,7 +9,7 @@ SECRET_KEY=
UPLOADS_DIR=./uploads UPLOADS_DIR=./uploads
PROCESSED_DIR=./processed PROCESSED_DIR=./processed
# Start Gunicorn in the background # Start Gunicorn in the background
gunicorn -w 4 --threads 2 -k uvicorn.workers.UvicornWorker --forwarded-allow-ips='*' --error-logfile - --access-logfile - main:app -b 0.0.0.0:8000 & gunicorn -w 4 --threads 2 -k uvicorn.workers.UvicornWorker --forwarded-allow-ips='*' --error-logfile - --access-logfile - main:app -b 0.0.0.0:8008 &
echo "Started Gunicorn..." echo "Started Gunicorn..."
# Store the Gunicorn process ID # Store the Gunicorn process ID
GUNICORN_PID=$! GUNICORN_PID=$!

24
settings.default.yml Normal file → Executable file
View File

@@ -6,11 +6,11 @@ auth_settings:
oidc_end_session_endpoint: https://accounts.example.com/oidc/session/end oidc_end_session_endpoint: https://accounts.example.com/oidc/session/end
admin_users: admin_users:
- user@example.com - user@example.com
web_hook_settings: webhook_settings:
enabled: False enabled: False
allow_chunked_api_uploads": False allow_chunked_api_uploads: False
allowed_callback_urls: allowed_callback_urls: []
callback_bearer_token": callback_bearer_token:
tts_settings: tts_settings:
piper: piper:
model_dir: "./models/tts" model_dir: "./models/tts"
@@ -99,6 +99,13 @@ ocr_settings:
clean: true clean: true
optimize: 1 optimize: 1
force_ocr: true force_ocr: true
academic_settings:
pandoc:
csl_files:
apa: https://www.zotero.org/styles/apa
mla: https://www.zotero.org/styles/modern-language-association
chicago: https://www.zotero.org/styles/chicago-author-date
chicago-fullnote: https://www.zotero.org/styles/chicago-fullnote-bibliography
transcription_settings: transcription_settings:
whisper: whisper:
compute_type: int8 compute_type: int8
@@ -371,3 +378,12 @@ conversion_tools:
jpg_q85: JPEG (High Quality) jpg_q85: JPEG (High Quality)
jpg_q75: JPEG (Web Quality) jpg_q75: JPEG (Web Quality)
jpg_q60: JPEG (Aggressive Compression) jpg_q60: JPEG (Aggressive Compression)
pandoc_academic:
name: Pandoc (Academic Document)
command_template: "pandoc {main_document} -o {output} --bibliography {bib_file} --citeproc --csl {csl_style}"
timeout: 300
formats:
pdf_apa: "PDF with Bibliography (APA Style)"
pdf_mla: "PDF with Bibliography (MLA Style)"
pdf_chicago: "PDF with Bibliography (Chicago Style)"
pdf_chicago_fullnote: "PDF with Bibliography (Chicago Full Note)"

60
static/css/settings.css Normal file → Executable file
View File

@@ -9,13 +9,8 @@
border-bottom: 1px solid var(--divider-color); border-bottom: 1px solid var(--divider-color);
} }
.settings-header h1 {e .settings-header h1 {
margin: 0 0 0.25rem 0;
}
.settings-header p {
margin: 0; margin: 0;
color: var(--muted-text);
} }
.back-button { .back-button {
@@ -32,11 +27,18 @@
background-color: var(--primary-hover); background-color: var(--primary-hover);
} }
/* Main layout grid for settings */
.settings-main-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(400px, 1fr));
gap: 2rem;
}
.settings-group { .settings-group {
border: 1px solid var(--border-color); border: 1px solid var(--border-color);
border-radius: 8px; border-radius: 8px;
padding: 1.5rem; padding: 1.5rem;
margin-bottom: 2rem; margin-bottom: 2rem; /* Kept for spacing when grid stacks */
} }
.settings-group legend { .settings-group legend {
@@ -70,36 +72,39 @@
.form-textarea { .form-textarea {
resize: vertical; resize: vertical;
min-height: 60px; min-height: 60px;
font-family: 'Courier New', Courier, monospace; /* Use a more standard monospace font stack */
font-family: Consolas, 'Courier New', Courier, monospace;
} }
.field-description { .field-description {
font-size: 0.85rem; font-size: 0.9rem;
color: var(--muted-text); color: var(--muted-text);
margin-top: -0.5rem; margin-top: 0.25rem;
margin-bottom: 1rem; margin-bottom: 0.75rem;
line-height: 1.4;
} }
.field-description code { .field-description code {
background-color: rgba(255,255,255,0.1); background-color: rgba(255,255,255,0.1);
padding: 0.1rem 0.3rem; padding: 0.1rem 0.3rem;
border-radius: 3px; border-radius: 3px;
font-size: 0.8rem; font-size: 0.85rem;
} }
.checkbox-group { .checkbox-group {
display: flex; display: flex;
align-items: center; align-items: center;
gap: 0.75rem; gap: 0.75rem;
margin-bottom: 0.5rem; margin-bottom: 0.75rem;
} }
.checkbox-group input[type="checkbox"] { .checkbox-group input[type="checkbox"] {
width: 1rem; width: 1.1rem;
height: 1rem; height: 1.1rem;
accent-color: var(--primary-color);
} }
.tool-grid { .tool-grid {
display: grid; display: grid;
grid-template-columns: repeat(auto-fit, minmax(350px, 1fr)); grid-template-columns: 1fr; /* Simplified to single column within a settings group */
gap: 1rem; gap: 1rem;
} }
@@ -121,22 +126,23 @@
gap: 1rem; gap: 1rem;
margin-top: 1.5rem; margin-top: 1.5rem;
} }
.button-primary { .button-primary {
display: inline-block; display: inline-block;
background: var(--primary-color); background: var(--primary-color);
background-color: transparent; color: var(--bg-color);
border-color: var(--border-color); border: 1px solid var(--primary-color);
border-width: 1px;
color: #ffffff;
padding: 0.65rem 1.5rem; padding: 0.65rem 1.5rem;
font-size: 1rem; font-size: 1rem;
font-weight: 600; font-weight: 600;
border-radius: 5px; border-radius: 5px;
cursor: pointer; cursor: pointer;
transition: background-color 0.15s ease; transition: all 0.15s ease;
} }
.button-primary:hover { .button-primary:hover {
background: var(--primary-hover); background: var(--primary-hover);
color: var(--text-color);
border-color: var(--primary-hover);
} }
.save-status { .save-status {
@@ -192,3 +198,15 @@
.button-danger:hover { .button-danger:hover {
background-color: #ff8f8f; background-color: #ff8f8f;
} }
/* Responsive adjustments */
@media (max-width: 768px) {
.danger-action {
flex-direction: column;
align-items: flex-start;
}
.button-danger {
width: 100%;
text-align: center;
}
}

2
static/css/style.css Normal file → Executable file
View File

@@ -18,7 +18,7 @@
--border-color: rgba(255, 255, 255, 0.1); --border-color: rgba(255, 255, 255, 0.1);
--divider-color: rgba(255, 255, 255, 0.06); --divider-color: rgba(255, 255, 255, 0.06);
--font-family: 'Inter', -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; --font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
} }
/* Page */ /* Page */

0
static/css/style.old Normal file → Executable file
View File

0
static/favicon.ico Normal file → Executable file
View File

Before

Width:  |  Height:  |  Size: 1.1 KiB

After

Width:  |  Height:  |  Size: 1.1 KiB

0
static/favicon.png Normal file → Executable file
View File

Before

Width:  |  Height:  |  Size: 702 B

After

Width:  |  Height:  |  Size: 702 B

792
static/js/script.js Normal file → Executable file

File diff suppressed because it is too large Load Diff

0
static/js/script.old Normal file → Executable file
View File

50
static/js/settings.js Normal file → Executable file
View File

@@ -11,41 +11,45 @@ document.addEventListener('DOMContentLoaded', () => {
saveStatus.textContent = 'Saving...'; saveStatus.textContent = 'Saving...';
saveStatus.classList.remove('success', 'error'); saveStatus.classList.remove('success', 'error');
const formData = new FormData(settingsForm);
const settingsObject = {}; const settingsObject = {};
const elements = Array.from(settingsForm.elements);
// Convert FormData to a nested object for (const el of elements) {
formData.forEach((value, key) => { if (!el.name || el.type === 'submit') continue; // Skip elements without a name and submit buttons
// Handle checkboxes that might not be submitted if unchecked
if (key.includes('ocr_settings')) { let value;
const checkbox = document.querySelector(`[name="${key}"]`); const keys = el.name.split('.');
if (checkbox && checkbox.type === 'checkbox') {
value = checkbox.checked; // Determine value based on element type
} if (el.type === 'checkbox') {
value = el.checked;
} else if (el.tagName === 'TEXTAREA') {
// Convert comma-separated text into an array of strings
value = el.value.split(',')
.map(item => item.trim())
.filter(item => item); // Remove empty strings from the list
} else if (el.type === 'number') {
value = parseFloat(el.value);
if (isNaN(value)) {
value = null; // Represent empty number fields as null
}
} else {
value = el.value;
} }
const keys = key.split('.'); // Build nested object from dot-notation name
let current = settingsObject; let current = settingsObject;
keys.forEach((k, index) => { keys.forEach((k, index) => {
if (index === keys.length - 1) { if (index === keys.length - 1) {
current[k] = value; current[k] = value;
} else { } else {
current[k] = current[k] || {}; if (!current[k]) {
current[k] = {};
}
current = current[k]; current = current[k];
} }
}); });
}); }
// Ensure unchecked OCR boxes are sent as false
const ocrCheckboxes = settingsForm.querySelectorAll('input[type="checkbox"][name^="ocr_settings"]');
ocrCheckboxes.forEach(cb => {
const keys = cb.name.split('.');
if (!formData.has(cb.name)) {
// this is a bit of a hack but gets the job done for this specific form
settingsObject[keys[0]][keys[1]][keys[2]] = false;
}
});
try { try {
const response = await fetch('/settings/save', { const response = await fetch('/settings/save', {

0
supervisor.conf Normal file → Executable file
View File

0
swappy-20250920_155526.png Normal file → Executable file
View File

Before

Width:  |  Height:  |  Size: 358 KiB

After

Width:  |  Height:  |  Size: 358 KiB

0
templates/index.html Normal file → Executable file
View File

0
templates/index.old Normal file → Executable file
View File

114
templates/settings.html Normal file → Executable file
View File

@@ -6,9 +6,7 @@
<title>Settings - File Wizard</title> <title>Settings - File Wizard</title>
<link rel="stylesheet" href="{{ url_for('static', path='/css/style.css') }}"> <link rel="stylesheet" href="{{ url_for('static', path='/css/style.css') }}">
<link rel="stylesheet" href="{{ url_for('static', path='/css/settings.css') }}"> <link rel="stylesheet" href="{{ url_for('static', path='/css/settings.css') }}">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;700&display=swap" rel="stylesheet">
</head> </head>
<body> <body>
<div class="container"> <div class="container">
@@ -21,14 +19,44 @@
<main> <main>
<form id="settings-form"> <form id="settings-form">
<div class="settings-main-grid">
<fieldset class="settings-group">
<legend><h2>General Settings</h2></legend>
<div class="form-control">
<label for="app-public-url">App Public URL</label>
<p class="field-description">The public-facing base URL of the application (e.g., https://files.example.com). Used for generating absolute URLs in webhooks.</p>
<input type="text" id="app-public-url" name="app_settings.app_public_url" value="{{ config.app_settings.get('app_public_url', '') }}" class="form-input" placeholder="https://... ">
</div>
<div class="form-control">
<label for="app-max-file-size">Max Upload Size (MB)</label>
<input type="number" id="app-max-file-size" name="app_settings.max_file_size_mb" value="{{ config.app_settings.max_file_size_mb }}" class="form-input">
</div>
<div class="form-control">
<label for="app-allowed-extensions">Allowed File Extensions for Conversion</label>
<p class="field-description">A comma-separated list of file extensions (e.g., .pdf, .docx, .png). If empty, all files are allowed.</p>
<textarea id="app-allowed-extensions" name="app_settings.allowed_all_extensions" class="form-textarea" rows="2">{{ config.app_settings.get('allowed_all_extensions', []) | join(', ') }}</textarea>
</div>
</fieldset>
<fieldset class="settings-group"> <fieldset class="settings-group">
<legend><h2>General Settings</h2></legend> <legend><h2>Performance Tuning</h2></legend>
<div class="form-control"> <div class="form-control">
<label for="app-max-file-size">Max Upload Size (MB)</label> <label for="perf-model-concurrency">Model Concurrency Limit</label>
<input type="number" id="app-max-file-size" name="app_settings.max_file_size_mb" value="{{ config.app_settings.max_file_size_mb }}" class="form-input"> <p class="field-description">Maximum number of AI models (e.g., Piper TTS) that can run in parallel. Helps prevent CPU/GPU overload.</p>
</div> <input type="number" id="perf-model-concurrency" name="app_settings.model_concurrency" value="{{ config.app_settings.get('model_concurrency', 1) }}" class="form-input">
</fieldset> </div>
<div class="form-control">
<label for="perf-model-timeout">Model Inactivity Timeout (seconds)</label>
<p class="field-description">Time in seconds before an unused Whisper model is unloaded from memory.</p>
<input type="number" id="perf-model-timeout" name="app_settings.model_inactivity_timeout" value="{{ config.app_settings.get('model_inactivity_timeout', 1800) }}" class="form-input">
</div>
<div class="form-control">
<label for="perf-cache-interval">Cache Check Interval (seconds)</label>
<p class="field-description">How often to check for inactive models to unload.</p>
<input type="number" id="perf-cache-interval" name="app_settings.cache_check_interval" value="{{ config.app_settings.get('cache_check_interval', 300) }}" class="form-input">
</div>
</fieldset>
</div>
<fieldset class="settings-group"> <fieldset class="settings-group">
<legend><h2>OCR (ocrmypdf)</h2></legend> <legend><h2>OCR (ocrmypdf)</h2></legend>
@@ -48,6 +76,7 @@
<fieldset class="settings-group"> <fieldset class="settings-group">
<legend><h2>Transcription (Whisper)</h2></legend> <legend><h2>Transcription (Whisper)</h2></legend>
<p class="field-description">Device settings (CPU/GPU) are configured via environment variables (see documentation).</p>
<div class="form-control"> <div class="form-control">
<label for="whisper-compute-type">Compute Type</label> <label for="whisper-compute-type">Compute Type</label>
<select id="whisper-compute-type" name="transcription_settings.whisper.compute_type" class="form-select"> <select id="whisper-compute-type" name="transcription_settings.whisper.compute_type" class="form-select">
@@ -58,6 +87,71 @@
</div> </div>
</fieldset> </fieldset>
<fieldset class="settings-group">
<legend><h2>Authentication (OIDC)</h2></legend>
<p class="field-description">Used for logging in users. Requires `LOCAL_ONLY=False` environment variable.</p>
<div class="form-control">
<label for="auth-client-id">Client ID</label>
<input type="text" id="auth-client-id" name="auth_settings.oidc_client_id" value="{{ config.auth_settings.get('oidc_client_id', '') }}" class="form-input">
</div>
<div class="form-control">
<label for="auth-client-secret">Client Secret</label>
<input type="password" id="auth-client-secret" name="auth_settings.oidc_client_secret" value="{{ config.auth_settings.get('oidc_client_secret', '') }}" class="form-input">
</div>
<div class="form-control">
<label for="auth-metadata-url">Server Metadata URL</label>
<input type="text" id="auth-metadata-url" name="auth_settings.oidc_server_metadata_url" value="{{ config.auth_settings.get('oidc_server_metadata_url', '') }}" class="form-input" placeholder="https://your-auth-server/.well-known/openid-configuration">
</div>
<div class="form-control">
<label for="auth-admin-users">Admin User Emails</label>
<p class="field-description">Comma-separated list of email addresses for users who should have admin rights.</p>
<textarea id="auth-admin-users" name="auth_settings.admin_users" class="form-textarea" rows="2">{{ config.auth_settings.get('admin_users', []) | join(', ') }}</textarea>
</div>
</fieldset>
<fieldset class="settings-group">
<legend><h2>Webhooks</h2></legend>
<p class="field-description">Allow programmatic access and job status callbacks.</p>
<div class="form-control checkbox-group">
<input type="checkbox" id="webhook-enabled" name="webhook_settings.enabled" {% if config.webhook_settings.get('enabled') %}checked{% endif %}>
<label for="webhook-enabled">Enable Webhook API</label>
</div>
<div class="form-control checkbox-group">
<input type="checkbox" id="webhook-chunked-uploads" name="webhook_settings.allow_chunked_api_uploads" {% if config.webhook_settings.get('allow_chunked_api_uploads') %}checked{% endif %}>
<label for="webhook-chunked-uploads">Allow Chunked Uploads via API</label>
</div>
<div class="form-control">
<label for="webhook-allowed-urls">Allowed Callback URLs</label>
<p class="field-description">Comma-separated list of URLs or domain prefixes that are allowed for callbacks (e.g., https://n8n.example.com).</p>
<textarea id="webhook-allowed-urls" name="webhook_settings.allowed_callback_urls" class="form-textarea" rows="2">{{ config.webhook_settings.get('allowed_callback_urls', []) | join(', ') }}</textarea>
</div>
<div class="form-control">
<label for="webhook-token">Callback Bearer Token</label>
<p class="field-description">If set, this token will be sent in the `Authorization` header for all callback requests.</p>
<input type="password" id="webhook-token" name="webhook_settings.callback_bearer_token" value="{{ config.webhook_settings.get('callback_bearer_token', '') }}" class="form-input">
</div>
</fieldset>
<fieldset class="settings-group">
<legend><h2>TTS (Piper)</h2></legend>
<div class="form-control checkbox-group">
<input type="checkbox" id="tts-piper-cuda" name="tts_settings.piper.use_cuda" {% if config.tts_settings.piper.get('use_cuda') %}checked{% endif %}>
<label for="tts-piper-cuda">Use CUDA (GPU)</label>
</div>
<div class="form-control">
<label for="tts-piper-length">Length Scale</label>
<input type="number" step="0.1" id="tts-piper-length" name="tts_settings.piper.synthesis_config.length_scale" value="{{ config.tts_settings.piper.synthesis_config.get('length_scale', 1.0) }}" class="form-input">
</div>
<div class="form-control">
<label for="tts-piper-noise">Noise Scale</label>
<input type="number" step="0.1" id="tts-piper-noise" name="tts_settings.piper.synthesis_config.noise_scale" value="{{ config.tts_settings.piper.synthesis_config.get('noise_scale', 0.667) }}" class="form-input">
</div>
<div class="form-control">
<label for="tts-piper-noise-w">Noise W</label>
<input type="number" step="0.1" id="tts-piper-noise-w" name="tts_settings.piper.synthesis_config.noise_w" value="{{ config.tts_settings.piper.synthesis_config.get('noise_w', 0.8) }}" class="form-input">
</div>
</fieldset>
<fieldset class="settings-group"> <fieldset class="settings-group">
<legend><h2>Conversion Tools</h2></legend> <legend><h2>Conversion Tools</h2></legend>
<p class="field-description"> <p class="field-description">