Drag and Drop

This commit is contained in:
2025-09-17 18:45:55 +00:00
parent 2115238217
commit 20e41b67a7
10 changed files with 1358 additions and 379 deletions

42
Dockerfile Normal file
View File

@@ -0,0 +1,42 @@
# Dockerfile
FROM python:3.13.7-slim
RUN apt-get update && apt-get install -y --no-install-recommends \
tesseract-ocr \
ghostscript \
poppler-utils \
libreoffice \
imagemagick \
graphicsmagick \
libvips-tools \
ffmpeg \
libheif-examples \
inkscape \
calibre \
build-essential \
pkg-config \
git \
curl \
texlive \
texlive-latex-extra \
texlive-xetex
&& rm -rf /var/lib/apt/lists/*
# Set working directory inside the container
WORKDIR /app
# Copy requirements and install dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy the rest of the app
COPY . .
# Expose the app port
EXPOSE 8000
RUN chmod +x run.sh
# Command to run when container starts
CMD ["./run.sh"]

10
docker-compose.yml Normal file
View File

@@ -0,0 +1,10 @@
version: "3.9"
services:
web:
build: .
ports:
- "5000:5000"
volumes:
- .:/app # optional: mount code for live changes
environment:
- FLASK_ENV=development

401
main.py
View File

@@ -6,7 +6,7 @@ import uuid
import shlex import shlex
import yaml import yaml
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from datetime import datetime from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from typing import Dict, List, Any from typing import Dict, List, Any
@@ -21,17 +21,21 @@ from fastapi.responses import FileResponse, JSONResponse
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates from fastapi.templating import Jinja2Templates
from huey import SqliteHuey from huey import SqliteHuey
from pydantic import BaseModel, ConfigDict from pydantic import BaseModel, ConfigDict, field_serializer # MODIFIED: Import field_serializer
from sqlalchemy import (Column, DateTime, Integer, String, Text, from sqlalchemy import (Column, DateTime, Integer, String, Text,
create_engine, delete, event) create_engine, delete, event)
from sqlalchemy.orm import Session, declarative_base, sessionmaker
from sqlalchemy.pool import NullPool from sqlalchemy.pool import NullPool
from string import Formatter from string import Formatter
from sqlalchemy.orm import Session, declarative_base, sessionmaker
from werkzeug.utils import secure_filename from werkzeug.utils import secure_filename
from typing import List as TypingList
# -------------------------------------------------------------------------------- # --------------------------------------------------------------------------------
# --- 1. CONFIGURATION # --- 1. CONFIGURATION
# -------------------------------------------------------------------------------- # --------------------------------------------------------------------------------
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
class AppPaths(BaseModel): class AppPaths(BaseModel):
BASE_DIR: Path = Path(__file__).resolve().parent BASE_DIR: Path = Path(__file__).resolve().parent
@@ -43,30 +47,46 @@ class AppPaths(BaseModel):
PATHS = AppPaths() PATHS = AppPaths()
APP_CONFIG: Dict[str, Any] = {} APP_CONFIG: Dict[str, Any] = {}
PATHS.UPLOADS_DIR.mkdir(exist_ok=True)
PATHS.PROCESSED_DIR.mkdir(exist_ok=True)
def load_app_config(): def load_app_config():
global APP_CONFIG global APP_CONFIG
try: try:
with open(PATHS.SETTINGS_FILE, 'r') as f: with open(PATHS.SETTINGS_FILE, 'r', encoding='utf8') as f:
APP_CONFIG = yaml.safe_load(f) cfg_raw = yaml.safe_load(f) or {}
APP_CONFIG['app_settings']['max_file_size_bytes'] = APP_CONFIG['app_settings']['max_file_size_mb'] * 1024 * 1024 # basic defaults
allowed_extensions = { defaults = {
".pdf", ".ps", ".eps", ".png", ".jpg", ".jpeg", ".tiff", ".tif", ".gif", "app_settings": {"max_file_size_mb": 100, "allowed_all_extensions": []},
".bmp", ".webp", ".svg", ".jxl", ".avif", ".ppm", ".mp3", ".m4a", ".ogg", "transcription_settings": {"whisper": {"allowed_models": ["tiny", "base", "small"], "compute_type": "int8"}},
".flac", ".opus", ".wav", ".aac", ".mp4", ".mkv", ".mov", ".webm", ".avi", "conversion_tools": {},
".flv", ".md", ".txt", ".html", ".docx", ".odt", ".rst", ".epub", ".mobi", "ocr_settings": {"ocrmypdf": {}}
".azw3", ".pptx", ".xlsx"
} }
APP_CONFIG['app_settings']['allowed_all_extensions'] = allowed_extensions # shallow merge (safe for top-level keys)
cfg = defaults.copy()
cfg.update(cfg_raw)
# normalize app settings
app_settings = cfg.get("app_settings", {})
max_mb = app_settings.get("max_file_size_mb", 100)
app_settings["max_file_size_bytes"] = int(max_mb) * 1024 * 1024
allowed = app_settings.get("allowed_all_extensions", [])
if not isinstance(allowed, (list, set)):
allowed = list(allowed)
app_settings["allowed_all_extensions"] = set(allowed)
cfg["app_settings"] = app_settings
APP_CONFIG = cfg
logger.info("Successfully loaded settings from settings.yml") logger.info("Successfully loaded settings from settings.yml")
except (FileNotFoundError, yaml.YAMLError) as e: except (FileNotFoundError, yaml.YAMLError) as e:
logger.error(f"Could not load settings.yml: {e}. App may not function correctly.") logging.getLogger(__name__).exception(f"Could not load settings.yml: {e}. Using defaults.")
APP_CONFIG = {}
APP_CONFIG = {
"app_settings": {"max_file_size_mb": 100, "max_file_size_bytes": 100 * 1024 * 1024, "allowed_all_extensions": set()},
"transcription_settings": {"whisper": {"allowed_models": ["tiny", "base", "small"], "compute_type": "int8"}},
"conversion_tools": {},
"ocr_settings": {"ocrmypdf": {}}
}
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
PATHS.UPLOADS_DIR.mkdir(exist_ok=True)
PATHS.PROCESSED_DIR.mkdir(exist_ok=True)
# -------------------------------------------------------------------------------- # --------------------------------------------------------------------------------
# --- 2. DATABASE & Schemas # --- 2. DATABASE & Schemas
@@ -77,8 +97,6 @@ engine = create_engine(
poolclass=NullPool, poolclass=NullPool,
) )
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
# THIS IS THE CRITICAL FIX
Base = declarative_base() Base = declarative_base()
@event.listens_for(engine, "connect") @event.listens_for(engine, "connect")
@@ -102,11 +120,13 @@ class Job(Base):
progress = Column(Integer, default=0) progress = Column(Integer, default=0)
original_filename = Column(String) original_filename = Column(String)
input_filepath = Column(String) input_filepath = Column(String)
input_filesize = Column(Integer, nullable=True)
processed_filepath = Column(String, nullable=True) processed_filepath = Column(String, nullable=True)
output_filesize = Column(Integer, nullable=True)
result_preview = Column(Text, nullable=True) result_preview = Column(Text, nullable=True)
error_message = Column(Text, nullable=True) error_message = Column(Text, nullable=True)
created_at = Column(DateTime, default=datetime.utcnow) created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) updated_at = Column(DateTime, default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc))
def get_db(): def get_db():
db = SessionLocal() db = SessionLocal()
@@ -120,6 +140,7 @@ class JobCreate(BaseModel):
task_type: str task_type: str
original_filename: str original_filename: str
input_filepath: str input_filepath: str
input_filesize: int | None = None
processed_filepath: str | None = None processed_filepath: str | None = None
class JobSchema(BaseModel): class JobSchema(BaseModel):
@@ -128,6 +149,8 @@ class JobSchema(BaseModel):
status: str status: str
progress: int progress: int
original_filename: str original_filename: str
input_filesize: int | None = None
output_filesize: int | None = None
processed_filepath: str | None = None processed_filepath: str | None = None
result_preview: str | None = None result_preview: str | None = None
error_message: str | None = None error_message: str | None = None
@@ -135,8 +158,14 @@ class JobSchema(BaseModel):
updated_at: datetime updated_at: datetime
model_config = ConfigDict(from_attributes=True) model_config = ConfigDict(from_attributes=True)
# NEW: This serializer ensures the datetime string sent to the frontend ALWAYS
# includes the 'Z' UTC indicator, fixing the timezone bug.
@field_serializer('created_at', 'updated_at')
def serialize_dt(self, dt: datetime, _info):
return dt.isoformat() + "Z"
# -------------------------------------------------------------------------------- # --------------------------------------------------------------------------------
# --- 3. CRUD OPERATIONS (No Changes) # --- 3. CRUD OPERATIONS
# -------------------------------------------------------------------------------- # --------------------------------------------------------------------------------
def get_job(db: Session, job_id: str): def get_job(db: Session, job_id: str):
return db.query(Job).filter(Job.id == job_id).first() return db.query(Job).filter(Job.id == job_id).first()
@@ -163,80 +192,120 @@ def update_job_status(db: Session, job_id: str, status: str, progress: int = Non
db.refresh(db_job) db.refresh(db_job)
return db_job return db_job
def mark_job_as_completed(db: Session, job_id: str, preview: str | None = None): def mark_job_as_completed(db: Session, job_id: str, output_filepath_str: str | None = None, preview: str | None = None):
db_job = get_job(db, job_id) db_job = get_job(db, job_id)
if db_job and db_job.status != 'cancelled': if db_job and db_job.status != 'cancelled':
db_job.status = "completed" db_job.status = "completed"
db_job.progress = 100 db_job.progress = 100
if preview: if preview:
db_job.result_preview = preview.strip()[:2000] db_job.result_preview = preview.strip()[:2000]
if output_filepath_str:
try:
output_path = Path(output_filepath_str)
if output_path.exists():
db_job.output_filesize = output_path.stat().st_size
except Exception:
logger.exception(f"Could not stat output file {output_filepath_str} for job {job_id}")
db.commit() db.commit()
return db_job return db_job
# ... (The rest of the file is unchanged and remains the same) ...
# -------------------------------------------------------------------------------- # --------------------------------------------------------------------------------
# --- 4. BACKGROUND TASK SETUP # --- 4. BACKGROUND TASK SETUP
# -------------------------------------------------------------------------------- # --------------------------------------------------------------------------------
huey = SqliteHuey(filename=PATHS.HUEY_DB_PATH) huey = SqliteHuey(filename=PATHS.HUEY_DB_PATH)
# --- START: NEW WHISPER MODEL CACHING --- # Whisper model cache per worker process
# This dictionary will live in the memory of the Huey worker process,
# allowing us to reuse loaded models across tasks.
WHISPER_MODELS_CACHE: Dict[str, WhisperModel] = {} WHISPER_MODELS_CACHE: Dict[str, WhisperModel] = {}
def get_whisper_model(model_size: str, whisper_settings: dict) -> WhisperModel: def get_whisper_model(model_size: str, whisper_settings: dict) -> WhisperModel:
""" if model_size in WHISPER_MODELS_CACHE:
Loads a Whisper model into the cache if not present, and returns the model.
This ensures a model is only loaded into memory once per worker process.
"""
if model_size not in WHISPER_MODELS_CACHE:
compute_type = whisper_settings.get('compute_type', 'int8')
logger.info(f"Whisper model '{model_size}' not in cache. Loading into memory...")
model = WhisperModel(model_size, device="cpu", compute_type=compute_type)
WHISPER_MODELS_CACHE[model_size] = model
logger.info(f"Model '{model_size}' loaded successfully.")
else:
logger.info(f"Found model '{model_size}' in cache. Reusing.") logger.info(f"Found model '{model_size}' in cache. Reusing.")
return WHISPER_MODELS_CACHE[model_size] return WHISPER_MODELS_CACHE[model_size]
# --- END: NEW WHISPER MODEL CACHING --- device = whisper_settings.get("device", "cpu")
compute_type = whisper_settings.get('compute_type', 'int8')
logger.info(f"Whisper model '{model_size}' not in cache. Loading into memory on device={device}...")
try:
model = WhisperModel(model_size, device=device, compute_type=compute_type)
except Exception:
logger.exception("Failed to load whisper model")
raise
WHISPER_MODELS_CACHE[model_size] = model
logger.info(f"Model '{model_size}' loaded successfully.")
return model
# Helper: safe run_command (trimmed logs + timeout)
def run_command(argv: TypingList[str], timeout: int = 300):
try:
res = subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=timeout)
except subprocess.TimeoutExpired:
raise Exception(f"Command timed out after {timeout}s")
if res.returncode != 0:
stderr = (res.stderr or "")[:4000]
stdout = (res.stdout or "")[:4000]
raise Exception(f"Command failed exit {res.returncode}. stderr: {stderr}; stdout: {stdout}")
return res
# Helper: validate and build command from template with allowlist
ALLOWED_VARS = {"input", "output", "output_dir", "output_ext", "quality", "speed", "preset", "device", "dpi", "samplerate", "bitdepth", "filter"}
def validate_and_build_command(template_str: str, mapping: Dict[str, str]) -> TypingList[str]:
"""
Validate placeholders against ALLOWED_VARS and build a safe argv list.
If a template uses allowed placeholders that are missing from `mapping`,
auto-fill sensible defaults:
- 'filter' -> mapping.get('output_ext', '')
- others -> empty string
This prevents KeyError while preserving the allowlist security check.
"""
fmt = Formatter()
used = {fname for _, fname, _, _ in fmt.parse(template_str) if fname}
bad = used - ALLOWED_VARS
if bad:
raise ValueError(f"Command template contains disallowed placeholders: {bad}")
# auto-fill missing allowed placeholders with safe defaults
safe_mapping = dict(mapping) # shallow copy to avoid mutating caller mapping
for name in used:
if name not in safe_mapping:
if name == "filter":
safe_mapping[name] = safe_mapping.get("output_ext", "")
else:
safe_mapping[name] = ""
formatted = template_str.format(**safe_mapping)
return shlex.split(formatted)
@huey.task() @huey.task()
def run_transcription_task(job_id: str, input_path_str: str, output_path_str: str, model_size: str, whisper_settings: dict): def run_transcription_task(job_id: str, input_path_str: str, output_path_str: str, model_size: str, whisper_settings: dict):
db = SessionLocal() db = SessionLocal()
try: try:
job = get_job(db, job_id) job = get_job(db, job_id)
if not job or job.status == 'cancelled': return if not job or job.status == 'cancelled':
return
update_job_status(db, job_id, "processing") update_job_status(db, job_id, "processing")
# --- MODIFIED: Use the caching function to get the model ---
model = get_whisper_model(model_size, whisper_settings) model = get_whisper_model(model_size, whisper_settings)
logger.info(f"Starting transcription for job {job_id}") logger.info(f"Starting transcription for job {job_id}")
segments, info = model.transcribe(input_path_str, beam_size=5) segments, info = model.transcribe(input_path_str, beam_size=5)
full_transcript = [] full_transcript = []
for segment in segments: for segment in segments:
job_check = get_job(db, job_id) # Check for cancellation during long tasks job_check = get_job(db, job_id) # Check for cancellation during long tasks
if job_check.status == 'cancelled': if job_check.status == 'cancelled':
logger.info(f"Job {job_id} cancelled during transcription.") logger.info(f"Job {job_id} cancelled during transcription.")
return return
if info.duration > 0: if info.duration > 0:
progress = int((segment.end / info.duration) * 100) progress = int((segment.end / info.duration) * 100)
update_job_status(db, job_id, "processing", progress=progress) update_job_status(db, job_id, "processing", progress=progress)
full_transcript.append(segment.text.strip()) full_transcript.append(segment.text.strip())
transcript_text = "\n".join(full_transcript) transcript_text = "\n".join(full_transcript)
# write atomically to avoid partial files # atomic write of transcript — keep the real extension and mark tmp in the name
out_path = Path(output_path_str) out_path = Path(output_path_str)
tmp_out = out_path.with_suffix(out_path.suffix + f".{uuid.uuid4().hex}.tmp") tmp_out = out_path.with_name(f"{out_path.stem}.tmp-{uuid.uuid4().hex}{out_path.suffix}")
with tmp_out.open("w", encoding="utf-8") as f: with tmp_out.open("w", encoding="utf-8") as f:
f.write(transcript_text) f.write(transcript_text)
tmp_out.replace(out_path) tmp_out.replace(out_path)
mark_job_as_completed(db, job_id, output_filepath_str=output_path_str, preview=transcript_text)
mark_job_as_completed(db, job_id, preview=transcript_text)
logger.info(f"Transcription for job {job_id} completed.") logger.info(f"Transcription for job {job_id} completed.")
except Exception: except Exception:
logger.exception(f"ERROR during transcription for job {job_id}") logger.exception(f"ERROR during transcription for job {job_id}")
@@ -245,13 +314,13 @@ def run_transcription_task(job_id: str, input_path_str: str, output_path_str: st
Path(input_path_str).unlink(missing_ok=True) Path(input_path_str).unlink(missing_ok=True)
db.close() db.close()
# Other tasks remain unchanged
@huey.task() @huey.task()
def run_pdf_ocr_task(job_id: str, input_path_str: str, output_path_str: str, ocr_settings: dict): def run_pdf_ocr_task(job_id: str, input_path_str: str, output_path_str: str, ocr_settings: dict):
db = SessionLocal() db = SessionLocal()
try: try:
job = get_job(db, job_id) job = get_job(db, job_id)
if not job or job.status == 'cancelled': return if not job or job.status == 'cancelled':
return
update_job_status(db, job_id, "processing") update_job_status(db, job_id, "processing")
logger.info(f"Starting PDF OCR for job {job_id}") logger.info(f"Starting PDF OCR for job {job_id}")
ocrmypdf.ocr(input_path_str, output_path_str, ocrmypdf.ocr(input_path_str, output_path_str,
@@ -263,7 +332,7 @@ def run_pdf_ocr_task(job_id: str, input_path_str: str, output_path_str: str, ocr
with open(output_path_str, "rb") as f: with open(output_path_str, "rb") as f:
reader = pypdf.PdfReader(f) reader = pypdf.PdfReader(f)
preview = "\n".join(page.extract_text() or "" for page in reader.pages) preview = "\n".join(page.extract_text() or "" for page in reader.pages)
mark_job_as_completed(db, job_id, preview=preview) mark_job_as_completed(db, job_id, output_filepath_str=output_path_str, preview=preview)
logger.info(f"PDF OCR for job {job_id} completed.") logger.info(f"PDF OCR for job {job_id} completed.")
except Exception: except Exception:
logger.exception(f"ERROR during PDF OCR for job {job_id}") logger.exception(f"ERROR during PDF OCR for job {job_id}")
@@ -277,13 +346,18 @@ def run_image_ocr_task(job_id: str, input_path_str: str, output_path_str: str):
db = SessionLocal() db = SessionLocal()
try: try:
job = get_job(db, job_id) job = get_job(db, job_id)
if not job or job.status == 'cancelled': return if not job or job.status == 'cancelled':
return
update_job_status(db, job_id, "processing", progress=50) update_job_status(db, job_id, "processing", progress=50)
logger.info(f"Starting Image OCR for job {job_id}") logger.info(f"Starting Image OCR for job {job_id}")
text = pytesseract.image_to_string(Image.open(input_path_str)) text = pytesseract.image_to_string(Image.open(input_path_str))
with open(output_path_str, "w", encoding="utf-8") as f: # atomic write of OCR text
out_path = Path(output_path_str)
tmp_out = out_path.with_name(f"{out_path.stem}.tmp-{uuid.uuid4().hex}{out_path.suffix}")
with tmp_out.open("w", encoding="utf-8") as f:
f.write(text) f.write(text)
mark_job_as_completed(db, job_id, preview=text) tmp_out.replace(out_path)
mark_job_as_completed(db, job_id, output_filepath_str=output_path_str, preview=text)
logger.info(f"Image OCR for job {job_id} completed.") logger.info(f"Image OCR for job {job_id} completed.")
except Exception: except Exception:
logger.exception(f"ERROR during Image OCR for job {job_id}") logger.exception(f"ERROR during Image OCR for job {job_id}")
@@ -300,14 +374,18 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str,
temp_output_file = None temp_output_file = None
try: try:
job = get_job(db, job_id) job = get_job(db, job_id)
if not job or job.status == 'cancelled': return if not job or job.status == 'cancelled':
return
update_job_status(db, job_id, "processing", progress=25) update_job_status(db, job_id, "processing", progress=25)
logger.info(f"Starting conversion for job {job_id} using {tool} with task {task_key}") logger.info(f"Starting conversion for job {job_id} using {tool} with task {task_key}")
tool_config = conversion_tools_config.get(tool) tool_config = conversion_tools_config.get(tool)
if not tool_config: raise ValueError(f"Unknown conversion tool: {tool}") if not tool_config:
raise ValueError(f"Unknown conversion tool: {tool}")
input_path = Path(input_path_str) input_path = Path(input_path_str)
output_path = Path(output_path_str) output_path = Path(output_path_str)
current_input_path = input_path current_input_path = input_path
# Pre-processing for specific tools
if tool == "mozjpeg": if tool == "mozjpeg":
temp_input_file = input_path.with_suffix('.temp.ppm') temp_input_file = input_path.with_suffix('.temp.ppm')
logger.info(f"Pre-converting for MozJPEG: {input_path} -> {temp_input_file}") logger.info(f"Pre-converting for MozJPEG: {input_path} -> {temp_input_file}")
@@ -317,22 +395,12 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str,
err = (pre_conv_result.stderr or "")[:4000] err = (pre_conv_result.stderr or "")[:4000]
raise Exception(f"MozJPEG pre-conversion to PPM failed: {err}") raise Exception(f"MozJPEG pre-conversion to PPM failed: {err}")
current_input_path = temp_input_file current_input_path = temp_input_file
update_job_status(db, job_id, "processing", progress=50) update_job_status(db, job_id, "processing", progress=50)
# Build safe mapping for formatting and validate placeholders
ALLOWED_VARS = {"input", "output", "output_dir", "output_ext", "quality", "speed", "preset", "device", "dpi", "samplerate", "bitdepth"}
def validate_and_build_command(template_str: str, mapping: dict):
fmt = Formatter()
used = {fname for _, fname, _, _ in fmt.parse(template_str) if fname}
bad = used - ALLOWED_VARS
if bad:
raise ValueError(f"Command template contains disallowed placeholders: {bad}")
formatted = template_str.format(**mapping)
return shlex.split(formatted)
# Use a temporary output path and atomically move into place after success # prepare temporary output and mapping
temp_output_file = output_path.with_suffix(output_path.suffix + f".{uuid.uuid4().hex}.tmp") # use a temp filename that preserves the real extension, e.g. file.tmp-<uuid>.pdf
temp_output_file = output_path.with_name(f"{output_path.stem}.tmp-{uuid.uuid4().hex}{output_path.suffix}")
# Prepare mapping
mapping = { mapping = {
"input": str(current_input_path), "input": str(current_input_path),
"output": str(temp_output_file), "output": str(temp_output_file),
@@ -340,7 +408,7 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str,
"output_ext": output_path.suffix.lstrip('.'), "output_ext": output_path.suffix.lstrip('.'),
} }
# Allow tool-specific adjustments to mapping # tool specific mapping adjustments
if tool.startswith("ghostscript"): if tool.startswith("ghostscript"):
device, setting = task_key.split('_') device, setting = task_key.split('_')
mapping.update({"device": device, "dpi": setting, "preset": setting}) mapping.update({"device": device, "dpi": setting, "preset": setting})
@@ -358,38 +426,30 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str,
_, quality = task_key.split('_') _, quality = task_key.split('_')
quality = quality.replace('q', '') quality = quality.replace('q', '')
mapping.update({"quality": quality}) mapping.update({"quality": quality})
elif tool == "libreoffice":
target_ext = output_path.suffix.lstrip('.')
# tool_config may include a 'filters' mapping (see settings.yml example)
filter_val = tool_config.get("filters", {}).get(target_ext, target_ext)
mapping["filter"] = filter_val
command_template_str = tool_config["command_template"] command_template_str = tool_config["command_template"]
command = validate_and_build_command(command_template_str, mapping) command = validate_and_build_command(command_template_str, mapping)
logger.info(f"Executing command: {' '.join(command)}") logger.info(f"Executing command: {' '.join(command)}")
# run with timeout and capture output; run_command helper ensures trimmed logs on failure
def run_command(argv: List[str], timeout: int = 300):
try:
res = subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=timeout)
except subprocess.TimeoutExpired:
raise Exception(f"Command timed out after {timeout}s")
if res.returncode != 0:
stderr = (res.stderr or "")[:4000]
stdout = (res.stdout or "")[:4000]
raise Exception(f"Command failed exit {res.returncode}. stderr: {stderr}; stdout: {stdout}")
return res
# execute command with timeout and trimmed logs on error
result = run_command(command, timeout=tool_config.get("timeout", 300)) result = run_command(command, timeout=tool_config.get("timeout", 300))
if tool == "libreoffice":
expected_output_filename = input_path.with_suffix(output_path.suffix).name # handle LibreOffice special case: sometimes it writes differently
generated_file = output_path.parent / expected_output_filename # Special-case LibreOffice: support per-format export filters via settings.yml
if generated_file.exists():
# move generated file into place
generated_file.replace(output_path)
else:
raise Exception(f"LibreOffice did not create the expected file: {expected_output_filename}")
# move temp output into final location atomically # move temp output into final location atomically
if temp_output_file and temp_output_file.exists(): if temp_output_file and temp_output_file.exists():
temp_output_file.replace(output_path) temp_output_file.replace(output_path)
mark_job_as_completed(db, job_id, preview=f"Successfully converted file.") mark_job_as_completed(db, job_id, output_filepath_str=output_path_str, preview=f"Successfully converted file.")
logger.info(f"Conversion for job {job_id} completed.") logger.info(f"Conversion for job {job_id} completed.")
except Exception as e: except Exception:
logger.exception(f"ERROR during conversion for job {job_id}") logger.exception(f"ERROR during conversion for job {job_id}")
update_job_status(db, job_id, "failed", error="See server logs for details.") update_job_status(db, job_id, "failed", error="See server logs for details.")
finally: finally:
@@ -415,13 +475,14 @@ app = FastAPI(lifespan=lifespan)
app.mount("/static", StaticFiles(directory=PATHS.BASE_DIR / "static"), name="static") app.mount("/static", StaticFiles(directory=PATHS.BASE_DIR / "static"), name="static")
templates = Jinja2Templates(directory=PATHS.BASE_DIR / "templates") templates = Jinja2Templates(directory=PATHS.BASE_DIR / "templates")
async def save_upload_file_chunked(upload_file: UploadFile, destination: Path): async def save_upload_file_chunked(upload_file: UploadFile, destination: Path) -> int:
""" """
Streams the uploaded file in chunks directly to a file on disk. Write upload to a tmp file in chunks, then atomically move to final destination.
This is memory-efficient and reliable for large files. Returns the final size of the file in bytes.
""" """
max_size = APP_CONFIG.get("app_settings", {}).get("max_file_size_bytes", 100 * 1024 * 1024) max_size = APP_CONFIG.get("app_settings", {}).get("max_file_size_bytes", 100 * 1024 * 1024)
tmp = destination.with_suffix(destination.suffix + f".{uuid.uuid4().hex}.tmp") # make a temp filename that keeps the real extension, e.g. file.tmp-<uuid>.pdf
tmp = destination.with_name(f"{destination.stem}.tmp-{uuid.uuid4().hex}{destination.suffix}")
size = 0 size = 0
try: try:
with tmp.open("wb") as buffer: with tmp.open("wb") as buffer:
@@ -433,17 +494,16 @@ async def save_upload_file_chunked(upload_file: UploadFile, destination: Path):
if size > max_size: if size > max_size:
raise HTTPException(status_code=413, detail=f"File exceeds {max_size / 1024 / 1024} MB limit") raise HTTPException(status_code=413, detail=f"File exceeds {max_size / 1024 / 1024} MB limit")
buffer.write(chunk) buffer.write(chunk)
# atomic move into place
tmp.replace(destination) tmp.replace(destination)
return size
except Exception: except Exception:
tmp.unlink(missing_ok=True) tmp.unlink(missing_ok=True)
raise raise
def is_allowed_file(filename: str, allowed_extensions: set) -> bool: def is_allowed_file(filename: str, allowed_extensions: set) -> bool:
return Path(filename).suffix.lower() in allowed_extensions return Path(filename).suffix.lower() in allowed_extensions
# --- Routes (only transcription route is modified) --- # --- Routes (transcription route uses Huey task enqueuing) ---
@app.post("/transcribe-audio", status_code=status.HTTP_202_ACCEPTED) @app.post("/transcribe-audio", status_code=status.HTTP_202_ACCEPTED)
async def submit_audio_transcription( async def submit_audio_transcription(
@@ -453,7 +513,7 @@ async def submit_audio_transcription(
): ):
if not is_allowed_file(file.filename, {".mp3", ".wav", ".m4a", ".flac", ".ogg", ".opus"}): if not is_allowed_file(file.filename, {".mp3", ".wav", ".m4a", ".flac", ".ogg", ".opus"}):
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid audio file type.") raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid audio file type.")
whisper_config = APP_CONFIG.get("transcription_settings", {}).get("whisper", {}) whisper_config = APP_CONFIG.get("transcription_settings", {}).get("whisper", {})
if model_size not in whisper_config.get("allowed_models", []): if model_size not in whisper_config.get("allowed_models", []):
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=f"Invalid model size: {model_size}.") raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=f"Invalid model size: {model_size}.")
@@ -461,24 +521,29 @@ async def submit_audio_transcription(
job_id = uuid.uuid4().hex job_id = uuid.uuid4().hex
safe_basename = secure_filename(file.filename) safe_basename = secure_filename(file.filename)
stem, suffix = Path(safe_basename).stem, Path(safe_basename).suffix stem, suffix = Path(safe_basename).stem, Path(safe_basename).suffix
audio_filename = f"{stem}_{job_id}{suffix}" audio_filename = f"{stem}_{job_id}{suffix}"
transcript_filename = f"{stem}_{job_id}.txt" transcript_filename = f"{stem}_{job_id}.txt"
upload_path = PATHS.UPLOADS_DIR / audio_filename upload_path = PATHS.UPLOADS_DIR / audio_filename
processed_path = PATHS.PROCESSED_DIR / transcript_filename processed_path = PATHS.PROCESSED_DIR / transcript_filename
await save_upload_file_chunked(file, upload_path) input_size = await save_upload_file_chunked(file, upload_path)
job_data = JobCreate(id=job_id, task_type="transcription", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path)) job_data = JobCreate(
id=job_id,
task_type="transcription",
original_filename=file.filename,
input_filepath=str(upload_path),
input_filesize=input_size,
processed_filepath=str(processed_path)
)
new_job = create_job(db=db, job=job_data) new_job = create_job(db=db, job=job_data)
# --- MODIFIED: Pass whisper_config to the task --- # enqueue the Huey task (decorated function call enqueues when using huey)
run_transcription_task(new_job.id, str(upload_path), str(processed_path), model_size=model_size, whisper_settings=whisper_config) run_transcription_task(new_job.id, str(upload_path), str(processed_path), model_size=model_size, whisper_settings=whisper_config)
return {"job_id": new_job.id, "status": new_job.status, "status_url": f"/job/{new_job.id}"}
return {"job_id": new_job.id, "status": new_job.status}
# --- Other routes remain unchanged ---
@app.get("/") @app.get("/")
async def get_index(request: Request): async def get_index(request: Request):
@@ -493,23 +558,55 @@ async def get_index(request: Request):
@app.get("/settings") @app.get("/settings")
async def get_settings_page(request: Request): async def get_settings_page(request: Request):
try: try:
with open(PATHS.SETTINGS_FILE, 'r') as f: with open(PATHS.SETTINGS_FILE, 'r', encoding='utf8') as f:
current_config = yaml.safe_load(f) current_config = yaml.safe_load(f) or {}
except Exception as e: except Exception:
logger.error(f"Could not load settings.yml for settings page: {e}") logger.exception("Could not load settings.yml for settings page")
current_config = {} current_config = {}
return templates.TemplateResponse("settings.html", {"request": request, "config": current_config}) return templates.TemplateResponse("settings.html", {"request": request, "config": current_config})
def deep_merge(base: dict, updates: dict) -> dict:
"""
Recursively merge `updates` into `base`. Lists and scalars are replaced.
"""
for key, value in updates.items():
if (
key in base
and isinstance(base[key], dict)
and isinstance(value, dict)
):
base[key] = deep_merge(base[key], value)
else:
base[key] = value
return base
@app.post("/settings/save") @app.post("/settings/save")
async def save_settings(new_config: Dict = Body(...)): async def save_settings(new_config: Dict = Body(...)):
tmp = PATHS.SETTINGS_FILE.with_suffix(".tmp")
try: try:
with open(PATHS.SETTINGS_FILE, 'w') as f: # load existing config if present
yaml.dump(new_config, f, default_flow_style=False, sort_keys=False) try:
with PATHS.SETTINGS_FILE.open("r", encoding="utf8") as f:
current_config = yaml.safe_load(f) or {}
except FileNotFoundError:
current_config = {}
# deep merge new values
merged = deep_merge(current_config, new_config)
# atomic write back
with tmp.open("w", encoding="utf8") as f:
yaml.safe_dump(merged, f, default_flow_style=False, sort_keys=False)
tmp.replace(PATHS.SETTINGS_FILE)
load_app_config() load_app_config()
return JSONResponse({"message": "Settings saved successfully."}) return JSONResponse({"message": "Settings updated successfully."})
except Exception as e: except Exception:
logger.error(f"Failed to save settings: {e}") logger.exception("Failed to update settings")
raise HTTPException(status_code=500, detail="Could not write to settings.yml.") tmp.unlink(missing_ok=True)
raise HTTPException(status_code=500, detail="Could not update settings.yml.")
@app.post("/settings/clear-history") @app.post("/settings/clear-history")
async def clear_job_history(db: Session = Depends(get_db)): async def clear_job_history(db: Session = Depends(get_db)):
@@ -518,9 +615,9 @@ async def clear_job_history(db: Session = Depends(get_db)):
db.commit() db.commit()
logger.info(f"Cleared {num_deleted} jobs from history.") logger.info(f"Cleared {num_deleted} jobs from history.")
return {"deleted_count": num_deleted} return {"deleted_count": num_deleted}
except Exception as e: except Exception:
db.rollback() db.rollback()
logger.error(f"Failed to clear job history: {e}") logger.exception("Failed to clear job history")
raise HTTPException(status_code=500, detail="Database error while clearing history.") raise HTTPException(status_code=500, detail="Database error while clearing history.")
@app.post("/settings/delete-files") @app.post("/settings/delete-files")
@@ -532,9 +629,9 @@ async def delete_processed_files():
if f.is_file(): if f.is_file():
f.unlink() f.unlink()
deleted_count += 1 deleted_count += 1
except Exception as e: except Exception:
errors.append(f.name) errors.append(f.name)
logger.error(f"Could not delete processed file {f.name}: {e}") logger.exception(f"Could not delete processed file {f.name}")
if errors: if errors:
raise HTTPException(status_code=500, detail=f"Could not delete some files: {', '.join(errors)}") raise HTTPException(status_code=500, detail=f"Could not delete some files: {', '.join(errors)}")
logger.info(f"Deleted {deleted_count} files from processed directory.") logger.info(f"Deleted {deleted_count} files from processed directory.")
@@ -562,12 +659,14 @@ async def submit_file_conversion(file: UploadFile = File(...), output_format: st
processed_filename = f"{original_stem}_{job_id}.{target_ext}" processed_filename = f"{original_stem}_{job_id}.{target_ext}"
upload_path = PATHS.UPLOADS_DIR / upload_filename upload_path = PATHS.UPLOADS_DIR / upload_filename
processed_path = PATHS.PROCESSED_DIR / processed_filename processed_path = PATHS.PROCESSED_DIR / processed_filename
await save_upload_file_chunked(file, upload_path) input_size = await save_upload_file_chunked(file, upload_path)
job_data = JobCreate(id=job_id, task_type="conversion", original_filename=file.filename, job_data = JobCreate(id=job_id, task_type="conversion", original_filename=file.filename,
input_filepath=str(upload_path), processed_filepath=str(processed_path)) input_filepath=str(upload_path),
input_filesize=input_size,
processed_filepath=str(processed_path))
new_job = create_job(db=db, job=job_data) new_job = create_job(db=db, job=job_data)
run_conversion_task(new_job.id, str(upload_path), str(processed_path), tool, task_key, conversion_tools) run_conversion_task(new_job.id, str(upload_path), str(processed_path), tool, task_key, conversion_tools)
return {"job_id": new_job.id, "status": new_job.status} return {"job_id": new_job.id, "status": new_job.status, "status_url": f"/job/{new_job.id}"}
@app.post("/ocr-pdf", status_code=status.HTTP_202_ACCEPTED) @app.post("/ocr-pdf", status_code=status.HTTP_202_ACCEPTED)
async def submit_pdf_ocr(file: UploadFile = File(...), db: Session = Depends(get_db)): async def submit_pdf_ocr(file: UploadFile = File(...), db: Session = Depends(get_db)):
@@ -578,12 +677,15 @@ async def submit_pdf_ocr(file: UploadFile = File(...), db: Session = Depends(get
unique_filename = f"{Path(safe_basename).stem}_{job_id}{Path(safe_basename).suffix}" unique_filename = f"{Path(safe_basename).stem}_{job_id}{Path(safe_basename).suffix}"
upload_path = PATHS.UPLOADS_DIR / unique_filename upload_path = PATHS.UPLOADS_DIR / unique_filename
processed_path = PATHS.PROCESSED_DIR / unique_filename processed_path = PATHS.PROCESSED_DIR / unique_filename
await save_upload_file_chunked(file, upload_path) input_size = await save_upload_file_chunked(file, upload_path)
job_data = JobCreate(id=job_id, task_type="ocr", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path)) job_data = JobCreate(id=job_id, task_type="ocr", original_filename=file.filename,
input_filepath=str(upload_path),
input_filesize=input_size,
processed_filepath=str(processed_path))
new_job = create_job(db=db, job=job_data) new_job = create_job(db=db, job=job_data)
ocr_settings = APP_CONFIG.get("ocr_settings", {}).get("ocrmypdf", {}) ocr_settings = APP_CONFIG.get("ocr_settings", {}).get("ocrmypdf", {})
run_pdf_ocr_task(new_job.id, str(upload_path), str(processed_path), ocr_settings) run_pdf_ocr_task(new_job.id, str(upload_path), str(processed_path), ocr_settings)
return {"job_id": new_job.id, "status": new_job.status} return {"job_id": new_job.id, "status": new_job.status, "status_url": f"/job/{new_job.id}"}
@app.post("/ocr-image", status_code=status.HTTP_202_ACCEPTED) @app.post("/ocr-image", status_code=status.HTTP_202_ACCEPTED)
async def submit_image_ocr(file: UploadFile = File(...), db: Session = Depends(get_db)): async def submit_image_ocr(file: UploadFile = File(...), db: Session = Depends(get_db)):
@@ -596,11 +698,14 @@ async def submit_image_ocr(file: UploadFile = File(...), db: Session = Depends(g
unique_filename = f"{Path(safe_basename).stem}_{job_id}{file_ext}" unique_filename = f"{Path(safe_basename).stem}_{job_id}{file_ext}"
upload_path = PATHS.UPLOADS_DIR / unique_filename upload_path = PATHS.UPLOADS_DIR / unique_filename
processed_path = PATHS.PROCESSED_DIR / f"{Path(safe_basename).stem}_{job_id}.txt" processed_path = PATHS.PROCESSED_DIR / f"{Path(safe_basename).stem}_{job_id}.txt"
await save_upload_file_chunked(file, upload_path) input_size = await save_upload_file_chunked(file, upload_path)
job_data = JobCreate(id=job_id, task_type="ocr-image", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path)) job_data = JobCreate(id=job_id, task_type="ocr-image", original_filename=file.filename,
input_filepath=str(upload_path),
input_filesize=input_size,
processed_filepath=str(processed_path))
new_job = create_job(db=db, job=job_data) new_job = create_job(db=db, job=job_data)
run_image_ocr_task(new_job.id, str(upload_path), str(processed_path)) run_image_ocr_task(new_job.id, str(upload_path), str(processed_path))
return {"job_id": new_job.id, "status": new_job.status} return {"job_id": new_job.id, "status": new_job.status, "status_url": f"/job/{new_job.id}"}
@app.post("/job/{job_id}/cancel", status_code=status.HTTP_202_ACCEPTED) @app.post("/job/{job_id}/cancel", status_code=status.HTTP_202_ACCEPTED)
async def cancel_job(job_id: str, db: Session = Depends(get_db)): async def cancel_job(job_id: str, db: Session = Depends(get_db)):
@@ -626,8 +731,7 @@ async def get_job_status(job_id: str, db: Session = Depends(get_db)):
@app.get("/download/{filename}") @app.get("/download/{filename}")
async def download_file(filename: str): async def download_file(filename: str):
safe_filename = secure_filename(filename) safe_filename = secure_filename(filename)
file_path = PATHS.PROCESSED_DIR / safe_filename file_path = (PATHS.PROCESSED_DIR / safe_filename).resolve()
file_path = file_path.resolve()
base = PATHS.PROCESSED_DIR.resolve() base = PATHS.PROCESSED_DIR.resolve()
try: try:
file_path.relative_to(base) file_path.relative_to(base)
@@ -635,4 +739,15 @@ async def download_file(filename: str):
raise HTTPException(status_code=403, detail="Access denied.") raise HTTPException(status_code=403, detail="Access denied.")
if not file_path.is_file(): if not file_path.is_file():
raise HTTPException(status_code=404, detail="File not found.") raise HTTPException(status_code=404, detail="File not found.")
return FileResponse(path=file_path, filename=safe_filename, media_type="application/octet-stream") return FileResponse(path=file_path, filename=safe_filename, media_type="application/octet-stream")
# Small health endpoint
@app.get("/health")
async def health():
try:
with engine.connect() as conn:
conn.execute("SELECT 1")
except Exception:
logger.exception("Health check failed")
return JSONResponse({"ok": False}, status_code=500)
return {"ok": True}

View File

@@ -1,22 +1,145 @@
# requirements.txt annotated-types==0.7.0
anyio==4.10.0
# Web framework audioop-lts==0.2.2
fastapi av==15.1.0
uvicorn[standard] azure-ai-documentintelligence==1.0.2
python-multipart azure-core==1.35.1
jinja2 azure-identity==1.25.0
beautifulsoup4==4.13.5
# PDF OCR certifi==2025.8.3
ocrmypdf cffi==2.0.0
PyPDF2 chardet==5.2.0
charset-normalizer==3.4.3
# Audio Transcription click==8.2.1
faster-whisper cobble==0.1.4
# The following are core dependencies for faster-whisper, coloredlogs==15.0.1
# but it's good to list them explicitly. cryptography==45.0.7
# ctranslate2 css-parser==1.0.10
# transformers ctranslate2==4.6.0
# torch # Note: torch is a dependency of transformers defusedxml==0.7.1
Deprecated==1.2.18
# Utilities deprecation==2.1.0
werkzeug et_xmlfile==2.0.0
fastapi==0.116.1
faster-whisper==1.2.0
filelock==3.19.1
flatbuffers==25.2.10
fsspec==2025.9.0
greenlet==3.2.4
gunicorn==23.0.0
h11==0.16.0
hf-xet==1.1.10
html5-parser==0.4.12
html5lib==1.1
httptools==0.6.4
huey==2.5.3
huggingface-hub==0.34.4
humanfriendly==10.0
idna==3.10
imageio==2.37.0
img2pdf==0.6.1
isodate==0.7.2
Jinja2==3.1.6
lazy_loader==0.4
lxml==6.0.1
magika==0.6.2
mammoth==1.10.0
markdown-it-py==4.0.0
markdownify==1.2.0
markitdown==0.1.3
MarkupSafe==3.0.2
mdurl==0.1.2
mechanize==0.4.10
mpmath==1.3.0
msal==1.33.0
msal-extensions==1.3.1
msgpack==1.1.1
networkx==3.5
ninja==1.13.0
numpy==2.2.6
nvidia-cublas-cu12==12.8.4.1
nvidia-cuda-cupti-cu12==12.8.90
nvidia-cuda-nvrtc-cu12==12.8.93
nvidia-cuda-runtime-cu12==12.8.90
nvidia-cudnn-cu12==9.10.2.21
nvidia-cufft-cu12==11.3.3.83
nvidia-cufile-cu12==1.13.1.3
nvidia-curand-cu12==10.3.9.90
nvidia-cusolver-cu12==11.7.3.90
nvidia-cusparse-cu12==12.5.8.93
nvidia-cusparselt-cu12==0.7.1
nvidia-nccl-cu12==2.27.3
nvidia-nvjitlink-cu12==12.8.93
nvidia-nvtx-cu12==12.8.90
ocrmypdf==16.11.0
olefile==0.47
onnxruntime==1.22.1
opencv-python-headless==4.12.0.88
openpyxl==3.1.5
packaging==25.0
pandas==2.3.2
pdfminer.six==20250506
pi_heif==1.1.0
pikepdf==9.11.0
pillow==11.3.0
pluggy==1.6.0
protobuf==6.32.1
pyclipper==1.3.0.post6
pycparser==2.23
pydantic==2.11.9
pydantic-settings==2.10.1
pydantic_core==2.33.2
pydub==0.25.1
Pygments==2.19.2
PyJWT==2.10.1
pypdf==6.0.0
PyPDF2==3.0.1
PyQt6==6.9.1
PyQt6-Qt6==6.9.2
PyQt6-WebEngine==6.9.0
PyQt6-WebEngine-Qt6==6.9.2
PyQt6_sip==13.10.2
pytesseract==0.3.13
python-bidi==0.6.6
python-dateutil==2.9.0.post0
python-dotenv==1.1.1
python-multipart==0.0.20
python-pptx==1.0.2
pytz==2025.2
PyYAML==6.0.2
regex==2025.9.1
requests==2.32.5
rich==14.1.0
scikit-image==0.25.2
scipy==1.16.2
setuptools==80.9.0
shapely==2.1.1
six==1.17.0
sniffio==1.3.1
soupsieve==2.8
SpeechRecognition==3.14.3
SQLAlchemy==2.0.43
standard-aifc==3.13.0
standard-chunk==3.13.0
starlette==0.47.3
sympy==1.14.0
tifffile==2025.9.9
tokenizers==0.22.0
torch==2.8.0
torchvision==0.23.0
tqdm==4.67.1
triton==3.4.0
typing-inspection==0.4.1
typing_extensions==4.15.0
tzdata==2025.2
urllib3==2.5.0
uvicorn==0.35.0
uvloop==0.21.0
watchfiles==1.1.0
webencodings==0.5.1
websockets==15.0.1
Werkzeug==3.1.3
wrapt==1.17.3
xlrd==2.0.2
xlsxwriter==3.2.9
youtube-transcript-api==1.0.3

4
run.sh
View File

@@ -3,8 +3,8 @@
echo "Starting DocProcessor with Gunicorn..." echo "Starting DocProcessor with Gunicorn..."
exec gunicorn -w 2 -k uvicorn.workers.UvicornWorker --forwarded-allow-ips='*' main:app -b 0.0.0.0:8000 & exec gunicorn -w 4 --threads 2 -k uvicorn.workers.UvicornWorker --forwarded-allow-ips='*' main:app -b 0.0.0.0:8000 &
echo "Done" echo "Done"
echo "Starting huey..." echo "Starting huey..."
exec huey_consumer.py main.huey -w 2 & exec huey_consumer.py main.huey -w 4 &
echo "Done" echo "Done"

272
settings .yml.default Normal file
View File

@@ -0,0 +1,272 @@
# settings.yml
# General application settings
app_settings:
max_file_size_mb: 2000 # Maximum upload size in Megabytes
# Allowed extensions (list will be normalized to a set by the server)
allowed_all_extensions:
- .pdf
- .ps
- .eps
- .png
- .jpg
- .jpeg
- .tiff
- .tif
- .gif
- .bmp
- .webp
- .svg
- .jxl
- .avif
- .ppm
- .mp3
- .m4a
- .ogg
- .flac
- .opus
- .wav
- .aac
- .mp4
- .mkv
- .mov
- .webm
- .avi
- .flv
- .md
- .txt
- .html
- .docx
- .odt
- .rst
- .epub
- .mobi
- .azw3
- .pptx
- .xlsx
# Settings for Optical Character Recognition (OCR) tasks
ocr_settings:
ocrmypdf:
deskew: true
clean: true
optimize: 1
force_ocr: true
# Settings for audio transcription tasks
transcription_settings:
whisper:
compute_type: "int8"
allowed_models:
- "tiny"
- "base"
- "small"
- "medium"
- "large-v3"
- "distil-large-v2"
# optional: specify device if workers have GPU (e.g. "cuda" or "cpu")
# device: "cpu"
# --- Conversion Tool Definitions ---
# The server validates placeholders against an allowlist:
# {input}, {output}, {output_dir}, {output_ext}, {quality}, {speed},
# {preset}, {device}, {dpi}, {samplerate}, {bitdepth}
conversion_tools:
libreoffice:
name: "LibreOffice"
# Use {filter} so we can supply liboffce export filters like "txt:Text"
command_template: 'libreoffice --headless --convert-to {filter} --outdir {output_dir} {input}'
timeout: 120
# Optional: per-format export filter. If missing for a format, server falls back to the extension.
filters:
pdf: "pdf"
docx: "docx"
odt: "odt"
html: "html"
rtf: "rtf"
txt: "txt:Text"
xml: "xml"
epub: "epub"
xlsx: "xlsx"
ods: "ods"
csv: "csv:Text"
pptx: "pptx"
odp: "odp"
svg: "svg"
formats:
pdf: "PDF"
docx: "Word Document"
odt: "OpenDocument Text"
html: "HTML"
rtf: "Rich Text Format"
txt: "Plain Text"
xml: "Word 2003 XML"
epub: "EPUB"
xlsx: "Excel Spreadsheet"
ods: "OpenDocument Spreadsheet"
csv: "CSV"
pptx: "PowerPoint Presentation"
odp: "OpenDocument Presentation"
svg: "SVG"
pandoc:
name: "Pandoc"
command_template: 'pandoc --standalone {input} -o {output} --to={output_ext} --pdf-engine=xelatex'
timeout: 60
formats:
docx: "Word Document"
odt: "OpenDocument Text"
pdf: "PDF"
rtf: "Rich Text Format"
txt: "Plain Text"
tex: "LaTeX"
man: "Groff Man Page"
epub: "EPUB v3 Book"
epub2: "EPUB v2 Book"
html: "HTML"
html5: "HTML5"
pptx: "PowerPoint Presentation"
beamer: "Beamer PDF Slides"
slidy: "Slidy HTML Slides"
md: "Markdown"
rst: "reStructuredText"
jira: "Jira Wiki Markup"
mediawiki: "MediaWiki Markup"
ghostscript_pdf:
name: "Ghostscript (PDF)"
# placeholders used: {preset}, {output}, {input}
command_template: 'gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dNOPAUSE -dQUIET -dBATCH {preset} -sOutputFile={output} {input}'
timeout: 60
formats:
screen: "PDF (Optimized for Screen)"
ebook: "PDF (Optimized for Ebooks)"
printer: "PDF (Optimized for Print)"
archive: "PDF/A (for Archiving)"
ghostscript_image:
name: "Ghostscript (Image)"
# placeholders used: {device}, {dpi}, {output}, {input}
command_template: 'gs -dNOPAUSE -dBATCH -sDEVICE={device} -r{dpi} -sOutputFile={output} {input}'
timeout: 60
formats:
jpeg_72: "JPEG Image (72 DPI)"
jpeg_300: "JPEG Image (300 DPI)"
png16m_150: "PNG Image (150 DPI)"
png16m_300: "PNG Image (300 DPI)"
tiff24nc_300: "TIFF Image (300 DPI)"
tiff24nc_600: "TIFF Image (600 DPI)"
calibre:
name: "Calibre (ebook-convert)"
command_template: 'ebook-convert {input} {output}'
timeout: 60
formats:
epub: "EPUB"
mobi: "MOBI"
azw3: "Amazon Kindle"
pdf: "PDF"
docx: "Word Document"
ffmpeg:
name: "FFmpeg"
command_template: 'ffmpeg -i {input} -y -preset medium {output}'
timeout: 300
formats:
mp4: "MP4 Video"
mkv: "MKV Video"
mov: "MOV Video"
webm: "WebM Video"
mp3: "MP3 Audio"
wav: "WAV Audio"
flac: "FLAC Audio"
gif: "Animated GIF"
vips:
name: "VIPS"
command_template: 'vips copy {input} {output}[Q=90]'
timeout: 60
formats:
jpg: "JPEG Image (Q90)"
png: "PNG Image"
webp: "WebP Image (Q90)"
tiff: "TIFF Image"
avif: "AVIF Image"
graphicsmagick:
name: "GraphicsMagick"
command_template: 'gm convert {input} -quality 90 {output}'
timeout: 60
formats:
jpg: "JPEG Image (Q90)"
png: "PNG Image"
webp: "WebP Image"
tiff: "TIFF Image"
pdf: "PDF from Images"
inkscape:
name: "Inkscape"
command_template: 'inkscape {input} --export-filename={output}'
timeout: 30
formats:
svg: "SVG (Plain)"
png: "PNG Image (96 DPI)"
pdf: "PDF Document"
libjxl:
name: "libjxl (cjxl)"
command_template: 'cjxl {input} {output} -q 90'
timeout: 30
formats:
jxl: "JPEG XL (Q90)"
resvg:
name: "resvg"
command_template: 'resvg {input} {output}'
timeout: 30
formats:
png: "PNG from SVG"
potrace:
name: "Potrace"
command_template: 'potrace {input} --svg -o {output}'
timeout: 30
formats:
svg: "SVG from Bitmap"
markitdown:
name: "Markitdown"
command_template: 'markitdown {input} -o {output}'
timeout: 30
formats:
md: "Markdown from Everything!"
pngquant:
name: "pngquant"
command_template: 'pngquant --quality={quality} --speed {speed} --force --output {output} {input}'
timeout: 30
formats:
png_hq: "PNG (High Quality Compression)"
png_mq: "PNG (Medium Quality Compression)"
png_fast: "PNG (Fast Compression)"
sox:
name: "SoX Audio Converter"
command_template: 'sox {input} -r {samplerate} -b {bitdepth} {output}'
timeout: 120
formats:
wav_48k_24b: "WAV (48kHz, 24-bit)"
wav_44k_16b: "WAV (CD, 44.1kHz, 16-bit)"
flac_48k_24b: "FLAC (48kHz, 24-bit)"
flac_44k_16b: "FLAC (CD, 44.1kHz, 16-bit)"
ogg_32k_16b: "Ogg Vorbis (32kHz)"
ogg_16k_16b: "Ogg Vorbis (16kHz, Voice)"
mozjpeg:
name: "MozJPEG"
command_template: 'cjpeg -quality {quality} -outfile {output} {input}'
timeout: 30
formats:
jpg_q85: "JPEG (High Quality)"
jpg_q75: "JPEG (Web Quality)"
jpg_q60: "JPEG (Aggressive Compression)"

View File

@@ -1,179 +1,242 @@
# settings.yml
# General application settings
app_settings: app_settings:
max_file_size_mb: 2000 # Maximum upload size in Megabytes max_file_size_mb: '2000'
allowed_all_extensions:
# Settings for Optical Character Recognition (OCR) tasks - .pdf
- .ps
- .eps
- .png
- .jpg
- .jpeg
- .tiff
- .tif
- .gif
- .bmp
- .webp
- .svg
- .jxl
- .avif
- .ppm
- .mp3
- .m4a
- .ogg
- .flac
- .opus
- .wav
- .aac
- .mp4
- .mkv
- .mov
- .webm
- .avi
- .flv
- .md
- .txt
- .html
- .docx
- .odt
- .rst
- .epub
- .mobi
- .azw3
- .pptx
- .xlsx
ocr_settings: ocr_settings:
ocrmypdf: ocrmypdf:
deskew: true deskew: true
clean: true clean: true
optimize: 1 optimize: 1
force_ocr: true force_ocr: true
# Settings for audio transcription tasks
transcription_settings: transcription_settings:
whisper: whisper:
compute_type: "int8" compute_type: int8
allowed_models: allowed_models:
- "tiny" - tiny
- "base" - base
- "small" - small
- "medium" - medium
- "large-v3" - large-v3
- "distil-large-v2" - distil-large-v2
# --- Conversion Tool Definitions ---
# Each tool's command is a single string. The backend uses shlex to parse it,
# so you can use quotes for arguments with spaces.
# Use placeholders like: {input}, {output}, {output_dir}, {output_ext}, etc.
conversion_tools: conversion_tools:
libreoffice: libreoffice:
name: "LibreOffice" name: LibreOffice
command_template: 'libreoffice --headless --convert-to {output_ext} --outdir {output_dir} {input}' command_template: libreoffice --headless --convert-to {filter} --outdir {output_dir}
{input}
timeout: 300
filters:
pdf: pdf
docx: docx
odt: odt
html: html
rtf: rtf
txt: txt:Text
xml: xml
epub: epub
xlsx: xlsx
ods: ods
csv: csv:Text
pptx: pptx
odp: odp
svg: svg
formats: formats:
pdf: "PDF" pdf: PDF
docx: "Word Document" docx: Word Document
odt: "OpenDocument Text" odt: OpenDocument Text
html: "HTML" html: HTML
rtf: "Rich Text Format" rtf: Rich Text Format
txt: "Plain Text" txt: Plain Text
xml: "Word 2003 XML" xml: Word 2003 XML
epub: "EPUB" epub: EPUB
xlsx: "Excel Spreadsheet" xlsx: Excel Spreadsheet
ods: "OpenDocument Spreadsheet" ods: OpenDocument Spreadsheet
csv: "CSV" csv: CSV
pptx: "PowerPoint Presentation" pptx: PowerPoint Presentation
odp: "OpenDocument Presentation" odp: OpenDocument Presentation
svg: "SVG" svg: SVG
pandoc: pandoc:
name: "Pandoc" name: Pandoc
command_template: 'pandoc --standalone {input} -o {output}' command_template: pandoc --standalone {input} -o {output} --to={output_ext} --pdf-engine=xelatex
timeout: 300
formats: formats:
docx: "Word Document" docx: Word Document
odt: "OpenDocument Text" odt: OpenDocument Text
pdf: "PDF" pdf: PDF
rtf: "Rich Text Format" rtf: Rich Text Format
txt: "Plain Text" txt: Plain Text
tex: "LaTeX" tex: LaTeX
man: "Groff Man Page" man: Groff Man Page
epub: "EPUB v3 Book" epub: EPUB v3 Book
epub2: "EPUB v2 Book" epub2: EPUB v2 Book
html: "HTML" html: HTML
html5: "HTML5" html5: HTML5
pptx: "PowerPoint Presentation" pptx: PowerPoint Presentation
beamer: "Beamer PDF Slides" beamer: Beamer PDF Slides
slidy: "Slidy HTML Slides" slidy: Slidy HTML Slides
md: "Markdown" md: Markdown
rst: "reStructuredText" rst: reStructuredText
jira: "Jira Wiki Markup" jira: Jira Wiki Markup
mediawiki: "MediaWiki Markup" mediawiki: MediaWiki Markup
ghostscript_pdf: ghostscript_pdf:
name: "Ghostscript (PDF)" name: Ghostscript (PDF)
command_template: 'gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dNOPAUSE -dQUIET -dBATCH {preset} -sOutputFile={output} {input}' command_template: gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dNOPAUSE -dQUIET
-dBATCH {preset} -sOutputFile={output} {input}
timeout: 60
formats: formats:
screen: "PDF (Optimized for Screen)" screen: PDF (Optimized for Screen)
ebook: "PDF (Optimized for Ebooks)" ebook: PDF (Optimized for Ebooks)
printer: "PDF (Optimized for Print)" printer: PDF (Optimized for Print)
archive: "PDF/A (for Archiving)" archive: PDF/A (for Archiving)
ghostscript_image: ghostscript_image:
name: "Ghostscript (Image)" name: Ghostscript (Image)
command_template: 'gs -dNOPAUSE -dBATCH -sDEVICE={device} -r{dpi} -sOutputFile={output} {input}' command_template: gs -dNOPAUSE -dBATCH -sDEVICE={device} -r{dpi} -sOutputFile={output}
{input}
timeout: 60
formats: formats:
jpeg_72: "JPEG Image (72 DPI)" jpeg_72: JPEG Image (72 DPI)
jpeg_300: "JPEG Image (300 DPI)" jpeg_300: JPEG Image (300 DPI)
png16m_150: "PNG Image (150 DPI)" png16m_150: PNG Image (150 DPI)
png16m_300: "PNG Image (300 DPI)" png16m_300: PNG Image (300 DPI)
tiff24nc_300: "TIFF Image (300 DPI)" tiff24nc_300: TIFF Image (300 DPI)
tiff24nc_600: "TIFF Image (600 DPI)" tiff24nc_600: TIFF Image (600 DPI)
calibre: calibre:
name: "Calibre (ebook-convert)" name: Calibre (ebook-convert)
command_template: 'ebook-convert {input} {output}' command_template: ebook-convert {input} {output}
timeout: 600
formats: formats:
epub: "EPUB" epub: EPUB
mobi: "MOBI" mobi: MOBI
azw3: "Amazon Kindle" azw3: Amazon Kindle
pdf: "PDF" pdf: PDF
docx: "Word Document" docx: Word Document
ffmpeg: ffmpeg:
name: "FFmpeg" name: FFmpeg
command_template: 'ffmpeg -i {input} -y -preset medium {output}' command_template: ffmpeg -i {input} -y -preset medium {output}
timeout: 600
formats: formats:
mp4: "MP4 Video" mp4: MP4 Video
mkv: "MKV Video" mkv: MKV Video
mov: "MOV Video" mov: MOV Video
webm: "WebM Video" webm: WebM Video
mp3: "MP3 Audio" mp3: MP3 Audio
wav: "WAV Audio" wav: WAV Audio
flac: "FLAC Audio" flac: FLAC Audio
gif: "Animated GIF" gif: Animated GIF
vips: vips:
name: "VIPS" name: VIPS
command_template: 'vips copy {input} {output}[Q=90]' command_template: vips copy {input} {output}[Q=90]
timeout: 60
formats: formats:
jpg: "JPEG Image (Q90)" jpg: JPEG Image (Q90)
png: "PNG Image" png: PNG Image
webp: "WebP Image (Q90)" webp: WebP Image (Q90)
tiff: "TIFF Image" tiff: TIFF Image
avif: "AVIF Image" avif: AVIF Image
graphicsmagick: graphicsmagick:
name: "GraphicsMagick" name: GraphicsMagick
command_template: 'gm convert {input} -quality 90 {output}' command_template: gm convert {input} -quality 90 {output}
timeout: 60
formats: formats:
jpg: "JPEG Image (Q90)" jpg: JPEG Image (Q90)
png: "PNG Image" png: PNG Image
webp: "WebP Image" webp: WebP Image
tiff: "TIFF Image" tiff: TIFF Image
pdf: "PDF from Images" pdf: PDF from Images
inkscape: inkscape:
name: "Inkscape" name: Inkscape
command_template: 'inkscape {input} --export-filename={output}' command_template: inkscape {input} --export-filename={output}
timeout: 30
formats: formats:
svg: "SVG (Plain)" svg: SVG (Plain)
png: "PNG Image (96 DPI)" png: PNG Image (96 DPI)
pdf: "PDF Document" pdf: PDF Document
libjxl: libjxl:
name: "libjxl (cjxl)" name: libjxl (cjxl)
command_template: 'cjxl {input} {output} -q 90' command_template: cjxl {input} {output} -q 90
timeout: 30
formats: formats:
jxl: "JPEG XL (Q90)" jxl: JPEG XL (Q90)
resvg: resvg:
name: "resvg" name: resvg
command_template: 'resvg {input} {output}' command_template: resvg {input} {output}
timeout: 30
formats: formats:
png: "PNG from SVG" png: PNG from SVG
potrace: potrace:
name: "Potrace" name: Potrace
command_template: 'potrace {input} --svg -o {output}' command_template: potrace {input} --svg -o {output}
timeout: 30
formats: formats:
svg: "SVG from Bitmap" svg: SVG from Bitmap
markitdown: markitdown:
name: "Markitdown" name: Markitdown
command_template: 'markitdown {input} -o {output}' command_template: markitdown {input} -o {output}
timeout: 300
formats: formats:
md: "Markdown from Everything!" md: Markdown from Everything!
pngquant: pngquant:
name: "pngquant" name: pngquant
command_template: 'pngquant --quality={quality} --speed {speed} --force --output {output} {input}' command_template: pngquant --quality={quality} --speed {speed} --force --output
{output} {input}
timeout: 300
formats: formats:
png_hq: "PNG (High Quality Compression)" png_hq: PNG (High Quality Compression)
png_mq: "PNG (Medium Quality Compression)" png_mq: PNG (Medium Quality Compression)
png_fast: "PNG (Fast Compression)" png_fast: PNG (Fast Compression)
sox: sox:
name: "SoX Audio Converter" name: SoX Audio Converter
command_template: 'sox {input} -r {samplerate} -b {bitdepth} {output}' command_template: sox {input} -r {samplerate} -b {bitdepth} {output}
timeout: 600
formats: formats:
wav_48k_24b: "WAV (48kHz, 24-bit)" wav_48k_24b: WAV (48kHz, 24-bit)
wav_44k_16b: "WAV (CD, 44.1kHz, 16-bit)" wav_44k_16b: WAV (CD, 44.1kHz, 16-bit)
flac_48k_24b: "FLAC (48kHz, 24-bit)" flac_48k_24b: FLAC (48kHz, 24-bit)
flac_44k_16b: "FLAC (CD, 44.1kHz, 16-bit)" flac_44k_16b: FLAC (CD, 44.1kHz, 16-bit)
ogg_32k: "Ogg Vorbis (32kHz)" ogg_32k_16b: Ogg Vorbis (32kHz)
ogg_16k: "Ogg Vorbis (16kHz, Voice)" ogg_16k_16b: Ogg Vorbis (16kHz, Voice)
mozjpeg: mozjpeg:
name: "MozJPEG" name: MozJPEG
command_template: 'cjpeg -quality {quality} -outfile {output} {input}' command_template: cjpeg -quality {quality} -outfile {output} {input}
timeout: 30
formats: formats:
jpg_q85: "JPEG (High Quality)" jpg_q85: JPEG (High Quality)
jpg_q75: "JPEG (Web Quality)" jpg_q75: JPEG (Web Quality)
jpg_q60: "JPEG (Aggressive Compression)" jpg_q60: JPEG (Aggressive Compression)

View File

@@ -1,5 +1,3 @@
/* static/css/style.css */
:root { :root {
/* Core */ /* Core */
--bg-color: #000000; --bg-color: #000000;
@@ -449,6 +447,110 @@ button[type="submit"]:disabled {
} }
/* --- START: Drag and Drop and Dialog Styles --- */
.drag-overlay {
position: fixed;
inset: 0;
z-index: 9999;
display: none; /* Hidden by default */
justify-content: center;
align-items: center;
background-color: rgba(0, 0, 0, 0.7);
backdrop-filter: blur(5px);
}
body.dragging .drag-overlay {
display: flex; /* Shown when body has .dragging class */
}
.drag-overlay-content {
border: 3px dashed var(--primary-color);
border-radius: 12px;
padding: 2rem 4rem;
text-align: center;
background-color: rgba(0, 0, 0, 0.2);
}
.drag-overlay-content p {
margin: 0;
font-size: 1.5rem;
font-weight: 500;
color: var(--primary-color);
}
.dialog-overlay {
position: fixed;
inset: 0;
z-index: 10000;
display: none; /* Hidden by default */
justify-content: center;
align-items: center;
background-color: rgba(0, 0, 0, 0.7);
backdrop-filter: blur(5px);
}
.dialog-overlay.visible {
display: flex; /* Show when .visible class is added */
}
.dialog-box {
background: var(--card-bg);
border: 1px solid var(--border-color);
border-radius: 8px;
padding: 1.5rem;
width: 100%;
max-width: 450px;
text-align: center;
box-shadow: 0 10px 30px rgba(0,0,0,0.5);
}
.dialog-box h2 {
margin-top: 0;
font-size: 1.5rem;
}
.dialog-box p {
color: var(--muted-text);
margin-bottom: 1.5rem;
}
.dialog-actions {
display: grid;
grid-template-columns: 1fr;
gap: 0.75rem;
margin-bottom: 1rem;
}
.dialog-actions button {
display: block;
width: 100%;
background: transparent;
border: 1px solid var(--border-color);
color: var(--text-color);
padding: 0.65rem 1rem;
font-size: 1rem;
font-weight: 600;
border-radius: 5px;
cursor: pointer;
transition: background-color 0.15s ease, border-color 0.15s ease;
}
.dialog-actions button:hover {
background: var(--primary-hover);
border-color: var(--primary-hover);
}
.dialog-secondary-action {
background-color: transparent !important;
border: 1px solid var(--border-color) !important;
}
.dialog-secondary-action:hover {
background-color: rgba(255, 255, 255, 0.05) !important;
}
.dialog-cancel {
background: none;
border: none;
color: var(--muted-text);
cursor: pointer;
font-size: 0.9rem;
padding: 0.5rem;
}
.dialog-cancel:hover {
color: var(--text-color);
}
/* --- END: Drag and Drop and Dialog Styles --- */
/* Spinner */ /* Spinner */
.spinner-small { .spinner-small {
border: 3px solid rgba(255,255,255,0.1); border: 3px solid rgba(255,255,255,0.1);
@@ -467,7 +569,6 @@ button[type="submit"]:disabled {
/* Mobile responsive table */ /* Mobile responsive table */
@media (max-width: 768px) { @media (max-width: 768px) {
/* ... (no changes in this section) ... */
.table-wrapper { .table-wrapper {
border: none; border: none;
background-color: transparent; background-color: transparent;
@@ -513,17 +614,17 @@ button[type="submit"]:disabled {
.cell-value { .cell-value {
min-width: 0; min-width: 0;
max-width: 20em; max-width: 20em;
text-wrap: nowrap; text-wrap: nowrap;
overflow: scroll; overflow: scroll;
} }
#job-table td[data-label="File"], #job-table td[data-label="File"],
#job-table td[data-label="Task"] { #job-table td[data-label="Task"] {
overflow: scroll; overflow: scroll;
text-overflow: ellipsis; text-overflow: ellipsis;
text-wrap: nowrap; text-wrap: nowrap;
max-width: 100em; max-width: 100em;
} }
} }

View File

@@ -1,6 +1,17 @@
// static/js/script.js
document.addEventListener('DOMContentLoaded', () => { document.addEventListener('DOMContentLoaded', () => {
// --- User Locale and Timezone Detection (Corrected Implementation) ---
const USER_LOCALE = navigator.language || 'en-US'; // Fallback to en-US
const USER_TIMEZONE = Intl.DateTimeFormat().resolvedOptions().timeZone;
const DATETIME_FORMAT_OPTIONS = {
year: 'numeric',
month: 'short',
day: 'numeric',
hour: 'numeric',
minute: '2-digit',
timeZone: USER_TIMEZONE,
};
console.log(`Using locale: ${USER_LOCALE} and timezone: ${USER_TIMEZONE}`);
// --- Element Selectors --- // --- Element Selectors ---
const jobListBody = document.getElementById('job-list-body'); const jobListBody = document.getElementById('job-list-body');
@@ -11,16 +22,35 @@ document.addEventListener('DOMContentLoaded', () => {
const audioForm = document.getElementById('audio-form'); const audioForm = document.getElementById('audio-form');
const audioFileInput = document.getElementById('audio-file-input'); const audioFileInput = document.getElementById('audio-file-input');
const audioFileName = document.getElementById('audio-file-name'); const audioFileName = document.getElementById('audio-file-name');
const modelSizeSelect = document.getElementById('model-size-select');
const conversionForm = document.getElementById('conversion-form'); const conversionForm = document.getElementById('conversion-form');
const conversionFileInput = document.getElementById('conversion-file-input'); const conversionFileInput = document.getElementById('conversion-file-input');
const conversionFileName = document.getElementById('conversion-file-name'); const conversionFileName = document.getElementById('conversion-file-name');
const outputFormatSelect = document.getElementById('output-format-select'); const outputFormatSelect = document.getElementById('output-format-select');
// MODIFICATION: Store the Choices.js instance in a variable // START: Drag and Drop additions
let conversionChoices = null; const dragOverlay = document.getElementById('drag-overlay');
const actionDialog = document.getElementById('action-dialog');
const dialogFileCount = document.getElementById('dialog-file-count');
// Dialog Views
const dialogInitialView = document.getElementById('dialog-initial-actions');
const dialogConvertView = document.getElementById('dialog-convert-view');
// Dialog Buttons
const dialogConvertBtn = document.getElementById('dialog-action-convert');
const dialogOcrBtn = document.getElementById('dialog-action-ocr');
const dialogTranscribeBtn = document.getElementById('dialog-action-transcribe');
const dialogCancelBtn = document.getElementById('dialog-action-cancel');
const dialogStartConversionBtn = document.getElementById('dialog-start-conversion');
const dialogBackBtn = document.getElementById('dialog-back');
// Dialog Select
const dialogOutputFormatSelect = document.getElementById('dialog-output-format-select');
// END: Drag and Drop additions
let conversionChoices = null;
let dialogConversionChoices = null; // For the dialog's format selector
const activePolls = new Map(); const activePolls = new Map();
let stagedFiles = null; // To hold files from a drop event
// --- Main Event Listeners --- // --- Main Event Listeners ---
pdfFileInput.addEventListener('change', () => updateFileName(pdfFileInput, pdfFileName)); pdfFileInput.addEventListener('change', () => updateFileName(pdfFileInput, pdfFileName));
@@ -37,13 +67,222 @@ document.addEventListener('DOMContentLoaded', () => {
handleCancelJob(jobId); handleCancelJob(jobId);
} }
}); });
// --- Helper Functions ---
function formatBytes(bytes, decimals = 1) {
if (!+bytes) return '0 Bytes'; // Handles 0, null, undefined
const k = 1024;
const dm = decimals < 0 ? 0 : decimals;
const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return `${parseFloat((bytes / Math.pow(k, i)).toFixed(dm))} ${sizes[i]}`;
}
// --- Core Job Submission Logic (Refactored for reuse) ---
async function submitJob(endpoint, formData, originalFilename) {
try {
const response = await fetch(endpoint, { method: 'POST', body: formData });
if (!response.ok) {
const errorData = await response.json();
throw new Error(errorData.detail || `HTTP error! Status: ${response.status}`);
}
const result = await response.json();
const preliminaryJob = {
id: result.job_id,
status: 'pending',
progress: 0,
original_filename: originalFilename,
input_filesize: formData.get('file').size,
task_type: endpoint.includes('ocr') ? 'ocr' : (endpoint.includes('transcribe') ? 'transcription' : 'conversion'),
created_at: new Date().toISOString() // Create preliminary UTC timestamp
};
renderJobRow(preliminaryJob);
startPolling(result.job_id);
} catch (error) {
console.error('Error submitting job:', error);
alert(`Submission failed for ${originalFilename}: ${error.message}`);
}
}
// --- Original Form Submission Handler (Now uses submitJob) ---
async function handleFormSubmit(event, endpoint, form) {
event.preventDefault();
const fileInput = form.querySelector('input[type="file"]');
if (fileInput.files.length === 0) return;
const submitButton = form.querySelector('button[type="submit"]');
submitButton.disabled = true;
// Convert FileList to an array to loop through it
const files = Array.from(fileInput.files);
// Process each file as a separate job
for (const file of files) {
const formData = new FormData();
formData.append('file', file);
// Append other form data if it exists
const outputFormat = form.querySelector('select[name="output_format"]');
if (outputFormat) {
formData.append('output_format', outputFormat.value);
}
const modelSize = form.querySelector('select[name="model_size"]');
if (modelSize) {
formData.append('model_size', modelSize.value);
}
// Await each job submission to process them sequentially
await submitJob(endpoint, formData, file.name);
}
// Reset the form UI after all jobs have been submitted
const fileNameDisplay = form.querySelector('.file-name');
form.reset();
if (fileNameDisplay) {
fileNameDisplay.textContent = 'No file chosen';
fileNameDisplay.title = 'No file chosen';
}
if (form.id === 'conversion-form' && conversionChoices) {
conversionChoices.clearInput();
conversionChoices.setValue([]);
}
submitButton.disabled = false;
}
// --- START: Drag and Drop Implementation ---
function setupDragAndDropListeners() {
let dragCounter = 0; // Counter to manage enter/leave events reliably
window.addEventListener('dragenter', (e) => {
e.preventDefault();
dragCounter++;
document.body.classList.add('dragging');
});
window.addEventListener('dragleave', (e) => {
e.preventDefault();
dragCounter--;
if (dragCounter === 0) {
document.body.classList.remove('dragging');
}
});
window.addEventListener('dragover', (e) => {
e.preventDefault(); // This is necessary to allow a drop
});
window.addEventListener('drop', (e) => {
e.preventDefault();
dragCounter = 0; // Reset counter
document.body.classList.remove('dragging');
// Only handle the drop if it's on our designated overlay
if (e.target === dragOverlay || dragOverlay.contains(e.target)) {
const files = e.dataTransfer.files;
if (files && files.length > 0) {
stagedFiles = files;
showActionDialog();
}
}
});
}
function showActionDialog() {
dialogFileCount.textContent = stagedFiles.length;
// Clone options from main form's select to the dialog's select
dialogOutputFormatSelect.innerHTML = outputFormatSelect.innerHTML;
// Clean up previous Choices.js instance if it exists
if (dialogConversionChoices) {
dialogConversionChoices.destroy();
}
// Initialize a new Choices.js instance for the dialog
dialogConversionChoices = new Choices(dialogOutputFormatSelect, {
searchEnabled: true,
itemSelectText: 'Select',
shouldSort: false,
placeholder: true,
placeholderValue: 'Select a format...',
});
// Ensure the initial view is shown
dialogInitialView.style.display = 'grid';
dialogConvertView.style.display = 'none';
actionDialog.classList.add('visible');
}
function closeActionDialog() {
actionDialog.classList.remove('visible');
stagedFiles = null;
// Important: Destroy the Choices instance to prevent memory leaks
if (dialogConversionChoices) {
// Explicitly hide the dropdown before destroying
dialogConversionChoices.hideDropdown();
dialogConversionChoices.destroy();
dialogConversionChoices = null;
}
}
// --- Dialog Button and Action Listeners ---
dialogConvertBtn.addEventListener('click', () => {
// Switch to the conversion view
dialogInitialView.style.display = 'none';
dialogConvertView.style.display = 'block';
});
dialogBackBtn.addEventListener('click', () => {
// Switch back to the initial view
dialogInitialView.style.display = 'grid';
dialogConvertView.style.display = 'none';
});
dialogStartConversionBtn.addEventListener('click', () => handleDialogAction('convert'));
dialogOcrBtn.addEventListener('click', () => handleDialogAction('ocr'));
dialogTranscribeBtn.addEventListener('click', () => handleDialogAction('transcribe'));
dialogCancelBtn.addEventListener('click', closeActionDialog);
function handleDialogAction(action) {
if (!stagedFiles) return;
let endpoint = '';
const formDataArray = [];
for (const file of stagedFiles) {
const formData = new FormData();
formData.append('file', file);
if (action === 'convert') {
const selectedFormat = dialogConversionChoices.getValue(true);
if (!selectedFormat) {
alert('Please select a format to convert to.');
return;
}
formData.append('output_format', selectedFormat);
endpoint = '/convert-file';
} else if (action === 'ocr') {
endpoint = '/ocr-pdf';
} else if (action === 'transcribe') {
formData.append('model_size', modelSizeSelect.value);
endpoint = '/transcribe-audio';
}
formDataArray.push({ formData, name: file.name });
}
formDataArray.forEach(item => {
submitJob(endpoint, item.formData, item.name);
});
closeActionDialog();
}
// --- END: Drag and Drop Implementation ---
function initializeConversionSelector() { function initializeConversionSelector() {
// MODIFICATION: Destroy the old instance if it exists before creating a new one
if (conversionChoices) { if (conversionChoices) {
conversionChoices.destroy(); conversionChoices.destroy();
} }
conversionChoices = new Choices(outputFormatSelect, { conversionChoices = new Choices(outputFormatSelect, {
searchEnabled: true, searchEnabled: true,
itemSelectText: 'Select', itemSelectText: 'Select',
@@ -65,7 +304,7 @@ document.addEventListener('DOMContentLoaded', () => {
for (const formatKey in tool.formats) { for (const formatKey in tool.formats) {
group.choices.push({ group.choices.push({
value: `${toolKey}_${formatKey}`, value: `${toolKey}_${formatKey}`,
label: `${formatKey.toUpperCase()} - ${tool.formats[formatKey]}` label: `${tool.name} - ${formatKey.toUpperCase()} (${tool.formats[formatKey]})`
}); });
} }
choicesArray.push(group); choicesArray.push(group);
@@ -73,58 +312,23 @@ document.addEventListener('DOMContentLoaded', () => {
conversionChoices.setChoices(choicesArray, 'value', 'label', true); conversionChoices.setChoices(choicesArray, 'value', 'label', true);
} }
// --- Helper Functions ---
function updateFileName(input, nameDisplay) { function updateFileName(input, nameDisplay) {
const fileName = input.files.length > 0 ? input.files[0].name : 'No file chosen'; const numFiles = input.files.length;
nameDisplay.textContent = fileName; let displayText = 'No file chosen';
nameDisplay.title = fileName; let displayTitle = 'No file chosen';
}
async function handleFormSubmit(event, endpoint, form) { if (numFiles === 1) {
event.preventDefault(); displayText = input.files[0].name;
const fileInput = form.querySelector('input[type="file"]'); displayTitle = input.files[0].name;
const fileNameDisplay = form.querySelector('.file-name'); } else if (numFiles > 1) {
if (!fileInput.files[0]) return; displayText = `${numFiles} files selected`;
// Create a title attribute to show all filenames on hover
const formData = new FormData(form); displayTitle = Array.from(input.files).map(file => file.name).join(', ');
const submitButton = form.querySelector('button[type="submit"]');
submitButton.disabled = true;
try {
const response = await fetch(endpoint, { method: 'POST', body: formData });
if (!response.ok) {
const errorData = await response.json();
throw new Error(errorData.detail || `HTTP error! Status: ${response.status}`);
}
const result = await response.json();
const preliminaryJob = {
id: result.job_id,
status: 'pending',
progress: 0,
original_filename: fileInput.files[0].name,
task_type: endpoint.includes('ocr') ? 'ocr' : (endpoint.includes('transcribe') ? 'transcription' : 'conversion'),
created_at: new Date().toISOString()
};
renderJobRow(preliminaryJob);
startPolling(result.job_id);
} catch (error) {
console.error('Error submitting job:', error);
alert(`Submission failed: ${error.message}`);
} finally {
form.reset();
if (fileNameDisplay) fileNameDisplay.textContent = 'No file chosen';
// MODIFICATION: Use the stored instance to correctly reset the dropdown
// without causing an error.
if (form.id === 'conversion-form' && conversionChoices) {
conversionChoices.clearInput();
conversionChoices.setValue([]); // Clears the selected value
}
submitButton.disabled = false;
} }
nameDisplay.textContent = displayText;
nameDisplay.title = displayTitle;
} }
async function handleCancelJob(jobId) { async function handleCancelJob(jobId) {
if (!confirm('Are you sure you want to cancel this job?')) return; if (!confirm('Are you sure you want to cancel this job?')) return;
try { try {
@@ -161,7 +365,7 @@ document.addEventListener('DOMContentLoaded', () => {
} }
} catch (error) { } catch (error) {
console.error("Couldn't load job history:", error); console.error("Couldn't load job history:", error);
jobListBody.innerHTML = '<tr><td colspan="5" style="text-align: center;">Could not load job history.</td></tr>'; jobListBody.innerHTML = '<tr><td colspan="6" style="text-align: center;">Could not load job history.</td></tr>';
} }
} }
@@ -214,7 +418,12 @@ document.addEventListener('DOMContentLoaded', () => {
taskTypeLabel = 'Conversion'; taskTypeLabel = 'Conversion';
} }
const formattedDate = new Date(job.created_at).toLocaleString(); // --- CORRECTED DATE FORMATTING ---
// Takes the UTC string from the server (or the preliminary job)
// and formats it using the user's detected locale and timezone.
const submittedDate = new Date(job.created_at);
const formattedDate = submittedDate.toLocaleString(USER_LOCALE, DATETIME_FORMAT_OPTIONS);
let statusHtml = `<span class="job-status-badge status-${job.status}">${job.status}</span>`; let statusHtml = `<span class="job-status-badge status-${job.status}">${job.status}</span>`;
if (job.status === 'processing') { if (job.status === 'processing') {
const progressClass = (job.task_type === 'transcription' && job.progress > 0) ? '' : 'indeterminate'; const progressClass = (job.task_type === 'transcription' && job.progress > 0) ? '' : 'indeterminate';
@@ -233,9 +442,21 @@ document.addEventListener('DOMContentLoaded', () => {
actionHtml = `<span class="error-text"${errorTitle}>Failed</span>`; actionHtml = `<span class="error-text"${errorTitle}>Failed</span>`;
} }
// --- File Size Logic ---
let fileSizeHtml = '<span>-</span>';
if (job.input_filesize) {
let sizeString = formatBytes(job.input_filesize);
if (job.status === 'completed' && job.output_filesize) {
sizeString += `${formatBytes(job.output_filesize)}`;
}
fileSizeHtml = `<span class="cell-value">${sizeString}</span>`;
}
const escapedFilename = job.original_filename ? job.original_filename.replace(/</g, "&lt;").replace(/>/g, "&gt;") : "No filename"; const escapedFilename = job.original_filename ? job.original_filename.replace(/</g, "&lt;").replace(/>/g, "&gt;") : "No filename";
row.innerHTML = ` row.innerHTML = `
<td data-label="File"><span class="cell-value" title="${escapedFilename}">${escapedFilename}</span></td> <td data-label="File"><span class="cell-value" title="${escapedFilename}">${escapedFilename}</span></td>
<td data-label="File Size">${fileSizeHtml}</td>
<td data-label="Task"><span class="cell-value">${taskTypeLabel}</span></td> <td data-label="Task"><span class="cell-value">${taskTypeLabel}</span></td>
<td data-label="Submitted"><span class="cell-value">${formattedDate}</span></td> <td data-label="Submitted"><span class="cell-value">${formattedDate}</span></td>
<td data-label="Status"><span class="cell-value">${statusHtml}</span></td> <td data-label="Status"><span class="cell-value">${statusHtml}</span></td>
@@ -246,4 +467,5 @@ document.addEventListener('DOMContentLoaded', () => {
// --- Initial Load --- // --- Initial Load ---
initializeConversionSelector(); initializeConversionSelector();
loadInitialJobs(); loadInitialJobs();
setupDragAndDropListeners();
}); });

View File

@@ -27,7 +27,7 @@
<fieldset> <fieldset>
<legend><h2>File Conversion</h2></legend> <legend><h2>File Conversion</h2></legend>
<div class="file-input-wrapper"> <div class="file-input-wrapper">
<input type="file" name="file" id="conversion-file-input" required> <input type="file" name="file" id="conversion-file-input" required multiple>
<label for="conversion-file-input" class="file-input-label">Choose File...</label> <label for="conversion-file-input" class="file-input-label">Choose File...</label>
<span id="conversion-file-name" class="file-name">No file chosen</span> <span id="conversion-file-name" class="file-name">No file chosen</span>
</div> </div>
@@ -45,7 +45,7 @@
<fieldset> <fieldset>
<legend><h2>PDF OCR</h2></legend> <legend><h2>PDF OCR</h2></legend>
<div class="file-input-wrapper"> <div class="file-input-wrapper">
<input type="file" name="file" id="pdf-file-input" accept=".pdf" required> <input type="file" name="file" id="pdf-file-input" accept=".pdf" required multiple>
<label for="pdf-file-input" class="file-input-label">Choose PDF...</label> <label for="pdf-file-input" class="file-input-label">Choose PDF...</label>
<span id="pdf-file-name" class="file-name">No file chosen</span> <span id="pdf-file-name" class="file-name">No file chosen</span>
</div> </div>
@@ -59,7 +59,7 @@
<fieldset> <fieldset>
<legend><h2>Transcribe Audio</h2></legend> <legend><h2>Transcribe Audio</h2></legend>
<div class="file-input-wrapper"> <div class="file-input-wrapper">
<input type="file" name="file" id="audio-file-input" accept="audio/*" required> <input type="file" name="file" id="audio-file-input" accept="audio/*" required multiple>
<label for="audio-file-input" class="file-input-label">Choose Audio...</label> <label for="audio-file-input" class="file-input-label">Choose Audio...</label>
<span id="audio-file-name" class="file-name">No file chosen</span> <span id="audio-file-name" class="file-name">No file chosen</span>
</div> </div>
@@ -87,6 +87,7 @@
<thead> <thead>
<tr> <tr>
<th>File</th> <th>File</th>
<th>File Size</th>
<th>Task</th> <th>Task</th>
<th>Submitted</th> <th>Submitted</th>
<th>Status</th> <th>Status</th>
@@ -100,7 +101,37 @@
</section> </section>
</main> </main>
</div> </div>
<div id="drag-overlay" class="drag-overlay">
<div class="drag-overlay-content">
<p>Drop files anywhere to begin</p>
</div>
</div>
<div id="action-dialog" class="dialog-overlay">
<div class="dialog-box">
<h2>Choose Action</h2>
<p><span id="dialog-file-count"></span> file(s) dropped. What would you like to do?</p>
<div id="dialog-initial-actions" class="dialog-actions">
<button id="dialog-action-convert">Convert</button>
<button id="dialog-action-ocr">OCR</button>
<button id="dialog-action-transcribe">Transcribe</button>
</div>
<div id="dialog-convert-view" style="display: none;">
<div class="form-control" style="text-align: left; margin-bottom: 1rem;">
<label for="dialog-output-format-select">Convert To</label>
<select id="dialog-output-format-select" required></select>
</div>
<div class="dialog-actions">
<button id="dialog-start-conversion">Start Conversion</button>
<button id="dialog-back" class="dialog-secondary-action">Back</button>
</div>
</div>
<button id="dialog-action-cancel" class="dialog-cancel">Cancel</button>
</div>
</div>
<script> <script>
window.APP_CONFIG = { window.APP_CONFIG = {
conversionTools: {{ conversion_tools | tojson }} conversionTools: {{ conversion_tools | tojson }}