Drag and Drop

2025-09-17 18:45:55 +00:00
parent 2115238217
commit 20e41b67a7
10 changed files with 1358 additions and 379 deletions
--- a/42
+++ b/42
@@ -0,0 +1,42 @@
 # Dockerfile
 FROM python:3.13.7-slim
 RUN apt-get update && apt-get install -y --no-install-recommends \
    tesseract-ocr \
    ghostscript \
    poppler-utils \
    libreoffice \
    imagemagick \
    graphicsmagick \
    libvips-tools \
    ffmpeg \
    libheif-examples \
    inkscape \
    calibre \
    build-essential \
    pkg-config \
    git \
    curl \
    texlive \
    texlive-latex-extra \
    texlive-xetex 
    && rm -rf /var/lib/apt/lists/*
 # Set working directory inside the container
 WORKDIR /app
 # Copy requirements and install dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 # Copy the rest of the app
 COPY . .
 # Expose the app port
 EXPOSE 8000
 RUN chmod +x run.sh
 # Command to run when container starts
 CMD ["./run.sh"]
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,10 @@
 version: "3.9"
 services:
  web:
    build: .
    ports:
      - "5000:5000"
    volumes:
      - .:/app  # optional: mount code for live changes
    environment:
      - FLASK_ENV=development
--- a/main.py
+++ b/main.py
@@ -6,7 +6,7 @@ import uuid
 import shlex
 import yaml
 from contextlib import asynccontextmanager
-from datetime import datetime
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import Dict, List, Any
@@ -21,17 +21,21 @@ from fastapi.responses import FileResponse, JSONResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi.templating import Jinja2Templates
 from huey import SqliteHuey
-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel, ConfigDict, field_serializer # MODIFIED: Import field_serializer
 from sqlalchemy import (Column, DateTime, Integer, String, Text,
                        create_engine, delete, event)
 from sqlalchemy.orm import Session, declarative_base, sessionmaker
 from sqlalchemy.pool import NullPool
 from string import Formatter
 from sqlalchemy.orm import Session, declarative_base, sessionmaker
 from werkzeug.utils import secure_filename
 from typing import List as TypingList
 # --------------------------------------------------------------------------------
 # --- 1. CONFIGURATION
 # --------------------------------------------------------------------------------
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 class AppPaths(BaseModel):
    BASE_DIR: Path = Path(__file__).resolve().parent
@@ -43,30 +47,46 @@ class AppPaths(BaseModel):
 PATHS = AppPaths()
 APP_CONFIG: Dict[str, Any] = {}
 PATHS.UPLOADS_DIR.mkdir(exist_ok=True)
 PATHS.PROCESSED_DIR.mkdir(exist_ok=True)
 def load_app_config():
    global APP_CONFIG
    try:
-        with open(PATHS.SETTINGS_FILE, 'r') as f:
+        with open(PATHS.SETTINGS_FILE, 'r', encoding='utf8') as f:
-            APP_CONFIG = yaml.safe_load(f)
+            cfg_raw = yaml.safe_load(f) or {}
-        APP_CONFIG['app_settings']['max_file_size_bytes'] = APP_CONFIG['app_settings']['max_file_size_mb'] * 1024 * 1024
+        # basic defaults
-        allowed_extensions = {
+        defaults = {
-            ".pdf", ".ps", ".eps", ".png", ".jpg", ".jpeg", ".tiff", ".tif", ".gif",
+            "app_settings": {"max_file_size_mb": 100, "allowed_all_extensions": []},
-            ".bmp", ".webp", ".svg", ".jxl", ".avif", ".ppm", ".mp3", ".m4a", ".ogg",
+            "transcription_settings": {"whisper": {"allowed_models": ["tiny", "base", "small"], "compute_type": "int8"}},
-            ".flac", ".opus", ".wav", ".aac", ".mp4", ".mkv", ".mov", ".webm", ".avi",
+            "conversion_tools": {},
-            ".flv", ".md", ".txt", ".html", ".docx", ".odt", ".rst", ".epub", ".mobi",
+            "ocr_settings": {"ocrmypdf": {}}
            ".azw3", ".pptx", ".xlsx"
        }
-        APP_CONFIG['app_settings']['allowed_all_extensions'] = allowed_extensions
+        # shallow merge (safe for top-level keys)
        cfg = defaults.copy()
        cfg.update(cfg_raw)
        # normalize app settings
        app_settings = cfg.get("app_settings", {})
        max_mb = app_settings.get("max_file_size_mb", 100)
        app_settings["max_file_size_bytes"] = int(max_mb) * 1024 * 1024
        allowed = app_settings.get("allowed_all_extensions", [])
        if not isinstance(allowed, (list, set)):
            allowed = list(allowed)
        app_settings["allowed_all_extensions"] = set(allowed)
        cfg["app_settings"] = app_settings
        APP_CONFIG = cfg
        logger.info("Successfully loaded settings from settings.yml")
    except (FileNotFoundError, yaml.YAMLError) as e:
-        logger.error(f"Could not load settings.yml: {e}. App may not function correctly.")
+        logging.getLogger(__name__).exception(f"Could not load settings.yml: {e}. Using defaults.")
-        APP_CONFIG = {}
+        
        APP_CONFIG = {
            "app_settings": {"max_file_size_mb": 100, "max_file_size_bytes": 100 * 1024 * 1024, "allowed_all_extensions": set()},
            "transcription_settings": {"whisper": {"allowed_models": ["tiny", "base", "small"], "compute_type": "int8"}},
            "conversion_tools": {},
            "ocr_settings": {"ocrmypdf": {}}
        }
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 PATHS.UPLOADS_DIR.mkdir(exist_ok=True)
 PATHS.PROCESSED_DIR.mkdir(exist_ok=True)
 # --------------------------------------------------------------------------------
 # --- 2. DATABASE & Schemas
@@ -77,8 +97,6 @@ engine = create_engine(
    poolclass=NullPool,
 )
 SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
 # THIS IS THE CRITICAL FIX
 Base = declarative_base()
@event.listens_for(engine, "connect")
@@ -102,11 +120,13 @@ class Job(Base):
    progress = Column(Integer, default=0)
    original_filename = Column(String)
    input_filepath = Column(String)
    input_filesize = Column(Integer, nullable=True)
    processed_filepath = Column(String, nullable=True)
    output_filesize = Column(Integer, nullable=True)
    result_preview = Column(Text, nullable=True)
    error_message = Column(Text, nullable=True)
-    created_at = Column(DateTime, default=datetime.utcnow)
+    created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
-    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+    updated_at = Column(DateTime, default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc))
 def get_db():
    db = SessionLocal()
@@ -120,6 +140,7 @@ class JobCreate(BaseModel):
    task_type: str
    original_filename: str
    input_filepath: str
    input_filesize: int | None = None
    processed_filepath: str | None = None
 class JobSchema(BaseModel):
@@ -128,6 +149,8 @@ class JobSchema(BaseModel):
    status: str
    progress: int
    original_filename: str
    input_filesize: int | None = None
    output_filesize: int | None = None
    processed_filepath: str | None = None
    result_preview: str | None = None
    error_message: str | None = None
@@ -135,8 +158,14 @@ class JobSchema(BaseModel):
    updated_at: datetime
    model_config = ConfigDict(from_attributes=True)
    # NEW: This serializer ensures the datetime string sent to the frontend ALWAYS
    # includes the 'Z' UTC indicator, fixing the timezone bug.
    @field_serializer('created_at', 'updated_at')
    def serialize_dt(self, dt: datetime, _info):
        return dt.isoformat() + "Z"
 # --------------------------------------------------------------------------------
-# --- 3. CRUD OPERATIONS (No Changes)
+# --- 3. CRUD OPERATIONS
 # --------------------------------------------------------------------------------
 def get_job(db: Session, job_id: str):
    return db.query(Job).filter(Job.id == job_id).first()
@@ -163,80 +192,120 @@ def update_job_status(db: Session, job_id: str, status: str, progress: int = Non
        db.refresh(db_job)
    return db_job
-def mark_job_as_completed(db: Session, job_id: str, preview: str | None = None):
+def mark_job_as_completed(db: Session, job_id: str, output_filepath_str: str | None = None, preview: str | None = None):
    db_job = get_job(db, job_id)
    if db_job and db_job.status != 'cancelled':
        db_job.status = "completed"
        db_job.progress = 100
        if preview:
            db_job.result_preview = preview.strip()[:2000]
        if output_filepath_str:
            try:
                output_path = Path(output_filepath_str)
                if output_path.exists():
                    db_job.output_filesize = output_path.stat().st_size
            except Exception:
                logger.exception(f"Could not stat output file {output_filepath_str} for job {job_id}")
        db.commit()
    return db_job
 # ... (The rest of the file is unchanged and remains the same) ...
 # --------------------------------------------------------------------------------
 # --- 4. BACKGROUND TASK SETUP
 # --------------------------------------------------------------------------------
 huey = SqliteHuey(filename=PATHS.HUEY_DB_PATH)
-# --- START: NEW WHISPER MODEL CACHING ---
+# Whisper model cache per worker process
 # This dictionary will live in the memory of the Huey worker process,
 # allowing us to reuse loaded models across tasks.
 WHISPER_MODELS_CACHE: Dict[str, WhisperModel] = {}
 def get_whisper_model(model_size: str, whisper_settings: dict) -> WhisperModel:
-    """
+    if model_size in WHISPER_MODELS_CACHE:
    Loads a Whisper model into the cache if not present, and returns the model.
    This ensures a model is only loaded into memory once per worker process.
    """
    if model_size not in WHISPER_MODELS_CACHE:
        compute_type = whisper_settings.get('compute_type', 'int8')
        logger.info(f"Whisper model '{model_size}' not in cache. Loading into memory...")
        model = WhisperModel(model_size, device="cpu", compute_type=compute_type)
        WHISPER_MODELS_CACHE[model_size] = model
        logger.info(f"Model '{model_size}' loaded successfully.")
    else:
        logger.info(f"Found model '{model_size}' in cache. Reusing.")
-    return WHISPER_MODELS_CACHE[model_size]
+        return WHISPER_MODELS_CACHE[model_size]
-# --- END: NEW WHISPER MODEL CACHING ---
+    device = whisper_settings.get("device", "cpu")
    compute_type = whisper_settings.get('compute_type', 'int8')
    logger.info(f"Whisper model '{model_size}' not in cache. Loading into memory on device={device}...")
    try:
        model = WhisperModel(model_size, device=device, compute_type=compute_type)
    except Exception:
        logger.exception("Failed to load whisper model")
        raise
    WHISPER_MODELS_CACHE[model_size] = model
    logger.info(f"Model '{model_size}' loaded successfully.")
    return model
 # Helper: safe run_command (trimmed logs + timeout)
 def run_command(argv: TypingList[str], timeout: int = 300):
    try:
        res = subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=timeout)
    except subprocess.TimeoutExpired:
        raise Exception(f"Command timed out after {timeout}s")
    if res.returncode != 0:
        stderr = (res.stderr or "")[:4000]
        stdout = (res.stdout or "")[:4000]
        raise Exception(f"Command failed exit {res.returncode}. stderr: {stderr}; stdout: {stdout}")
    return res
 # Helper: validate and build command from template with allowlist
 ALLOWED_VARS = {"input", "output", "output_dir", "output_ext", "quality", "speed", "preset", "device", "dpi", "samplerate", "bitdepth", "filter"}
 def validate_and_build_command(template_str: str, mapping: Dict[str, str]) -> TypingList[str]:
    """
    Validate placeholders against ALLOWED_VARS and build a safe argv list.
    If a template uses allowed placeholders that are missing from `mapping`,
    auto-fill sensible defaults:
      - 'filter' -> mapping.get('output_ext', '')
      - others -> empty string
    This prevents KeyError while preserving the allowlist security check.
    """
    fmt = Formatter()
    used = {fname for _, fname, _, _ in fmt.parse(template_str) if fname}
    bad = used - ALLOWED_VARS
    if bad:
        raise ValueError(f"Command template contains disallowed placeholders: {bad}")
    # auto-fill missing allowed placeholders with safe defaults
    safe_mapping = dict(mapping)  # shallow copy to avoid mutating caller mapping
    for name in used:
        if name not in safe_mapping:
            if name == "filter":
                safe_mapping[name] = safe_mapping.get("output_ext", "")
            else:
                safe_mapping[name] = ""
    formatted = template_str.format(**safe_mapping)
    return shlex.split(formatted)
@huey.task()
 def run_transcription_task(job_id: str, input_path_str: str, output_path_str: str, model_size: str, whisper_settings: dict):
    db = SessionLocal()
    try:
        job = get_job(db, job_id)
-        if not job or job.status == 'cancelled': return
+        if not job or job.status == 'cancelled':
-
+            return
        update_job_status(db, job_id, "processing")
        # --- MODIFIED: Use the caching function to get the model ---
        model = get_whisper_model(model_size, whisper_settings)
        logger.info(f"Starting transcription for job {job_id}")
        segments, info = model.transcribe(input_path_str, beam_size=5)
        full_transcript = []
        for segment in segments:
-            job_check = get_job(db, job_id) # Check for cancellation during long tasks
+            job_check = get_job(db, job_id)  # Check for cancellation during long tasks
            if job_check.status == 'cancelled':
                logger.info(f"Job {job_id} cancelled during transcription.")
                return
            if info.duration > 0:
                progress = int((segment.end / info.duration) * 100)
                update_job_status(db, job_id, "processing", progress=progress)
            full_transcript.append(segment.text.strip())
        transcript_text = "\n".join(full_transcript)
-        # write atomically to avoid partial files
+        # atomic write of transcript — keep the real extension and mark tmp in the name
        out_path = Path(output_path_str)
-        tmp_out = out_path.with_suffix(out_path.suffix + f".{uuid.uuid4().hex}.tmp")
+        tmp_out = out_path.with_name(f"{out_path.stem}.tmp-{uuid.uuid4().hex}{out_path.suffix}")
        with tmp_out.open("w", encoding="utf-8") as f:
            f.write(transcript_text)
        tmp_out.replace(out_path)
-
+        mark_job_as_completed(db, job_id, output_filepath_str=output_path_str, preview=transcript_text)
        mark_job_as_completed(db, job_id, preview=transcript_text)
        logger.info(f"Transcription for job {job_id} completed.")
    except Exception:
        logger.exception(f"ERROR during transcription for job {job_id}")
@@ -245,13 +314,13 @@ def run_transcription_task(job_id: str, input_path_str: str, output_path_str: st
        Path(input_path_str).unlink(missing_ok=True)
        db.close()
 # Other tasks remain unchanged
@huey.task()
 def run_pdf_ocr_task(job_id: str, input_path_str: str, output_path_str: str, ocr_settings: dict):
    db = SessionLocal()
    try:
        job = get_job(db, job_id)
-        if not job or job.status == 'cancelled': return
+        if not job or job.status == 'cancelled':
            return
        update_job_status(db, job_id, "processing")
        logger.info(f"Starting PDF OCR for job {job_id}")
        ocrmypdf.ocr(input_path_str, output_path_str,
@@ -263,7 +332,7 @@ def run_pdf_ocr_task(job_id: str, input_path_str: str, output_path_str: str, ocr
        with open(output_path_str, "rb") as f:
            reader = pypdf.PdfReader(f)
            preview = "\n".join(page.extract_text() or "" for page in reader.pages)
-        mark_job_as_completed(db, job_id, preview=preview)
+        mark_job_as_completed(db, job_id, output_filepath_str=output_path_str, preview=preview)
        logger.info(f"PDF OCR for job {job_id} completed.")
    except Exception:
        logger.exception(f"ERROR during PDF OCR for job {job_id}")
@@ -277,13 +346,18 @@ def run_image_ocr_task(job_id: str, input_path_str: str, output_path_str: str):
    db = SessionLocal()
    try:
        job = get_job(db, job_id)
-        if not job or job.status == 'cancelled': return
+        if not job or job.status == 'cancelled':
            return
        update_job_status(db, job_id, "processing", progress=50)
        logger.info(f"Starting Image OCR for job {job_id}")
        text = pytesseract.image_to_string(Image.open(input_path_str))
-        with open(output_path_str, "w", encoding="utf-8") as f:
+        # atomic write of OCR text
        out_path = Path(output_path_str)
        tmp_out = out_path.with_name(f"{out_path.stem}.tmp-{uuid.uuid4().hex}{out_path.suffix}")        
        with tmp_out.open("w", encoding="utf-8") as f:
            f.write(text)
-        mark_job_as_completed(db, job_id, preview=text)
+        tmp_out.replace(out_path)
        mark_job_as_completed(db, job_id, output_filepath_str=output_path_str, preview=text)
        logger.info(f"Image OCR for job {job_id} completed.")
    except Exception:
        logger.exception(f"ERROR during Image OCR for job {job_id}")
@@ -300,14 +374,18 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str,
    temp_output_file = None
    try:
        job = get_job(db, job_id)
-        if not job or job.status == 'cancelled': return
+        if not job or job.status == 'cancelled':
            return
        update_job_status(db, job_id, "processing", progress=25)
        logger.info(f"Starting conversion for job {job_id} using {tool} with task {task_key}")
        tool_config = conversion_tools_config.get(tool)
-        if not tool_config: raise ValueError(f"Unknown conversion tool: {tool}")
+        if not tool_config:
            raise ValueError(f"Unknown conversion tool: {tool}")
        input_path = Path(input_path_str)
        output_path = Path(output_path_str)
        current_input_path = input_path
        # Pre-processing for specific tools
        if tool == "mozjpeg":
            temp_input_file = input_path.with_suffix('.temp.ppm')
            logger.info(f"Pre-converting for MozJPEG: {input_path} -> {temp_input_file}")
@@ -317,22 +395,12 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str,
                err = (pre_conv_result.stderr or "")[:4000]
                raise Exception(f"MozJPEG pre-conversion to PPM failed: {err}")
            current_input_path = temp_input_file
        update_job_status(db, job_id, "processing", progress=50)
        # Build safe mapping for formatting and validate placeholders
        ALLOWED_VARS = {"input", "output", "output_dir", "output_ext", "quality", "speed", "preset", "device", "dpi", "samplerate", "bitdepth"}
        def validate_and_build_command(template_str: str, mapping: dict):
            fmt = Formatter()
            used = {fname for _, fname, _, _ in fmt.parse(template_str) if fname}
            bad = used - ALLOWED_VARS
            if bad:
                raise ValueError(f"Command template contains disallowed placeholders: {bad}")
            formatted = template_str.format(**mapping)
            return shlex.split(formatted)
-        # Use a temporary output path and atomically move into place after success
+        # prepare temporary output and mapping
-        temp_output_file = output_path.with_suffix(output_path.suffix + f".{uuid.uuid4().hex}.tmp")
+        # use a temp filename that preserves the real extension, e.g. file.tmp-<uuid>.pdf
-
+        temp_output_file = output_path.with_name(f"{output_path.stem}.tmp-{uuid.uuid4().hex}{output_path.suffix}")
        # Prepare mapping
        mapping = {
            "input": str(current_input_path),
            "output": str(temp_output_file),
@@ -340,7 +408,7 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str,
            "output_ext": output_path.suffix.lstrip('.'),
        }
-        # Allow tool-specific adjustments to mapping
+        # tool specific mapping adjustments
        if tool.startswith("ghostscript"):
            device, setting = task_key.split('_')
            mapping.update({"device": device, "dpi": setting, "preset": setting})
@@ -358,38 +426,30 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str,
            _, quality = task_key.split('_')
            quality = quality.replace('q', '')
            mapping.update({"quality": quality})
        elif tool == "libreoffice":
            target_ext = output_path.suffix.lstrip('.')
            # tool_config may include a 'filters' mapping (see settings.yml example)
            filter_val = tool_config.get("filters", {}).get(target_ext, target_ext)
            mapping["filter"] = filter_val
        command_template_str = tool_config["command_template"]
        command = validate_and_build_command(command_template_str, mapping)
        logger.info(f"Executing command: {' '.join(command)}")
        # run with timeout and capture output; run_command helper ensures trimmed logs on failure
        def run_command(argv: List[str], timeout: int = 300):
            try:
                res = subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=timeout)
            except subprocess.TimeoutExpired:
                raise Exception(f"Command timed out after {timeout}s")
            if res.returncode != 0:
                stderr = (res.stderr or "")[:4000]
                stdout = (res.stdout or "")[:4000]
                raise Exception(f"Command failed exit {res.returncode}. stderr: {stderr}; stdout: {stdout}")
            return res
        # execute command with timeout and trimmed logs on error
        result = run_command(command, timeout=tool_config.get("timeout", 300))
-        if tool == "libreoffice":
+
-            expected_output_filename = input_path.with_suffix(output_path.suffix).name
+        # handle LibreOffice special case: sometimes it writes differently
-            generated_file = output_path.parent / expected_output_filename
+        # Special-case LibreOffice: support per-format export filters via settings.yml
-            if generated_file.exists():
+
-                # move generated file into place
+
                generated_file.replace(output_path)
            else:
                raise Exception(f"LibreOffice did not create the expected file: {expected_output_filename}")
        # move temp output into final location atomically
        if temp_output_file and temp_output_file.exists():
            temp_output_file.replace(output_path)
-        mark_job_as_completed(db, job_id, preview=f"Successfully converted file.")
+        mark_job_as_completed(db, job_id, output_filepath_str=output_path_str, preview=f"Successfully converted file.")
        logger.info(f"Conversion for job {job_id} completed.")
-    except Exception as e:
+    except Exception:
        logger.exception(f"ERROR during conversion for job {job_id}")
        update_job_status(db, job_id, "failed", error="See server logs for details.")
    finally:
@@ -415,13 +475,14 @@ app = FastAPI(lifespan=lifespan)
 app.mount("/static", StaticFiles(directory=PATHS.BASE_DIR / "static"), name="static")
 templates = Jinja2Templates(directory=PATHS.BASE_DIR / "templates")
-async def save_upload_file_chunked(upload_file: UploadFile, destination: Path):
+async def save_upload_file_chunked(upload_file: UploadFile, destination: Path) -> int:
    """
-    Streams the uploaded file in chunks directly to a file on disk.
+    Write upload to a tmp file in chunks, then atomically move to final destination.
-    This is memory-efficient and reliable for large files.
+    Returns the final size of the file in bytes.
    """
    max_size = APP_CONFIG.get("app_settings", {}).get("max_file_size_bytes", 100 * 1024 * 1024)
-    tmp = destination.with_suffix(destination.suffix + f".{uuid.uuid4().hex}.tmp")
+    # make a temp filename that keeps the real extension, e.g. file.tmp-<uuid>.pdf
    tmp = destination.with_name(f"{destination.stem}.tmp-{uuid.uuid4().hex}{destination.suffix}")
    size = 0
    try:
        with tmp.open("wb") as buffer:
@@ -433,17 +494,16 @@ async def save_upload_file_chunked(upload_file: UploadFile, destination: Path):
                if size > max_size:
                    raise HTTPException(status_code=413, detail=f"File exceeds {max_size / 1024 / 1024} MB limit")
                buffer.write(chunk)
        # atomic move into place
        tmp.replace(destination)
        return size
    except Exception:
        tmp.unlink(missing_ok=True)
        raise
 def is_allowed_file(filename: str, allowed_extensions: set) -> bool:
    return Path(filename).suffix.lower() in allowed_extensions
-# --- Routes (only transcription route is modified) ---
+# --- Routes (transcription route uses Huey task enqueuing) ---
@app.post("/transcribe-audio", status_code=status.HTTP_202_ACCEPTED)
 async def submit_audio_transcription(
@@ -453,7 +513,7 @@ async def submit_audio_transcription(
 ):
    if not is_allowed_file(file.filename, {".mp3", ".wav", ".m4a", ".flac", ".ogg", ".opus"}):
        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid audio file type.")
-
+    
    whisper_config = APP_CONFIG.get("transcription_settings", {}).get("whisper", {})
    if model_size not in whisper_config.get("allowed_models", []):
        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=f"Invalid model size: {model_size}.")
@@ -461,24 +521,29 @@ async def submit_audio_transcription(
    job_id = uuid.uuid4().hex
    safe_basename = secure_filename(file.filename)
    stem, suffix = Path(safe_basename).stem, Path(safe_basename).suffix
-
+    
    audio_filename = f"{stem}_{job_id}{suffix}"
    transcript_filename = f"{stem}_{job_id}.txt"
    upload_path = PATHS.UPLOADS_DIR / audio_filename
    processed_path = PATHS.PROCESSED_DIR / transcript_filename
-    await save_upload_file_chunked(file, upload_path)
+    input_size = await save_upload_file_chunked(file, upload_path)
-
+    
-    job_data = JobCreate(id=job_id, task_type="transcription", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path))
+    job_data = JobCreate(
        id=job_id, 
        task_type="transcription", 
        original_filename=file.filename, 
        input_filepath=str(upload_path), 
        input_filesize=input_size,
        processed_filepath=str(processed_path)
    )
    new_job = create_job(db=db, job=job_data)
-
+    
-    # --- MODIFIED: Pass whisper_config to the task ---
+    # enqueue the Huey task (decorated function call enqueues when using huey)
    run_transcription_task(new_job.id, str(upload_path), str(processed_path), model_size=model_size, whisper_settings=whisper_config)
    return {"job_id": new_job.id, "status": new_job.status, "status_url": f"/job/{new_job.id}"}
    return {"job_id": new_job.id, "status": new_job.status}
 # --- Other routes remain unchanged ---
@app.get("/")
 async def get_index(request: Request):
@@ -493,23 +558,55 @@ async def get_index(request: Request):
@app.get("/settings")
 async def get_settings_page(request: Request):
    try:
-        with open(PATHS.SETTINGS_FILE, 'r') as f:
+        with open(PATHS.SETTINGS_FILE, 'r', encoding='utf8') as f:
-            current_config = yaml.safe_load(f)
+            current_config = yaml.safe_load(f) or {}
-    except Exception as e:
+    except Exception:
-        logger.error(f"Could not load settings.yml for settings page: {e}")
+        logger.exception("Could not load settings.yml for settings page")
        current_config = {}
    return templates.TemplateResponse("settings.html", {"request": request, "config": current_config})
 def deep_merge(base: dict, updates: dict) -> dict:
    """
    Recursively merge `updates` into `base`. Lists and scalars are replaced.
    """
    for key, value in updates.items():
        if (
            key in base
            and isinstance(base[key], dict)
            and isinstance(value, dict)
        ):
            base[key] = deep_merge(base[key], value)
        else:
            base[key] = value
    return base
@app.post("/settings/save")
 async def save_settings(new_config: Dict = Body(...)):
    tmp = PATHS.SETTINGS_FILE.with_suffix(".tmp")
    try:
-        with open(PATHS.SETTINGS_FILE, 'w') as f:
+        # load existing config if present
-            yaml.dump(new_config, f, default_flow_style=False, sort_keys=False)
+        try:
            with PATHS.SETTINGS_FILE.open("r", encoding="utf8") as f:
                current_config = yaml.safe_load(f) or {}
        except FileNotFoundError:
            current_config = {}
        # deep merge new values
        merged = deep_merge(current_config, new_config)
        # atomic write back
        with tmp.open("w", encoding="utf8") as f:
            yaml.safe_dump(merged, f, default_flow_style=False, sort_keys=False)
        tmp.replace(PATHS.SETTINGS_FILE)
        load_app_config()
-        return JSONResponse({"message": "Settings saved successfully."})
+        return JSONResponse({"message": "Settings updated successfully."})
-    except Exception as e:
+    except Exception:
-        logger.error(f"Failed to save settings: {e}")
+        logger.exception("Failed to update settings")
-        raise HTTPException(status_code=500, detail="Could not write to settings.yml.")
+        tmp.unlink(missing_ok=True)
        raise HTTPException(status_code=500, detail="Could not update settings.yml.")
@app.post("/settings/clear-history")
 async def clear_job_history(db: Session = Depends(get_db)):
@@ -518,9 +615,9 @@ async def clear_job_history(db: Session = Depends(get_db)):
        db.commit()
        logger.info(f"Cleared {num_deleted} jobs from history.")
        return {"deleted_count": num_deleted}
-    except Exception as e:
+    except Exception:
        db.rollback()
-        logger.error(f"Failed to clear job history: {e}")
+        logger.exception("Failed to clear job history")
        raise HTTPException(status_code=500, detail="Database error while clearing history.")
@app.post("/settings/delete-files")
@@ -532,9 +629,9 @@ async def delete_processed_files():
            if f.is_file():
                f.unlink()
                deleted_count += 1
-        except Exception as e:
+        except Exception:
            errors.append(f.name)
-            logger.error(f"Could not delete processed file {f.name}: {e}")
+            logger.exception(f"Could not delete processed file {f.name}")
    if errors:
        raise HTTPException(status_code=500, detail=f"Could not delete some files: {', '.join(errors)}")
    logger.info(f"Deleted {deleted_count} files from processed directory.")
@@ -562,12 +659,14 @@ async def submit_file_conversion(file: UploadFile = File(...), output_format: st
    processed_filename = f"{original_stem}_{job_id}.{target_ext}"
    upload_path = PATHS.UPLOADS_DIR / upload_filename
    processed_path = PATHS.PROCESSED_DIR / processed_filename
-    await save_upload_file_chunked(file, upload_path)
+    input_size = await save_upload_file_chunked(file, upload_path)
    job_data = JobCreate(id=job_id, task_type="conversion", original_filename=file.filename,
-                         input_filepath=str(upload_path), processed_filepath=str(processed_path))
+                         input_filepath=str(upload_path), 
                         input_filesize=input_size,
                         processed_filepath=str(processed_path))
    new_job = create_job(db=db, job=job_data)
    run_conversion_task(new_job.id, str(upload_path), str(processed_path), tool, task_key, conversion_tools)
-    return {"job_id": new_job.id, "status": new_job.status}
+    return {"job_id": new_job.id, "status": new_job.status, "status_url": f"/job/{new_job.id}"}
@app.post("/ocr-pdf", status_code=status.HTTP_202_ACCEPTED)
 async def submit_pdf_ocr(file: UploadFile = File(...), db: Session = Depends(get_db)):
@@ -578,12 +677,15 @@ async def submit_pdf_ocr(file: UploadFile = File(...), db: Session = Depends(get
    unique_filename = f"{Path(safe_basename).stem}_{job_id}{Path(safe_basename).suffix}"
    upload_path = PATHS.UPLOADS_DIR / unique_filename
    processed_path = PATHS.PROCESSED_DIR / unique_filename
-    await save_upload_file_chunked(file, upload_path)
+    input_size = await save_upload_file_chunked(file, upload_path)
-    job_data = JobCreate(id=job_id, task_type="ocr", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path))
+    job_data = JobCreate(id=job_id, task_type="ocr", original_filename=file.filename,
                         input_filepath=str(upload_path), 
                         input_filesize=input_size,
                         processed_filepath=str(processed_path))
    new_job = create_job(db=db, job=job_data)
    ocr_settings = APP_CONFIG.get("ocr_settings", {}).get("ocrmypdf", {})
    run_pdf_ocr_task(new_job.id, str(upload_path), str(processed_path), ocr_settings)
-    return {"job_id": new_job.id, "status": new_job.status}
+    return {"job_id": new_job.id, "status": new_job.status, "status_url": f"/job/{new_job.id}"}
@app.post("/ocr-image", status_code=status.HTTP_202_ACCEPTED)
 async def submit_image_ocr(file: UploadFile = File(...), db: Session = Depends(get_db)):
@@ -596,11 +698,14 @@ async def submit_image_ocr(file: UploadFile = File(...), db: Session = Depends(g
    unique_filename = f"{Path(safe_basename).stem}_{job_id}{file_ext}"
    upload_path = PATHS.UPLOADS_DIR / unique_filename
    processed_path = PATHS.PROCESSED_DIR / f"{Path(safe_basename).stem}_{job_id}.txt"
-    await save_upload_file_chunked(file, upload_path)
+    input_size = await save_upload_file_chunked(file, upload_path)
-    job_data = JobCreate(id=job_id, task_type="ocr-image", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path))
+    job_data = JobCreate(id=job_id, task_type="ocr-image", original_filename=file.filename,
                         input_filepath=str(upload_path), 
                         input_filesize=input_size,
                         processed_filepath=str(processed_path))
    new_job = create_job(db=db, job=job_data)
    run_image_ocr_task(new_job.id, str(upload_path), str(processed_path))
-    return {"job_id": new_job.id, "status": new_job.status}
+    return {"job_id": new_job.id, "status": new_job.status, "status_url": f"/job/{new_job.id}"}
@app.post("/job/{job_id}/cancel", status_code=status.HTTP_202_ACCEPTED)
 async def cancel_job(job_id: str, db: Session = Depends(get_db)):
@@ -626,8 +731,7 @@ async def get_job_status(job_id: str, db: Session = Depends(get_db)):
@app.get("/download/{filename}")
 async def download_file(filename: str):
    safe_filename = secure_filename(filename)
-    file_path = PATHS.PROCESSED_DIR / safe_filename
+    file_path = (PATHS.PROCESSED_DIR / safe_filename).resolve()
    file_path = file_path.resolve()
    base = PATHS.PROCESSED_DIR.resolve()
    try:
        file_path.relative_to(base)
@@ -635,4 +739,15 @@ async def download_file(filename: str):
        raise HTTPException(status_code=403, detail="Access denied.")
    if not file_path.is_file():
        raise HTTPException(status_code=404, detail="File not found.")
-    return FileResponse(path=file_path, filename=safe_filename, media_type="application/octet-stream")
+    return FileResponse(path=file_path, filename=safe_filename, media_type="application/octet-stream")
 # Small health endpoint
@app.get("/health")
 async def health():
    try:
        with engine.connect() as conn:
            conn.execute("SELECT 1")
    except Exception:
        logger.exception("Health check failed")
        return JSONResponse({"ok": False}, status_code=500)
    return {"ok": True}
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,22 +1,145 @@
-# requirements.txt
+annotated-types==0.7.0
-
+anyio==4.10.0
-# Web framework
+audioop-lts==0.2.2
-fastapi
+av==15.1.0
-uvicorn[standard]
+azure-ai-documentintelligence==1.0.2
-python-multipart
+azure-core==1.35.1
-jinja2
+azure-identity==1.25.0
-
+beautifulsoup4==4.13.5
-# PDF OCR
+certifi==2025.8.3
-ocrmypdf
+cffi==2.0.0
-PyPDF2
+chardet==5.2.0
-
+charset-normalizer==3.4.3
-# Audio Transcription
+click==8.2.1
-faster-whisper
+cobble==0.1.4
-# The following are core dependencies for faster-whisper,
+coloredlogs==15.0.1
-# but it's good to list them explicitly.
+cryptography==45.0.7
-# ctranslate2
+css-parser==1.0.10
-# transformers
+ctranslate2==4.6.0
-# torch # Note: torch is a dependency of transformers
+defusedxml==0.7.1
-
+Deprecated==1.2.18
-# Utilities
+deprecation==2.1.0
-werkzeug
+et_xmlfile==2.0.0
 fastapi==0.116.1
 faster-whisper==1.2.0
 filelock==3.19.1
 flatbuffers==25.2.10
 fsspec==2025.9.0
 greenlet==3.2.4
 gunicorn==23.0.0
 h11==0.16.0
 hf-xet==1.1.10
 html5-parser==0.4.12
 html5lib==1.1
 httptools==0.6.4
 huey==2.5.3
 huggingface-hub==0.34.4
 humanfriendly==10.0
 idna==3.10
 imageio==2.37.0
 img2pdf==0.6.1
 isodate==0.7.2
 Jinja2==3.1.6
 lazy_loader==0.4
 lxml==6.0.1
 magika==0.6.2
 mammoth==1.10.0
 markdown-it-py==4.0.0
 markdownify==1.2.0
 markitdown==0.1.3
 MarkupSafe==3.0.2
 mdurl==0.1.2
 mechanize==0.4.10
 mpmath==1.3.0
 msal==1.33.0
 msal-extensions==1.3.1
 msgpack==1.1.1
 networkx==3.5
 ninja==1.13.0
 numpy==2.2.6
 nvidia-cublas-cu12==12.8.4.1
 nvidia-cuda-cupti-cu12==12.8.90
 nvidia-cuda-nvrtc-cu12==12.8.93
 nvidia-cuda-runtime-cu12==12.8.90
 nvidia-cudnn-cu12==9.10.2.21
 nvidia-cufft-cu12==11.3.3.83
 nvidia-cufile-cu12==1.13.1.3
 nvidia-curand-cu12==10.3.9.90
 nvidia-cusolver-cu12==11.7.3.90
 nvidia-cusparse-cu12==12.5.8.93
 nvidia-cusparselt-cu12==0.7.1
 nvidia-nccl-cu12==2.27.3
 nvidia-nvjitlink-cu12==12.8.93
 nvidia-nvtx-cu12==12.8.90
 ocrmypdf==16.11.0
 olefile==0.47
 onnxruntime==1.22.1
 opencv-python-headless==4.12.0.88
 openpyxl==3.1.5
 packaging==25.0
 pandas==2.3.2
 pdfminer.six==20250506
 pi_heif==1.1.0
 pikepdf==9.11.0
 pillow==11.3.0
 pluggy==1.6.0
 protobuf==6.32.1
 pyclipper==1.3.0.post6
 pycparser==2.23
 pydantic==2.11.9
 pydantic-settings==2.10.1
 pydantic_core==2.33.2
 pydub==0.25.1
 Pygments==2.19.2
 PyJWT==2.10.1
 pypdf==6.0.0
 PyPDF2==3.0.1
 PyQt6==6.9.1
 PyQt6-Qt6==6.9.2
 PyQt6-WebEngine==6.9.0
 PyQt6-WebEngine-Qt6==6.9.2
 PyQt6_sip==13.10.2
 pytesseract==0.3.13
 python-bidi==0.6.6
 python-dateutil==2.9.0.post0
 python-dotenv==1.1.1
 python-multipart==0.0.20
 python-pptx==1.0.2
 pytz==2025.2
 PyYAML==6.0.2
 regex==2025.9.1
 requests==2.32.5
 rich==14.1.0
 scikit-image==0.25.2
 scipy==1.16.2
 setuptools==80.9.0
 shapely==2.1.1
 six==1.17.0
 sniffio==1.3.1
 soupsieve==2.8
 SpeechRecognition==3.14.3
 SQLAlchemy==2.0.43
 standard-aifc==3.13.0
 standard-chunk==3.13.0
 starlette==0.47.3
 sympy==1.14.0
 tifffile==2025.9.9
 tokenizers==0.22.0
 torch==2.8.0
 torchvision==0.23.0
 tqdm==4.67.1
 triton==3.4.0
 typing-inspection==0.4.1
 typing_extensions==4.15.0
 tzdata==2025.2
 urllib3==2.5.0
 uvicorn==0.35.0
 uvloop==0.21.0
 watchfiles==1.1.0
 webencodings==0.5.1
 websockets==15.0.1
 Werkzeug==3.1.3
 wrapt==1.17.3
 xlrd==2.0.2
 xlsxwriter==3.2.9
 youtube-transcript-api==1.0.3
--- a/run.sh
+++ b/run.sh
@@ -3,8 +3,8 @@
 echo "Starting DocProcessor with Gunicorn..."
-exec gunicorn -w 2 -k uvicorn.workers.UvicornWorker --forwarded-allow-ips='*' main:app -b 0.0.0.0:8000 &
+exec gunicorn -w 4 --threads 2 -k uvicorn.workers.UvicornWorker --forwarded-allow-ips='*' main:app -b 0.0.0.0:8000 &
 echo "Done"
 echo "Starting huey..."
-exec huey_consumer.py main.huey -w 2 &
+exec huey_consumer.py main.huey -w 4 &
 echo "Done"
--- a/.yml.default
+++ b/.yml.default
@@ -0,0 +1,272 @@
 # settings.yml
 # General application settings
 app_settings:
  max_file_size_mb: 2000 # Maximum upload size in Megabytes
  # Allowed extensions (list will be normalized to a set by the server)
  allowed_all_extensions:
    - .pdf
    - .ps
    - .eps
    - .png
    - .jpg
    - .jpeg
    - .tiff
    - .tif
    - .gif
    - .bmp
    - .webp
    - .svg
    - .jxl
    - .avif
    - .ppm
    - .mp3
    - .m4a
    - .ogg
    - .flac
    - .opus
    - .wav
    - .aac
    - .mp4
    - .mkv
    - .mov
    - .webm
    - .avi
    - .flv
    - .md
    - .txt
    - .html
    - .docx
    - .odt
    - .rst
    - .epub
    - .mobi
    - .azw3
    - .pptx
    - .xlsx
 # Settings for Optical Character Recognition (OCR) tasks
 ocr_settings:
  ocrmypdf:
    deskew: true
    clean: true
    optimize: 1
    force_ocr: true
 # Settings for audio transcription tasks
 transcription_settings:
  whisper:
    compute_type: "int8"
    allowed_models:
      - "tiny"
      - "base"
      - "small"
      - "medium"
      - "large-v3"
      - "distil-large-v2"
    # optional: specify device if workers have GPU (e.g. "cuda" or "cpu")
    # device: "cpu"
 # --- Conversion Tool Definitions ---
 # The server validates placeholders against an allowlist:
 # {input}, {output}, {output_dir}, {output_ext}, {quality}, {speed},
 # {preset}, {device}, {dpi}, {samplerate}, {bitdepth}
 conversion_tools:
  libreoffice:
    name: "LibreOffice"
    # Use {filter} so we can supply liboffce export filters like "txt:Text"
    command_template: 'libreoffice --headless --convert-to {filter} --outdir {output_dir} {input}'
    timeout: 120
    # Optional: per-format export filter. If missing for a format, server falls back to the extension.
    filters:
      pdf: "pdf"
      docx: "docx"
      odt: "odt"
      html: "html"
      rtf: "rtf"
      txt: "txt:Text"
      xml: "xml"
      epub: "epub"
      xlsx: "xlsx"
      ods: "ods"
      csv: "csv:Text"
      pptx: "pptx"
      odp: "odp"
      svg: "svg"
    formats:
      pdf: "PDF"
      docx: "Word Document"
      odt: "OpenDocument Text"
      html: "HTML"
      rtf: "Rich Text Format"
      txt: "Plain Text"
      xml: "Word 2003 XML"
      epub: "EPUB"
      xlsx: "Excel Spreadsheet"
      ods: "OpenDocument Spreadsheet"
      csv: "CSV"
      pptx: "PowerPoint Presentation"
      odp: "OpenDocument Presentation"
      svg: "SVG"
  pandoc:
    name: "Pandoc"
    command_template: 'pandoc --standalone {input} -o {output} --to={output_ext} --pdf-engine=xelatex'
    timeout: 60
    formats:
      docx: "Word Document"
      odt: "OpenDocument Text"
      pdf: "PDF"
      rtf: "Rich Text Format"
      txt: "Plain Text"
      tex: "LaTeX"
      man: "Groff Man Page"
      epub: "EPUB v3 Book"
      epub2: "EPUB v2 Book"
      html: "HTML"
      html5: "HTML5"
      pptx: "PowerPoint Presentation"
      beamer: "Beamer PDF Slides"
      slidy: "Slidy HTML Slides"
      md: "Markdown"
      rst: "reStructuredText"
      jira: "Jira Wiki Markup"
      mediawiki: "MediaWiki Markup"
  ghostscript_pdf:
    name: "Ghostscript (PDF)"
    # placeholders used: {preset}, {output}, {input}
    command_template: 'gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dNOPAUSE -dQUIET -dBATCH {preset} -sOutputFile={output} {input}'
    timeout: 60
    formats:
      screen: "PDF (Optimized for Screen)"
      ebook: "PDF (Optimized for Ebooks)"
      printer: "PDF (Optimized for Print)"
      archive: "PDF/A (for Archiving)"
  ghostscript_image:
    name: "Ghostscript (Image)"
    # placeholders used: {device}, {dpi}, {output}, {input}
    command_template: 'gs -dNOPAUSE -dBATCH -sDEVICE={device} -r{dpi} -sOutputFile={output} {input}'
    timeout: 60
    formats:
      jpeg_72: "JPEG Image (72 DPI)"
      jpeg_300: "JPEG Image (300 DPI)"
      png16m_150: "PNG Image (150 DPI)"
      png16m_300: "PNG Image (300 DPI)"
      tiff24nc_300: "TIFF Image (300 DPI)"
      tiff24nc_600: "TIFF Image (600 DPI)"
  calibre:
    name: "Calibre (ebook-convert)"
    command_template: 'ebook-convert {input} {output}'
    timeout: 60
    formats:
      epub: "EPUB"
      mobi: "MOBI"
      azw3: "Amazon Kindle"
      pdf: "PDF"
      docx: "Word Document"
  ffmpeg:
    name: "FFmpeg"
    command_template: 'ffmpeg -i {input} -y -preset medium {output}'
    timeout: 300
    formats:
      mp4: "MP4 Video"
      mkv: "MKV Video"
      mov: "MOV Video"
      webm: "WebM Video"
      mp3: "MP3 Audio"
      wav: "WAV Audio"
      flac: "FLAC Audio"
      gif: "Animated GIF"
  vips:
    name: "VIPS"
    command_template: 'vips copy {input} {output}[Q=90]'
    timeout: 60
    formats:
      jpg: "JPEG Image (Q90)"
      png: "PNG Image"
      webp: "WebP Image (Q90)"
      tiff: "TIFF Image"
      avif: "AVIF Image"
  graphicsmagick:
    name: "GraphicsMagick"
    command_template: 'gm convert {input} -quality 90 {output}'
    timeout: 60
    formats:
      jpg: "JPEG Image (Q90)"
      png: "PNG Image"
      webp: "WebP Image"
      tiff: "TIFF Image"
      pdf: "PDF from Images"
  inkscape:
    name: "Inkscape"
    command_template: 'inkscape {input} --export-filename={output}'
    timeout: 30
    formats:
      svg: "SVG (Plain)"
      png: "PNG Image (96 DPI)"
      pdf: "PDF Document"
  libjxl:
    name: "libjxl (cjxl)"
    command_template: 'cjxl {input} {output} -q 90'
    timeout: 30
    formats:
      jxl: "JPEG XL (Q90)"
  resvg:
    name: "resvg"
    command_template: 'resvg {input} {output}'
    timeout: 30
    formats:
      png: "PNG from SVG"
  potrace:
    name: "Potrace"
    command_template: 'potrace {input} --svg -o {output}'
    timeout: 30
    formats:
      svg: "SVG from Bitmap"
  markitdown:
    name: "Markitdown"
    command_template: 'markitdown {input} -o {output}'
    timeout: 30
    formats:
      md: "Markdown from Everything!"
  pngquant:
    name: "pngquant"
    command_template: 'pngquant --quality={quality} --speed {speed} --force --output {output} {input}'
    timeout: 30
    formats:
      png_hq: "PNG (High Quality Compression)"
      png_mq: "PNG (Medium Quality Compression)"
      png_fast: "PNG (Fast Compression)"
  sox:
    name: "SoX Audio Converter"
    command_template: 'sox {input} -r {samplerate} -b {bitdepth} {output}'
    timeout: 120
    formats:
      wav_48k_24b: "WAV (48kHz, 24-bit)"
      wav_44k_16b: "WAV (CD, 44.1kHz, 16-bit)"
      flac_48k_24b: "FLAC (48kHz, 24-bit)"
      flac_44k_16b: "FLAC (CD, 44.1kHz, 16-bit)"
      ogg_32k_16b: "Ogg Vorbis (32kHz)"
      ogg_16k_16b: "Ogg Vorbis (16kHz, Voice)"
  mozjpeg:
    name: "MozJPEG"
    command_template: 'cjpeg -quality {quality} -outfile {output} {input}'
    timeout: 30
    formats:
      jpg_q85: "JPEG (High Quality)"
      jpg_q75: "JPEG (Web Quality)"
      jpg_q60: "JPEG (Aggressive Compression)"
--- a/settings.yml
+++ b/settings.yml
@@ -1,179 +1,242 @@
 # settings.yml
 # General application settings
 app_settings:
-  max_file_size_mb: 2000 # Maximum upload size in Megabytes
+  max_file_size_mb: '2000'
-
+  allowed_all_extensions:
-# Settings for Optical Character Recognition (OCR) tasks
+  - .pdf
  - .ps
  - .eps
  - .png
  - .jpg
  - .jpeg
  - .tiff
  - .tif
  - .gif
  - .bmp
  - .webp
  - .svg
  - .jxl
  - .avif
  - .ppm
  - .mp3
  - .m4a
  - .ogg
  - .flac
  - .opus
  - .wav
  - .aac
  - .mp4
  - .mkv
  - .mov
  - .webm
  - .avi
  - .flv
  - .md
  - .txt
  - .html
  - .docx
  - .odt
  - .rst
  - .epub
  - .mobi
  - .azw3
  - .pptx
  - .xlsx
 ocr_settings:
  ocrmypdf:
    deskew: true
    clean: true
    optimize: 1
    force_ocr: true
 # Settings for audio transcription tasks
 transcription_settings:
  whisper:
-    compute_type: "int8"
+    compute_type: int8
    allowed_models:
-      - "tiny"
+    - tiny
-      - "base"
+    - base
-      - "small"
+    - small
-      - "medium"
+    - medium
-      - "large-v3"
+    - large-v3
-      - "distil-large-v2"
+    - distil-large-v2
 # --- Conversion Tool Definitions ---
 # Each tool's command is a single string. The backend uses shlex to parse it,
 # so you can use quotes for arguments with spaces.
 # Use placeholders like: {input}, {output}, {output_dir}, {output_ext}, etc.
 conversion_tools:
  libreoffice:
-    name: "LibreOffice"
+    name: LibreOffice
-    command_template: 'libreoffice --headless --convert-to {output_ext} --outdir {output_dir} {input}'
+    command_template: libreoffice --headless --convert-to {filter} --outdir {output_dir}
      {input}
    timeout: 300
    filters:
      pdf: pdf
      docx: docx
      odt: odt
      html: html
      rtf: rtf
      txt: txt:Text
      xml: xml
      epub: epub
      xlsx: xlsx
      ods: ods
      csv: csv:Text
      pptx: pptx
      odp: odp
      svg: svg
    formats:
-      pdf: "PDF"
+      pdf: PDF
-      docx: "Word Document"
+      docx: Word Document
-      odt: "OpenDocument Text"
+      odt: OpenDocument Text
-      html: "HTML"
+      html: HTML
-      rtf: "Rich Text Format"
+      rtf: Rich Text Format
-      txt: "Plain Text"
+      txt: Plain Text
-      xml: "Word 2003 XML"
+      xml: Word 2003 XML
-      epub: "EPUB"
+      epub: EPUB
-      xlsx: "Excel Spreadsheet"
+      xlsx: Excel Spreadsheet
-      ods: "OpenDocument Spreadsheet"
+      ods: OpenDocument Spreadsheet
-      csv: "CSV"
+      csv: CSV
-      pptx: "PowerPoint Presentation"
+      pptx: PowerPoint Presentation
-      odp: "OpenDocument Presentation"
+      odp: OpenDocument Presentation
-      svg: "SVG"
+      svg: SVG
  pandoc:
-    name: "Pandoc"
+    name: Pandoc
-    command_template: 'pandoc --standalone {input} -o {output}'
+    command_template: pandoc --standalone {input} -o {output} --to={output_ext} --pdf-engine=xelatex
    timeout: 300
    formats:
-      docx: "Word Document"
+      docx: Word Document
-      odt: "OpenDocument Text"
+      odt: OpenDocument Text
-      pdf: "PDF"
+      pdf: PDF
-      rtf: "Rich Text Format"
+      rtf: Rich Text Format
-      txt: "Plain Text"
+      txt: Plain Text
-      tex: "LaTeX"
+      tex: LaTeX
-      man: "Groff Man Page"
+      man: Groff Man Page
-      epub: "EPUB v3 Book"
+      epub: EPUB v3 Book
-      epub2: "EPUB v2 Book"
+      epub2: EPUB v2 Book
-      html: "HTML"
+      html: HTML
-      html5: "HTML5"
+      html5: HTML5
-      pptx: "PowerPoint Presentation"
+      pptx: PowerPoint Presentation
-      beamer: "Beamer PDF Slides"
+      beamer: Beamer PDF Slides
-      slidy: "Slidy HTML Slides"
+      slidy: Slidy HTML Slides
-      md: "Markdown"
+      md: Markdown
-      rst: "reStructuredText"
+      rst: reStructuredText
-      jira: "Jira Wiki Markup"
+      jira: Jira Wiki Markup
-      mediawiki: "MediaWiki Markup"
+      mediawiki: MediaWiki Markup
  ghostscript_pdf:
-    name: "Ghostscript (PDF)"
+    name: Ghostscript (PDF)
-    command_template: 'gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dNOPAUSE -dQUIET -dBATCH {preset} -sOutputFile={output} {input}'
+    command_template: gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dNOPAUSE -dQUIET
      -dBATCH {preset} -sOutputFile={output} {input}
    timeout: 60
    formats:
-      screen: "PDF (Optimized for Screen)"
+      screen: PDF (Optimized for Screen)
-      ebook: "PDF (Optimized for Ebooks)"
+      ebook: PDF (Optimized for Ebooks)
-      printer: "PDF (Optimized for Print)"
+      printer: PDF (Optimized for Print)
-      archive: "PDF/A (for Archiving)"
+      archive: PDF/A (for Archiving)
  ghostscript_image:
-    name: "Ghostscript (Image)"
+    name: Ghostscript (Image)
-    command_template: 'gs -dNOPAUSE -dBATCH -sDEVICE={device} -r{dpi} -sOutputFile={output} {input}'
+    command_template: gs -dNOPAUSE -dBATCH -sDEVICE={device} -r{dpi} -sOutputFile={output}
      {input}
    timeout: 60
    formats:
-      jpeg_72: "JPEG Image (72 DPI)"
+      jpeg_72: JPEG Image (72 DPI)
-      jpeg_300: "JPEG Image (300 DPI)"
+      jpeg_300: JPEG Image (300 DPI)
-      png16m_150: "PNG Image (150 DPI)"
+      png16m_150: PNG Image (150 DPI)
-      png16m_300: "PNG Image (300 DPI)"
+      png16m_300: PNG Image (300 DPI)
-      tiff24nc_300: "TIFF Image (300 DPI)"
+      tiff24nc_300: TIFF Image (300 DPI)
-      tiff24nc_600: "TIFF Image (600 DPI)"
+      tiff24nc_600: TIFF Image (600 DPI)
  calibre:
-    name: "Calibre (ebook-convert)"
+    name: Calibre (ebook-convert)
-    command_template: 'ebook-convert {input} {output}'
+    command_template: ebook-convert {input} {output}
    timeout: 600
    formats:
-      epub: "EPUB"
+      epub: EPUB
-      mobi: "MOBI"
+      mobi: MOBI
-      azw3: "Amazon Kindle"
+      azw3: Amazon Kindle
-      pdf: "PDF"
+      pdf: PDF
-      docx: "Word Document"
+      docx: Word Document
  ffmpeg:
-    name: "FFmpeg"
+    name: FFmpeg
-    command_template: 'ffmpeg -i {input} -y -preset medium {output}'
+    command_template: ffmpeg -i {input} -y -preset medium {output}
    timeout: 600
    formats:
-      mp4: "MP4 Video"
+      mp4: MP4 Video
-      mkv: "MKV Video"
+      mkv: MKV Video
-      mov: "MOV Video"
+      mov: MOV Video
-      webm: "WebM Video"
+      webm: WebM Video
-      mp3: "MP3 Audio"
+      mp3: MP3 Audio
-      wav: "WAV Audio"
+      wav: WAV Audio
-      flac: "FLAC Audio"
+      flac: FLAC Audio
-      gif: "Animated GIF"
+      gif: Animated GIF
  vips:
-    name: "VIPS"
+    name: VIPS
-    command_template: 'vips copy {input} {output}[Q=90]'
+    command_template: vips copy {input} {output}[Q=90]
    timeout: 60
    formats:
-      jpg: "JPEG Image (Q90)"
+      jpg: JPEG Image (Q90)
-      png: "PNG Image"
+      png: PNG Image
-      webp: "WebP Image (Q90)"
+      webp: WebP Image (Q90)
-      tiff: "TIFF Image"
+      tiff: TIFF Image
-      avif: "AVIF Image"
+      avif: AVIF Image
  graphicsmagick:
-    name: "GraphicsMagick"
+    name: GraphicsMagick
-    command_template: 'gm convert {input} -quality 90 {output}'
+    command_template: gm convert {input} -quality 90 {output}
    timeout: 60
    formats:
-      jpg: "JPEG Image (Q90)"
+      jpg: JPEG Image (Q90)
-      png: "PNG Image"
+      png: PNG Image
-      webp: "WebP Image"
+      webp: WebP Image
-      tiff: "TIFF Image"
+      tiff: TIFF Image
-      pdf: "PDF from Images"
+      pdf: PDF from Images
  inkscape:
-    name: "Inkscape"
+    name: Inkscape
-    command_template: 'inkscape {input} --export-filename={output}'
+    command_template: inkscape {input} --export-filename={output}
    timeout: 30
    formats:
-      svg: "SVG (Plain)"
+      svg: SVG (Plain)
-      png: "PNG Image (96 DPI)"
+      png: PNG Image (96 DPI)
-      pdf: "PDF Document"
+      pdf: PDF Document
  libjxl:
-    name: "libjxl (cjxl)"
+    name: libjxl (cjxl)
-    command_template: 'cjxl {input} {output} -q 90'
+    command_template: cjxl {input} {output} -q 90
    timeout: 30
    formats:
-      jxl: "JPEG XL (Q90)"
+      jxl: JPEG XL (Q90)
  resvg:
-    name: "resvg"
+    name: resvg
-    command_template: 'resvg {input} {output}'
+    command_template: resvg {input} {output}
    timeout: 30
    formats:
-      png: "PNG from SVG"
+      png: PNG from SVG
  potrace:
-    name: "Potrace"
+    name: Potrace
-    command_template: 'potrace {input} --svg -o {output}'
+    command_template: potrace {input} --svg -o {output}
    timeout: 30
    formats:
-      svg: "SVG from Bitmap"
+      svg: SVG from Bitmap
  markitdown:
-    name: "Markitdown"
+    name: Markitdown
-    command_template: 'markitdown {input} -o {output}'
+    command_template: markitdown {input} -o {output}
    timeout: 300
    formats:
-      md: "Markdown from Everything!"
+      md: Markdown from Everything!
  pngquant:
-    name: "pngquant"
+    name: pngquant
-    command_template: 'pngquant --quality={quality} --speed {speed} --force --output {output} {input}'
+    command_template: pngquant --quality={quality} --speed {speed} --force --output
      {output} {input}
    timeout: 300
    formats:
-      png_hq: "PNG (High Quality Compression)"
+      png_hq: PNG (High Quality Compression)
-      png_mq: "PNG (Medium Quality Compression)"
+      png_mq: PNG (Medium Quality Compression)
-      png_fast: "PNG (Fast Compression)"
+      png_fast: PNG (Fast Compression)
  sox:
-    name: "SoX Audio Converter"
+    name: SoX Audio Converter
-    command_template: 'sox {input} -r {samplerate} -b {bitdepth} {output}'
+    command_template: sox {input} -r {samplerate} -b {bitdepth} {output}
    timeout: 600
    formats:
-      wav_48k_24b: "WAV (48kHz, 24-bit)"
+      wav_48k_24b: WAV (48kHz, 24-bit)
-      wav_44k_16b: "WAV (CD, 44.1kHz, 16-bit)"
+      wav_44k_16b: WAV (CD, 44.1kHz, 16-bit)
-      flac_48k_24b: "FLAC (48kHz, 24-bit)"
+      flac_48k_24b: FLAC (48kHz, 24-bit)
-      flac_44k_16b: "FLAC (CD, 44.1kHz, 16-bit)"
+      flac_44k_16b: FLAC (CD, 44.1kHz, 16-bit)
-      ogg_32k: "Ogg Vorbis (32kHz)"
+      ogg_32k_16b: Ogg Vorbis (32kHz)
-      ogg_16k: "Ogg Vorbis (16kHz, Voice)"
+      ogg_16k_16b: Ogg Vorbis (16kHz, Voice)
  mozjpeg:
-    name: "MozJPEG"
+    name: MozJPEG
-    command_template: 'cjpeg -quality {quality} -outfile {output} {input}'
+    command_template: cjpeg -quality {quality} -outfile {output} {input}
    timeout: 30
    formats:
-      jpg_q85: "JPEG (High Quality)"
+      jpg_q85: JPEG (High Quality)
-      jpg_q75: "JPEG (Web Quality)"
+      jpg_q75: JPEG (Web Quality)
-      jpg_q60: "JPEG (Aggressive Compression)"
+      jpg_q60: JPEG (Aggressive Compression)
--- a/static/css/style.css
+++ b/static/css/style.css
@@ -1,5 +1,3 @@
 /* static/css/style.css */
 :root {
    /* Core */
    --bg-color: #000000;
@@ -449,6 +447,110 @@ button[type="submit"]:disabled {
 }
 /* --- START: Drag and Drop and Dialog Styles --- */
 .drag-overlay {
    position: fixed;
    inset: 0;
    z-index: 9999;
    display: none; /* Hidden by default */
    justify-content: center;
    align-items: center;
    background-color: rgba(0, 0, 0, 0.7);
    backdrop-filter: blur(5px);
 }
 body.dragging .drag-overlay {
    display: flex; /* Shown when body has .dragging class */
 }
 .drag-overlay-content {
    border: 3px dashed var(--primary-color);
    border-radius: 12px;
    padding: 2rem 4rem;
    text-align: center;
    background-color: rgba(0, 0, 0, 0.2);
 }
 .drag-overlay-content p {
    margin: 0;
    font-size: 1.5rem;
    font-weight: 500;
    color: var(--primary-color);
 }
 .dialog-overlay {
    position: fixed;
    inset: 0;
    z-index: 10000;
    display: none; /* Hidden by default */
    justify-content: center;
    align-items: center;
    background-color: rgba(0, 0, 0, 0.7);
    backdrop-filter: blur(5px);
 }
 .dialog-overlay.visible {
    display: flex; /* Show when .visible class is added */
 }
 .dialog-box {
    background: var(--card-bg);
    border: 1px solid var(--border-color);
    border-radius: 8px;
    padding: 1.5rem;
    width: 100%;
    max-width: 450px;
    text-align: center;
    box-shadow: 0 10px 30px rgba(0,0,0,0.5);
 }
 .dialog-box h2 {
    margin-top: 0;
    font-size: 1.5rem;
 }
 .dialog-box p {
    color: var(--muted-text);
    margin-bottom: 1.5rem;
 }
 .dialog-actions {
    display: grid;
    grid-template-columns: 1fr;
    gap: 0.75rem;
    margin-bottom: 1rem;
 }
 .dialog-actions button {
    display: block;
    width: 100%;
    background: transparent;
    border: 1px solid var(--border-color);
    color: var(--text-color);
    padding: 0.65rem 1rem;
    font-size: 1rem;
    font-weight: 600;
    border-radius: 5px;
    cursor: pointer;
    transition: background-color 0.15s ease, border-color 0.15s ease;
 }
 .dialog-actions button:hover {
    background: var(--primary-hover);
    border-color: var(--primary-hover);
 }
 .dialog-secondary-action {
    background-color: transparent !important;
    border: 1px solid var(--border-color) !important;
 }
 .dialog-secondary-action:hover {
    background-color: rgba(255, 255, 255, 0.05) !important;
 }
 .dialog-cancel {
    background: none;
    border: none;
    color: var(--muted-text);
    cursor: pointer;
    font-size: 0.9rem;
    padding: 0.5rem;
 }
 .dialog-cancel:hover {
    color: var(--text-color);
 }
 /* --- END: Drag and Drop and Dialog Styles --- */
 /* Spinner */
 .spinner-small {
    border: 3px solid rgba(255,255,255,0.1);
@@ -467,7 +569,6 @@ button[type="submit"]:disabled {
 /* Mobile responsive table */
@media (max-width: 768px) {
    /* ... (no changes in this section) ... */
    .table-wrapper {
        border: none;
        background-color: transparent;
@@ -513,17 +614,17 @@ button[type="submit"]:disabled {
    .cell-value {
        min-width: 0;
-    max-width: 20em;
+        max-width: 20em;
-    text-wrap: nowrap;
+        text-wrap: nowrap;
-    overflow: scroll;
+        overflow: scroll;
-}
+    }
-#job-table td[data-label="File"],
+    #job-table td[data-label="File"],
-#job-table td[data-label="Task"] {
+    #job-table td[data-label="Task"] {
-    overflow: scroll;
+        overflow: scroll;
-    text-overflow: ellipsis;
+        text-overflow: ellipsis;
-    text-wrap: nowrap;
+        text-wrap: nowrap;
-    max-width: 100em;
+        max-width: 100em;
-}
+    }
 }
--- a/static/js/script.js
+++ b/static/js/script.js
@@ -1,6 +1,17 @@
 // static/js/script.js
 document.addEventListener('DOMContentLoaded', () => {
    // --- User Locale and Timezone Detection (Corrected Implementation) ---
    const USER_LOCALE = navigator.language || 'en-US'; // Fallback to en-US
    const USER_TIMEZONE = Intl.DateTimeFormat().resolvedOptions().timeZone;
    const DATETIME_FORMAT_OPTIONS = {
        year: 'numeric',
        month: 'short',
        day: 'numeric',
        hour: 'numeric',
        minute: '2-digit',
        timeZone: USER_TIMEZONE,
    };
    console.log(`Using locale: ${USER_LOCALE} and timezone: ${USER_TIMEZONE}`);
    // --- Element Selectors ---
    const jobListBody = document.getElementById('job-list-body');
@@ -11,16 +22,35 @@ document.addEventListener('DOMContentLoaded', () => {
    const audioForm = document.getElementById('audio-form');
    const audioFileInput = document.getElementById('audio-file-input');
    const audioFileName = document.getElementById('audio-file-name');
    const modelSizeSelect = document.getElementById('model-size-select');
    const conversionForm = document.getElementById('conversion-form');
    const conversionFileInput = document.getElementById('conversion-file-input');
    const conversionFileName = document.getElementById('conversion-file-name');
    const outputFormatSelect = document.getElementById('output-format-select');
-    // MODIFICATION: Store the Choices.js instance in a variable
+    // START: Drag and Drop additions
-    let conversionChoices = null;
+    const dragOverlay = document.getElementById('drag-overlay');
    const actionDialog = document.getElementById('action-dialog');
    const dialogFileCount = document.getElementById('dialog-file-count');
    // Dialog Views
    const dialogInitialView = document.getElementById('dialog-initial-actions');
    const dialogConvertView = document.getElementById('dialog-convert-view');
    // Dialog Buttons
    const dialogConvertBtn = document.getElementById('dialog-action-convert');
    const dialogOcrBtn = document.getElementById('dialog-action-ocr');
    const dialogTranscribeBtn = document.getElementById('dialog-action-transcribe');
    const dialogCancelBtn = document.getElementById('dialog-action-cancel');
    const dialogStartConversionBtn = document.getElementById('dialog-start-conversion');
    const dialogBackBtn = document.getElementById('dialog-back');
    // Dialog Select
    const dialogOutputFormatSelect = document.getElementById('dialog-output-format-select');
    // END: Drag and Drop additions
    let conversionChoices = null;
    let dialogConversionChoices = null; // For the dialog's format selector
    const activePolls = new Map();
    let stagedFiles = null; // To hold files from a drop event
    // --- Main Event Listeners ---
    pdfFileInput.addEventListener('change', () => updateFileName(pdfFileInput, pdfFileName));
@@ -37,13 +67,222 @@ document.addEventListener('DOMContentLoaded', () => {
            handleCancelJob(jobId);
        }
    });
-    
+
    // --- Helper Functions ---
    function formatBytes(bytes, decimals = 1) {
        if (!+bytes) return '0 Bytes'; // Handles 0, null, undefined
        const k = 1024;
        const dm = decimals < 0 ? 0 : decimals;
        const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB'];
        const i = Math.floor(Math.log(bytes) / Math.log(k));
        return `${parseFloat((bytes / Math.pow(k, i)).toFixed(dm))} ${sizes[i]}`;
    }
    // --- Core Job Submission Logic (Refactored for reuse) ---
    async function submitJob(endpoint, formData, originalFilename) {
        try {
            const response = await fetch(endpoint, { method: 'POST', body: formData });
            if (!response.ok) {
                const errorData = await response.json();
                throw new Error(errorData.detail || `HTTP error! Status: ${response.status}`);
            }
            const result = await response.json();
            const preliminaryJob = {
                id: result.job_id,
                status: 'pending',
                progress: 0,
                original_filename: originalFilename,
                input_filesize: formData.get('file').size,
                task_type: endpoint.includes('ocr') ? 'ocr' : (endpoint.includes('transcribe') ? 'transcription' : 'conversion'),
                created_at: new Date().toISOString() // Create preliminary UTC timestamp
            };
            renderJobRow(preliminaryJob);
            startPolling(result.job_id);
        } catch (error) {
            console.error('Error submitting job:', error);
            alert(`Submission failed for ${originalFilename}: ${error.message}`);
        }
    }
    // --- Original Form Submission Handler (Now uses submitJob) ---
    async function handleFormSubmit(event, endpoint, form) {
        event.preventDefault();
        const fileInput = form.querySelector('input[type="file"]');
        if (fileInput.files.length === 0) return;
        const submitButton = form.querySelector('button[type="submit"]');
        submitButton.disabled = true;
        // Convert FileList to an array to loop through it
        const files = Array.from(fileInput.files);
        // Process each file as a separate job
        for (const file of files) {
            const formData = new FormData();
            formData.append('file', file);
            // Append other form data if it exists
            const outputFormat = form.querySelector('select[name="output_format"]');
            if (outputFormat) {
                formData.append('output_format', outputFormat.value);
            }
            const modelSize = form.querySelector('select[name="model_size"]');
            if (modelSize) {
                formData.append('model_size', modelSize.value);
            }
            // Await each job submission to process them sequentially
            await submitJob(endpoint, formData, file.name);
        }
        // Reset the form UI after all jobs have been submitted
        const fileNameDisplay = form.querySelector('.file-name');
        form.reset();
        if (fileNameDisplay) {
             fileNameDisplay.textContent = 'No file chosen';
             fileNameDisplay.title = 'No file chosen';
        }
        if (form.id === 'conversion-form' && conversionChoices) {
            conversionChoices.clearInput();
            conversionChoices.setValue([]);
        }
        submitButton.disabled = false;
    }
    // --- START: Drag and Drop Implementation ---
   function setupDragAndDropListeners() {
        let dragCounter = 0; // Counter to manage enter/leave events reliably
        window.addEventListener('dragenter', (e) => {
            e.preventDefault();
            dragCounter++;
            document.body.classList.add('dragging');
        });
        window.addEventListener('dragleave', (e) => {
            e.preventDefault();
            dragCounter--;
            if (dragCounter === 0) {
                document.body.classList.remove('dragging');
            }
        });
        window.addEventListener('dragover', (e) => {
            e.preventDefault(); // This is necessary to allow a drop
        });
        window.addEventListener('drop', (e) => {
            e.preventDefault();
            dragCounter = 0; // Reset counter
            document.body.classList.remove('dragging');
            // Only handle the drop if it's on our designated overlay
            if (e.target === dragOverlay || dragOverlay.contains(e.target)) {
                const files = e.dataTransfer.files;
                if (files && files.length > 0) {
                    stagedFiles = files;
                    showActionDialog();
                }
            }
        });
    }
    function showActionDialog() {
        dialogFileCount.textContent = stagedFiles.length;
        // Clone options from main form's select to the dialog's select
        dialogOutputFormatSelect.innerHTML = outputFormatSelect.innerHTML;
        // Clean up previous Choices.js instance if it exists
        if (dialogConversionChoices) {
            dialogConversionChoices.destroy();
        }
        // Initialize a new Choices.js instance for the dialog
        dialogConversionChoices = new Choices(dialogOutputFormatSelect, {
            searchEnabled: true,
            itemSelectText: 'Select',
            shouldSort: false,
            placeholder: true,
            placeholderValue: 'Select a format...',
        });
        // Ensure the initial view is shown
        dialogInitialView.style.display = 'grid';
        dialogConvertView.style.display = 'none';
        actionDialog.classList.add('visible');
    }
    function closeActionDialog() {
        actionDialog.classList.remove('visible');
        stagedFiles = null;
        // Important: Destroy the Choices instance to prevent memory leaks
        if (dialogConversionChoices) {
            // Explicitly hide the dropdown before destroying
            dialogConversionChoices.hideDropdown(); 
            dialogConversionChoices.destroy();
            dialogConversionChoices = null;
        }
    }
    // --- Dialog Button and Action Listeners ---
    dialogConvertBtn.addEventListener('click', () => {
        // Switch to the conversion view
        dialogInitialView.style.display = 'none';
        dialogConvertView.style.display = 'block';
    });
    dialogBackBtn.addEventListener('click', () => {
        // Switch back to the initial view
        dialogInitialView.style.display = 'grid';
        dialogConvertView.style.display = 'none';
    });
    dialogStartConversionBtn.addEventListener('click', () => handleDialogAction('convert'));
    dialogOcrBtn.addEventListener('click', () => handleDialogAction('ocr'));
    dialogTranscribeBtn.addEventListener('click', () => handleDialogAction('transcribe'));
    dialogCancelBtn.addEventListener('click', closeActionDialog);
    function handleDialogAction(action) {
        if (!stagedFiles) return;
        let endpoint = '';
        const formDataArray = [];
        for (const file of stagedFiles) {
            const formData = new FormData();
            formData.append('file', file);
            if (action === 'convert') {
                const selectedFormat = dialogConversionChoices.getValue(true);
                if (!selectedFormat) {
                    alert('Please select a format to convert to.');
                    return;
                }
                formData.append('output_format', selectedFormat);
                endpoint = '/convert-file';
            } else if (action === 'ocr') {
                endpoint = '/ocr-pdf';
            } else if (action === 'transcribe') {
                formData.append('model_size', modelSizeSelect.value);
                endpoint = '/transcribe-audio';
            }
            formDataArray.push({ formData, name: file.name });
        }
        formDataArray.forEach(item => {
            submitJob(endpoint, item.formData, item.name);
        });
        closeActionDialog();
    }
    // --- END: Drag and Drop Implementation ---
    function initializeConversionSelector() {
        // MODIFICATION: Destroy the old instance if it exists before creating a new one
        if (conversionChoices) {
            conversionChoices.destroy();
        }
        conversionChoices = new Choices(outputFormatSelect, {
            searchEnabled: true,
            itemSelectText: 'Select',
@@ -65,7 +304,7 @@ document.addEventListener('DOMContentLoaded', () => {
            for (const formatKey in tool.formats) {
                group.choices.push({
                    value: `${toolKey}_${formatKey}`,
-                    label: `${formatKey.toUpperCase()} - ${tool.formats[formatKey]}`
+                    label: `${tool.name} - ${formatKey.toUpperCase()} (${tool.formats[formatKey]})`
                });
            }
            choicesArray.push(group);
@@ -73,58 +312,23 @@ document.addEventListener('DOMContentLoaded', () => {
        conversionChoices.setChoices(choicesArray, 'value', 'label', true);
    }
    // --- Helper Functions ---
    function updateFileName(input, nameDisplay) {
-        const fileName = input.files.length > 0 ? input.files[0].name : 'No file chosen';
+        const numFiles = input.files.length;
-        nameDisplay.textContent = fileName;
+        let displayText = 'No file chosen';
-        nameDisplay.title = fileName;
+        let displayTitle = 'No file chosen';
    }
-    async function handleFormSubmit(event, endpoint, form) {
+        if (numFiles === 1) {
-        event.preventDefault();
+            displayText = input.files[0].name;
-        const fileInput = form.querySelector('input[type="file"]');
+            displayTitle = input.files[0].name;
-        const fileNameDisplay = form.querySelector('.file-name');
+        } else if (numFiles > 1) {
-        if (!fileInput.files[0]) return;
+            displayText = `${numFiles} files selected`;
-
+            // Create a title attribute to show all filenames on hover
-        const formData = new FormData(form);
+            displayTitle = Array.from(input.files).map(file => file.name).join(', ');
        const submitButton = form.querySelector('button[type="submit"]');
        submitButton.disabled = true;
        try {
            const response = await fetch(endpoint, { method: 'POST', body: formData });
            if (!response.ok) {
                const errorData = await response.json();
                throw new Error(errorData.detail || `HTTP error! Status: ${response.status}`);
            }
            const result = await response.json();
            const preliminaryJob = {
                id: result.job_id,
                status: 'pending',
                progress: 0,
                original_filename: fileInput.files[0].name,
                task_type: endpoint.includes('ocr') ? 'ocr' : (endpoint.includes('transcribe') ? 'transcription' : 'conversion'),
                created_at: new Date().toISOString()
            };
            renderJobRow(preliminaryJob);
            startPolling(result.job_id);
        } catch (error) {
            console.error('Error submitting job:', error);
            alert(`Submission failed: ${error.message}`);
        } finally {
            form.reset();
            if (fileNameDisplay) fileNameDisplay.textContent = 'No file chosen';
            // MODIFICATION: Use the stored instance to correctly reset the dropdown
            // without causing an error.
            if (form.id === 'conversion-form' && conversionChoices) {
                 conversionChoices.clearInput();
                 conversionChoices.setValue([]); // Clears the selected value
            }
            submitButton.disabled = false;
        }
        nameDisplay.textContent = displayText;
        nameDisplay.title = displayTitle;
    }
-    
+
    async function handleCancelJob(jobId) {
        if (!confirm('Are you sure you want to cancel this job?')) return;
        try {
@@ -161,7 +365,7 @@ document.addEventListener('DOMContentLoaded', () => {
            }
        } catch (error) {
            console.error("Couldn't load job history:", error);
-            jobListBody.innerHTML = '<tr><td colspan="5" style="text-align: center;">Could not load job history.</td></tr>';
+            jobListBody.innerHTML = '<tr><td colspan="6" style="text-align: center;">Could not load job history.</td></tr>';
        }
    }
@@ -214,7 +418,12 @@ document.addEventListener('DOMContentLoaded', () => {
            taskTypeLabel = 'Conversion';
        }
-        const formattedDate = new Date(job.created_at).toLocaleString();
+        // --- CORRECTED DATE FORMATTING ---
        // Takes the UTC string from the server (or the preliminary job)
        // and formats it using the user's detected locale and timezone.
        const submittedDate = new Date(job.created_at);
        const formattedDate = submittedDate.toLocaleString(USER_LOCALE, DATETIME_FORMAT_OPTIONS);
        let statusHtml = `<span class="job-status-badge status-${job.status}">${job.status}</span>`;
        if (job.status === 'processing') {
            const progressClass = (job.task_type === 'transcription' && job.progress > 0) ? '' : 'indeterminate';
@@ -233,9 +442,21 @@ document.addEventListener('DOMContentLoaded', () => {
            actionHtml = `<span class="error-text"${errorTitle}>Failed</span>`;
        }
        // --- File Size Logic ---
        let fileSizeHtml = '<span>-</span>';
        if (job.input_filesize) {
            let sizeString = formatBytes(job.input_filesize);
            if (job.status === 'completed' && job.output_filesize) {
                sizeString += ` → ${formatBytes(job.output_filesize)}`;
            }
            fileSizeHtml = `<span class="cell-value">${sizeString}</span>`;
        }
        const escapedFilename = job.original_filename ? job.original_filename.replace(/</g, "&lt;").replace(/>/g, "&gt;") : "No filename";
        row.innerHTML = `
            <td data-label="File"><span class="cell-value" title="${escapedFilename}">${escapedFilename}</span></td>
            <td data-label="File Size">${fileSizeHtml}</td>
            <td data-label="Task"><span class="cell-value">${taskTypeLabel}</span></td>
            <td data-label="Submitted"><span class="cell-value">${formattedDate}</span></td>
            <td data-label="Status"><span class="cell-value">${statusHtml}</span></td>
@@ -246,4 +467,5 @@ document.addEventListener('DOMContentLoaded', () => {
    // --- Initial Load ---
    initializeConversionSelector();
    loadInitialJobs();
    setupDragAndDropListeners();
 });
--- a/templates/index.html
+++ b/templates/index.html
@@ -27,7 +27,7 @@
                        <fieldset>
                            <legend><h2>File Conversion</h2></legend>
                            <div class="file-input-wrapper">
-                                 <input type="file" name="file" id="conversion-file-input" required>
+                                 <input type="file" name="file" id="conversion-file-input" required multiple>
                                 <label for="conversion-file-input" class="file-input-label">Choose File...</label>
                                 <span id="conversion-file-name" class="file-name">No file chosen</span>
                            </div>
@@ -45,7 +45,7 @@
                        <fieldset>
                            <legend><h2>PDF OCR</h2></legend>
                            <div class="file-input-wrapper">
-                                <input type="file" name="file" id="pdf-file-input" accept=".pdf" required>
+                                <input type="file" name="file" id="pdf-file-input" accept=".pdf" required multiple>
                                <label for="pdf-file-input" class="file-input-label">Choose PDF...</label>
                                <span id="pdf-file-name" class="file-name">No file chosen</span>
                            </div>
@@ -59,7 +59,7 @@
                        <fieldset>
                            <legend><h2>Transcribe Audio</h2></legend>
                            <div class="file-input-wrapper">
-                                 <input type="file" name="file" id="audio-file-input" accept="audio/*" required>
+                                 <input type="file" name="file" id="audio-file-input" accept="audio/*" required multiple>
                                 <label for="audio-file-input" class="file-input-label">Choose Audio...</label>
                                 <span id="audio-file-name" class="file-name">No file chosen</span>
                            </div>
@@ -87,6 +87,7 @@
                        <thead>
                            <tr>
                                <th>File</th>
                                <th>File Size</th>
                                <th>Task</th>
                                <th>Submitted</th>
                                <th>Status</th>
@@ -100,7 +101,37 @@
            </section>
        </main>
    </div>
-    
+
    <div id="drag-overlay" class="drag-overlay">
        <div class="drag-overlay-content">
             <p>Drop files anywhere to begin</p>
        </div>
    </div>
    <div id="action-dialog" class="dialog-overlay">
        <div class="dialog-box">
            <h2>Choose Action</h2>
            <p><span id="dialog-file-count"></span> file(s) dropped. What would you like to do?</p>
            <div id="dialog-initial-actions" class="dialog-actions">
                <button id="dialog-action-convert">Convert</button>
                <button id="dialog-action-ocr">OCR</button>
                <button id="dialog-action-transcribe">Transcribe</button>
            </div>
            <div id="dialog-convert-view" style="display: none;">
                <div class="form-control" style="text-align: left; margin-bottom: 1rem;">
                    <label for="dialog-output-format-select">Convert To</label>
                    <select id="dialog-output-format-select" required></select>
                </div>
                <div class="dialog-actions">
                    <button id="dialog-start-conversion">Start Conversion</button>
                    <button id="dialog-back" class="dialog-secondary-action">Back</button>
                </div>
            </div>
            <button id="dialog-action-cancel" class="dialog-cancel">Cancel</button>
        </div>
    </div>
    <script>
        window.APP_CONFIG = {
            conversionTools: {{ conversion_tools | tojson }}