Drag and Drop

2025-09-17 18:45:55 +00:00
parent 2115238217
commit 20e41b67a7
10 changed files with 1358 additions and 379 deletions
--- a/42
+++ b/42
@@ -0,0 +1,42 @@
+# Dockerfile
+FROM python:3.13.7-slim
+
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    tesseract-ocr \
+    ghostscript \
+    poppler-utils \
+    libreoffice \
+    imagemagick \
+    graphicsmagick \
+    libvips-tools \
+    ffmpeg \
+    libheif-examples \
+    inkscape \
+    calibre \
+    build-essential \
+    pkg-config \
+    git \
+    curl \
+    texlive \
+    texlive-latex-extra \
+    texlive-xetex 
+    && rm -rf /var/lib/apt/lists/*
+
+
+
+# Set working directory inside the container
+WORKDIR /app
+
+# Copy requirements and install dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the rest of the app
+COPY . .
+
+# Expose the app port
+EXPOSE 8000
+RUN chmod +x run.sh
+# Command to run when container starts
+CMD ["./run.sh"]
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,10 @@
+version: "3.9"
+services:
+  web:
+    build: .
+    ports:
+      - "5000:5000"
+    volumes:
+      - .:/app  # optional: mount code for live changes
+    environment:
+      - FLASK_ENV=development
--- a/main.py
+++ b/main.py
@@ -6,7 +6,7 @@ import uuid
 import shlex
 import yaml
 from contextlib import asynccontextmanager
-from datetime import datetime
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import Dict, List, Any

@@ -21,17 +21,21 @@ from fastapi.responses import FileResponse, JSONResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi.templating import Jinja2Templates
 from huey import SqliteHuey
-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel, ConfigDict, field_serializer # MODIFIED: Import field_serializer
 from sqlalchemy import (Column, DateTime, Integer, String, Text,
                        create_engine, delete, event)
+from sqlalchemy.orm import Session, declarative_base, sessionmaker
 from sqlalchemy.pool import NullPool
 from string import Formatter
-from sqlalchemy.orm import Session, declarative_base, sessionmaker
 from werkzeug.utils import secure_filename
+from typing import List as TypingList

 # --------------------------------------------------------------------------------
 # --- 1. CONFIGURATION
 # --------------------------------------------------------------------------------
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+

 class AppPaths(BaseModel):
    BASE_DIR: Path = Path(__file__).resolve().parent
@@ -43,30 +47,46 @@ class AppPaths(BaseModel):

 PATHS = AppPaths()
 APP_CONFIG: Dict[str, Any] = {}
+PATHS.UPLOADS_DIR.mkdir(exist_ok=True)
+PATHS.PROCESSED_DIR.mkdir(exist_ok=True)

 def load_app_config():
    global APP_CONFIG
    try:
-        with open(PATHS.SETTINGS_FILE, 'r') as f:
-            APP_CONFIG = yaml.safe_load(f)
-        APP_CONFIG['app_settings']['max_file_size_bytes'] = APP_CONFIG['app_settings']['max_file_size_mb'] * 1024 * 1024
-        allowed_extensions = {
-            ".pdf", ".ps", ".eps", ".png", ".jpg", ".jpeg", ".tiff", ".tif", ".gif",
-            ".bmp", ".webp", ".svg", ".jxl", ".avif", ".ppm", ".mp3", ".m4a", ".ogg",
-            ".flac", ".opus", ".wav", ".aac", ".mp4", ".mkv", ".mov", ".webm", ".avi",
-            ".flv", ".md", ".txt", ".html", ".docx", ".odt", ".rst", ".epub", ".mobi",
-            ".azw3", ".pptx", ".xlsx"
+        with open(PATHS.SETTINGS_FILE, 'r', encoding='utf8') as f:
+            cfg_raw = yaml.safe_load(f) or {}
+        # basic defaults
+        defaults = {
+            "app_settings": {"max_file_size_mb": 100, "allowed_all_extensions": []},
+            "transcription_settings": {"whisper": {"allowed_models": ["tiny", "base", "small"], "compute_type": "int8"}},
+            "conversion_tools": {},
+            "ocr_settings": {"ocrmypdf": {}}
        }
-        APP_CONFIG['app_settings']['allowed_all_extensions'] = allowed_extensions
+        # shallow merge (safe for top-level keys)
+        cfg = defaults.copy()
+        cfg.update(cfg_raw)
+        # normalize app settings
+        app_settings = cfg.get("app_settings", {})
+        max_mb = app_settings.get("max_file_size_mb", 100)
+        app_settings["max_file_size_bytes"] = int(max_mb) * 1024 * 1024
+        allowed = app_settings.get("allowed_all_extensions", [])
+        if not isinstance(allowed, (list, set)):
+            allowed = list(allowed)
+        app_settings["allowed_all_extensions"] = set(allowed)
+        cfg["app_settings"] = app_settings
+        APP_CONFIG = cfg
        logger.info("Successfully loaded settings from settings.yml")
    except (FileNotFoundError, yaml.YAMLError) as e:
-        logger.error(f"Could not load settings.yml: {e}. App may not function correctly.")
-        APP_CONFIG = {}
+        logging.getLogger(__name__).exception(f"Could not load settings.yml: {e}. Using defaults.")
+        
+        APP_CONFIG = {
+            "app_settings": {"max_file_size_mb": 100, "max_file_size_bytes": 100 * 1024 * 1024, "allowed_all_extensions": set()},
+            "transcription_settings": {"whisper": {"allowed_models": ["tiny", "base", "small"], "compute_type": "int8"}},
+            "conversion_tools": {},
+            "ocr_settings": {"ocrmypdf": {}}
+        }
+

-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
-PATHS.UPLOADS_DIR.mkdir(exist_ok=True)
-PATHS.PROCESSED_DIR.mkdir(exist_ok=True)

 # --------------------------------------------------------------------------------
 # --- 2. DATABASE & Schemas
@@ -77,8 +97,6 @@ engine = create_engine(
    poolclass=NullPool,
 )
 SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
-
-# THIS IS THE CRITICAL FIX
 Base = declarative_base()

@event.listens_for(engine, "connect")
@@ -102,11 +120,13 @@ class Job(Base):
    progress = Column(Integer, default=0)
    original_filename = Column(String)
    input_filepath = Column(String)
+    input_filesize = Column(Integer, nullable=True)
    processed_filepath = Column(String, nullable=True)
+    output_filesize = Column(Integer, nullable=True)
    result_preview = Column(Text, nullable=True)
    error_message = Column(Text, nullable=True)
-    created_at = Column(DateTime, default=datetime.utcnow)
-    updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
+    created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
+    updated_at = Column(DateTime, default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc))

 def get_db():
    db = SessionLocal()
@@ -120,6 +140,7 @@ class JobCreate(BaseModel):
    task_type: str
    original_filename: str
    input_filepath: str
+    input_filesize: int | None = None
    processed_filepath: str | None = None

 class JobSchema(BaseModel):
@@ -128,6 +149,8 @@ class JobSchema(BaseModel):
    status: str
    progress: int
    original_filename: str
+    input_filesize: int | None = None
+    output_filesize: int | None = None
    processed_filepath: str | None = None
    result_preview: str | None = None
    error_message: str | None = None
@@ -135,8 +158,14 @@ class JobSchema(BaseModel):
    updated_at: datetime
    model_config = ConfigDict(from_attributes=True)

+    # NEW: This serializer ensures the datetime string sent to the frontend ALWAYS
+    # includes the 'Z' UTC indicator, fixing the timezone bug.
+    @field_serializer('created_at', 'updated_at')
+    def serialize_dt(self, dt: datetime, _info):
+        return dt.isoformat() + "Z"
+
 # --------------------------------------------------------------------------------
-# --- 3. CRUD OPERATIONS (No Changes)
+# --- 3. CRUD OPERATIONS
 # --------------------------------------------------------------------------------
 def get_job(db: Session, job_id: str):
    return db.query(Job).filter(Job.id == job_id).first()
@@ -163,80 +192,120 @@ def update_job_status(db: Session, job_id: str, status: str, progress: int = Non
        db.refresh(db_job)
    return db_job

-def mark_job_as_completed(db: Session, job_id: str, preview: str | None = None):
+def mark_job_as_completed(db: Session, job_id: str, output_filepath_str: str | None = None, preview: str | None = None):
    db_job = get_job(db, job_id)
    if db_job and db_job.status != 'cancelled':
        db_job.status = "completed"
        db_job.progress = 100
        if preview:
            db_job.result_preview = preview.strip()[:2000]
+        if output_filepath_str:
+            try:
+                output_path = Path(output_filepath_str)
+                if output_path.exists():
+                    db_job.output_filesize = output_path.stat().st_size
+            except Exception:
+                logger.exception(f"Could not stat output file {output_filepath_str} for job {job_id}")
        db.commit()
    return db_job

+# ... (The rest of the file is unchanged and remains the same) ...
+
 # --------------------------------------------------------------------------------
 # --- 4. BACKGROUND TASK SETUP
 # --------------------------------------------------------------------------------
 huey = SqliteHuey(filename=PATHS.HUEY_DB_PATH)

-# --- START: NEW WHISPER MODEL CACHING ---
-# This dictionary will live in the memory of the Huey worker process,
-# allowing us to reuse loaded models across tasks.
+# Whisper model cache per worker process
 WHISPER_MODELS_CACHE: Dict[str, WhisperModel] = {}

 def get_whisper_model(model_size: str, whisper_settings: dict) -> WhisperModel:
-    """
-    Loads a Whisper model into the cache if not present, and returns the model.
-    This ensures a model is only loaded into memory once per worker process.
-    """
-    if model_size not in WHISPER_MODELS_CACHE:
-        compute_type = whisper_settings.get('compute_type', 'int8')
-        logger.info(f"Whisper model '{model_size}' not in cache. Loading into memory...")
-        model = WhisperModel(model_size, device="cpu", compute_type=compute_type)
-        WHISPER_MODELS_CACHE[model_size] = model
-        logger.info(f"Model '{model_size}' loaded successfully.")
-    else:
+    if model_size in WHISPER_MODELS_CACHE:
        logger.info(f"Found model '{model_size}' in cache. Reusing.")
        return WHISPER_MODELS_CACHE[model_size]
-# --- END: NEW WHISPER MODEL CACHING ---
+    device = whisper_settings.get("device", "cpu")
+    compute_type = whisper_settings.get('compute_type', 'int8')
+    logger.info(f"Whisper model '{model_size}' not in cache. Loading into memory on device={device}...")
+    try:
+        model = WhisperModel(model_size, device=device, compute_type=compute_type)
+    except Exception:
+        logger.exception("Failed to load whisper model")
+        raise
+    WHISPER_MODELS_CACHE[model_size] = model
+    logger.info(f"Model '{model_size}' loaded successfully.")
+    return model

+# Helper: safe run_command (trimmed logs + timeout)
+def run_command(argv: TypingList[str], timeout: int = 300):
+    try:
+        res = subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=timeout)
+    except subprocess.TimeoutExpired:
+        raise Exception(f"Command timed out after {timeout}s")
+    if res.returncode != 0:
+        stderr = (res.stderr or "")[:4000]
+        stdout = (res.stdout or "")[:4000]
+        raise Exception(f"Command failed exit {res.returncode}. stderr: {stderr}; stdout: {stdout}")
+    return res
+
+# Helper: validate and build command from template with allowlist
+ALLOWED_VARS = {"input", "output", "output_dir", "output_ext", "quality", "speed", "preset", "device", "dpi", "samplerate", "bitdepth", "filter"}
+
+def validate_and_build_command(template_str: str, mapping: Dict[str, str]) -> TypingList[str]:
+    """
+    Validate placeholders against ALLOWED_VARS and build a safe argv list.
+    If a template uses allowed placeholders that are missing from `mapping`,
+    auto-fill sensible defaults:
+      - 'filter' -> mapping.get('output_ext', '')
+      - others -> empty string
+    This prevents KeyError while preserving the allowlist security check.
+    """
+    fmt = Formatter()
+    used = {fname for _, fname, _, _ in fmt.parse(template_str) if fname}
+    bad = used - ALLOWED_VARS
+    if bad:
+        raise ValueError(f"Command template contains disallowed placeholders: {bad}")
+
+    # auto-fill missing allowed placeholders with safe defaults
+    safe_mapping = dict(mapping)  # shallow copy to avoid mutating caller mapping
+    for name in used:
+        if name not in safe_mapping:
+            if name == "filter":
+                safe_mapping[name] = safe_mapping.get("output_ext", "")
+            else:
+                safe_mapping[name] = ""
+
+    formatted = template_str.format(**safe_mapping)
+    return shlex.split(formatted)

@huey.task()
 def run_transcription_task(job_id: str, input_path_str: str, output_path_str: str, model_size: str, whisper_settings: dict):
    db = SessionLocal()
    try:
        job = get_job(db, job_id)
-        if not job or job.status == 'cancelled': return
-
+        if not job or job.status == 'cancelled':
+            return
        update_job_status(db, job_id, "processing")
-
-        # --- MODIFIED: Use the caching function to get the model ---
        model = get_whisper_model(model_size, whisper_settings)
-
        logger.info(f"Starting transcription for job {job_id}")
        segments, info = model.transcribe(input_path_str, beam_size=5)
-
        full_transcript = []
        for segment in segments:
            job_check = get_job(db, job_id)  # Check for cancellation during long tasks
            if job_check.status == 'cancelled':
                logger.info(f"Job {job_id} cancelled during transcription.")
                return
-
            if info.duration > 0:
                progress = int((segment.end / info.duration) * 100)
                update_job_status(db, job_id, "processing", progress=progress)
-
            full_transcript.append(segment.text.strip())
-
        transcript_text = "\n".join(full_transcript)
-        # write atomically to avoid partial files
+        # atomic write of transcript — keep the real extension and mark tmp in the name
        out_path = Path(output_path_str)
-        tmp_out = out_path.with_suffix(out_path.suffix + f".{uuid.uuid4().hex}.tmp")
+        tmp_out = out_path.with_name(f"{out_path.stem}.tmp-{uuid.uuid4().hex}{out_path.suffix}")
        with tmp_out.open("w", encoding="utf-8") as f:
            f.write(transcript_text)
        tmp_out.replace(out_path)
-
-        mark_job_as_completed(db, job_id, preview=transcript_text)
+        mark_job_as_completed(db, job_id, output_filepath_str=output_path_str, preview=transcript_text)
        logger.info(f"Transcription for job {job_id} completed.")
    except Exception:
        logger.exception(f"ERROR during transcription for job {job_id}")
@@ -245,13 +314,13 @@ def run_transcription_task(job_id: str, input_path_str: str, output_path_str: st
        Path(input_path_str).unlink(missing_ok=True)
        db.close()

-# Other tasks remain unchanged
@huey.task()
 def run_pdf_ocr_task(job_id: str, input_path_str: str, output_path_str: str, ocr_settings: dict):
    db = SessionLocal()
    try:
        job = get_job(db, job_id)
-        if not job or job.status == 'cancelled': return
+        if not job or job.status == 'cancelled':
+            return
        update_job_status(db, job_id, "processing")
        logger.info(f"Starting PDF OCR for job {job_id}")
        ocrmypdf.ocr(input_path_str, output_path_str,
@@ -263,7 +332,7 @@ def run_pdf_ocr_task(job_id: str, input_path_str: str, output_path_str: str, ocr
        with open(output_path_str, "rb") as f:
            reader = pypdf.PdfReader(f)
            preview = "\n".join(page.extract_text() or "" for page in reader.pages)
-        mark_job_as_completed(db, job_id, preview=preview)
+        mark_job_as_completed(db, job_id, output_filepath_str=output_path_str, preview=preview)
        logger.info(f"PDF OCR for job {job_id} completed.")
    except Exception:
        logger.exception(f"ERROR during PDF OCR for job {job_id}")
@@ -277,13 +346,18 @@ def run_image_ocr_task(job_id: str, input_path_str: str, output_path_str: str):
    db = SessionLocal()
    try:
        job = get_job(db, job_id)
-        if not job or job.status == 'cancelled': return
+        if not job or job.status == 'cancelled':
+            return
        update_job_status(db, job_id, "processing", progress=50)
        logger.info(f"Starting Image OCR for job {job_id}")
        text = pytesseract.image_to_string(Image.open(input_path_str))
-        with open(output_path_str, "w", encoding="utf-8") as f:
+        # atomic write of OCR text
+        out_path = Path(output_path_str)
+        tmp_out = out_path.with_name(f"{out_path.stem}.tmp-{uuid.uuid4().hex}{out_path.suffix}")        
+        with tmp_out.open("w", encoding="utf-8") as f:
            f.write(text)
-        mark_job_as_completed(db, job_id, preview=text)
+        tmp_out.replace(out_path)
+        mark_job_as_completed(db, job_id, output_filepath_str=output_path_str, preview=text)
        logger.info(f"Image OCR for job {job_id} completed.")
    except Exception:
        logger.exception(f"ERROR during Image OCR for job {job_id}")
@@ -300,14 +374,18 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str,
    temp_output_file = None
    try:
        job = get_job(db, job_id)
-        if not job or job.status == 'cancelled': return
+        if not job or job.status == 'cancelled':
+            return
        update_job_status(db, job_id, "processing", progress=25)
        logger.info(f"Starting conversion for job {job_id} using {tool} with task {task_key}")
        tool_config = conversion_tools_config.get(tool)
-        if not tool_config: raise ValueError(f"Unknown conversion tool: {tool}")
+        if not tool_config:
+            raise ValueError(f"Unknown conversion tool: {tool}")
        input_path = Path(input_path_str)
        output_path = Path(output_path_str)
        current_input_path = input_path
+
+        # Pre-processing for specific tools
        if tool == "mozjpeg":
            temp_input_file = input_path.with_suffix('.temp.ppm')
            logger.info(f"Pre-converting for MozJPEG: {input_path} -> {temp_input_file}")
@@ -317,22 +395,12 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str,
                err = (pre_conv_result.stderr or "")[:4000]
                raise Exception(f"MozJPEG pre-conversion to PPM failed: {err}")
            current_input_path = temp_input_file
+
        update_job_status(db, job_id, "processing", progress=50)
-        # Build safe mapping for formatting and validate placeholders
-        ALLOWED_VARS = {"input", "output", "output_dir", "output_ext", "quality", "speed", "preset", "device", "dpi", "samplerate", "bitdepth"}
-        def validate_and_build_command(template_str: str, mapping: dict):
-            fmt = Formatter()
-            used = {fname for _, fname, _, _ in fmt.parse(template_str) if fname}
-            bad = used - ALLOWED_VARS
-            if bad:
-                raise ValueError(f"Command template contains disallowed placeholders: {bad}")
-            formatted = template_str.format(**mapping)
-            return shlex.split(formatted)

-        # Use a temporary output path and atomically move into place after success
-        temp_output_file = output_path.with_suffix(output_path.suffix + f".{uuid.uuid4().hex}.tmp")
-
-        # Prepare mapping
+        # prepare temporary output and mapping
+        # use a temp filename that preserves the real extension, e.g. file.tmp-<uuid>.pdf
+        temp_output_file = output_path.with_name(f"{output_path.stem}.tmp-{uuid.uuid4().hex}{output_path.suffix}")
        mapping = {
            "input": str(current_input_path),
            "output": str(temp_output_file),
@@ -340,7 +408,7 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str,
            "output_ext": output_path.suffix.lstrip('.'),
        }

-        # Allow tool-specific adjustments to mapping
+        # tool specific mapping adjustments
        if tool.startswith("ghostscript"):
            device, setting = task_key.split('_')
            mapping.update({"device": device, "dpi": setting, "preset": setting})
@@ -358,38 +426,30 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str,
            _, quality = task_key.split('_')
            quality = quality.replace('q', '')
            mapping.update({"quality": quality})
+        elif tool == "libreoffice":
+            target_ext = output_path.suffix.lstrip('.')
+            # tool_config may include a 'filters' mapping (see settings.yml example)
+            filter_val = tool_config.get("filters", {}).get(target_ext, target_ext)
+            mapping["filter"] = filter_val

        command_template_str = tool_config["command_template"]
        command = validate_and_build_command(command_template_str, mapping)
        logger.info(f"Executing command: {' '.join(command)}")
-        # run with timeout and capture output; run_command helper ensures trimmed logs on failure
-        def run_command(argv: List[str], timeout: int = 300):
-            try:
-                res = subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=timeout)
-            except subprocess.TimeoutExpired:
-                raise Exception(f"Command timed out after {timeout}s")
-            if res.returncode != 0:
-                stderr = (res.stderr or "")[:4000]
-                stdout = (res.stdout or "")[:4000]
-                raise Exception(f"Command failed exit {res.returncode}. stderr: {stderr}; stdout: {stdout}")
-            return res

+        # execute command with timeout and trimmed logs on error
        result = run_command(command, timeout=tool_config.get("timeout", 300))
-        if tool == "libreoffice":
-            expected_output_filename = input_path.with_suffix(output_path.suffix).name
-            generated_file = output_path.parent / expected_output_filename
-            if generated_file.exists():
-                # move generated file into place
-                generated_file.replace(output_path)
-            else:
-                raise Exception(f"LibreOffice did not create the expected file: {expected_output_filename}")
+
+        # handle LibreOffice special case: sometimes it writes differently
+        # Special-case LibreOffice: support per-format export filters via settings.yml
+
+
        # move temp output into final location atomically
        if temp_output_file and temp_output_file.exists():
            temp_output_file.replace(output_path)

-        mark_job_as_completed(db, job_id, preview=f"Successfully converted file.")
+        mark_job_as_completed(db, job_id, output_filepath_str=output_path_str, preview=f"Successfully converted file.")
        logger.info(f"Conversion for job {job_id} completed.")
-    except Exception as e:
+    except Exception:
        logger.exception(f"ERROR during conversion for job {job_id}")
        update_job_status(db, job_id, "failed", error="See server logs for details.")
    finally:
@@ -415,13 +475,14 @@ app = FastAPI(lifespan=lifespan)
 app.mount("/static", StaticFiles(directory=PATHS.BASE_DIR / "static"), name="static")
 templates = Jinja2Templates(directory=PATHS.BASE_DIR / "templates")

-async def save_upload_file_chunked(upload_file: UploadFile, destination: Path):
+async def save_upload_file_chunked(upload_file: UploadFile, destination: Path) -> int:
    """
-    Streams the uploaded file in chunks directly to a file on disk.
-    This is memory-efficient and reliable for large files.
+    Write upload to a tmp file in chunks, then atomically move to final destination.
+    Returns the final size of the file in bytes.
    """
    max_size = APP_CONFIG.get("app_settings", {}).get("max_file_size_bytes", 100 * 1024 * 1024)
-    tmp = destination.with_suffix(destination.suffix + f".{uuid.uuid4().hex}.tmp")
+    # make a temp filename that keeps the real extension, e.g. file.tmp-<uuid>.pdf
+    tmp = destination.with_name(f"{destination.stem}.tmp-{uuid.uuid4().hex}{destination.suffix}")
    size = 0
    try:
        with tmp.open("wb") as buffer:
@@ -433,17 +494,16 @@ async def save_upload_file_chunked(upload_file: UploadFile, destination: Path):
                if size > max_size:
                    raise HTTPException(status_code=413, detail=f"File exceeds {max_size / 1024 / 1024} MB limit")
                buffer.write(chunk)
-        # atomic move into place
        tmp.replace(destination)
+        return size
    except Exception:
        tmp.unlink(missing_ok=True)
        raise

-
 def is_allowed_file(filename: str, allowed_extensions: set) -> bool:
    return Path(filename).suffix.lower() in allowed_extensions

-# --- Routes (only transcription route is modified) ---
+# --- Routes (transcription route uses Huey task enqueuing) ---

@app.post("/transcribe-audio", status_code=status.HTTP_202_ACCEPTED)
 async def submit_audio_transcription(
@@ -467,19 +527,24 @@ async def submit_audio_transcription(
    upload_path = PATHS.UPLOADS_DIR / audio_filename
    processed_path = PATHS.PROCESSED_DIR / transcript_filename

-    await save_upload_file_chunked(file, upload_path)
+    input_size = await save_upload_file_chunked(file, upload_path)
    
-    job_data = JobCreate(id=job_id, task_type="transcription", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path))
+    job_data = JobCreate(
+        id=job_id, 
+        task_type="transcription", 
+        original_filename=file.filename, 
+        input_filepath=str(upload_path), 
+        input_filesize=input_size,
+        processed_filepath=str(processed_path)
+    )
    new_job = create_job(db=db, job=job_data)
    
-    # --- MODIFIED: Pass whisper_config to the task ---
+    # enqueue the Huey task (decorated function call enqueues when using huey)
    run_transcription_task(new_job.id, str(upload_path), str(processed_path), model_size=model_size, whisper_settings=whisper_config)
    
-    return {"job_id": new_job.id, "status": new_job.status}
+    return {"job_id": new_job.id, "status": new_job.status, "status_url": f"/job/{new_job.id}"}


-# --- Other routes remain unchanged ---
-
@app.get("/")
 async def get_index(request: Request):
    whisper_models = APP_CONFIG.get("transcription_settings", {}).get("whisper", {}).get("allowed_models", [])
@@ -493,23 +558,55 @@ async def get_index(request: Request):
@app.get("/settings")
 async def get_settings_page(request: Request):
    try:
-        with open(PATHS.SETTINGS_FILE, 'r') as f:
-            current_config = yaml.safe_load(f)
-    except Exception as e:
-        logger.error(f"Could not load settings.yml for settings page: {e}")
+        with open(PATHS.SETTINGS_FILE, 'r', encoding='utf8') as f:
+            current_config = yaml.safe_load(f) or {}
+    except Exception:
+        logger.exception("Could not load settings.yml for settings page")
        current_config = {}
    return templates.TemplateResponse("settings.html", {"request": request, "config": current_config})

+def deep_merge(base: dict, updates: dict) -> dict:
+    """
+    Recursively merge `updates` into `base`. Lists and scalars are replaced.
+    """
+    for key, value in updates.items():
+        if (
+            key in base
+            and isinstance(base[key], dict)
+            and isinstance(value, dict)
+        ):
+            base[key] = deep_merge(base[key], value)
+        else:
+            base[key] = value
+    return base
+
+
@app.post("/settings/save")
 async def save_settings(new_config: Dict = Body(...)):
+    tmp = PATHS.SETTINGS_FILE.with_suffix(".tmp")
    try:
-        with open(PATHS.SETTINGS_FILE, 'w') as f:
-            yaml.dump(new_config, f, default_flow_style=False, sort_keys=False)
+        # load existing config if present
+        try:
+            with PATHS.SETTINGS_FILE.open("r", encoding="utf8") as f:
+                current_config = yaml.safe_load(f) or {}
+        except FileNotFoundError:
+            current_config = {}
+
+        # deep merge new values
+        merged = deep_merge(current_config, new_config)
+
+        # atomic write back
+        with tmp.open("w", encoding="utf8") as f:
+            yaml.safe_dump(merged, f, default_flow_style=False, sort_keys=False)
+        tmp.replace(PATHS.SETTINGS_FILE)
+
        load_app_config()
-        return JSONResponse({"message": "Settings saved successfully."})
-    except Exception as e:
-        logger.error(f"Failed to save settings: {e}")
-        raise HTTPException(status_code=500, detail="Could not write to settings.yml.")
+        return JSONResponse({"message": "Settings updated successfully."})
+    except Exception:
+        logger.exception("Failed to update settings")
+        tmp.unlink(missing_ok=True)
+        raise HTTPException(status_code=500, detail="Could not update settings.yml.")
+

@app.post("/settings/clear-history")
 async def clear_job_history(db: Session = Depends(get_db)):
@@ -518,9 +615,9 @@ async def clear_job_history(db: Session = Depends(get_db)):
        db.commit()
        logger.info(f"Cleared {num_deleted} jobs from history.")
        return {"deleted_count": num_deleted}
-    except Exception as e:
+    except Exception:
        db.rollback()
-        logger.error(f"Failed to clear job history: {e}")
+        logger.exception("Failed to clear job history")
        raise HTTPException(status_code=500, detail="Database error while clearing history.")

@app.post("/settings/delete-files")
@@ -532,9 +629,9 @@ async def delete_processed_files():
            if f.is_file():
                f.unlink()
                deleted_count += 1
-        except Exception as e:
+        except Exception:
            errors.append(f.name)
-            logger.error(f"Could not delete processed file {f.name}: {e}")
+            logger.exception(f"Could not delete processed file {f.name}")
    if errors:
        raise HTTPException(status_code=500, detail=f"Could not delete some files: {', '.join(errors)}")
    logger.info(f"Deleted {deleted_count} files from processed directory.")
@@ -562,12 +659,14 @@ async def submit_file_conversion(file: UploadFile = File(...), output_format: st
    processed_filename = f"{original_stem}_{job_id}.{target_ext}"
    upload_path = PATHS.UPLOADS_DIR / upload_filename
    processed_path = PATHS.PROCESSED_DIR / processed_filename
-    await save_upload_file_chunked(file, upload_path)
+    input_size = await save_upload_file_chunked(file, upload_path)
    job_data = JobCreate(id=job_id, task_type="conversion", original_filename=file.filename,
-                         input_filepath=str(upload_path), processed_filepath=str(processed_path))
+                         input_filepath=str(upload_path), 
+                         input_filesize=input_size,
+                         processed_filepath=str(processed_path))
    new_job = create_job(db=db, job=job_data)
    run_conversion_task(new_job.id, str(upload_path), str(processed_path), tool, task_key, conversion_tools)
-    return {"job_id": new_job.id, "status": new_job.status}
+    return {"job_id": new_job.id, "status": new_job.status, "status_url": f"/job/{new_job.id}"}

@app.post("/ocr-pdf", status_code=status.HTTP_202_ACCEPTED)
 async def submit_pdf_ocr(file: UploadFile = File(...), db: Session = Depends(get_db)):
@@ -578,12 +677,15 @@ async def submit_pdf_ocr(file: UploadFile = File(...), db: Session = Depends(get
    unique_filename = f"{Path(safe_basename).stem}_{job_id}{Path(safe_basename).suffix}"
    upload_path = PATHS.UPLOADS_DIR / unique_filename
    processed_path = PATHS.PROCESSED_DIR / unique_filename
-    await save_upload_file_chunked(file, upload_path)
-    job_data = JobCreate(id=job_id, task_type="ocr", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path))
+    input_size = await save_upload_file_chunked(file, upload_path)
+    job_data = JobCreate(id=job_id, task_type="ocr", original_filename=file.filename,
+                         input_filepath=str(upload_path), 
+                         input_filesize=input_size,
+                         processed_filepath=str(processed_path))
    new_job = create_job(db=db, job=job_data)
    ocr_settings = APP_CONFIG.get("ocr_settings", {}).get("ocrmypdf", {})
    run_pdf_ocr_task(new_job.id, str(upload_path), str(processed_path), ocr_settings)
-    return {"job_id": new_job.id, "status": new_job.status}
+    return {"job_id": new_job.id, "status": new_job.status, "status_url": f"/job/{new_job.id}"}

@app.post("/ocr-image", status_code=status.HTTP_202_ACCEPTED)
 async def submit_image_ocr(file: UploadFile = File(...), db: Session = Depends(get_db)):
@@ -596,11 +698,14 @@ async def submit_image_ocr(file: UploadFile = File(...), db: Session = Depends(g
    unique_filename = f"{Path(safe_basename).stem}_{job_id}{file_ext}"
    upload_path = PATHS.UPLOADS_DIR / unique_filename
    processed_path = PATHS.PROCESSED_DIR / f"{Path(safe_basename).stem}_{job_id}.txt"
-    await save_upload_file_chunked(file, upload_path)
-    job_data = JobCreate(id=job_id, task_type="ocr-image", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path))
+    input_size = await save_upload_file_chunked(file, upload_path)
+    job_data = JobCreate(id=job_id, task_type="ocr-image", original_filename=file.filename,
+                         input_filepath=str(upload_path), 
+                         input_filesize=input_size,
+                         processed_filepath=str(processed_path))
    new_job = create_job(db=db, job=job_data)
    run_image_ocr_task(new_job.id, str(upload_path), str(processed_path))
-    return {"job_id": new_job.id, "status": new_job.status}
+    return {"job_id": new_job.id, "status": new_job.status, "status_url": f"/job/{new_job.id}"}

@app.post("/job/{job_id}/cancel", status_code=status.HTTP_202_ACCEPTED)
 async def cancel_job(job_id: str, db: Session = Depends(get_db)):
@@ -626,8 +731,7 @@ async def get_job_status(job_id: str, db: Session = Depends(get_db)):
@app.get("/download/{filename}")
 async def download_file(filename: str):
    safe_filename = secure_filename(filename)
-    file_path = PATHS.PROCESSED_DIR / safe_filename
-    file_path = file_path.resolve()
+    file_path = (PATHS.PROCESSED_DIR / safe_filename).resolve()
    base = PATHS.PROCESSED_DIR.resolve()
    try:
        file_path.relative_to(base)
@@ -636,3 +740,14 @@ async def download_file(filename: str):
    if not file_path.is_file():
        raise HTTPException(status_code=404, detail="File not found.")
    return FileResponse(path=file_path, filename=safe_filename, media_type="application/octet-stream")
+
+# Small health endpoint
+@app.get("/health")
+async def health():
+    try:
+        with engine.connect() as conn:
+            conn.execute("SELECT 1")
+    except Exception:
+        logger.exception("Health check failed")
+        return JSONResponse({"ok": False}, status_code=500)
+    return {"ok": True}
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,22 +1,145 @@
-# requirements.txt
-
-# Web framework
-fastapi
-uvicorn[standard]
-python-multipart
-jinja2
-
-# PDF OCR
-ocrmypdf
-PyPDF2
-
-# Audio Transcription
-faster-whisper
-# The following are core dependencies for faster-whisper,
-# but it's good to list them explicitly.
-# ctranslate2
-# transformers
-# torch # Note: torch is a dependency of transformers
-
-# Utilities
-werkzeug
+annotated-types==0.7.0
+anyio==4.10.0
+audioop-lts==0.2.2
+av==15.1.0
+azure-ai-documentintelligence==1.0.2
+azure-core==1.35.1
+azure-identity==1.25.0
+beautifulsoup4==4.13.5
+certifi==2025.8.3
+cffi==2.0.0
+chardet==5.2.0
+charset-normalizer==3.4.3
+click==8.2.1
+cobble==0.1.4
+coloredlogs==15.0.1
+cryptography==45.0.7
+css-parser==1.0.10
+ctranslate2==4.6.0
+defusedxml==0.7.1
+Deprecated==1.2.18
+deprecation==2.1.0
+et_xmlfile==2.0.0
+fastapi==0.116.1
+faster-whisper==1.2.0
+filelock==3.19.1
+flatbuffers==25.2.10
+fsspec==2025.9.0
+greenlet==3.2.4
+gunicorn==23.0.0
+h11==0.16.0
+hf-xet==1.1.10
+html5-parser==0.4.12
+html5lib==1.1
+httptools==0.6.4
+huey==2.5.3
+huggingface-hub==0.34.4
+humanfriendly==10.0
+idna==3.10
+imageio==2.37.0
+img2pdf==0.6.1
+isodate==0.7.2
+Jinja2==3.1.6
+lazy_loader==0.4
+lxml==6.0.1
+magika==0.6.2
+mammoth==1.10.0
+markdown-it-py==4.0.0
+markdownify==1.2.0
+markitdown==0.1.3
+MarkupSafe==3.0.2
+mdurl==0.1.2
+mechanize==0.4.10
+mpmath==1.3.0
+msal==1.33.0
+msal-extensions==1.3.1
+msgpack==1.1.1
+networkx==3.5
+ninja==1.13.0
+numpy==2.2.6
+nvidia-cublas-cu12==12.8.4.1
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-runtime-cu12==12.8.90
+nvidia-cudnn-cu12==9.10.2.21
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cufile-cu12==1.13.1.3
+nvidia-curand-cu12==10.3.9.90
+nvidia-cusolver-cu12==11.7.3.90
+nvidia-cusparse-cu12==12.5.8.93
+nvidia-cusparselt-cu12==0.7.1
+nvidia-nccl-cu12==2.27.3
+nvidia-nvjitlink-cu12==12.8.93
+nvidia-nvtx-cu12==12.8.90
+ocrmypdf==16.11.0
+olefile==0.47
+onnxruntime==1.22.1
+opencv-python-headless==4.12.0.88
+openpyxl==3.1.5
+packaging==25.0
+pandas==2.3.2
+pdfminer.six==20250506
+pi_heif==1.1.0
+pikepdf==9.11.0
+pillow==11.3.0
+pluggy==1.6.0
+protobuf==6.32.1
+pyclipper==1.3.0.post6
+pycparser==2.23
+pydantic==2.11.9
+pydantic-settings==2.10.1
+pydantic_core==2.33.2
+pydub==0.25.1
+Pygments==2.19.2
+PyJWT==2.10.1
+pypdf==6.0.0
+PyPDF2==3.0.1
+PyQt6==6.9.1
+PyQt6-Qt6==6.9.2
+PyQt6-WebEngine==6.9.0
+PyQt6-WebEngine-Qt6==6.9.2
+PyQt6_sip==13.10.2
+pytesseract==0.3.13
+python-bidi==0.6.6
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
+python-multipart==0.0.20
+python-pptx==1.0.2
+pytz==2025.2
+PyYAML==6.0.2
+regex==2025.9.1
+requests==2.32.5
+rich==14.1.0
+scikit-image==0.25.2
+scipy==1.16.2
+setuptools==80.9.0
+shapely==2.1.1
+six==1.17.0
+sniffio==1.3.1
+soupsieve==2.8
+SpeechRecognition==3.14.3
+SQLAlchemy==2.0.43
+standard-aifc==3.13.0
+standard-chunk==3.13.0
+starlette==0.47.3
+sympy==1.14.0
+tifffile==2025.9.9
+tokenizers==0.22.0
+torch==2.8.0
+torchvision==0.23.0
+tqdm==4.67.1
+triton==3.4.0
+typing-inspection==0.4.1
+typing_extensions==4.15.0
+tzdata==2025.2
+urllib3==2.5.0
+uvicorn==0.35.0
+uvloop==0.21.0
+watchfiles==1.1.0
+webencodings==0.5.1
+websockets==15.0.1
+Werkzeug==3.1.3
+wrapt==1.17.3
+xlrd==2.0.2
+xlsxwriter==3.2.9
+youtube-transcript-api==1.0.3
--- a/run.sh
+++ b/run.sh
@@ -3,8 +3,8 @@

 echo "Starting DocProcessor with Gunicorn..."

-exec gunicorn -w 2 -k uvicorn.workers.UvicornWorker --forwarded-allow-ips='*' main:app -b 0.0.0.0:8000 &
+exec gunicorn -w 4 --threads 2 -k uvicorn.workers.UvicornWorker --forwarded-allow-ips='*' main:app -b 0.0.0.0:8000 &
 echo "Done"
 echo "Starting huey..."
-exec huey_consumer.py main.huey -w 2 &
+exec huey_consumer.py main.huey -w 4 &
 echo "Done"
--- a/.yml.default
+++ b/.yml.default
@@ -0,0 +1,272 @@
+# settings.yml
+
+# General application settings
+app_settings:
+  max_file_size_mb: 2000 # Maximum upload size in Megabytes
+  # Allowed extensions (list will be normalized to a set by the server)
+  allowed_all_extensions:
+    - .pdf
+    - .ps
+    - .eps
+    - .png
+    - .jpg
+    - .jpeg
+    - .tiff
+    - .tif
+    - .gif
+    - .bmp
+    - .webp
+    - .svg
+    - .jxl
+    - .avif
+    - .ppm
+    - .mp3
+    - .m4a
+    - .ogg
+    - .flac
+    - .opus
+    - .wav
+    - .aac
+    - .mp4
+    - .mkv
+    - .mov
+    - .webm
+    - .avi
+    - .flv
+    - .md
+    - .txt
+    - .html
+    - .docx
+    - .odt
+    - .rst
+    - .epub
+    - .mobi
+    - .azw3
+    - .pptx
+    - .xlsx
+
+# Settings for Optical Character Recognition (OCR) tasks
+ocr_settings:
+  ocrmypdf:
+    deskew: true
+    clean: true
+    optimize: 1
+    force_ocr: true
+
+# Settings for audio transcription tasks
+transcription_settings:
+  whisper:
+    compute_type: "int8"
+    allowed_models:
+      - "tiny"
+      - "base"
+      - "small"
+      - "medium"
+      - "large-v3"
+      - "distil-large-v2"
+    # optional: specify device if workers have GPU (e.g. "cuda" or "cpu")
+    # device: "cpu"
+
+# --- Conversion Tool Definitions ---
+# The server validates placeholders against an allowlist:
+# {input}, {output}, {output_dir}, {output_ext}, {quality}, {speed},
+# {preset}, {device}, {dpi}, {samplerate}, {bitdepth}
+conversion_tools:
+  libreoffice:
+    name: "LibreOffice"
+    # Use {filter} so we can supply liboffce export filters like "txt:Text"
+    command_template: 'libreoffice --headless --convert-to {filter} --outdir {output_dir} {input}'
+    timeout: 120
+    # Optional: per-format export filter. If missing for a format, server falls back to the extension.
+    filters:
+      pdf: "pdf"
+      docx: "docx"
+      odt: "odt"
+      html: "html"
+      rtf: "rtf"
+      txt: "txt:Text"
+      xml: "xml"
+      epub: "epub"
+      xlsx: "xlsx"
+      ods: "ods"
+      csv: "csv:Text"
+      pptx: "pptx"
+      odp: "odp"
+      svg: "svg"
+    formats:
+      pdf: "PDF"
+      docx: "Word Document"
+      odt: "OpenDocument Text"
+      html: "HTML"
+      rtf: "Rich Text Format"
+      txt: "Plain Text"
+      xml: "Word 2003 XML"
+      epub: "EPUB"
+      xlsx: "Excel Spreadsheet"
+      ods: "OpenDocument Spreadsheet"
+      csv: "CSV"
+      pptx: "PowerPoint Presentation"
+      odp: "OpenDocument Presentation"
+      svg: "SVG"
+
+  pandoc:
+    name: "Pandoc"
+    command_template: 'pandoc --standalone {input} -o {output} --to={output_ext} --pdf-engine=xelatex'
+    timeout: 60
+    formats:
+      docx: "Word Document"
+      odt: "OpenDocument Text"
+      pdf: "PDF"
+      rtf: "Rich Text Format"
+      txt: "Plain Text"
+      tex: "LaTeX"
+      man: "Groff Man Page"
+      epub: "EPUB v3 Book"
+      epub2: "EPUB v2 Book"
+      html: "HTML"
+      html5: "HTML5"
+      pptx: "PowerPoint Presentation"
+      beamer: "Beamer PDF Slides"
+      slidy: "Slidy HTML Slides"
+      md: "Markdown"
+      rst: "reStructuredText"
+      jira: "Jira Wiki Markup"
+      mediawiki: "MediaWiki Markup"
+
+  ghostscript_pdf:
+    name: "Ghostscript (PDF)"
+    # placeholders used: {preset}, {output}, {input}
+    command_template: 'gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dNOPAUSE -dQUIET -dBATCH {preset} -sOutputFile={output} {input}'
+    timeout: 60
+    formats:
+      screen: "PDF (Optimized for Screen)"
+      ebook: "PDF (Optimized for Ebooks)"
+      printer: "PDF (Optimized for Print)"
+      archive: "PDF/A (for Archiving)"
+
+  ghostscript_image:
+    name: "Ghostscript (Image)"
+    # placeholders used: {device}, {dpi}, {output}, {input}
+    command_template: 'gs -dNOPAUSE -dBATCH -sDEVICE={device} -r{dpi} -sOutputFile={output} {input}'
+    timeout: 60
+    formats:
+      jpeg_72: "JPEG Image (72 DPI)"
+      jpeg_300: "JPEG Image (300 DPI)"
+      png16m_150: "PNG Image (150 DPI)"
+      png16m_300: "PNG Image (300 DPI)"
+      tiff24nc_300: "TIFF Image (300 DPI)"
+      tiff24nc_600: "TIFF Image (600 DPI)"
+
+  calibre:
+    name: "Calibre (ebook-convert)"
+    command_template: 'ebook-convert {input} {output}'
+    timeout: 60
+    formats:
+      epub: "EPUB"
+      mobi: "MOBI"
+      azw3: "Amazon Kindle"
+      pdf: "PDF"
+      docx: "Word Document"
+
+  ffmpeg:
+    name: "FFmpeg"
+    command_template: 'ffmpeg -i {input} -y -preset medium {output}'
+    timeout: 300
+    formats:
+      mp4: "MP4 Video"
+      mkv: "MKV Video"
+      mov: "MOV Video"
+      webm: "WebM Video"
+      mp3: "MP3 Audio"
+      wav: "WAV Audio"
+      flac: "FLAC Audio"
+      gif: "Animated GIF"
+
+  vips:
+    name: "VIPS"
+    command_template: 'vips copy {input} {output}[Q=90]'
+    timeout: 60
+    formats:
+      jpg: "JPEG Image (Q90)"
+      png: "PNG Image"
+      webp: "WebP Image (Q90)"
+      tiff: "TIFF Image"
+      avif: "AVIF Image"
+
+  graphicsmagick:
+    name: "GraphicsMagick"
+    command_template: 'gm convert {input} -quality 90 {output}'
+    timeout: 60
+    formats:
+      jpg: "JPEG Image (Q90)"
+      png: "PNG Image"
+      webp: "WebP Image"
+      tiff: "TIFF Image"
+      pdf: "PDF from Images"
+
+  inkscape:
+    name: "Inkscape"
+    command_template: 'inkscape {input} --export-filename={output}'
+    timeout: 30
+    formats:
+      svg: "SVG (Plain)"
+      png: "PNG Image (96 DPI)"
+      pdf: "PDF Document"
+
+  libjxl:
+    name: "libjxl (cjxl)"
+    command_template: 'cjxl {input} {output} -q 90'
+    timeout: 30
+    formats:
+      jxl: "JPEG XL (Q90)"
+
+  resvg:
+    name: "resvg"
+    command_template: 'resvg {input} {output}'
+    timeout: 30
+    formats:
+      png: "PNG from SVG"
+
+  potrace:
+    name: "Potrace"
+    command_template: 'potrace {input} --svg -o {output}'
+    timeout: 30
+    formats:
+      svg: "SVG from Bitmap"
+
+  markitdown:
+    name: "Markitdown"
+    command_template: 'markitdown {input} -o {output}'
+    timeout: 30
+    formats:
+      md: "Markdown from Everything!"
+
+  pngquant:
+    name: "pngquant"
+    command_template: 'pngquant --quality={quality} --speed {speed} --force --output {output} {input}'
+    timeout: 30
+    formats:
+      png_hq: "PNG (High Quality Compression)"
+      png_mq: "PNG (Medium Quality Compression)"
+      png_fast: "PNG (Fast Compression)"
+
+  sox:
+    name: "SoX Audio Converter"
+    command_template: 'sox {input} -r {samplerate} -b {bitdepth} {output}'
+    timeout: 120
+    formats:
+      wav_48k_24b: "WAV (48kHz, 24-bit)"
+      wav_44k_16b: "WAV (CD, 44.1kHz, 16-bit)"
+      flac_48k_24b: "FLAC (48kHz, 24-bit)"
+      flac_44k_16b: "FLAC (CD, 44.1kHz, 16-bit)"
+      ogg_32k_16b: "Ogg Vorbis (32kHz)"
+      ogg_16k_16b: "Ogg Vorbis (16kHz, Voice)"
+
+  mozjpeg:
+    name: "MozJPEG"
+    command_template: 'cjpeg -quality {quality} -outfile {output} {input}'
+    timeout: 30
+    formats:
+      jpg_q85: "JPEG (High Quality)"
+      jpg_q75: "JPEG (Web Quality)"
+      jpg_q60: "JPEG (Aggressive Compression)"
--- a/settings.yml
+++ b/settings.yml
@@ -1,179 +1,242 @@
-# settings.yml
-
-# General application settings
 app_settings:
-  max_file_size_mb: 2000 # Maximum upload size in Megabytes
-
-# Settings for Optical Character Recognition (OCR) tasks
+  max_file_size_mb: '2000'
+  allowed_all_extensions:
+  - .pdf
+  - .ps
+  - .eps
+  - .png
+  - .jpg
+  - .jpeg
+  - .tiff
+  - .tif
+  - .gif
+  - .bmp
+  - .webp
+  - .svg
+  - .jxl
+  - .avif
+  - .ppm
+  - .mp3
+  - .m4a
+  - .ogg
+  - .flac
+  - .opus
+  - .wav
+  - .aac
+  - .mp4
+  - .mkv
+  - .mov
+  - .webm
+  - .avi
+  - .flv
+  - .md
+  - .txt
+  - .html
+  - .docx
+  - .odt
+  - .rst
+  - .epub
+  - .mobi
+  - .azw3
+  - .pptx
+  - .xlsx
 ocr_settings:
  ocrmypdf:
    deskew: true
    clean: true
    optimize: 1
    force_ocr: true
-
-# Settings for audio transcription tasks
 transcription_settings:
  whisper:
-    compute_type: "int8"
+    compute_type: int8
    allowed_models:
-      - "tiny"
-      - "base"
-      - "small"
-      - "medium"
-      - "large-v3"
-      - "distil-large-v2"
-
-# --- Conversion Tool Definitions ---
-# Each tool's command is a single string. The backend uses shlex to parse it,
-# so you can use quotes for arguments with spaces.
-# Use placeholders like: {input}, {output}, {output_dir}, {output_ext}, etc.
+    - tiny
+    - base
+    - small
+    - medium
+    - large-v3
+    - distil-large-v2
 conversion_tools:
  libreoffice:
-    name: "LibreOffice"
-    command_template: 'libreoffice --headless --convert-to {output_ext} --outdir {output_dir} {input}'
+    name: LibreOffice
+    command_template: libreoffice --headless --convert-to {filter} --outdir {output_dir}
+      {input}
+    timeout: 300
+    filters:
+      pdf: pdf
+      docx: docx
+      odt: odt
+      html: html
+      rtf: rtf
+      txt: txt:Text
+      xml: xml
+      epub: epub
+      xlsx: xlsx
+      ods: ods
+      csv: csv:Text
+      pptx: pptx
+      odp: odp
+      svg: svg
    formats:
-      pdf: "PDF"
-      docx: "Word Document"
-      odt: "OpenDocument Text"
-      html: "HTML"
-      rtf: "Rich Text Format"
-      txt: "Plain Text"
-      xml: "Word 2003 XML"
-      epub: "EPUB"
-      xlsx: "Excel Spreadsheet"
-      ods: "OpenDocument Spreadsheet"
-      csv: "CSV"
-      pptx: "PowerPoint Presentation"
-      odp: "OpenDocument Presentation"
-      svg: "SVG"
+      pdf: PDF
+      docx: Word Document
+      odt: OpenDocument Text
+      html: HTML
+      rtf: Rich Text Format
+      txt: Plain Text
+      xml: Word 2003 XML
+      epub: EPUB
+      xlsx: Excel Spreadsheet
+      ods: OpenDocument Spreadsheet
+      csv: CSV
+      pptx: PowerPoint Presentation
+      odp: OpenDocument Presentation
+      svg: SVG
  pandoc:
-    name: "Pandoc"
-    command_template: 'pandoc --standalone {input} -o {output}'
+    name: Pandoc
+    command_template: pandoc --standalone {input} -o {output} --to={output_ext} --pdf-engine=xelatex
+    timeout: 300
    formats:
-      docx: "Word Document"
-      odt: "OpenDocument Text"
-      pdf: "PDF"
-      rtf: "Rich Text Format"
-      txt: "Plain Text"
-      tex: "LaTeX"
-      man: "Groff Man Page"
-      epub: "EPUB v3 Book"
-      epub2: "EPUB v2 Book"
-      html: "HTML"
-      html5: "HTML5"
-      pptx: "PowerPoint Presentation"
-      beamer: "Beamer PDF Slides"
-      slidy: "Slidy HTML Slides"
-      md: "Markdown"
-      rst: "reStructuredText"
-      jira: "Jira Wiki Markup"
-      mediawiki: "MediaWiki Markup"
+      docx: Word Document
+      odt: OpenDocument Text
+      pdf: PDF
+      rtf: Rich Text Format
+      txt: Plain Text
+      tex: LaTeX
+      man: Groff Man Page
+      epub: EPUB v3 Book
+      epub2: EPUB v2 Book
+      html: HTML
+      html5: HTML5
+      pptx: PowerPoint Presentation
+      beamer: Beamer PDF Slides
+      slidy: Slidy HTML Slides
+      md: Markdown
+      rst: reStructuredText
+      jira: Jira Wiki Markup
+      mediawiki: MediaWiki Markup
  ghostscript_pdf:
-    name: "Ghostscript (PDF)"
-    command_template: 'gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dNOPAUSE -dQUIET -dBATCH {preset} -sOutputFile={output} {input}'
+    name: Ghostscript (PDF)
+    command_template: gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dNOPAUSE -dQUIET
+      -dBATCH {preset} -sOutputFile={output} {input}
+    timeout: 60
    formats:
-      screen: "PDF (Optimized for Screen)"
-      ebook: "PDF (Optimized for Ebooks)"
-      printer: "PDF (Optimized for Print)"
-      archive: "PDF/A (for Archiving)"
+      screen: PDF (Optimized for Screen)
+      ebook: PDF (Optimized for Ebooks)
+      printer: PDF (Optimized for Print)
+      archive: PDF/A (for Archiving)
  ghostscript_image:
-    name: "Ghostscript (Image)"
-    command_template: 'gs -dNOPAUSE -dBATCH -sDEVICE={device} -r{dpi} -sOutputFile={output} {input}'
+    name: Ghostscript (Image)
+    command_template: gs -dNOPAUSE -dBATCH -sDEVICE={device} -r{dpi} -sOutputFile={output}
+      {input}
+    timeout: 60
    formats:
-      jpeg_72: "JPEG Image (72 DPI)"
-      jpeg_300: "JPEG Image (300 DPI)"
-      png16m_150: "PNG Image (150 DPI)"
-      png16m_300: "PNG Image (300 DPI)"
-      tiff24nc_300: "TIFF Image (300 DPI)"
-      tiff24nc_600: "TIFF Image (600 DPI)"
+      jpeg_72: JPEG Image (72 DPI)
+      jpeg_300: JPEG Image (300 DPI)
+      png16m_150: PNG Image (150 DPI)
+      png16m_300: PNG Image (300 DPI)
+      tiff24nc_300: TIFF Image (300 DPI)
+      tiff24nc_600: TIFF Image (600 DPI)
  calibre:
-    name: "Calibre (ebook-convert)"
-    command_template: 'ebook-convert {input} {output}'
+    name: Calibre (ebook-convert)
+    command_template: ebook-convert {input} {output}
+    timeout: 600
    formats:
-      epub: "EPUB"
-      mobi: "MOBI"
-      azw3: "Amazon Kindle"
-      pdf: "PDF"
-      docx: "Word Document"
+      epub: EPUB
+      mobi: MOBI
+      azw3: Amazon Kindle
+      pdf: PDF
+      docx: Word Document
  ffmpeg:
-    name: "FFmpeg"
-    command_template: 'ffmpeg -i {input} -y -preset medium {output}'
+    name: FFmpeg
+    command_template: ffmpeg -i {input} -y -preset medium {output}
+    timeout: 600
    formats:
-      mp4: "MP4 Video"
-      mkv: "MKV Video"
-      mov: "MOV Video"
-      webm: "WebM Video"
-      mp3: "MP3 Audio"
-      wav: "WAV Audio"
-      flac: "FLAC Audio"
-      gif: "Animated GIF"
+      mp4: MP4 Video
+      mkv: MKV Video
+      mov: MOV Video
+      webm: WebM Video
+      mp3: MP3 Audio
+      wav: WAV Audio
+      flac: FLAC Audio
+      gif: Animated GIF
  vips:
-    name: "VIPS"
-    command_template: 'vips copy {input} {output}[Q=90]'
+    name: VIPS
+    command_template: vips copy {input} {output}[Q=90]
+    timeout: 60
    formats:
-      jpg: "JPEG Image (Q90)"
-      png: "PNG Image"
-      webp: "WebP Image (Q90)"
-      tiff: "TIFF Image"
-      avif: "AVIF Image"
+      jpg: JPEG Image (Q90)
+      png: PNG Image
+      webp: WebP Image (Q90)
+      tiff: TIFF Image
+      avif: AVIF Image
  graphicsmagick:
-    name: "GraphicsMagick"
-    command_template: 'gm convert {input} -quality 90 {output}'
+    name: GraphicsMagick
+    command_template: gm convert {input} -quality 90 {output}
+    timeout: 60
    formats:
-      jpg: "JPEG Image (Q90)"
-      png: "PNG Image"
-      webp: "WebP Image"
-      tiff: "TIFF Image"
-      pdf: "PDF from Images"
+      jpg: JPEG Image (Q90)
+      png: PNG Image
+      webp: WebP Image
+      tiff: TIFF Image
+      pdf: PDF from Images
  inkscape:
-    name: "Inkscape"
-    command_template: 'inkscape {input} --export-filename={output}'
+    name: Inkscape
+    command_template: inkscape {input} --export-filename={output}
+    timeout: 30
    formats:
-      svg: "SVG (Plain)"
-      png: "PNG Image (96 DPI)"
-      pdf: "PDF Document"
+      svg: SVG (Plain)
+      png: PNG Image (96 DPI)
+      pdf: PDF Document
  libjxl:
-    name: "libjxl (cjxl)"
-    command_template: 'cjxl {input} {output} -q 90'
+    name: libjxl (cjxl)
+    command_template: cjxl {input} {output} -q 90
+    timeout: 30
    formats:
-      jxl: "JPEG XL (Q90)"
+      jxl: JPEG XL (Q90)
  resvg:
-    name: "resvg"
-    command_template: 'resvg {input} {output}'
+    name: resvg
+    command_template: resvg {input} {output}
+    timeout: 30
    formats:
-      png: "PNG from SVG"
+      png: PNG from SVG
  potrace:
-    name: "Potrace"
-    command_template: 'potrace {input} --svg -o {output}'
+    name: Potrace
+    command_template: potrace {input} --svg -o {output}
+    timeout: 30
    formats:
-      svg: "SVG from Bitmap"
+      svg: SVG from Bitmap
  markitdown:
-    name: "Markitdown"
-    command_template: 'markitdown {input} -o {output}'
+    name: Markitdown
+    command_template: markitdown {input} -o {output}
+    timeout: 300
    formats:
-      md: "Markdown from Everything!"
+      md: Markdown from Everything!
  pngquant:
-    name: "pngquant"
-    command_template: 'pngquant --quality={quality} --speed {speed} --force --output {output} {input}'
+    name: pngquant
+    command_template: pngquant --quality={quality} --speed {speed} --force --output
+      {output} {input}
+    timeout: 300
    formats:
-      png_hq: "PNG (High Quality Compression)"
-      png_mq: "PNG (Medium Quality Compression)"
-      png_fast: "PNG (Fast Compression)"
+      png_hq: PNG (High Quality Compression)
+      png_mq: PNG (Medium Quality Compression)
+      png_fast: PNG (Fast Compression)
  sox:
-    name: "SoX Audio Converter"
-    command_template: 'sox {input} -r {samplerate} -b {bitdepth} {output}'
+    name: SoX Audio Converter
+    command_template: sox {input} -r {samplerate} -b {bitdepth} {output}
+    timeout: 600
    formats:
-      wav_48k_24b: "WAV (48kHz, 24-bit)"
-      wav_44k_16b: "WAV (CD, 44.1kHz, 16-bit)"
-      flac_48k_24b: "FLAC (48kHz, 24-bit)"
-      flac_44k_16b: "FLAC (CD, 44.1kHz, 16-bit)"
-      ogg_32k: "Ogg Vorbis (32kHz)"
-      ogg_16k: "Ogg Vorbis (16kHz, Voice)"
+      wav_48k_24b: WAV (48kHz, 24-bit)
+      wav_44k_16b: WAV (CD, 44.1kHz, 16-bit)
+      flac_48k_24b: FLAC (48kHz, 24-bit)
+      flac_44k_16b: FLAC (CD, 44.1kHz, 16-bit)
+      ogg_32k_16b: Ogg Vorbis (32kHz)
+      ogg_16k_16b: Ogg Vorbis (16kHz, Voice)
  mozjpeg:
-    name: "MozJPEG"
-    command_template: 'cjpeg -quality {quality} -outfile {output} {input}'
+    name: MozJPEG
+    command_template: cjpeg -quality {quality} -outfile {output} {input}
+    timeout: 30
    formats:
-      jpg_q85: "JPEG (High Quality)"
-      jpg_q75: "JPEG (Web Quality)"
-      jpg_q60: "JPEG (Aggressive Compression)"
+      jpg_q85: JPEG (High Quality)
+      jpg_q75: JPEG (Web Quality)
+      jpg_q60: JPEG (Aggressive Compression)
--- a/static/css/style.css
+++ b/static/css/style.css
@@ -1,5 +1,3 @@
-/* static/css/style.css */
-
 :root {
    /* Core */
    --bg-color: #000000;
@@ -449,6 +447,110 @@ button[type="submit"]:disabled {

 }

+/* --- START: Drag and Drop and Dialog Styles --- */
+.drag-overlay {
+    position: fixed;
+    inset: 0;
+    z-index: 9999;
+    display: none; /* Hidden by default */
+    justify-content: center;
+    align-items: center;
+    background-color: rgba(0, 0, 0, 0.7);
+    backdrop-filter: blur(5px);
+}
+body.dragging .drag-overlay {
+    display: flex; /* Shown when body has .dragging class */
+}
+.drag-overlay-content {
+    border: 3px dashed var(--primary-color);
+    border-radius: 12px;
+    padding: 2rem 4rem;
+    text-align: center;
+    background-color: rgba(0, 0, 0, 0.2);
+}
+.drag-overlay-content p {
+    margin: 0;
+    font-size: 1.5rem;
+    font-weight: 500;
+    color: var(--primary-color);
+}
+
+.dialog-overlay {
+    position: fixed;
+    inset: 0;
+    z-index: 10000;
+    display: none; /* Hidden by default */
+    justify-content: center;
+    align-items: center;
+    background-color: rgba(0, 0, 0, 0.7);
+    backdrop-filter: blur(5px);
+}
+.dialog-overlay.visible {
+    display: flex; /* Show when .visible class is added */
+}
+
+.dialog-box {
+    background: var(--card-bg);
+    border: 1px solid var(--border-color);
+    border-radius: 8px;
+    padding: 1.5rem;
+    width: 100%;
+    max-width: 450px;
+    text-align: center;
+    box-shadow: 0 10px 30px rgba(0,0,0,0.5);
+}
+.dialog-box h2 {
+    margin-top: 0;
+    font-size: 1.5rem;
+}
+.dialog-box p {
+    color: var(--muted-text);
+    margin-bottom: 1.5rem;
+}
+
+.dialog-actions {
+    display: grid;
+    grid-template-columns: 1fr;
+    gap: 0.75rem;
+    margin-bottom: 1rem;
+}
+.dialog-actions button {
+    display: block;
+    width: 100%;
+    background: transparent;
+    border: 1px solid var(--border-color);
+    color: var(--text-color);
+    padding: 0.65rem 1rem;
+    font-size: 1rem;
+    font-weight: 600;
+    border-radius: 5px;
+    cursor: pointer;
+    transition: background-color 0.15s ease, border-color 0.15s ease;
+}
+.dialog-actions button:hover {
+    background: var(--primary-hover);
+    border-color: var(--primary-hover);
+}
+.dialog-secondary-action {
+    background-color: transparent !important;
+    border: 1px solid var(--border-color) !important;
+}
+.dialog-secondary-action:hover {
+    background-color: rgba(255, 255, 255, 0.05) !important;
+}
+.dialog-cancel {
+    background: none;
+    border: none;
+    color: var(--muted-text);
+    cursor: pointer;
+    font-size: 0.9rem;
+    padding: 0.5rem;
+}
+.dialog-cancel:hover {
+    color: var(--text-color);
+}
+/* --- END: Drag and Drop and Dialog Styles --- */
+
 /* Spinner */
 .spinner-small {
    border: 3px solid rgba(255,255,255,0.1);
@@ -467,7 +569,6 @@ button[type="submit"]:disabled {

 /* Mobile responsive table */
@media (max-width: 768px) {
-    /* ... (no changes in this section) ... */
    .table-wrapper {
        border: none;
        background-color: transparent;
--- a/static/js/script.js
+++ b/static/js/script.js
@@ -1,6 +1,17 @@
-// static/js/script.js
-
 document.addEventListener('DOMContentLoaded', () => {
+    // --- User Locale and Timezone Detection (Corrected Implementation) ---
+    const USER_LOCALE = navigator.language || 'en-US'; // Fallback to en-US
+    const USER_TIMEZONE = Intl.DateTimeFormat().resolvedOptions().timeZone;
+    const DATETIME_FORMAT_OPTIONS = {
+        year: 'numeric',
+        month: 'short',
+        day: 'numeric',
+        hour: 'numeric',
+        minute: '2-digit',
+        timeZone: USER_TIMEZONE,
+    };
+    console.log(`Using locale: ${USER_LOCALE} and timezone: ${USER_TIMEZONE}`);
+
    // --- Element Selectors ---
    const jobListBody = document.getElementById('job-list-body');
    
@@ -11,16 +22,35 @@ document.addEventListener('DOMContentLoaded', () => {
    const audioForm = document.getElementById('audio-form');
    const audioFileInput = document.getElementById('audio-file-input');
    const audioFileName = document.getElementById('audio-file-name');
+    const modelSizeSelect = document.getElementById('model-size-select');
    
    const conversionForm = document.getElementById('conversion-form');
    const conversionFileInput = document.getElementById('conversion-file-input');
    const conversionFileName = document.getElementById('conversion-file-name');
    const outputFormatSelect = document.getElementById('output-format-select');

-    // MODIFICATION: Store the Choices.js instance in a variable
-    let conversionChoices = null;
+    // START: Drag and Drop additions
+    const dragOverlay = document.getElementById('drag-overlay');
+    const actionDialog = document.getElementById('action-dialog');
+    const dialogFileCount = document.getElementById('dialog-file-count');
+    // Dialog Views
+    const dialogInitialView = document.getElementById('dialog-initial-actions');
+    const dialogConvertView = document.getElementById('dialog-convert-view');
+    // Dialog Buttons
+    const dialogConvertBtn = document.getElementById('dialog-action-convert');
+    const dialogOcrBtn = document.getElementById('dialog-action-ocr');
+    const dialogTranscribeBtn = document.getElementById('dialog-action-transcribe');
+    const dialogCancelBtn = document.getElementById('dialog-action-cancel');
+    const dialogStartConversionBtn = document.getElementById('dialog-start-conversion');
+    const dialogBackBtn = document.getElementById('dialog-back');
+    // Dialog Select
+    const dialogOutputFormatSelect = document.getElementById('dialog-output-format-select');
+    // END: Drag and Drop additions

+    let conversionChoices = null;
+    let dialogConversionChoices = null; // For the dialog's format selector
    const activePolls = new Map();
+    let stagedFiles = null; // To hold files from a drop event

    // --- Main Event Listeners ---
    pdfFileInput.addEventListener('change', () => updateFileName(pdfFileInput, pdfFileName));
@@ -38,12 +68,221 @@ document.addEventListener('DOMContentLoaded', () => {
        }
    });

+    // --- Helper Functions ---
+    function formatBytes(bytes, decimals = 1) {
+        if (!+bytes) return '0 Bytes'; // Handles 0, null, undefined
+        const k = 1024;
+        const dm = decimals < 0 ? 0 : decimals;
+        const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB'];
+        const i = Math.floor(Math.log(bytes) / Math.log(k));
+        return `${parseFloat((bytes / Math.pow(k, i)).toFixed(dm))} ${sizes[i]}`;
+    }
+
+    // --- Core Job Submission Logic (Refactored for reuse) ---
+    async function submitJob(endpoint, formData, originalFilename) {
+        try {
+            const response = await fetch(endpoint, { method: 'POST', body: formData });
+            if (!response.ok) {
+                const errorData = await response.json();
+                throw new Error(errorData.detail || `HTTP error! Status: ${response.status}`);
+            }
+            const result = await response.json();
+            const preliminaryJob = {
+                id: result.job_id,
+                status: 'pending',
+                progress: 0,
+                original_filename: originalFilename,
+                input_filesize: formData.get('file').size,
+                task_type: endpoint.includes('ocr') ? 'ocr' : (endpoint.includes('transcribe') ? 'transcription' : 'conversion'),
+                created_at: new Date().toISOString() // Create preliminary UTC timestamp
+            };
+            renderJobRow(preliminaryJob);
+            startPolling(result.job_id);
+        } catch (error) {
+            console.error('Error submitting job:', error);
+            alert(`Submission failed for ${originalFilename}: ${error.message}`);
+        }
+    }
+
+    // --- Original Form Submission Handler (Now uses submitJob) ---
+    async function handleFormSubmit(event, endpoint, form) {
+        event.preventDefault();
+        const fileInput = form.querySelector('input[type="file"]');
+        if (fileInput.files.length === 0) return;
+
+        const submitButton = form.querySelector('button[type="submit"]');
+        submitButton.disabled = true;
+
+        // Convert FileList to an array to loop through it
+        const files = Array.from(fileInput.files);
+
+        // Process each file as a separate job
+        for (const file of files) {
+            const formData = new FormData();
+            formData.append('file', file);
+
+            // Append other form data if it exists
+            const outputFormat = form.querySelector('select[name="output_format"]');
+            if (outputFormat) {
+                formData.append('output_format', outputFormat.value);
+            }
+            const modelSize = form.querySelector('select[name="model_size"]');
+            if (modelSize) {
+                formData.append('model_size', modelSize.value);
+            }
+
+            // Await each job submission to process them sequentially
+            await submitJob(endpoint, formData, file.name);
+        }
+
+        // Reset the form UI after all jobs have been submitted
+        const fileNameDisplay = form.querySelector('.file-name');
+        form.reset();
+        if (fileNameDisplay) {
+             fileNameDisplay.textContent = 'No file chosen';
+             fileNameDisplay.title = 'No file chosen';
+        }
+        if (form.id === 'conversion-form' && conversionChoices) {
+            conversionChoices.clearInput();
+            conversionChoices.setValue([]);
+        }
+        submitButton.disabled = false;
+    }
+
+    // --- START: Drag and Drop Implementation ---
+   function setupDragAndDropListeners() {
+        let dragCounter = 0; // Counter to manage enter/leave events reliably
+
+        window.addEventListener('dragenter', (e) => {
+            e.preventDefault();
+            dragCounter++;
+            document.body.classList.add('dragging');
+        });
+
+        window.addEventListener('dragleave', (e) => {
+            e.preventDefault();
+            dragCounter--;
+            if (dragCounter === 0) {
+                document.body.classList.remove('dragging');
+            }
+        });
+
+        window.addEventListener('dragover', (e) => {
+            e.preventDefault(); // This is necessary to allow a drop
+        });
+
+        window.addEventListener('drop', (e) => {
+            e.preventDefault();
+            dragCounter = 0; // Reset counter
+            document.body.classList.remove('dragging');
+            
+            // Only handle the drop if it's on our designated overlay
+            if (e.target === dragOverlay || dragOverlay.contains(e.target)) {
+                const files = e.dataTransfer.files;
+                if (files && files.length > 0) {
+                    stagedFiles = files;
+                    showActionDialog();
+                }
+            }
+        });
+    }
+
+    function showActionDialog() {
+        dialogFileCount.textContent = stagedFiles.length;
+
+        // Clone options from main form's select to the dialog's select
+        dialogOutputFormatSelect.innerHTML = outputFormatSelect.innerHTML;
+
+        // Clean up previous Choices.js instance if it exists
+        if (dialogConversionChoices) {
+            dialogConversionChoices.destroy();
+        }
+
+        // Initialize a new Choices.js instance for the dialog
+        dialogConversionChoices = new Choices(dialogOutputFormatSelect, {
+            searchEnabled: true,
+            itemSelectText: 'Select',
+            shouldSort: false,
+            placeholder: true,
+            placeholderValue: 'Select a format...',
+        });
+
+        // Ensure the initial view is shown
+        dialogInitialView.style.display = 'grid';
+        dialogConvertView.style.display = 'none';
+        actionDialog.classList.add('visible');
+    }
+
+    function closeActionDialog() {
+        actionDialog.classList.remove('visible');
+        stagedFiles = null;
+        // Important: Destroy the Choices instance to prevent memory leaks
+        if (dialogConversionChoices) {
+            // Explicitly hide the dropdown before destroying
+            dialogConversionChoices.hideDropdown(); 
+            dialogConversionChoices.destroy();
+            dialogConversionChoices = null;
+        }
+    }
+
+    // --- Dialog Button and Action Listeners ---
+    dialogConvertBtn.addEventListener('click', () => {
+        // Switch to the conversion view
+        dialogInitialView.style.display = 'none';
+        dialogConvertView.style.display = 'block';
+    });
+
+    dialogBackBtn.addEventListener('click', () => {
+        // Switch back to the initial view
+        dialogInitialView.style.display = 'grid';
+        dialogConvertView.style.display = 'none';
+    });
+
+    dialogStartConversionBtn.addEventListener('click', () => handleDialogAction('convert'));
+    dialogOcrBtn.addEventListener('click', () => handleDialogAction('ocr'));
+    dialogTranscribeBtn.addEventListener('click', () => handleDialogAction('transcribe'));
+    dialogCancelBtn.addEventListener('click', closeActionDialog);
+
+
+    function handleDialogAction(action) {
+        if (!stagedFiles) return;
+
+        let endpoint = '';
+        const formDataArray = [];
+
+        for (const file of stagedFiles) {
+            const formData = new FormData();
+            formData.append('file', file);
+            
+            if (action === 'convert') {
+                const selectedFormat = dialogConversionChoices.getValue(true);
+                if (!selectedFormat) {
+                    alert('Please select a format to convert to.');
+                    return;
+                }
+                formData.append('output_format', selectedFormat);
+                endpoint = '/convert-file';
+            } else if (action === 'ocr') {
+                endpoint = '/ocr-pdf';
+            } else if (action === 'transcribe') {
+                formData.append('model_size', modelSizeSelect.value);
+                endpoint = '/transcribe-audio';
+            }
+            formDataArray.push({ formData, name: file.name });
+        }
+
+        formDataArray.forEach(item => {
+            submitJob(endpoint, item.formData, item.name);
+        });
+
+        closeActionDialog();
+    }
+    // --- END: Drag and Drop Implementation ---
+
    function initializeConversionSelector() {
-        // MODIFICATION: Destroy the old instance if it exists before creating a new one
        if (conversionChoices) {
            conversionChoices.destroy();
        }
-
        conversionChoices = new Choices(outputFormatSelect, {
            searchEnabled: true,
            itemSelectText: 'Select',
@@ -65,7 +304,7 @@ document.addEventListener('DOMContentLoaded', () => {
            for (const formatKey in tool.formats) {
                group.choices.push({
                    value: `${toolKey}_${formatKey}`,
-                    label: `${formatKey.toUpperCase()} - ${tool.formats[formatKey]}`
+                    label: `${tool.name} - ${formatKey.toUpperCase()} (${tool.formats[formatKey]})`
                });
            }
            choicesArray.push(group);
@@ -73,56 +312,21 @@ document.addEventListener('DOMContentLoaded', () => {
        conversionChoices.setChoices(choicesArray, 'value', 'label', true);
    }
    
-    // --- Helper Functions ---
    function updateFileName(input, nameDisplay) {
-        const fileName = input.files.length > 0 ? input.files[0].name : 'No file chosen';
-        nameDisplay.textContent = fileName;
-        nameDisplay.title = fileName;
-    }
-
-    async function handleFormSubmit(event, endpoint, form) {
-        event.preventDefault();
-        const fileInput = form.querySelector('input[type="file"]');
-        const fileNameDisplay = form.querySelector('.file-name');
-        if (!fileInput.files[0]) return;
-
-        const formData = new FormData(form);
-        const submitButton = form.querySelector('button[type="submit"]');
-        submitButton.disabled = true;
-
-        try {
-            const response = await fetch(endpoint, { method: 'POST', body: formData });
-            if (!response.ok) {
-                const errorData = await response.json();
-                throw new Error(errorData.detail || `HTTP error! Status: ${response.status}`);
-            }
-            const result = await response.json();
-            const preliminaryJob = {
-                id: result.job_id,
-                status: 'pending',
-                progress: 0,
-                original_filename: fileInput.files[0].name,
-                task_type: endpoint.includes('ocr') ? 'ocr' : (endpoint.includes('transcribe') ? 'transcription' : 'conversion'),
-                created_at: new Date().toISOString()
-            };
-            renderJobRow(preliminaryJob);
-            startPolling(result.job_id);
-        } catch (error) {
-            console.error('Error submitting job:', error);
-            alert(`Submission failed: ${error.message}`);
-        } finally {
-            form.reset();
-            if (fileNameDisplay) fileNameDisplay.textContent = 'No file chosen';
-            
-            // MODIFICATION: Use the stored instance to correctly reset the dropdown
-            // without causing an error.
-            if (form.id === 'conversion-form' && conversionChoices) {
-                 conversionChoices.clearInput();
-                 conversionChoices.setValue([]); // Clears the selected value
-            }
-            
-            submitButton.disabled = false;
+        const numFiles = input.files.length;
+        let displayText = 'No file chosen';
+        let displayTitle = 'No file chosen';
+
+        if (numFiles === 1) {
+            displayText = input.files[0].name;
+            displayTitle = input.files[0].name;
+        } else if (numFiles > 1) {
+            displayText = `${numFiles} files selected`;
+            // Create a title attribute to show all filenames on hover
+            displayTitle = Array.from(input.files).map(file => file.name).join(', ');
        }
+        nameDisplay.textContent = displayText;
+        nameDisplay.title = displayTitle;
    }

    async function handleCancelJob(jobId) {
@@ -161,7 +365,7 @@ document.addEventListener('DOMContentLoaded', () => {
            }
        } catch (error) {
            console.error("Couldn't load job history:", error);
-            jobListBody.innerHTML = '<tr><td colspan="5" style="text-align: center;">Could not load job history.</td></tr>';
+            jobListBody.innerHTML = '<tr><td colspan="6" style="text-align: center;">Could not load job history.</td></tr>';
        }
    }

@@ -214,7 +418,12 @@ document.addEventListener('DOMContentLoaded', () => {
            taskTypeLabel = 'Conversion';
        }

-        const formattedDate = new Date(job.created_at).toLocaleString();
+        // --- CORRECTED DATE FORMATTING ---
+        // Takes the UTC string from the server (or the preliminary job)
+        // and formats it using the user's detected locale and timezone.
+        const submittedDate = new Date(job.created_at);
+        const formattedDate = submittedDate.toLocaleString(USER_LOCALE, DATETIME_FORMAT_OPTIONS);
+
        let statusHtml = `<span class="job-status-badge status-${job.status}">${job.status}</span>`;
        if (job.status === 'processing') {
            const progressClass = (job.task_type === 'transcription' && job.progress > 0) ? '' : 'indeterminate';
@@ -233,9 +442,21 @@ document.addEventListener('DOMContentLoaded', () => {
            actionHtml = `<span class="error-text"${errorTitle}>Failed</span>`;
        }

+        // --- File Size Logic ---
+        let fileSizeHtml = '<span>-</span>';
+        if (job.input_filesize) {
+            let sizeString = formatBytes(job.input_filesize);
+            if (job.status === 'completed' && job.output_filesize) {
+                sizeString += ` → ${formatBytes(job.output_filesize)}`;
+            }
+            fileSizeHtml = `<span class="cell-value">${sizeString}</span>`;
+        }
+
        const escapedFilename = job.original_filename ? job.original_filename.replace(/</g, "&lt;").replace(/>/g, "&gt;") : "No filename";
+        
        row.innerHTML = `
            <td data-label="File"><span class="cell-value" title="${escapedFilename}">${escapedFilename}</span></td>
+            <td data-label="File Size">${fileSizeHtml}</td>
            <td data-label="Task"><span class="cell-value">${taskTypeLabel}</span></td>
            <td data-label="Submitted"><span class="cell-value">${formattedDate}</span></td>
            <td data-label="Status"><span class="cell-value">${statusHtml}</span></td>
@@ -246,4 +467,5 @@ document.addEventListener('DOMContentLoaded', () => {
    // --- Initial Load ---
    initializeConversionSelector();
    loadInitialJobs();
+    setupDragAndDropListeners();
 });
--- a/templates/index.html
+++ b/templates/index.html
@@ -27,7 +27,7 @@
                        <fieldset>
                            <legend><h2>File Conversion</h2></legend>
                            <div class="file-input-wrapper">
-                                 <input type="file" name="file" id="conversion-file-input" required>
+                                 <input type="file" name="file" id="conversion-file-input" required multiple>
                                 <label for="conversion-file-input" class="file-input-label">Choose File...</label>
                                 <span id="conversion-file-name" class="file-name">No file chosen</span>
                            </div>
@@ -45,7 +45,7 @@
                        <fieldset>
                            <legend><h2>PDF OCR</h2></legend>
                            <div class="file-input-wrapper">
-                                <input type="file" name="file" id="pdf-file-input" accept=".pdf" required>
+                                <input type="file" name="file" id="pdf-file-input" accept=".pdf" required multiple>
                                <label for="pdf-file-input" class="file-input-label">Choose PDF...</label>
                                <span id="pdf-file-name" class="file-name">No file chosen</span>
                            </div>
@@ -59,7 +59,7 @@
                        <fieldset>
                            <legend><h2>Transcribe Audio</h2></legend>
                            <div class="file-input-wrapper">
-                                 <input type="file" name="file" id="audio-file-input" accept="audio/*" required>
+                                 <input type="file" name="file" id="audio-file-input" accept="audio/*" required multiple>
                                 <label for="audio-file-input" class="file-input-label">Choose Audio...</label>
                                 <span id="audio-file-name" class="file-name">No file chosen</span>
                            </div>
@@ -87,6 +87,7 @@
                        <thead>
                            <tr>
                                <th>File</th>
+                                <th>File Size</th>
                                <th>Task</th>
                                <th>Submitted</th>
                                <th>Status</th>
@@ -101,6 +102,36 @@
        </main>
    </div>

+    <div id="drag-overlay" class="drag-overlay">
+        <div class="drag-overlay-content">
+             <p>Drop files anywhere to begin</p>
+        </div>
+    </div>
+    <div id="action-dialog" class="dialog-overlay">
+        <div class="dialog-box">
+            <h2>Choose Action</h2>
+            <p><span id="dialog-file-count"></span> file(s) dropped. What would you like to do?</p>
+            
+            <div id="dialog-initial-actions" class="dialog-actions">
+                <button id="dialog-action-convert">Convert</button>
+                <button id="dialog-action-ocr">OCR</button>
+                <button id="dialog-action-transcribe">Transcribe</button>
+            </div>
+            
+            <div id="dialog-convert-view" style="display: none;">
+                <div class="form-control" style="text-align: left; margin-bottom: 1rem;">
+                    <label for="dialog-output-format-select">Convert To</label>
+                    <select id="dialog-output-format-select" required></select>
+                </div>
+                <div class="dialog-actions">
+                    <button id="dialog-start-conversion">Start Conversion</button>
+                    <button id="dialog-back" class="dialog-secondary-action">Back</button>
+                </div>
+            </div>
+
+            <button id="dialog-action-cancel" class="dialog-cancel">Cancel</button>
+        </div>
+    </div>
    <script>
        window.APP_CONFIG = {
            conversionTools: {{ conversion_tools | tojson }}