stability and settings

2025-09-17 12:36:24 +00:00
parent 4d586a46e9
commit 2115238217
9 changed files with 1271 additions and 265 deletions
--- a/main.py
+++ b/main.py
@@ -1,64 +1,99 @@
 import logging
 import shutil
 import subprocess
 import traceback
 import uuid
 import shlex
 import yaml
 from contextlib import asynccontextmanager
 from datetime import datetime
 from pathlib import Path
-from typing import List, Set
+from typing import Dict, List, Any
 import ocrmypdf
 import pypdf
 import pytesseract
 from PIL import Image
 from faster_whisper import WhisperModel
 # MODIFICATION: Added Form for model selection
 from fastapi import (Depends, FastAPI, File, Form, HTTPException, Request,
-                     UploadFile, status)
+                     UploadFile, status, Body)
-from fastapi.responses import FileResponse
+from fastapi.responses import FileResponse, JSONResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi.templating import Jinja2Templates
 from huey import SqliteHuey
 from pydantic import BaseModel, ConfigDict
 from pydantic_settings import BaseSettings
 from sqlalchemy import (Column, DateTime, Integer, String, Text,
-                        create_engine)
+                        create_engine, delete, event)
 from sqlalchemy.pool import NullPool
 from string import Formatter
 from sqlalchemy.orm import Session, declarative_base, sessionmaker
 from werkzeug.utils import secure_filename
 # --------------------------------------------------------------------------------
 # --- 1. CONFIGURATION
 # --------------------------------------------------------------------------------
-class Settings(BaseSettings):
+
 class AppPaths(BaseModel):
    BASE_DIR: Path = Path(__file__).resolve().parent
    UPLOADS_DIR: Path = BASE_DIR / "uploads"
    PROCESSED_DIR: Path = BASE_DIR / "processed"
    DATABASE_URL: str = f"sqlite:///{BASE_DIR / 'jobs.db'}"
    HUEY_DB_PATH: str = str(BASE_DIR / "huey.db")
-    # MODIFICATION: Removed hardcoded model size, added a set of allowed models
+    SETTINGS_FILE: Path = BASE_DIR / "settings.yml"
    WHISPER_COMPUTE_TYPE: str = "int8"
    ALLOWED_WHISPER_MODELS: Set[str] = {"tiny", "base", "small", "medium", "large-v3", "distil-large-v2"}
    MAX_FILE_SIZE_BYTES: int = 500 * 1024 * 1024  # 500 MB
    ALLOWED_PDF_EXTENSIONS: set = {".pdf"}
    ALLOWED_IMAGE_EXTENSIONS: set = {".png", ".jpg", ".jpeg", ".tiff", ".tif"}
    ALLOWED_AUDIO_EXTENSIONS: set = {".mp3", "m4a", ".ogg", ".flac", ".opus"}
-settings = Settings()
+PATHS = AppPaths()
 APP_CONFIG: Dict[str, Any] = {}
 def load_app_config():
    global APP_CONFIG
    try:
        with open(PATHS.SETTINGS_FILE, 'r') as f:
            APP_CONFIG = yaml.safe_load(f)
        APP_CONFIG['app_settings']['max_file_size_bytes'] = APP_CONFIG['app_settings']['max_file_size_mb'] * 1024 * 1024
        allowed_extensions = {
            ".pdf", ".ps", ".eps", ".png", ".jpg", ".jpeg", ".tiff", ".tif", ".gif",
            ".bmp", ".webp", ".svg", ".jxl", ".avif", ".ppm", ".mp3", ".m4a", ".ogg",
            ".flac", ".opus", ".wav", ".aac", ".mp4", ".mkv", ".mov", ".webm", ".avi",
            ".flv", ".md", ".txt", ".html", ".docx", ".odt", ".rst", ".epub", ".mobi",
            ".azw3", ".pptx", ".xlsx"
        }
        APP_CONFIG['app_settings']['allowed_all_extensions'] = allowed_extensions
        logger.info("Successfully loaded settings from settings.yml")
    except (FileNotFoundError, yaml.YAMLError) as e:
        logger.error(f"Could not load settings.yml: {e}. App may not function correctly.")
        APP_CONFIG = {}
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
-
+PATHS.UPLOADS_DIR.mkdir(exist_ok=True)
-settings.UPLOADS_DIR.mkdir(exist_ok=True)
+PATHS.PROCESSED_DIR.mkdir(exist_ok=True)
 settings.PROCESSED_DIR.mkdir(exist_ok=True)
 # --------------------------------------------------------------------------------
-# --- 2. DATABASE (for Job Tracking) - NO CHANGES
+# --- 2. DATABASE & Schemas
 # --------------------------------------------------------------------------------
-engine = create_engine(settings.DATABASE_URL, connect_args={"check_same_thread": False})
+engine = create_engine(
    PATHS.DATABASE_URL,
    connect_args={"check_same_thread": False, "timeout": 30},
    poolclass=NullPool,
 )
 SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
 # THIS IS THE CRITICAL FIX
 Base = declarative_base()
@event.listens_for(engine, "connect")
 def _set_sqlite_pragmas(dbapi_connection, connection_record):
    """
    Enable WAL mode and set sane synchronous for better concurrency
    between the FastAPI process and Huey worker processes.
    """
    c = dbapi_connection.cursor()
    try:
        c.execute("PRAGMA journal_mode=WAL;")
        c.execute("PRAGMA synchronous=NORMAL;")
    finally:
        c.close()
 class Job(Base):
    __tablename__ = "jobs"
    id = Column(String, primary_key=True, index=True)
@@ -80,10 +115,6 @@ def get_db():
    finally:
        db.close()
 # --------------------------------------------------------------------------------
 # --- 3. PYDANTIC SCHEMAS (Data Validation) - NO CHANGES
 # --------------------------------------------------------------------------------
 class JobCreate(BaseModel):
    id: str
    task_type: str
@@ -104,9 +135,8 @@ class JobSchema(BaseModel):
    updated_at: datetime
    model_config = ConfigDict(from_attributes=True)
 # --------------------------------------------------------------------------------
-# --- 4. CRUD OPERATIONS (Database Interactions) - NO CHANGES
+# --- 3. CRUD OPERATIONS (No Changes)
 # --------------------------------------------------------------------------------
 def get_job(db: Session, job_id: str):
    return db.query(Job).filter(Job.id == job_id).first()
@@ -143,37 +173,101 @@ def mark_job_as_completed(db: Session, job_id: str, preview: str | None = None):
        db.commit()
    return db_job
 # --------------------------------------------------------------------------------
-# --- 5. BACKGROUND TASKS (Huey)
+# --- 4. BACKGROUND TASK SETUP
 # --------------------------------------------------------------------------------
-huey = SqliteHuey(filename=settings.HUEY_DB_PATH)
+huey = SqliteHuey(filename=PATHS.HUEY_DB_PATH)
 # --- START: NEW WHISPER MODEL CACHING ---
 # This dictionary will live in the memory of the Huey worker process,
 # allowing us to reuse loaded models across tasks.
 WHISPER_MODELS_CACHE: Dict[str, WhisperModel] = {}
 def get_whisper_model(model_size: str, whisper_settings: dict) -> WhisperModel:
    """
    Loads a Whisper model into the cache if not present, and returns the model.
    This ensures a model is only loaded into memory once per worker process.
    """
    if model_size not in WHISPER_MODELS_CACHE:
        compute_type = whisper_settings.get('compute_type', 'int8')
        logger.info(f"Whisper model '{model_size}' not in cache. Loading into memory...")
        model = WhisperModel(model_size, device="cpu", compute_type=compute_type)
        WHISPER_MODELS_CACHE[model_size] = model
        logger.info(f"Model '{model_size}' loaded successfully.")
    else:
        logger.info(f"Found model '{model_size}' in cache. Reusing.")
    return WHISPER_MODELS_CACHE[model_size]
 # --- END: NEW WHISPER MODEL CACHING ---
 # MODIFICATION: Removed global whisper model and lazy loader.
 # The model will now be loaded inside the task itself based on user selection.
@huey.task()
-def run_pdf_ocr_task(job_id: str, input_path_str: str, output_path_str: str):
+def run_transcription_task(job_id: str, input_path_str: str, output_path_str: str, model_size: str, whisper_settings: dict):
    db = SessionLocal()
    try:
        job = get_job(db, job_id)
-        if not job or job.status == 'cancelled':
+        if not job or job.status == 'cancelled': return
            logger.info(f"Job {job_id} was cancelled before starting.")
            return
        update_job_status(db, job_id, "processing")
        # --- MODIFIED: Use the caching function to get the model ---
        model = get_whisper_model(model_size, whisper_settings)
        logger.info(f"Starting transcription for job {job_id}")
        segments, info = model.transcribe(input_path_str, beam_size=5)
        full_transcript = []
        for segment in segments:
            job_check = get_job(db, job_id) # Check for cancellation during long tasks
            if job_check.status == 'cancelled':
                logger.info(f"Job {job_id} cancelled during transcription.")
                return
            if info.duration > 0:
                progress = int((segment.end / info.duration) * 100)
                update_job_status(db, job_id, "processing", progress=progress)
            full_transcript.append(segment.text.strip())
        transcript_text = "\n".join(full_transcript)
        # write atomically to avoid partial files
        out_path = Path(output_path_str)
        tmp_out = out_path.with_suffix(out_path.suffix + f".{uuid.uuid4().hex}.tmp")
        with tmp_out.open("w", encoding="utf-8") as f:
            f.write(transcript_text)
        tmp_out.replace(out_path)
        mark_job_as_completed(db, job_id, preview=transcript_text)
        logger.info(f"Transcription for job {job_id} completed.")
    except Exception:
        logger.exception(f"ERROR during transcription for job {job_id}")
        update_job_status(db, job_id, "failed", error="See server logs for details.")
    finally:
        Path(input_path_str).unlink(missing_ok=True)
        db.close()
 # Other tasks remain unchanged
@huey.task()
 def run_pdf_ocr_task(job_id: str, input_path_str: str, output_path_str: str, ocr_settings: dict):
    db = SessionLocal()
    try:
        job = get_job(db, job_id)
        if not job or job.status == 'cancelled': return
        update_job_status(db, job_id, "processing")
        logger.info(f"Starting PDF OCR for job {job_id}")
-        
+        ocrmypdf.ocr(input_path_str, output_path_str,
-        ocrmypdf.ocr(input_path_str, output_path_str, deskew=True, force_ocr=True, clean=True, optimize=1, progress_bar=False)
+                     deskew=ocr_settings.get('deskew', True),
-        
+                     force_ocr=ocr_settings.get('force_ocr', True),
                     clean=ocr_settings.get('clean', True),
                     optimize=ocr_settings.get('optimize', 1),
                     progress_bar=False)
        with open(output_path_str, "rb") as f:
            reader = pypdf.PdfReader(f)
            preview = "\n".join(page.extract_text() or "" for page in reader.pages)
        mark_job_as_completed(db, job_id, preview=preview)
        logger.info(f"PDF OCR for job {job_id} completed.")
-    except Exception as e:
+    except Exception:
-        logger.error(f"ERROR during PDF OCR for job {job_id}: {e}\n{traceback.format_exc()}")
+        logger.exception(f"ERROR during PDF OCR for job {job_id}")
-        update_job_status(db, job_id, "failed", error=str(e))
+        update_job_status(db, job_id, "failed", error="See server logs for details.")
    finally:
        Path(input_path_str).unlink(missing_ok=True)
        db.close()
@@ -183,10 +277,7 @@ def run_image_ocr_task(job_id: str, input_path_str: str, output_path_str: str):
    db = SessionLocal()
    try:
        job = get_job(db, job_id)
-        if not job or job.status == 'cancelled':
+        if not job or job.status == 'cancelled': return
            logger.info(f"Job {job_id} was cancelled before starting.")
            return
        update_job_status(db, job_id, "processing", progress=50)
        logger.info(f"Starting Image OCR for job {job_id}")
        text = pytesseract.image_to_string(Image.open(input_path_str))
@@ -194,154 +285,177 @@ def run_image_ocr_task(job_id: str, input_path_str: str, output_path_str: str):
            f.write(text)
        mark_job_as_completed(db, job_id, preview=text)
        logger.info(f"Image OCR for job {job_id} completed.")
-    except Exception as e:
+    except Exception:
-        logger.error(f"ERROR during Image OCR for job {job_id}: {e}\n{traceback.format_exc()}")
+        logger.exception(f"ERROR during Image OCR for job {job_id}")
-        update_job_status(db, job_id, "failed", error=str(e))
+        update_job_status(db, job_id, "failed", error="See server logs for details.")
    finally:
        Path(input_path_str).unlink(missing_ok=True)
        db.close()
-# MODIFICATION: The task now accepts `model_size` and loads the model dynamically.
+
@huey.task()
-def run_transcription_task(job_id: str, input_path_str: str, output_path_str: str, model_size: str):
+def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str, tool: str, task_key: str, conversion_tools_config: dict):
    db = SessionLocal()
    temp_input_file = None
    temp_output_file = None
    try:
        job = get_job(db, job_id)
-        if not job or job.status == 'cancelled':
+        if not job or job.status == 'cancelled': return
-            logger.info(f"Job {job_id} was cancelled before starting.")
+        update_job_status(db, job_id, "processing", progress=25)
-            return
+        logger.info(f"Starting conversion for job {job_id} using {tool} with task {task_key}")
        tool_config = conversion_tools_config.get(tool)
        if not tool_config: raise ValueError(f"Unknown conversion tool: {tool}")
        input_path = Path(input_path_str)
        output_path = Path(output_path_str)
        current_input_path = input_path
        if tool == "mozjpeg":
            temp_input_file = input_path.with_suffix('.temp.ppm')
            logger.info(f"Pre-converting for MozJPEG: {input_path} -> {temp_input_file}")
            pre_conv_cmd = ["vips", "copy", str(input_path), str(temp_input_file)]
            pre_conv_result = subprocess.run(pre_conv_cmd, capture_output=True, text=True, check=False, timeout=tool_config.get("timeout", 300))
            if pre_conv_result.returncode != 0:
                err = (pre_conv_result.stderr or "")[:4000]
                raise Exception(f"MozJPEG pre-conversion to PPM failed: {err}")
            current_input_path = temp_input_file
        update_job_status(db, job_id, "processing", progress=50)
        # Build safe mapping for formatting and validate placeholders
        ALLOWED_VARS = {"input", "output", "output_dir", "output_ext", "quality", "speed", "preset", "device", "dpi", "samplerate", "bitdepth"}
        def validate_and_build_command(template_str: str, mapping: dict):
            fmt = Formatter()
            used = {fname for _, fname, _, _ in fmt.parse(template_str) if fname}
            bad = used - ALLOWED_VARS
            if bad:
                raise ValueError(f"Command template contains disallowed placeholders: {bad}")
            formatted = template_str.format(**mapping)
            return shlex.split(formatted)
-        update_job_status(db, job_id, "processing")
+        # Use a temporary output path and atomically move into place after success
        temp_output_file = output_path.with_suffix(output_path.suffix + f".{uuid.uuid4().hex}.tmp")
-        # Load the specified model for this task
+        # Prepare mapping
-        logger.info(f"Loading faster-whisper model: {model_size} for job {job_id}...")
+        mapping = {
-        model = WhisperModel(
+            "input": str(current_input_path),
-            model_size,
+            "output": str(temp_output_file),
-            device="cpu",
+            "output_dir": str(output_path.parent),
-            compute_type=settings.WHISPER_COMPUTE_TYPE
+            "output_ext": output_path.suffix.lstrip('.'),
-        )
+        }
        logger.info(f"Whisper model '{model_size}' loaded successfully.")
-        logger.info(f"Starting transcription for job {job_id}")
+        # Allow tool-specific adjustments to mapping
-        segments, info = model.transcribe(input_path_str, beam_size=5)
+        if tool.startswith("ghostscript"):
            device, setting = task_key.split('_')
            mapping.update({"device": device, "dpi": setting, "preset": setting})
        elif tool == "pngquant":
            _, quality_key = task_key.split('_')
            quality_map = {"hq": "80-95", "mq": "65-80", "fast": "65-80"}
            speed_map = {"hq": "1", "mq": "3", "fast": "11"}
            mapping.update({"quality": quality_map.get(quality_key, "65-80"), "speed": speed_map.get(quality_key, "3")})
        elif tool == "sox":
            _, rate, depth = task_key.split('_')
            rate = rate.replace('k', '000') if 'k' in rate else rate
            depth = depth.replace('b', '') if 'b' in depth else '16'
            mapping.update({"samplerate": rate, "bitdepth": depth})
        elif tool == "mozjpeg":
            _, quality = task_key.split('_')
            quality = quality.replace('q', '')
            mapping.update({"quality": quality})
-        full_transcript = []
+        command_template_str = tool_config["command_template"]
-        total_duration = info.duration
+        command = validate_and_build_command(command_template_str, mapping)
-        for segment in segments:
+        logger.info(f"Executing command: {' '.join(command)}")
-            job_check = get_job(db, job_id)
+        # run with timeout and capture output; run_command helper ensures trimmed logs on failure
-            if job_check.status == 'cancelled':
+        def run_command(argv: List[str], timeout: int = 300):
-                logger.info(f"Job {job_id} cancelled during transcription.")
+            try:
-                return
+                res = subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=timeout)
            except subprocess.TimeoutExpired:
                raise Exception(f"Command timed out after {timeout}s")
            if res.returncode != 0:
                stderr = (res.stderr or "")[:4000]
                stdout = (res.stdout or "")[:4000]
                raise Exception(f"Command failed exit {res.returncode}. stderr: {stderr}; stdout: {stdout}")
            return res
-            # Update progress based on the segment's end time
+        result = run_command(command, timeout=tool_config.get("timeout", 300))
-            if total_duration > 0:
+        if tool == "libreoffice":
-                progress = int((segment.end / total_duration) * 100)
+            expected_output_filename = input_path.with_suffix(output_path.suffix).name
-                update_job_status(db, job_id, "processing", progress=progress)
+            generated_file = output_path.parent / expected_output_filename
-            full_transcript.append(segment.text.strip())
+            if generated_file.exists():
                # move generated file into place
                generated_file.replace(output_path)
            else:
                raise Exception(f"LibreOffice did not create the expected file: {expected_output_filename}")
        # move temp output into final location atomically
        if temp_output_file and temp_output_file.exists():
            temp_output_file.replace(output_path)
-        transcript_text = "\n".join(full_transcript)
+        mark_job_as_completed(db, job_id, preview=f"Successfully converted file.")
-        with open(output_path_str, "w", encoding="utf-8") as f:
+        logger.info(f"Conversion for job {job_id} completed.")
            f.write(transcript_text)
        mark_job_as_completed(db, job_id, preview=transcript_text)
        logger.info(f"Transcription for job {job_id} completed.")
    except Exception as e:
-        logger.error(f"ERROR during transcription for job {job_id}: {e}\n{traceback.format_exc()}")
+        logger.exception(f"ERROR during conversion for job {job_id}")
-        update_job_status(db, job_id, "failed", error=str(e))
+        update_job_status(db, job_id, "failed", error="See server logs for details.")
    finally:
        Path(input_path_str).unlink(missing_ok=True)
        if temp_input_file:
            temp_input_file.unlink(missing_ok=True)
        if temp_output_file:
            temp_output_file.unlink(missing_ok=True)
        db.close()
 # --------------------------------------------------------------------------------
-# --- 6. FASTAPI APPLICATION
+# --- 5. FASTAPI APPLICATION
 # --------------------------------------------------------------------------------
@asynccontextmanager
 async def lifespan(app: FastAPI):
    logger.info("Application starting up...")
    Base.metadata.create_all(bind=engine)
    load_app_config()
    yield
    logger.info("Application shutting down...")
 app = FastAPI(lifespan=lifespan)
-app.mount("/static", StaticFiles(directory=settings.BASE_DIR / "static"), name="static")
+app.mount("/static", StaticFiles(directory=PATHS.BASE_DIR / "static"), name="static")
-templates = Jinja2Templates(directory=settings.BASE_DIR / "templates")
+templates = Jinja2Templates(directory=PATHS.BASE_DIR / "templates")
 # --- Helper Functions ---
 async def save_upload_file_chunked(upload_file: UploadFile, destination: Path):
    """
    Streams the uploaded file in chunks directly to a file on disk.
    This is memory-efficient and reliable for large files.
    """
    max_size = APP_CONFIG.get("app_settings", {}).get("max_file_size_bytes", 100 * 1024 * 1024)
    tmp = destination.with_suffix(destination.suffix + f".{uuid.uuid4().hex}.tmp")
    size = 0
-    with open(destination, "wb") as buffer:
+    try:
-        while chunk := await upload_file.read(1024 * 1024):  # 1MB chunks
+        with tmp.open("wb") as buffer:
-            if size + len(chunk) > settings.MAX_FILE_SIZE_BYTES:
+            while True:
-                raise HTTPException(
+                chunk = await upload_file.read(1024 * 1024)
-                    status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
+                if not chunk:
-                    detail=f"File exceeds limit of {settings.MAX_FILE_SIZE_BYTES // 1024 // 1024} MB"
+                    break
-                )
+                size += len(chunk)
-            buffer.write(chunk)
+                if size > max_size:
-            size += len(chunk)
+                    raise HTTPException(status_code=413, detail=f"File exceeds {max_size / 1024 / 1024} MB limit")
                buffer.write(chunk)
        # atomic move into place
        tmp.replace(destination)
    except Exception:
        tmp.unlink(missing_ok=True)
        raise
 def is_allowed_file(filename: str, allowed_extensions: set) -> bool:
    return Path(filename).suffix.lower() in allowed_extensions
-# --- API Endpoints ---
+# --- Routes (only transcription route is modified) ---
@app.get("/")
 async def get_index(request: Request):
    # MODIFICATION: Pass available models to the template
    return templates.TemplateResponse("index.html", {
        "request": request,
        "whisper_models": sorted(list(settings.ALLOWED_WHISPER_MODELS))
    })
@app.post("/ocr-pdf", status_code=status.HTTP_202_ACCEPTED)
 async def submit_pdf_ocr(file: UploadFile = File(...), db: Session = Depends(get_db)):
    if not is_allowed_file(file.filename, settings.ALLOWED_PDF_EXTENSIONS):
        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid file type. Please upload a PDF.")
    job_id = uuid.uuid4().hex
    safe_basename = secure_filename(file.filename)
    unique_filename = f"{Path(safe_basename).stem}_{job_id}{Path(safe_basename).suffix}"
    upload_path = settings.UPLOADS_DIR / unique_filename
    processed_path = settings.PROCESSED_DIR / unique_filename
    await save_upload_file_chunked(file, upload_path)
    job_data = JobCreate(id=job_id, task_type="ocr", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path))
    new_job = create_job(db=db, job=job_data)
    run_pdf_ocr_task(new_job.id, str(upload_path), str(processed_path))
    return {"job_id": new_job.id, "status": new_job.status}
@app.post("/ocr-image", status_code=status.HTTP_202_ACCEPTED)
 async def submit_image_ocr(file: UploadFile = File(...), db: Session = Depends(get_db)):
    if not is_allowed_file(file.filename, settings.ALLOWED_IMAGE_EXTENSIONS):
        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid file type. Please upload a PNG, JPG, or TIFF.")
    job_id = uuid.uuid4().hex
    safe_basename = secure_filename(file.filename)
    file_ext = Path(safe_basename).suffix
    unique_filename = f"{Path(safe_basename).stem}_{job_id}{file_ext}"
    upload_path = settings.UPLOADS_DIR / unique_filename
    processed_path = settings.PROCESSED_DIR / f"{Path(safe_basename).stem}_{job_id}.txt"
    await save_upload_file_chunked(file, upload_path)
    job_data = JobCreate(id=job_id, task_type="ocr-image", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path))
    new_job = create_job(db=db, job=job_data)
    run_image_ocr_task(new_job.id, str(upload_path), str(processed_path))
    return {"job_id": new_job.id, "status": new_job.status}
 # MODIFICATION: Endpoint now accepts `model_size` as form data.
@app.post("/transcribe-audio", status_code=status.HTTP_202_ACCEPTED)
 async def submit_audio_transcription(
    file: UploadFile = File(...),
    model_size: str = Form("base"),
    db: Session = Depends(get_db)
 ):
-    if not is_allowed_file(file.filename, settings.ALLOWED_AUDIO_EXTENSIONS):
+    if not is_allowed_file(file.filename, {".mp3", ".wav", ".m4a", ".flac", ".ogg", ".opus"}):
        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid audio file type.")
-    # Validate the selected model size
+    whisper_config = APP_CONFIG.get("transcription_settings", {}).get("whisper", {})
-    if model_size not in settings.ALLOWED_WHISPER_MODELS:
+    if model_size not in whisper_config.get("allowed_models", []):
        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=f"Invalid model size: {model_size}.")
    job_id = uuid.uuid4().hex
@@ -350,19 +464,145 @@ async def submit_audio_transcription(
    audio_filename = f"{stem}_{job_id}{suffix}"
    transcript_filename = f"{stem}_{job_id}.txt"
-    upload_path = settings.UPLOADS_DIR / audio_filename
+    upload_path = PATHS.UPLOADS_DIR / audio_filename
-    processed_path = settings.PROCESSED_DIR / transcript_filename
+    processed_path = PATHS.PROCESSED_DIR / transcript_filename
    await save_upload_file_chunked(file, upload_path)
    job_data = JobCreate(id=job_id, task_type="transcription", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path))
    new_job = create_job(db=db, job=job_data)
-    # Pass the selected model size to the background task
+    # --- MODIFIED: Pass whisper_config to the task ---
-    run_transcription_task(new_job.id, str(upload_path), str(processed_path), model_size=model_size)
+    run_transcription_task(new_job.id, str(upload_path), str(processed_path), model_size=model_size, whisper_settings=whisper_config)
    return {"job_id": new_job.id, "status": new_job.status}
-@app.post("/job/{job_id}/cancel", status_code=status.HTTP_200_OK)
+
 # --- Other routes remain unchanged ---
@app.get("/")
 async def get_index(request: Request):
    whisper_models = APP_CONFIG.get("transcription_settings", {}).get("whisper", {}).get("allowed_models", [])
    conversion_tools = APP_CONFIG.get("conversion_tools", {})
    return templates.TemplateResponse("index.html", {
        "request": request,
        "whisper_models": sorted(list(whisper_models)),
        "conversion_tools": conversion_tools
    })
@app.get("/settings")
 async def get_settings_page(request: Request):
    try:
        with open(PATHS.SETTINGS_FILE, 'r') as f:
            current_config = yaml.safe_load(f)
    except Exception as e:
        logger.error(f"Could not load settings.yml for settings page: {e}")
        current_config = {}
    return templates.TemplateResponse("settings.html", {"request": request, "config": current_config})
@app.post("/settings/save")
 async def save_settings(new_config: Dict = Body(...)):
    try:
        with open(PATHS.SETTINGS_FILE, 'w') as f:
            yaml.dump(new_config, f, default_flow_style=False, sort_keys=False)
        load_app_config()
        return JSONResponse({"message": "Settings saved successfully."})
    except Exception as e:
        logger.error(f"Failed to save settings: {e}")
        raise HTTPException(status_code=500, detail="Could not write to settings.yml.")
@app.post("/settings/clear-history")
 async def clear_job_history(db: Session = Depends(get_db)):
    try:
        num_deleted = db.query(Job).delete()
        db.commit()
        logger.info(f"Cleared {num_deleted} jobs from history.")
        return {"deleted_count": num_deleted}
    except Exception as e:
        db.rollback()
        logger.error(f"Failed to clear job history: {e}")
        raise HTTPException(status_code=500, detail="Database error while clearing history.")
@app.post("/settings/delete-files")
 async def delete_processed_files():
    deleted_count = 0
    errors = []
    for f in PATHS.PROCESSED_DIR.glob('*'):
        try:
            if f.is_file():
                f.unlink()
                deleted_count += 1
        except Exception as e:
            errors.append(f.name)
            logger.error(f"Could not delete processed file {f.name}: {e}")
    if errors:
        raise HTTPException(status_code=500, detail=f"Could not delete some files: {', '.join(errors)}")
    logger.info(f"Deleted {deleted_count} files from processed directory.")
    return {"deleted_count": deleted_count}
@app.post("/convert-file", status_code=status.HTTP_202_ACCEPTED)
 async def submit_file_conversion(file: UploadFile = File(...), output_format: str = Form(...), db: Session = Depends(get_db)):
    allowed_exts = APP_CONFIG.get("app_settings", {}).get("allowed_all_extensions", set())
    if not is_allowed_file(file.filename, allowed_exts):
        raise HTTPException(status_code=400, detail=f"File type '{Path(file.filename).suffix}' not allowed.")
    conversion_tools = APP_CONFIG.get("conversion_tools", {})
    try:
        tool, task_key = output_format.split('_', 1)
        if tool not in conversion_tools or task_key not in conversion_tools[tool]["formats"]:
            raise ValueError()
    except ValueError:
        raise HTTPException(status_code=400, detail="Invalid output format selected.")
    job_id = uuid.uuid4().hex
    safe_basename = secure_filename(file.filename)
    original_stem = Path(safe_basename).stem
    target_ext = task_key.split('_')[0]
    if tool == "ghostscript_pdf":
        target_ext = "pdf"
    upload_filename = f"{original_stem}_{job_id}{Path(safe_basename).suffix}"
    processed_filename = f"{original_stem}_{job_id}.{target_ext}"
    upload_path = PATHS.UPLOADS_DIR / upload_filename
    processed_path = PATHS.PROCESSED_DIR / processed_filename
    await save_upload_file_chunked(file, upload_path)
    job_data = JobCreate(id=job_id, task_type="conversion", original_filename=file.filename,
                         input_filepath=str(upload_path), processed_filepath=str(processed_path))
    new_job = create_job(db=db, job=job_data)
    run_conversion_task(new_job.id, str(upload_path), str(processed_path), tool, task_key, conversion_tools)
    return {"job_id": new_job.id, "status": new_job.status}
@app.post("/ocr-pdf", status_code=status.HTTP_202_ACCEPTED)
 async def submit_pdf_ocr(file: UploadFile = File(...), db: Session = Depends(get_db)):
    if not is_allowed_file(file.filename, {".pdf"}):
        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid file type. Please upload a PDF.")
    job_id = uuid.uuid4().hex
    safe_basename = secure_filename(file.filename)
    unique_filename = f"{Path(safe_basename).stem}_{job_id}{Path(safe_basename).suffix}"
    upload_path = PATHS.UPLOADS_DIR / unique_filename
    processed_path = PATHS.PROCESSED_DIR / unique_filename
    await save_upload_file_chunked(file, upload_path)
    job_data = JobCreate(id=job_id, task_type="ocr", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path))
    new_job = create_job(db=db, job=job_data)
    ocr_settings = APP_CONFIG.get("ocr_settings", {}).get("ocrmypdf", {})
    run_pdf_ocr_task(new_job.id, str(upload_path), str(processed_path), ocr_settings)
    return {"job_id": new_job.id, "status": new_job.status}
@app.post("/ocr-image", status_code=status.HTTP_202_ACCEPTED)
 async def submit_image_ocr(file: UploadFile = File(...), db: Session = Depends(get_db)):
    allowed_exts = {".png", ".jpg", ".jpeg", ".tiff", ".tif"}
    if not is_allowed_file(file.filename, allowed_exts):
        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid file type. Please upload a PNG, JPG, or TIFF.")
    job_id = uuid.uuid4().hex
    safe_basename = secure_filename(file.filename)
    file_ext = Path(safe_basename).suffix
    unique_filename = f"{Path(safe_basename).stem}_{job_id}{file_ext}"
    upload_path = PATHS.UPLOADS_DIR / unique_filename
    processed_path = PATHS.PROCESSED_DIR / f"{Path(safe_basename).stem}_{job_id}.txt"
    await save_upload_file_chunked(file, upload_path)
    job_data = JobCreate(id=job_id, task_type="ocr-image", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path))
    new_job = create_job(db=db, job=job_data)
    run_image_ocr_task(new_job.id, str(upload_path), str(processed_path))
    return {"job_id": new_job.id, "status": new_job.status}
@app.post("/job/{job_id}/cancel", status_code=status.HTTP_202_ACCEPTED)
 async def cancel_job(job_id: str, db: Session = Depends(get_db)):
    job = get_job(db, job_id)
    if not job:
@@ -386,12 +626,13 @@ async def get_job_status(job_id: str, db: Session = Depends(get_db)):
@app.get("/download/{filename}")
 async def download_file(filename: str):
    safe_filename = secure_filename(filename)
-    file_path = settings.PROCESSED_DIR / safe_filename
+    file_path = PATHS.PROCESSED_DIR / safe_filename
-    
+    file_path = file_path.resolve()
-    if not file_path.resolve().is_relative_to(settings.PROCESSED_DIR.resolve()):
+    base = PATHS.PROCESSED_DIR.resolve()
    try:
        file_path.relative_to(base)
    except ValueError:
        raise HTTPException(status_code=403, detail="Access denied.")
    if not file_path.is_file():
        raise HTTPException(status_code=404, detail="File not found.")
    return FileResponse(path=file_path, filename=safe_filename, media_type="application/octet-stream")
--- a/run.sh
+++ b/run.sh
@@ -3,8 +3,8 @@
 echo "Starting DocProcessor with Gunicorn..."
-exec gunicorn -w 1 -k uvicorn.workers.UvicornWorker --forwarded-allow-ips='*' main:app -b 0.0.0.0:8000 &
+exec gunicorn -w 2 -k uvicorn.workers.UvicornWorker --forwarded-allow-ips='*' main:app -b 0.0.0.0:8000 &
 echo "Done"
 echo "Starting huey..."
-exec huey_consumer.py main.huey &
+exec huey_consumer.py main.huey -w 2 &
 echo "Done"
--- a/settings.yml
+++ b/settings.yml
@@ -0,0 +1,179 @@
 # settings.yml
 # General application settings
 app_settings:
  max_file_size_mb: 2000 # Maximum upload size in Megabytes
 # Settings for Optical Character Recognition (OCR) tasks
 ocr_settings:
  ocrmypdf:
    deskew: true
    clean: true
    optimize: 1
    force_ocr: true
 # Settings for audio transcription tasks
 transcription_settings:
  whisper:
    compute_type: "int8"
    allowed_models:
      - "tiny"
      - "base"
      - "small"
      - "medium"
      - "large-v3"
      - "distil-large-v2"
 # --- Conversion Tool Definitions ---
 # Each tool's command is a single string. The backend uses shlex to parse it,
 # so you can use quotes for arguments with spaces.
 # Use placeholders like: {input}, {output}, {output_dir}, {output_ext}, etc.
 conversion_tools:
  libreoffice:
    name: "LibreOffice"
    command_template: 'libreoffice --headless --convert-to {output_ext} --outdir {output_dir} {input}'
    formats:
      pdf: "PDF"
      docx: "Word Document"
      odt: "OpenDocument Text"
      html: "HTML"
      rtf: "Rich Text Format"
      txt: "Plain Text"
      xml: "Word 2003 XML"
      epub: "EPUB"
      xlsx: "Excel Spreadsheet"
      ods: "OpenDocument Spreadsheet"
      csv: "CSV"
      pptx: "PowerPoint Presentation"
      odp: "OpenDocument Presentation"
      svg: "SVG"
  pandoc:
    name: "Pandoc"
    command_template: 'pandoc --standalone {input} -o {output}'
    formats:
      docx: "Word Document"
      odt: "OpenDocument Text"
      pdf: "PDF"
      rtf: "Rich Text Format"
      txt: "Plain Text"
      tex: "LaTeX"
      man: "Groff Man Page"
      epub: "EPUB v3 Book"
      epub2: "EPUB v2 Book"
      html: "HTML"
      html5: "HTML5"
      pptx: "PowerPoint Presentation"
      beamer: "Beamer PDF Slides"
      slidy: "Slidy HTML Slides"
      md: "Markdown"
      rst: "reStructuredText"
      jira: "Jira Wiki Markup"
      mediawiki: "MediaWiki Markup"
  ghostscript_pdf:
    name: "Ghostscript (PDF)"
    command_template: 'gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dNOPAUSE -dQUIET -dBATCH {preset} -sOutputFile={output} {input}'
    formats:
      screen: "PDF (Optimized for Screen)"
      ebook: "PDF (Optimized for Ebooks)"
      printer: "PDF (Optimized for Print)"
      archive: "PDF/A (for Archiving)"
  ghostscript_image:
    name: "Ghostscript (Image)"
    command_template: 'gs -dNOPAUSE -dBATCH -sDEVICE={device} -r{dpi} -sOutputFile={output} {input}'
    formats:
      jpeg_72: "JPEG Image (72 DPI)"
      jpeg_300: "JPEG Image (300 DPI)"
      png16m_150: "PNG Image (150 DPI)"
      png16m_300: "PNG Image (300 DPI)"
      tiff24nc_300: "TIFF Image (300 DPI)"
      tiff24nc_600: "TIFF Image (600 DPI)"
  calibre:
    name: "Calibre (ebook-convert)"
    command_template: 'ebook-convert {input} {output}'
    formats:
      epub: "EPUB"
      mobi: "MOBI"
      azw3: "Amazon Kindle"
      pdf: "PDF"
      docx: "Word Document"
  ffmpeg:
    name: "FFmpeg"
    command_template: 'ffmpeg -i {input} -y -preset medium {output}'
    formats:
      mp4: "MP4 Video"
      mkv: "MKV Video"
      mov: "MOV Video"
      webm: "WebM Video"
      mp3: "MP3 Audio"
      wav: "WAV Audio"
      flac: "FLAC Audio"
      gif: "Animated GIF"
  vips:
    name: "VIPS"
    command_template: 'vips copy {input} {output}[Q=90]'
    formats:
      jpg: "JPEG Image (Q90)"
      png: "PNG Image"
      webp: "WebP Image (Q90)"
      tiff: "TIFF Image"
      avif: "AVIF Image"
  graphicsmagick:
    name: "GraphicsMagick"
    command_template: 'gm convert {input} -quality 90 {output}'
    formats:
      jpg: "JPEG Image (Q90)"
      png: "PNG Image"
      webp: "WebP Image"
      tiff: "TIFF Image"
      pdf: "PDF from Images"
  inkscape:
    name: "Inkscape"
    command_template: 'inkscape {input} --export-filename={output}'
    formats:
      svg: "SVG (Plain)"
      png: "PNG Image (96 DPI)"
      pdf: "PDF Document"
  libjxl:
    name: "libjxl (cjxl)"
    command_template: 'cjxl {input} {output} -q 90'
    formats:
      jxl: "JPEG XL (Q90)"
  resvg:
    name: "resvg"
    command_template: 'resvg {input} {output}'
    formats:
      png: "PNG from SVG"
  potrace:
    name: "Potrace"
    command_template: 'potrace {input} --svg -o {output}'
    formats:
      svg: "SVG from Bitmap"
  markitdown:
    name: "Markitdown"
    command_template: 'markitdown {input} -o {output}'
    formats:
      md: "Markdown from Everything!"
  pngquant:
    name: "pngquant"
    command_template: 'pngquant --quality={quality} --speed {speed} --force --output {output} {input}'
    formats:
      png_hq: "PNG (High Quality Compression)"
      png_mq: "PNG (Medium Quality Compression)"
      png_fast: "PNG (Fast Compression)"
  sox:
    name: "SoX Audio Converter"
    command_template: 'sox {input} -r {samplerate} -b {bitdepth} {output}'
    formats:
      wav_48k_24b: "WAV (48kHz, 24-bit)"
      wav_44k_16b: "WAV (CD, 44.1kHz, 16-bit)"
      flac_48k_24b: "FLAC (48kHz, 24-bit)"
      flac_44k_16b: "FLAC (CD, 44.1kHz, 16-bit)"
      ogg_32k: "Ogg Vorbis (32kHz)"
      ogg_16k: "Ogg Vorbis (16kHz, Voice)"
  mozjpeg:
    name: "MozJPEG"
    command_template: 'cjpeg -quality {quality} -outfile {output} {input}'
    formats:
      jpg_q85: "JPEG (High Quality)"
      jpg_q75: "JPEG (Web Quality)"
      jpg_q60: "JPEG (Aggressive Compression)"
--- a/static/css/settings.css
+++ b/static/css/settings.css
@@ -0,0 +1,194 @@
 /* static/css/settings.css */
 .settings-header {
    display: flex;
    justify-content: space-between;
    align-items: center;
    margin-bottom: 2rem;
    padding-bottom: 1rem;
    border-bottom: 1px solid var(--divider-color);
 }
 .settings-header h1 {e
    margin: 0 0 0.25rem 0;
 }
 .settings-header p {
    margin: 0;
    color: var(--muted-text);
 }
 .back-button {
    background-color: var(--secondary-color);
    color: var(--text-color);
    padding: 0.5rem 1rem;
    border-radius: 5px;
    text-decoration: none;
    font-weight: 500;
    transition: background-color 0.15s ease;
    white-space: nowrap;
 }
 .back-button:hover {
    background-color: var(--primary-hover);
 }
 .settings-group {
    border: 1px solid var(--border-color);
    border-radius: 8px;
    padding: 1.5rem;
    margin-bottom: 2rem;
 }
 .settings-group legend {
    padding: 0 0.5rem;
    font-weight: 500;
    color: var(--primary-color);
 }
 .settings-group legend h2 {
    margin: 0;
    font-size: 1.25rem;
 }
 .form-input, .form-select, .form-textarea {
    width: 100%;
    background-color: rgba(255,255,255,0.05);
    color: var(--text-color);
    padding: 0.6rem 0.8rem;
    border-radius: 5px;
    border: 1px solid var(--border-color);
    font-family: inherit;
    font-size: 0.95rem;
    transition: border-color 0.15s ease-in-out, box-shadow 0.15s ease-in-out;
 }
 .form-input:focus, .form-select:focus, .form-textarea:focus {
    outline: none;
    border-color: var(--primary-color);
    box-shadow: 0 0 0 2px rgba(0, 180, 255, 0.2);
 }
 .form-textarea {
    resize: vertical;
    min-height: 60px;
    font-family: 'Courier New', Courier, monospace;
 }
 .field-description {
    font-size: 0.85rem;
    color: var(--muted-text);
    margin-top: -0.5rem;
    margin-bottom: 1rem;
 }
 .field-description code {
    background-color: rgba(255,255,255,0.1);
    padding: 0.1rem 0.3rem;
    border-radius: 3px;
    font-size: 0.8rem;
 }
 .checkbox-group {
    display: flex;
    align-items: center;
    gap: 0.75rem;
    margin-bottom: 0.5rem;
 }
 .checkbox-group input[type="checkbox"] {
    width: 1rem;
    height: 1rem;
 }
 .tool-grid {
    display: grid;
    grid-template-columns: repeat(auto-fit, minmax(350px, 1fr));
    gap: 1rem;
 }
 .tool-card {
    border: 1px solid var(--divider-color);
    padding: 1rem;
    border-radius: 6px;
    background-color: rgba(0,0,0,0.2);
 }
 .tool-card h3 {
    margin-top: 0;
    margin-bottom: 1rem;
    font-size: 1.1rem;
 }
 .form-actions {
    display: flex;
    align-items: center;
    gap: 1rem;
    margin-top: 1.5rem;
 }
 .button-primary {
    display: inline-block;
    background: var(--primary-color);
    background-color: transparent;
    border-color: var(--border-color);
    border-width: 1px;
    color: #ffffff;
    padding: 0.65rem 1.5rem;
    font-size: 1rem;
    font-weight: 600;
    border-radius: 5px;
    cursor: pointer;
    transition: background-color 0.15s ease;
 }
 .button-primary:hover {
    background: var(--primary-hover);
 }
 .save-status {
    font-weight: 500;
 }
 .save-status.success {
    color: var(--success-color);
 }
 .save-status.error {
    color: var(--error-color);
 }
 .divider {
    border: none;
    height: 1px;
    background-color: var(--divider-color);
    margin: 3rem 0;
 }
 .danger-zone {
    border: 1px solid var(--error-color);
    border-radius: 8px;
    padding: 1rem;
    background-color: rgba(255, 107, 107, 0.05);
 }
 .danger-action {
    display: flex;
    justify-content: space-between;
    align-items: center;
    gap: 1rem;
 }
 .danger-action + .danger-action {
    margin-top: 1rem;
    padding-top: 1rem;
    border-top: 1px solid rgba(255, 107, 107, 0.2);
 }
 .danger-action p {
    margin: 0.25rem 0 0 0;
    font-size: 0.9rem;
    color: var(--muted-text);
 }
 .button-danger {
    background-color: var(--error-color);
    color: #1a0000;
    border: none;
    padding: 0.6rem 1.2rem;
    font-weight: 600;
    border-radius: 5px;
    cursor: pointer;
    transition: background-color 0.15s ease;
    white-space: nowrap;
 }
 .button-danger:hover {
    background-color: #ff8f8f;
 }
--- a/static/css/style.css
+++ b/static/css/style.css
@@ -3,14 +3,15 @@
 :root {
    /* Core */
    --bg-color: #000000;
-    --surface: #111111;
+    --surface: #000000;
-    --card-bg: #0b0b0b;
+    --card-bg: #000000;
    --text-color: #e6eef6;
    --muted-text: #9aa4ad;
    /* Accent / interactive */
-    --primary-color: #00b4ff;
+    --primary-color: #ffffff;
-    --primary-hover: #00d0ff;
+    --secondary-color: #2b2b2b;
    --primary-hover: #ffffff3d;
    --success-color: #26c281;
    --error-color: #ff6b6b;
    --cancel-color: #f39c12; /* Orange for cancelled */
@@ -43,12 +44,14 @@ body {
 /* Container */
 .container {
    width: 100%;
-    max-width: 960px;
+    max-width: 1280px; /* Increased max-width for 3 columns */
    margin: 0 auto;
    background: var(--card-bg);
    border-radius: 10px;
    padding: 1.5rem;
    border: 1px solid var(--border-color);
    margin-top: 1em;
 }
@media (max-width: 768px) {
    .container {
@@ -69,8 +72,10 @@ header {
 header h1 {
    margin: 0 0 0.25rem 0;
-    font-size: 1.75rem;
+    font-size: 3rem;
    font-weight: 700;
    font-family: serif;
    font-weight: lighter;
 }
 header p {
@@ -79,18 +84,28 @@ header p {
    font-size: 1rem;
 }
 .header-actions {
    position: absolute;
    top: 1.5rem;
    right: 1.5rem;
 }
 .settings-link {
    font-size: 1.5rem;
    text-decoration: none;
    color: var(--muted-text);
    transition: color 0.2s ease;
 }
 .settings-link:hover {
    color: var(--text-color);
 }
 /* Form Layout */
 .form-grid {
    display: grid;
-    grid-template-columns: 1fr;
+    /* MODIFICATION: Responsive grid for 1, 2, or 3 items */
    grid-template-columns: repeat(auto-fit, minmax(320px, 1fr));
    gap: 1.5rem;
    margin-bottom: 2rem;
 }
@media (min-width: 768px) {
    .form-grid {
        grid-template-columns: 1fr 1fr;
    }
 }
 .upload-form fieldset {
    border: 1px solid var(--border-color);
@@ -130,7 +145,6 @@ input[type="file"] {
    width: 100%;
    height: 100%;
    cursor: pointer;
    text-overflow: ellipsis;
    inset: 0;
 }
@@ -142,8 +156,6 @@ input[type="file"] {
    cursor: pointer;
    transition: background-color 0.15s ease;
    font-weight: 500;
    text-overflow: ellipsis;
    border: 1px solid rgba(255,255,255,0.1);
    white-space: nowrap;
 }
@@ -159,10 +171,10 @@ input[type="file"] {
    text-overflow: ellipsis;
    white-space: nowrap;
    min-width: 0;
-    max-width: 10em;
+    max-width: 15em;
 }
-/* --- STYLES FOR DROPDOWN MENU --- */
+/* Form Controls (for dropdowns) */
 .form-control {
    margin-bottom: 1rem;
 }
@@ -200,22 +212,23 @@ input[type="file"] {
    border-color: var(--primary-color);
    box-shadow: 0 0 0 2px rgba(0, 180, 255, 0.2);
 }
 /* ---------------------------------- */
 /* Submit button */
 button[type="submit"] {
    display: block;
    width: 100%;
    background: var(--primary-color);
-    color: #00161b;
+    background-color: transparent;
-    border: none;
+    border-color: var(--border-color);
    border-width: 1px;
    color: #ffffff;
    padding: 0.65rem 1rem;
    font-size: 1rem;
    font-weight: 600;
    border-radius: 5px;
    cursor: pointer;
    transition: background-color 0.15s ease;
-    margin-top: auto; /* Pushes button to the bottom */
+    margin-top: auto;
 }
 button[type="submit"]:hover {
    background: var(--primary-hover);
@@ -268,16 +281,24 @@ button[type="submit"]:disabled {
    white-space: nowrap;
 }
-#job-table td[data-label="File"] {
+.cell-value {
-    max-width: 250px;
+    max-width: 10em;
-    overflow: hidden;
+    text-wrap: wrap;
    overflow: scroll;
 }
 #job-table td[data-label="File"],
 #job-table td[data-label="Task"] {
    overflow: scroll;
    text-overflow: ellipsis;
-    white-space: nowrap;
+    text-wrap: wrap;
    max-width: 15em;
 }
 .action-col {
    text-align: center;
-    width: 100px;
+    width: 120px;
 }
 #job-table td.action-col {
    text-align: center;
@@ -348,35 +369,87 @@ button[type="submit"]:disabled {
 }
 /* Action items */
-.download-button {
+.download-button, .cancel-button {
    background-color: var(--success-color);
    color: #00160b;
    padding: 0.3rem 0.8rem;
    text-decoration: none;
    border-radius: 5px;
    font-weight: 600;
    font-size: 0.85rem;
    display: inline-block;
-    transition: transform 0.1s ease;
+    transition: transform 0.1s ease, background-color 0.15s ease;
    border: none;
    cursor: pointer;
 }
 .download-button {
    background-color: var(--success-color);
    color: #00160b;
 }
 .download-button:hover { transform: scale(1.05); }
 .cancel-button {
    background-color: var(--error-color);
    color: #1a0000;
    padding: 0.3rem 0.8rem;
    border-radius: 5px;
    font-weight: 600;
    font-size: 0.85rem;
    display: inline-block;
    transition: transform 0.1s ease;
    border: none;
    cursor: pointer;
 }
 .cancel-button:hover { background-color: #ff8f8f; }
 /* --- MODIFICATION: Dark theme for Choices.js --- */
 .choices {
    font-size: 0.95rem;
 }
 .choices__inner {
    background-color: rgba(255, 255, 255, 0.05);
    border: 1px solid var(--border-color);
    border-radius: 5px;
    padding: 0.35rem 0.75rem;
    color: var(--text-color);
    min-height: auto;
 }
 .is-open .choices__inner {
    border-radius: 5px 5px 0 0;
    border-color: var(--primary-color);
 }
 .is-focused .choices__inner {
     border-color: var(--primary-color);
     box-shadow: 0 0 0 2px rgba(0, 180, 255, 0.2);
 }
 .choices__list--dropdown, .choices__list[aria-expanded] {
    background-color: var(--surface);
    border: 1px solid var(--primary-color);
    border-top: none;
    border-radius: 0 0 5px 5px;
 }
 .choices__list--dropdown .choices__item--selectable.is-highlighted, 
 .choices__list[aria-expanded] .choices__item--selectable.is-highlighted {
    background-color: var(--secondary-color);
    color: var(--text-color)
 }
 .choices__list--dropdown .choices__item, .choices__list[aria-expanded] .choices__item {
    padding: 0.5rem 0.8rem;
    font-size: 0.9rem;
 }
 .choices__group {
    background-color: rgba(0,0,0,0.2);
    border-bottom: 1px solid var(--divider-color);
 }
 .choices__group .choices__heading {
    color: var(--primary-hover);
    font-size: 0.75rem;
    font-weight: 700;
    border-bottom: none;
    padding: 0.5rem 0.8rem;
 }
 .choices__input {
    color: var(--text-color);
    background-color: #000000;
    font-size: 0.95rem;
 }
 .choices[data-type*="select-one"] .choices__input {
    background-color: #000000;
 }
 /* Spinner */
 .spinner-small {
    border: 3px solid rgba(255,255,255,0.1);
    border-top: 3px solid var(--primary-color);
@@ -392,7 +465,9 @@ button[type="submit"]:disabled {
    100% { transform: rotate(360deg); }
 }
 /* Mobile responsive table */
@media (max-width: 768px) {
    /* ... (no changes in this section) ... */
    .table-wrapper {
        border: none;
        background-color: transparent;
@@ -432,5 +507,23 @@ button[type="submit"]:disabled {
        text-align: right;
        min-width: 0;
        word-break: break-all;
        overflow: scroll;
        max-width: 100em;
    }
    .cell-value {
        min-width: 0;
    max-width: 20em;
    text-wrap: nowrap;
    overflow: scroll;
 }
 #job-table td[data-label="File"],
 #job-table td[data-label="Task"] {
    overflow: scroll;
    text-overflow: ellipsis;
    text-wrap: nowrap;
    max-width: 100em;
 }
 }
--- a/static/js/script.js
+++ b/static/js/script.js
@@ -1,21 +1,35 @@
 // static/js/script.js
 document.addEventListener('DOMContentLoaded', () => {
    // --- Element Selectors ---
    const jobListBody = document.getElementById('job-list-body');
    const pdfForm = document.getElementById('pdf-form');
    const pdfFileInput = document.getElementById('pdf-file-input');
    const pdfFileName = document.getElementById('pdf-file-name');
    const audioForm = document.getElementById('audio-form');
    const audioFileInput = document.getElementById('audio-file-input');
    const audioFileName = document.getElementById('audio-file-name');
    const conversionForm = document.getElementById('conversion-form');
    const conversionFileInput = document.getElementById('conversion-file-input');
    const conversionFileName = document.getElementById('conversion-file-name');
    const outputFormatSelect = document.getElementById('output-format-select');
    // MODIFICATION: Store the Choices.js instance in a variable
    let conversionChoices = null;
    const activePolls = new Map();
    // --- Main Event Listeners ---
    pdfFileInput.addEventListener('change', () => updateFileName(pdfFileInput, pdfFileName));
    audioFileInput.addEventListener('change', () => updateFileName(audioFileInput, audioFileName));
-    pdfForm.addEventListener('submit', (e) => handleFormSubmit(e, '/ocr-pdf', pdfForm, pdfFileInput, pdfFileName));
+    conversionFileInput.addEventListener('change', () => updateFileName(conversionFileInput, conversionFileName));
-    audioForm.addEventListener('submit', (e) => handleFormSubmit(e, '/transcribe-audio', audioForm, audioFileInput, audioFileName));
+
    pdfForm.addEventListener('submit', (e) => handleFormSubmit(e, '/ocr-pdf', pdfForm));
    audioForm.addEventListener('submit', (e) => handleFormSubmit(e, '/transcribe-audio', audioForm));
    conversionForm.addEventListener('submit', (e) => handleFormSubmit(e, '/convert-file', conversionForm));
    jobListBody.addEventListener('click', (event) => {
        if (event.target.classList.contains('cancel-button')) {
@@ -24,19 +38,55 @@ document.addEventListener('DOMContentLoaded', () => {
        }
    });
-    function updateFileName(input, nameDisplay) {
+    function initializeConversionSelector() {
-        nameDisplay.textContent = input.files.length > 0 ? input.files[0].name : 'No file chosen';
+        // MODIFICATION: Destroy the old instance if it exists before creating a new one
-        nameDisplay.title = nameDisplay.textContent; // Add a tooltip for the full name
+        if (conversionChoices) {
            conversionChoices.destroy();
        }
        conversionChoices = new Choices(outputFormatSelect, {
            searchEnabled: true,
            itemSelectText: 'Select',
            shouldSort: false,
            placeholder: true,
            placeholderValue: 'Select a format...',
        });
        const tools = window.APP_CONFIG.conversionTools || {};
        const choicesArray = [];
        for (const toolKey in tools) {
            const tool = tools[toolKey];
            const group = {
                label: tool.name,
                id: toolKey,
                disabled: false,
                choices: []
            };
            for (const formatKey in tool.formats) {
                group.choices.push({
                    value: `${toolKey}_${formatKey}`,
                    label: `${formatKey.toUpperCase()} - ${tool.formats[formatKey]}`
                });
            }
            choicesArray.push(group);
        }
        conversionChoices.setChoices(choicesArray, 'value', 'label', true);
    }
-    async function handleFormSubmit(event, endpoint, form, fileInput, fileNameDisplay) {
+    // --- Helper Functions ---
    function updateFileName(input, nameDisplay) {
        const fileName = input.files.length > 0 ? input.files[0].name : 'No file chosen';
        nameDisplay.textContent = fileName;
        nameDisplay.title = fileName;
    }
    async function handleFormSubmit(event, endpoint, form) {
        event.preventDefault();
        const fileInput = form.querySelector('input[type="file"]');
        const fileNameDisplay = form.querySelector('.file-name');
        if (!fileInput.files[0]) return;
        // MODIFICATION: Use new FormData(form) to capture all form fields,
        // including the new model size dropdown for the audio form.
        const formData = new FormData(form);
        const submitButton = form.querySelector('button[type="submit"]');
        submitButton.disabled = true;
@@ -47,54 +97,49 @@ document.addEventListener('DOMContentLoaded', () => {
                throw new Error(errorData.detail || `HTTP error! Status: ${response.status}`);
            }
            const result = await response.json();
            // To provide immediate feedback, create a placeholder job object
            const preliminaryJob = {
                id: result.job_id,
                status: 'pending',
                progress: 0,
                original_filename: fileInput.files[0].name,
-                task_type: endpoint.includes('ocr') ? 'ocr' : 'transcription',
+                task_type: endpoint.includes('ocr') ? 'ocr' : (endpoint.includes('transcribe') ? 'transcription' : 'conversion'),
-                created_at: new Date().toISOString(),
+                created_at: new Date().toISOString()
                processed_filepath: null,
                error_message: null
            };
-            renderJobRow(preliminaryJob); // Render immediately
+            renderJobRow(preliminaryJob);
-            startPolling(result.job_id); // Start polling for updates
+            startPolling(result.job_id);
        } catch (error) {
            console.error('Error submitting job:', error);
            alert(`Submission failed: ${error.message}`);
        } finally {
            form.reset();
-            fileNameDisplay.textContent = 'No file chosen';
+            if (fileNameDisplay) fileNameDisplay.textContent = 'No file chosen';
-            fileNameDisplay.title = '';
+            
            // MODIFICATION: Use the stored instance to correctly reset the dropdown
            // without causing an error.
            if (form.id === 'conversion-form' && conversionChoices) {
                 conversionChoices.clearInput();
                 conversionChoices.setValue([]); // Clears the selected value
            }
            submitButton.disabled = false;
        }
    }
    async function handleCancelJob(jobId) {
        if (!confirm('Are you sure you want to cancel this job?')) return;
        try {
            const response = await fetch(`/job/${jobId}/cancel`, { method: 'POST' });
            if (!response.ok) {
                const errorData = await response.json();
                throw new Error(errorData.detail || 'Failed to cancel job.');
            }
            // The polling mechanism will update the UI to "cancelled" automatically.
            // We can stop polling immediately to be more efficient.
            stopPolling(jobId);
            // Optionally, force an immediate UI update
            const row = document.getElementById(`job-${jobId}`);
            if (row) {
                const statusCell = row.querySelector('td[data-label="Status"] .cell-value');
                const actionCell = row.querySelector('td[data-label="Action"] .cell-value');
-                if (statusCell) {
+                if (statusCell) statusCell.innerHTML = `<span class="job-status-badge status-cancelled">Cancelled</span>`;
-                    statusCell.innerHTML = `<span class="job-status-badge status-cancelled">cancelled</span>`;
+                if (actionCell) actionCell.innerHTML = `<span>-</span>`;
                }
                if (actionCell) {
                    actionCell.innerHTML = `<span>-</span>`;
                }
            }
        } catch (error) {
            console.error('Error cancelling job:', error);
@@ -107,7 +152,7 @@ document.addEventListener('DOMContentLoaded', () => {
            const response = await fetch('/jobs');
            if (!response.ok) throw new Error('Failed to fetch jobs.');
            const jobs = await response.json();
-            jobListBody.innerHTML = ''; // Clear existing
+            jobListBody.innerHTML = '';
            for (const job of jobs.reverse()) {
                renderJobRow(job);
                if (['pending', 'processing'].includes(job.status)) {
@@ -126,10 +171,7 @@ document.addEventListener('DOMContentLoaded', () => {
            try {
                const response = await fetch(`/job/${jobId}`);
                if (!response.ok) {
-                    if (response.status === 404) {
+                    if (response.status === 404) stopPolling(jobId);
                        console.warn(`Job ${jobId} not found. Stopping poll.`);
                        stopPolling(jobId);
                    }
                    return;
                }
                const job = await response.json();
@@ -139,9 +181,9 @@ document.addEventListener('DOMContentLoaded', () => {
                }
            } catch (error) {
                console.error(`Error polling for job ${jobId}:`, error);
-                stopPolling(jobId); // Stop polling on network or other errors
+                stopPolling(jobId);
            }
-        }, 2500); // Poll every 2.5 seconds
+        }, 2500);
        activePolls.set(jobId, intervalId);
    }
@@ -160,20 +202,27 @@ document.addEventListener('DOMContentLoaded', () => {
            jobListBody.prepend(row);
        }
-        const taskTypeLabel = job.task_type.includes('ocr') ? 'PDF/Image OCR' : 'Transcription';
+        let taskTypeLabel = 'Unknown';
-        const formattedDate = new Date(job.created_at).toLocaleString();
+        if (job.task_type === 'ocr' || job.task_type === 'ocr-image') {
-
+            taskTypeLabel = 'OCR';
-        let statusHtml = `<span class="job-status-badge status-${job.status}">${job.status}</span>`;
+        } else if (job.task_type === 'transcription') {
-        let actionHtml = `<span>-</span>`;
+            taskTypeLabel = 'Transcription';
-
+        } else if (job.task_type === 'conversion' && job.processed_filepath) {
-        if (job.status === 'processing') {
+            const extension = job.processed_filepath.split('.').pop();
-            // Show real progress for transcription, but an indeterminate one for OCR tasks
+            taskTypeLabel = `Convert to ${extension.toUpperCase()}`;
-            const progressClass = job.task_type === 'transcription' ? '' : 'indeterminate';
+        } else if (job.task_type === 'conversion') {
-            const progressWidth = job.task_type === 'transcription' ? job.progress : 100;
+            taskTypeLabel = 'Conversion';
            const progressBarHtml = `<div class="progress-bar-container"><div class="progress-bar ${progressClass}" style="width: ${progressWidth}%"></div></div>`;
            statusHtml += progressBarHtml;
        }
        const formattedDate = new Date(job.created_at).toLocaleString();
        let statusHtml = `<span class="job-status-badge status-${job.status}">${job.status}</span>`;
        if (job.status === 'processing') {
            const progressClass = (job.task_type === 'transcription' && job.progress > 0) ? '' : 'indeterminate';
            const progressWidth = job.task_type === 'transcription' ? job.progress : 100;
            statusHtml += `<div class="progress-bar-container"><div class="progress-bar ${progressClass}" style="width: ${progressWidth}%"></div></div>`;
        }
        let actionHtml = `<span>-</span>`;
        if (job.status === 'pending' || job.status === 'processing') {
            actionHtml = `<button class="cancel-button" data-job-id="${job.id}">Cancel</button>`;
        } else if (job.status === 'completed' && job.processed_filepath) {
@@ -184,12 +233,10 @@ document.addEventListener('DOMContentLoaded', () => {
            actionHtml = `<span class="error-text"${errorTitle}>Failed</span>`;
        }
-        // Use textContent for filename to prevent XSS and add a title for overflow
+        const escapedFilename = job.original_filename ? job.original_filename.replace(/</g, "&lt;").replace(/>/g, "&gt;") : "No filename";
        const escapedFilename = job.original_filename.replace(/</g, "&lt;").replace(/>/g, "&gt;");
        row.innerHTML = `
            <td data-label="File"><span class="cell-value" title="${escapedFilename}">${escapedFilename}</span></td>
-            <td data-label="Type"><span class="cell-value">${taskTypeLabel}</span></td>
+            <td data-label="Task"><span class="cell-value">${taskTypeLabel}</span></td>
            <td data-label="Submitted"><span class="cell-value">${formattedDate}</span></td>
            <td data-label="Status"><span class="cell-value">${statusHtml}</span></td>
            <td data-label="Action" class="action-col"><span class="cell-value">${actionHtml}</span></td>
@@ -197,5 +244,6 @@ document.addEventListener('DOMContentLoaded', () => {
    }
    // --- Initial Load ---
    initializeConversionSelector();
    loadInitialJobs();
 });
--- a/static/js/settings.js
+++ b/static/js/settings.js
@@ -0,0 +1,108 @@
 // static/js/settings.js
 document.addEventListener('DOMContentLoaded', () => {
    const settingsForm = document.getElementById('settings-form');
    const saveStatus = document.getElementById('save-status');
    const clearHistoryBtn = document.getElementById('clear-history-btn');
    const deleteFilesBtn = document.getElementById('delete-files-btn');
    // --- Save Settings ---
    settingsForm.addEventListener('submit', async (event) => {
        event.preventDefault();
        saveStatus.textContent = 'Saving...';
        saveStatus.classList.remove('success', 'error');
        const formData = new FormData(settingsForm);
        const settingsObject = {};
        // Convert FormData to a nested object
        formData.forEach((value, key) => {
            // Handle checkboxes that might not be submitted if unchecked
            if (key.includes('ocr_settings')) {
                 const checkbox = document.querySelector(`[name="${key}"]`);
                 if (checkbox && checkbox.type === 'checkbox') {
                    value = checkbox.checked;
                 }
            }
            const keys = key.split('.');
            let current = settingsObject;
            keys.forEach((k, index) => {
                if (index === keys.length - 1) {
                    current[k] = value;
                } else {
                    current[k] = current[k] || {};
                    current = current[k];
                }
            });
        });
        // Ensure unchecked OCR boxes are sent as false
        const ocrCheckboxes = settingsForm.querySelectorAll('input[type="checkbox"][name^="ocr_settings"]');
        ocrCheckboxes.forEach(cb => {
            const keys = cb.name.split('.');
            if (!formData.has(cb.name)) {
                 // this is a bit of a hack but gets the job done for this specific form
                 settingsObject[keys[0]][keys[1]][keys[2]] = false;
            }
        });
        try {
            const response = await fetch('/settings/save', {
                method: 'POST',
                headers: { 'Content-Type': 'application/json' },
                body: JSON.stringify(settingsObject)
            });
            if (!response.ok) {
                const errorData = await response.json();
                throw new Error(errorData.detail || 'Failed to save settings.');
            }
            saveStatus.textContent = 'Settings saved successfully!';
            saveStatus.classList.add('success');
        } catch (error) {
            saveStatus.textContent = `Error: ${error.message}`;
            saveStatus.classList.add('error');
            console.error('Error saving settings:', error);
        } finally {
            setTimeout(() => {
                saveStatus.textContent = '';
                saveStatus.classList.remove('success', 'error');
            }, 5000);
        }
    });
    // --- Clear History ---
    clearHistoryBtn.addEventListener('click', async () => {
        if (!confirm('ARE YOU SURE?\n\nThis will permanently delete all job history records from the database.')) {
            return;
        }
        try {
            const response = await fetch('/settings/clear-history', { method: 'POST' });
            if (!response.ok) throw new Error('Server responded with an error.');
            const result = await response.json();
            alert(`Success: ${result.deleted_count} job records have been deleted.`);
        } catch (error) {
            alert('An error occurred while clearing history.');
            console.error(error);
        }
    });
    // --- Delete Files ---
    deleteFilesBtn.addEventListener('click', async () => {
        if (!confirm('ARE YOU SURE?\n\nThis will permanently delete all files in the "processed" folder.')) {
            return;
        }
        try {
            const response = await fetch('/settings/delete-files', { method: 'POST' });
            if (!response.ok) throw new Error('Server responded with an error.');
            const result = await response.json();
            alert(`Success: ${result.deleted_count} files have been deleted.`);
        } catch (error) {
            alert('An error occurred while deleting files.');
            console.error(error);
        }
    });
 });
--- a/templates/index.html
+++ b/templates/index.html
@@ -3,7 +3,8 @@
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>DocProcessor</title>
+    <title>File Wizard</title>
    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/choices.js/public/assets/styles/choices.min.css"/>
    <link rel="stylesheet" href="{{ url_for('static', path='/css/style.css') }}">
    <link rel="preconnect" href="https://fonts.googleapis.com">
    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
@@ -11,17 +12,38 @@
 </head>
 <body>
    <div class="container">
        <div class="header-actions">
            <a href="/settings" title="Settings" class="settings-link">⚙️</a>
        </div>
        <header>
-            <h1>DocProcessor 🚀</h1>
+            <h1>File Wizard</h1>
            <p>PDF OCR & Audio Transcription</p>
        </header>
        <main>
            <div class="form-grid">
                <section class="processor-section">
                    <form id="conversion-form" class="upload-form" enctype="multipart/form-data">
                        <fieldset>
                            <legend><h2>File Conversion</h2></legend>
                            <div class="file-input-wrapper">
                                 <input type="file" name="file" id="conversion-file-input" required>
                                 <label for="conversion-file-input" class="file-input-label">Choose File...</label>
                                 <span id="conversion-file-name" class="file-name">No file chosen</span>
                            </div>
                            <div class="form-control">
                                <label for="output-format-select">Convert To</label>
                                <select name="output_format" id="output-format-select" required></select>
                            </div>
                            <button type="submit">Convert File</button>
                        </fieldset>
                    </form>
                </section>
                <section class="processor-section">
                    <form id="pdf-form" class="upload-form" enctype="multipart/form-data">
                        <fieldset>
-                            <legend><h2>📄 PDF OCR</h2></legend>
+                            <legend><h2>PDF OCR</h2></legend>
                            <div class="file-input-wrapper">
                                <input type="file" name="file" id="pdf-file-input" accept=".pdf" required>
                                <label for="pdf-file-input" class="file-input-label">Choose PDF...</label>
@@ -35,7 +57,7 @@
                <section class="processor-section">
                    <form id="audio-form" class="upload-form" enctype="multipart/form-data">
                        <fieldset>
-                            <legend><h2>🎤 Transcribe Audio</h2></legend>
+                            <legend><h2>Transcribe Audio</h2></legend>
                            <div class="file-input-wrapper">
                                 <input type="file" name="file" id="audio-file-input" accept="audio/*" required>
                                 <label for="audio-file-input" class="file-input-label">Choose Audio...</label>
@@ -45,11 +67,11 @@
                                <label for="model-size-select">Model Size</label>
                                <select name="model_size" id="model-size-select">
                                    <option value="tiny">Tiny (Fastest, lower accuracy)</option>
-                                    <option value="base" selected>Base (Good balance)</option>
+                                    <option value="base" selected>Base</option>
                                    <option value="small">Small (Better accuracy)</option>
                                    <option value="medium">Medium (High accuracy)</option>
                                    <option value="large-v3">Large v3 (Best accuracy, slow)</option>
-                                    <option value="distil-large-v2">Distilled Large v2 (Fast & Accurate)</option>
+                                    <option value="distil-large-v2">Distilled Large v2</option>
                                </select>
                            </div>
                            <button type="submit">Transcribe</button>
@@ -65,7 +87,7 @@
                        <thead>
                            <tr>
                                <th>File</th>
-                                <th>Type</th>
+                                <th>Task</th>
                                <th>Submitted</th>
                                <th>Status</th>
                                <th class="action-col">Action</th>
@@ -78,6 +100,14 @@
            </section>
        </main>
    </div>
    <script>
        window.APP_CONFIG = {
            conversionTools: {{ conversion_tools | tojson }}
        };
    </script>
    <script src="https://cdn.jsdelivr.net/npm/choices.js/public/assets/scripts/choices.min.js"></script>
    <script src="{{ url_for('static', path='/js/script.js') }}"></script>
 </body>
 </html>
--- a/templates/settings.html
+++ b/templates/settings.html
@@ -0,0 +1,113 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Settings - File Wizard</title>
    <link rel="stylesheet" href="{{ url_for('static', path='/css/style.css') }}">
    <link rel="stylesheet" href="{{ url_for('static', path='/css/settings.css') }}">
    <link rel="preconnect" href="https://fonts.googleapis.com">
    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;700&display=swap" rel="stylesheet">
 </head>
 <body>
    <div class="container">
        <header class="settings-header">
            <div class="header-content">
                <h1>Settings</h1>
            </div>
            <a href="/" class="back-button">← Back</a>
        </header>
        <main>
            <form id="settings-form">
                <fieldset class="settings-group">
                    <legend><h2>General Settings</h2></legend>
                    <div class="form-control">
                        <label for="app-max-file-size">Max Upload Size (MB)</label>
                        <input type="number" id="app-max-file-size" name="app_settings.max_file_size_mb" value="{{ config.app_settings.max_file_size_mb }}" class="form-input">
                    </div>
                </fieldset>
                <fieldset class="settings-group">
                    <legend><h2>OCR (ocrmypdf)</h2></legend>
                     <div class="form-control checkbox-group">
                        <input type="checkbox" id="ocr-deskew" name="ocr_settings.ocrmypdf.deskew" {% if config.ocr_settings.ocrmypdf.deskew %}checked{% endif %}>
                        <label for="ocr-deskew">Deskew (correct tilted pages)</label>
                    </div>
                    <div class="form-control checkbox-group">
                        <input type="checkbox" id="ocr-clean" name="ocr_settings.ocrmypdf.clean" {% if config.ocr_settings.ocrmypdf.clean %}checked{% endif %}>
                        <label for="ocr-clean">Clean (remove speckles/noise)</label>
                    </div>
                    <div class="form-control checkbox-group">
                        <input type="checkbox" id="ocr-force-ocr" name="ocr_settings.ocrmypdf.force_ocr" {% if config.ocr_settings.ocrmypdf.force_ocr %}checked{% endif %}>
                        <label for="ocr-force-ocr">Force OCR (re-process pages with existing text)</label>
                    </div>
                </fieldset>
                <fieldset class="settings-group">
                    <legend><h2>Transcription (Whisper)</h2></legend>
                    <div class="form-control">
                        <label for="whisper-compute-type">Compute Type</label>
                        <select id="whisper-compute-type" name="transcription_settings.whisper.compute_type" class="form-select">
                            {% for ctype in ["default", "int8", "int8_float16", "int16", "float16", "float32"] %}
                                <option value="{{ ctype }}" {% if config.transcription_settings.whisper.compute_type == ctype %}selected{% endif %}>{{ ctype }}</option>
                            {% endfor %}
                        </select>
                    </div>
                </fieldset>
                <fieldset class="settings-group">
                    <legend><h2>Conversion Tools</h2></legend>
                    <p class="field-description">
                        Edit the command line templates for each conversion tool. The following placeholders are available: <code>{input}</code>, <code>{output}</code>, <code>{output_dir}</code>, <code>{output_ext}</code>.
                        Some tools may have additional placeholders; refer to the sourcecode or documentation for details.
                    </p>
                    <div class="tool-grid">
                        {% for tool_id, tool in config.conversion_tools.items() %}
                        <div class="tool-card">
                            <h3>{{ tool.name }}</h3>
                            <div class="form-control">
                                <label for="tool-{{ tool_id }}-cmd">Command Template</label>
                                <textarea id="tool-{{ tool_id }}-cmd" name="conversion_tools.{{ tool_id }}.command_template" class="form-textarea" rows="3">{{ tool.command_template }}</textarea>
                            </div>
                        </div>
                        {% endfor %}
                    </div>
                </fieldset>
                <div class="form-actions">
                     <button type="submit" class="button-primary">Save Settings</button>
                     <div id="save-status" class="save-status"></div>
                </div>
            </form>
            <hr class="divider">
            <div id="history-management" class="settings-group">
                <h2>History Management</h2>
                 <p class="field-description">These actions are irreversible. Please be certain before proceeding.</p>
                <div class="danger-zone">
                   <div class="danger-action">
                        <div>
                            <strong>Clear Job History</strong>
                            <p>Deletes all job records from the database. Processed files on disk will not be removed.</p>
                        </div>
                        <button id="clear-history-btn" class="button-danger">Clear History</button>
                   </div>
                   <div class="danger-action">
                        <div>
                            <strong>Delete Processed Files</strong>
                            <p>Deletes all files from the 'processed' directory. Database records will remain but download links will be broken.</p>
                        </div>
                        <button id="delete-files-btn" class="button-danger">Delete Files</button>
                   </div>
                </div>
            </div>
        </main>
    </div>
    <script src="{{ url_for('static', path='/js/settings.js') }}"></script>
 </body>
 </html>