From ed58453466a84e3c2dcd7627829dc8350f0a6c06 Mon Sep 17 00:00:00 2001
From: Manuel <manuel@unterriker.biz>
Date: Sat, 20 Sep 2025 12:49:48 +0000
Subject: [PATCH] TTS and webhooks

---
 .env.example         |    3 +-
 .gitignore           |    5 +-
 docker-compose.yml   |    1 +
 main.py              | 1293 ++++++++++++++++++++++++++++++++++--------
 requirements.txt     |    2 +
 settings.default.yml |   30 +-
 static/css/style.css |    2 +-
 static/js/script.js  |  218 +++++--
 templates/index.html |   30 +-
 9 files changed, 1291 insertions(+), 293 deletions(-)

diff --git a/.env.example b/.env.example
index 713031f..f0207bb 100644
--- a/.env.example
+++ b/.env.example
@@ -1,4 +1,5 @@
 LOCAL_ONLY=True
 SECRET_KEY=
 UPLOADS_DIR=./uploads
-PROCESSED_DIR=./processed
\ No newline at end of file
+PROCESSED_DIR=./processed
+OMP_NUM_THREADS=1
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 13f2ffb..8a28d1d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,4 +11,7 @@ settings.yml
 app.log
 /config/*
 my.settings.yml
-jobs.db-*
\ No newline at end of file
+jobs.db-*
+venv312
+models
+config
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
index c7f289d..4fc44f6 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -10,6 +10,7 @@ services:
       - SECRET_KEY= # if using auth
       - UPLOADS_DIR=/app/uploads
       - PROCESSED_DIR=/app/processed
+      - OMP_NUM_THREADS=1
     #user: "1000:1000"
     ports:
       - "6969:8000"
diff --git a/main.py b/main.py
index 057f72f..9065391 100644
--- a/main.py
+++ b/main.py
@@ -8,14 +8,20 @@ import uuid
 import shlex
 import yaml
 import os
+import httpx
+import glob
 from contextlib import asynccontextmanager
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Dict, List, Any
+from typing import Dict, List, Any, Optional
 import resource
 from threading import Semaphore
 from logging.handlers import RotatingFileHandler
-from urllib.parse import urlencode
+from urllib.parse import urljoin
+import sys
+import re
+import importlib
+import collections.abc
 
 import ocrmypdf
 import pypdf
@@ -27,6 +33,7 @@ from fastapi import (Depends, FastAPI, File, Form, HTTPException, Request,
 from fastapi.responses import FileResponse, JSONResponse, RedirectResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi.templating import Jinja2Templates
+from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
 from huey import SqliteHuey
 from pydantic import BaseModel, ConfigDict, field_serializer
 from sqlalchemy import (Column, DateTime, Integer, String, Text,
@@ -39,13 +46,44 @@ from typing import List as TypingList
 from starlette.middleware.sessions import SessionMiddleware
 from authlib.integrations.starlette_client import OAuth
 from dotenv import load_dotenv
+from piper import PiperVoice
+import wave
 
 load_dotenv()
+
+# --- Optional Dependency Handling for Piper TTS ---
+try:
+    from piper.synthesis import SynthesisConfig
+    # download helpers: some piper versions export download_voice, others expose ensure_voice_exists/find_voice
+    try:
+        # prefer the more explicit helpers if present
+        from piper.download import get_voices, ensure_voice_exists, find_voice, VoiceNotFoundError
+    except Exception:
+        # fall back to older API if available
+        try:
+            from piper.download import get_voices, download_voice, VoiceNotFoundError
+            ensure_voice_exists = None
+            find_voice = None
+        except Exception:
+            # partial import failed -> treat as piper-not-installed for download helpers
+            get_voices = None
+            download_voice = None
+            ensure_voice_exists = None
+            find_voice = None
+            VoiceNotFoundError = None
+except ImportError:
+    SynthesisConfig = None
+    get_voices = None
+    download_voice = None
+    ensure_voice_exists = None
+    find_voice = None
+    VoiceNotFoundError = None
+
+
 # Instantiate OAuth object (was referenced in code)
 oauth = OAuth()
 
 
-
 # --------------------------------------------------------------------------------
 # --- 1. CONFIGURATION & SECURITY HELPERS
 # --------------------------------------------------------------------------------
@@ -70,9 +108,9 @@ def ensure_path_is_safe(p: Path, allowed_bases: List[Path]):
 def _limit_resources_preexec():
     """Set resource limits for child processes to prevent DoS attacks."""
     try:
-        # 6000s CPU, 2GB address space, i dont know if thats too much tbh
+        # 6000s CPU, 4GB address space
         resource.setrlimit(resource.RLIMIT_CPU, (6000, 6000))
-        resource.setrlimit(resource.RLIMIT_AS, (4 * 1024 * 1024 * 1024, 2 * 1024 * 1024 * 1024))
+        resource.setrlimit(resource.RLIMIT_AS, (4 * 1024 * 1024 * 1024, 4 * 1024 * 1024 * 1024))
     except Exception as e:
         # This may fail in some environments (e.g. Windows, some containers)
         logging.getLogger(__name__).warning(f"Could not set resource limits: {e}")
@@ -100,6 +138,10 @@ class AppPaths(BaseModel):
     UPLOADS_DIR: Path = UPLOADS_BASE
     PROCESSED_DIR: Path = PROCESSED_BASE
     CHUNK_TMP_DIR: Path = CHUNK_TMP_BASE
+    TTS_MODELS_DIR: Path = BASE_DIR / "models" / "tts"
+    KOKORO_TTS_MODELS_DIR: Path = BASE_DIR / "models" / "tts" / "kokoro"
+    KOKORO_MODEL_FILE: Path = KOKORO_TTS_MODELS_DIR / "kokoro-v1.0.onnx"
+    KOKORO_VOICES_FILE: Path = KOKORO_TTS_MODELS_DIR / "voices-v1.0.bin"
     DATABASE_URL: str = f"sqlite:///{BASE_DIR / 'jobs.db'}"
     HUEY_DB_PATH: str = str(BASE_DIR / "huey.db")
     CONFIG_DIR: Path = BASE_DIR / "config"
@@ -112,6 +154,9 @@ PATHS.UPLOADS_DIR.mkdir(exist_ok=True, parents=True)
 PATHS.PROCESSED_DIR.mkdir(exist_ok=True, parents=True)
 PATHS.CHUNK_TMP_DIR.mkdir(exist_ok=True, parents=True)
 PATHS.CONFIG_DIR.mkdir(exist_ok=True, parents=True)
+PATHS.TTS_MODELS_DIR.mkdir(exist_ok=True, parents=True)
+PATHS.KOKORO_TTS_MODELS_DIR.mkdir(exist_ok=True, parents=True)
+
 
 def load_app_config():
     """
@@ -123,14 +168,18 @@ def load_app_config():
         # --- Primary Method: Attempt to load settings.yml ---
         with open(PATHS.SETTINGS_FILE, 'r', encoding='utf8') as f:
             cfg_raw = yaml.safe_load(f) or {}
-        
-        # This logic block is intentionally duplicated to maintain compatibility
+
         defaults = {
-            "app_settings": {"max_file_size_mb": 100, "allowed_all_extensions": []},
+            "app_settings": {"max_file_size_mb": 100, "allowed_all_extensions": [], "app_public_url": ""},
             "transcription_settings": {"whisper": {"allowed_models": ["tiny", "base", "small"], "compute_type": "int8", "device": "cpu"}},
+            "tts_settings": {
+                "piper": {"model_dir": str(PATHS.TTS_MODELS_DIR), "use_cuda": False, "synthesis_config": {"length_scale": 1.0, "noise_scale": 0.667, "noise_w": 0.8}},
+                "kokoro": {"model_dir": str(PATHS.KOKORO_TTS_MODELS_DIR), "command_template": "kokoro-tts {input} {output} --model {model_path} --voices {voices_path} --lang {lang} --voice {model_name}"}
+            },
             "conversion_tools": {},
             "ocr_settings": {"ocrmypdf": {}},
-            "auth_settings": {"oidc_client_id": "", "oidc_client_secret": "", "oidc_server_metadata_url": "", "admin_users": []}
+            "auth_settings": {"oidc_client_id": "", "oidc_client_secret": "", "oidc_server_metadata_url": "", "admin_users": []},
+            "webhook_settings": {"enabled": False, "allow_chunked_api_uploads": False, "allowed_callback_urls": [], "callback_bearer_token": ""}
         }
         cfg = defaults.copy()
         cfg.update(cfg_raw) # Merge loaded settings into defaults
@@ -152,13 +201,17 @@ def load_app_config():
             with open(PATHS.DEFAULT_SETTINGS_FILE, 'r', encoding='utf8') as f:
                 cfg_raw = yaml.safe_load(f) or {}
 
-            # The same processing logic is applied to the fallback file
             defaults = {
-                "app_settings": {"max_file_size_mb": 100, "allowed_all_extensions": []},
+                "app_settings": {"max_file_size_mb": 100, "allowed_all_extensions": [], "app_public_url": ""},
                 "transcription_settings": {"whisper": {"allowed_models": ["tiny", "base", "small"], "compute_type": "int8", "device": "cpu"}},
+                "tts_settings": {
+                    "piper": {"model_dir": str(PATHS.TTS_MODELS_DIR), "use_cuda": False, "synthesis_config": {"length_scale": 1.0, "noise_scale": 0.667, "noise_w": 0.8}},
+                    "kokoro": {"model_dir": str(PATHS.KOKORO_TTS_MODELS_DIR), "command_template": "kokoro-tts {input} {output} --model {model_path} --voices {voices_path} --lang {lang} --voice {model_name}"}
+                },
                 "conversion_tools": {},
                 "ocr_settings": {"ocrmypdf": {}},
-                "auth_settings": {"oidc_client_id": "", "oidc_client_secret": "", "oidc_server_metadata_url": "", "admin_users": []}
+                "auth_settings": {"oidc_client_id": "", "oidc_client_secret": "", "oidc_server_metadata_url": "", "admin_users": []},
+                "webhook_settings": {"enabled": False, "allow_chunked_api_uploads": False, "allowed_callback_urls": [], "callback_bearer_token": ""}
             }
             cfg = defaults.copy()
             cfg.update(cfg_raw) # Merge loaded settings into defaults
@@ -177,11 +230,16 @@ def load_app_config():
             # --- Final Failsafe: Use hardcoded defaults ---
             logger.error(f"CRITICAL: Fallback file settings.default.yml also failed: {e_fallback}. Using hardcoded defaults.")
             APP_CONFIG = {
-                "app_settings": {"max_file_size_mb": 100, "max_file_size_bytes": 100 * 1024 * 1024, "allowed_all_extensions": set()},
+                "app_settings": {"max_file_size_mb": 100, "max_file_size_bytes": 100 * 1024 * 1024, "allowed_all_extensions": set(), "app_public_url": ""},
                 "transcription_settings": {"whisper": {"allowed_models": ["tiny", "base", "small"], "compute_type": "int8", "device": "cpu"}},
+                "tts_settings": {
+                    "piper": {"model_dir": str(PATHS.TTS_MODELS_DIR), "use_cuda": False, "synthesis_config": {"length_scale": 1.0, "noise_scale": 0.667, "noise_w": 0.8}},
+                    "kokoro": {"model_dir": str(PATHS.KOKORO_TTS_MODELS_DIR), "command_template": "kokoro-tts {input} {output} --model {model_path} --voices {voices_path} --lang {lang} --voice {model_name}"}
+                },
                 "conversion_tools": {},
                 "ocr_settings": {"ocrmypdf": {}},
-                "auth_settings": {"oidc_client_id": "", "oidc_client_secret": "", "oidc_server_metadata_url": "", "admin_users": []}
+                "auth_settings": {"oidc_client_id": "", "oidc_client_secret": "", "oidc_server_metadata_url": "", "admin_users": []},
+                "webhook_settings": {"enabled": False, "allow_chunked_api_uploads": False, "allowed_callback_urls": [], "callback_bearer_token": ""}
             }
 
 
@@ -219,6 +277,7 @@ class Job(Base):
     output_filesize = Column(Integer, nullable=True)
     result_preview = Column(Text, nullable=True)
     error_message = Column(Text, nullable=True)
+    callback_url = Column(String, nullable=True)
     created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
     updated_at = Column(DateTime, default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc))
 
@@ -236,6 +295,7 @@ class JobCreate(BaseModel):
     original_filename: str
     input_filepath: str
     input_filesize: int | None = None
+    callback_url: str | None = None
     processed_filepath: str | None = None
 
 class JobSchema(BaseModel):
@@ -262,10 +322,13 @@ class FinalizeUploadPayload(BaseModel):
     total_chunks: int
     task_type: str
     model_size: str = ""
+    model_name: str = ""
     output_format: str = ""
+    callback_url: Optional[str] = None # For API chunked uploads
+
 
 # --------------------------------------------------------------------------------
-# --- 3. CRUD OPERATIONS
+# --- 3. CRUD OPERATIONS & WEBHOOKS
 # --------------------------------------------------------------------------------
 def get_job(db: Session, job_id: str):
     return db.query(Job).filter(Job.id == job_id).first()
@@ -312,11 +375,67 @@ def mark_job_as_completed(db: Session, job_id: str, output_filepath_str: str | N
         db.commit()
     return db_job
 
+def send_webhook_notification(job_id: str, app_config: Dict[str, Any], base_url: str):
+    """Sends a notification to the callback URL if one is configured for the job."""
+    webhook_config = app_config.get("webhook_settings", {})
+    if not webhook_config.get("enabled", False):
+        return
+
+    db = SessionLocal()
+    try:
+        job = get_job(db, job_id)
+        if not job or not job.callback_url:
+            return
+
+        download_url = None
+        if job.status == "completed" and job.processed_filepath:
+            filename = Path(job.processed_filepath).name
+            public_url = app_config.get("app_settings", {}).get("app_public_url", base_url)
+            if not public_url:
+                logger.warning(f"app_public_url is not set. Cannot generate a full download URL for job {job_id}.")
+                download_url = f"/download/{filename}" # Relative URL as fallback
+            else:
+                download_url = urljoin(public_url, f"/download/{filename}")
+
+        payload = {
+            "job_id": job.id,
+            "status": job.status,
+            "original_filename": job.original_filename,
+            "download_url": download_url,
+            "error_message": job.error_message,
+            "created_at": job.created_at.isoformat() + "Z",
+            "updated_at": job.updated_at.isoformat() + "Z",
+        }
+
+        headers = {"Content-Type": "application/json", "User-Agent": "FileProcessor-Webhook/1.0"}
+        token = webhook_config.get("callback_bearer_token")
+        if token:
+            headers["Authorization"] = f"Bearer {token}"
+
+        try:
+            with httpx.Client() as client:
+                response = client.post(job.callback_url, json=payload, headers=headers, timeout=15)
+                response.raise_for_status()
+            logger.info(f"Sent webhook notification for job {job_id} to {job.callback_url} (Status: {response.status_code})")
+        except httpx.RequestError as e:
+            logger.error(f"Failed to send webhook for job {job_id} to {job.callback_url}: {e}")
+        except httpx.HTTPStatusError as e:
+            logger.error(f"Webhook for job {job_id} received non-2xx response {e.response.status_code} from {job.callback_url}")
+
+    except Exception as e:
+        logger.exception(f"An unexpected error occurred in send_webhook_notification for job {job_id}: {e}")
+    finally:
+        db.close()
+
+
 # --------------------------------------------------------------------------------
 # --- 4. BACKGROUND TASK SETUP
 # --------------------------------------------------------------------------------
 huey = SqliteHuey(filename=PATHS.HUEY_DB_PATH)
 WHISPER_MODELS_CACHE: Dict[str, WhisperModel] = {}
+PIPER_VOICES_CACHE: Dict[str, "PiperVoice"] = {}
+AVAILABLE_TTS_VOICES_CACHE: Dict[str, Any] | None = None
+
 
 def get_whisper_model(model_size: str, whisper_settings: dict) -> WhisperModel:
     if model_size in WHISPER_MODELS_CACHE:
@@ -331,6 +450,453 @@ def get_whisper_model(model_size: str, whisper_settings: dict) -> WhisperModel:
         logger.info(f"Model '{model_size}' loaded.")
         return model
 
+def get_piper_voice(model_name: str, tts_settings: dict | None) -> "PiperVoice":
+    """
+    Load (or download + load) a Piper voice in a robust way:
+      - Try Python API helpers (get_voices, ensure_voice_exists/find_voice, download_voice)
+      - On any failure, try CLI fallback (download_voice_cli)
+      - Attempt to locate model files after download (search subdirs)
+      - Try re-importing piper if bindings were previously unavailable
+    """
+    # ----- Defensive normalization -----
+    if tts_settings is None or not isinstance(tts_settings, dict):
+        logger.debug("get_piper_voice: normalizing tts_settings (was %r)", tts_settings)
+        tts_settings = {}
+
+    model_dir_val = tts_settings.get("model_dir", None)
+    if model_dir_val is None:
+        model_dir = Path(str(PATHS.TTS_MODELS_DIR))
+    else:
+        try:
+            model_dir = Path(model_dir_val)
+        except Exception:
+            logger.warning("Could not coerce tts_settings['model_dir']=%r to Path; using default.", model_dir_val)
+            model_dir = Path(str(PATHS.TTS_MODELS_DIR))
+    model_dir.mkdir(parents=True, exist_ok=True)
+
+    # If PiperVoice already cached, reuse
+    if model_name in PIPER_VOICES_CACHE:
+        logger.info("Reusing cached Piper voice '%s'.", model_name)
+        return PIPER_VOICES_CACHE[model_name]
+
+    with _model_semaphore:
+        if model_name in PIPER_VOICES_CACHE:
+            return PIPER_VOICES_CACHE[model_name]
+
+        # If Python bindings are missing, attempt CLI download first (and try re-import)
+        if PiperVoice is None:
+            logger.info("Piper Python bindings missing; attempting CLI download fallback for '%s' before failing import.", model_name)
+            cli_ok = False
+            try:
+                cli_ok = download_voice_cli(model_name, model_dir)
+            except Exception as e:
+                logger.warning("CLI download attempt raised: %s", e)
+                cli_ok = False
+
+            if cli_ok:
+                # attempt to re-import piper package (maybe import issue was transient)
+                try:
+                    importlib.invalidate_caches()
+                    piper_mod = importlib.import_module("piper")
+                    from piper import PiperVoice as _PiperVoice  # noqa: F401
+                    from piper.synthesis import SynthesisConfig as _SynthesisConfig  # noqa: F401
+                    globals().update({"PiperVoice": _PiperVoice, "SynthesisConfig": _SynthesisConfig})
+                    logger.info("Successfully re-imported piper after CLI download.")
+                except Exception:
+                    logger.warning("Could not import piper after CLI download; bindings still unavailable.")
+            # If bindings still absent, we cannot load models; raise helpful error
+            if PiperVoice is None:
+                raise RuntimeError(
+                    "Piper Python bindings are not installed or failed to import. "
+                    "Tried CLI download fallback but python bindings are still unavailable. "
+                    "Please install 'piper-tts' in the runtime used by this process."
+                )
+
+        # Now we have Piper bindings (or they were present to begin with). Attempt Python helpers.
+        onnx_path = None
+        config_path = None
+
+        # Prefer using get_voices to update the index if available
+        voices_info = None
+        try:
+            if get_voices:
+                try:
+                    voices_info = get_voices(str(model_dir), update_voices=True)
+                except TypeError:
+                    # some versions may not support update_voices kwarg
+                    voices_info = get_voices(str(model_dir))
+        except Exception as e:
+            logger.debug("get_voices failed or unavailable: %s", e)
+            voices_info = None
+
+        try:
+            # Preferred modern helpers
+            if ensure_voice_exists and find_voice:
+                try:
+                    ensure_voice_exists(model_name, [model_dir], model_dir, voices_info)
+                    onnx_path, config_path = find_voice(model_name, [model_dir])
+                except Exception as e:
+                    # Could be VoiceNotFoundError or other download error
+                    logger.warning("ensure/find voice failed for %s: %s", model_name, e)
+                    raise
+            elif download_voice:
+                # older API: call download helper directly
+                try:
+                    download_voice(model_name, model_dir)
+                    # attempt to locate files
+                    onnx_path = model_dir / f"{model_name}.onnx"
+                    config_path = model_dir / f"{model_name}.onnx.json"
+                except Exception:
+                    logger.warning("download_voice failed for %s", model_name)
+                    raise
+            else:
+                # No python download helper available
+                raise RuntimeError("No Python download helper available in installed piper package.")
+        except Exception as py_exc:
+            # Python helper route failed; try CLI fallback BEFORE giving up
+            logger.info("Python download route failed for '%s' (%s). Trying CLI fallback...", model_name, py_exc)
+            try:
+                cli_ok = download_voice_cli(model_name, model_dir)
+            except Exception as e:
+                logger.warning("CLI fallback attempt raised: %s", e)
+                cli_ok = False
+
+            if not cli_ok:
+                # If CLI also failed, re-raise the original python exception to preserve context
+                logger.error("Both Python download helpers and CLI fallback failed for '%s'.", model_name)
+                raise
+
+            # CLI succeeded (or at least returned success) — try to find files on disk
+            onnx_path, config_path = _find_model_files(model_name, model_dir)
+            if not (onnx_path and config_path):
+                # maybe CLI wrote into a nested dir or different name; try to search broadly
+                logger.info("Could not find model files after CLI download in %s; attempting broader search...", model_dir)
+                onnx_path, config_path = _find_model_files(model_name, model_dir)
+                if not (onnx_path and config_path):
+                    logger.error("Model files still missing after CLI fallback for '%s'.", model_name)
+                    raise RuntimeError(f"Piper voice files for '{model_name}' missing after CLI fallback.")
+            # continue to loading below
+
+        # Final safety check and last-resort search
+        if not (onnx_path and config_path):
+            onnx_path, config_path = _find_model_files(model_name, model_dir)
+
+        if not (onnx_path and config_path):
+            raise RuntimeError(f"Piper voice files for '{model_name}' are missing after attempts to download.")
+
+        # Load the PiperVoice
+        try:
+            use_cuda = bool(tts_settings.get("use_cuda", False))
+            voice = PiperVoice.load(str(onnx_path), config_path=str(config_path), use_cuda=use_cuda)
+            PIPER_VOICES_CACHE[model_name] = voice
+            logger.info("Loaded Piper voice '%s' from %s", model_name, onnx_path)
+            return voice
+        except Exception as e:
+            logger.exception("Failed to load Piper voice '%s' from files (%s, %s): %s", model_name, onnx_path, config_path, e)
+            raise
+
+
+def _find_model_files(model_name: str, model_dir: Path):
+    """
+    Try multiple strategies to find onnx and config files for a given model_name under model_dir.
+    Returns (onnx_path, config_path) or (None, None).
+    """
+    # direct files in model_dir
+    onnx = model_dir / f"{model_name}.onnx"
+    cfg = model_dir / f"{model_name}.onnx.json"
+    if onnx.exists() and cfg.exists():
+        return onnx, cfg
+
+    # possible alternative names or nested directories: search recursively
+    matches_onnx = list(model_dir.rglob(f"{model_name}*.onnx"))
+    matches_cfg = list(model_dir.rglob(f"{model_name}*.onnx.json"))
+    if matches_onnx and matches_cfg:
+        # prefer same directory match
+        for o in matches_onnx:
+            for c in matches_cfg:
+                if o.parent == c.parent:
+                    return o, c
+        # otherwise return first matches
+        return matches_onnx[0], matches_cfg[0]
+
+    # last-resort: any onnx + any json in same subdir that contain model name token
+    for o in model_dir.rglob("*.onnx"):
+        if model_name in o.name:
+            # try find any matching json in same dir
+            cands = list(o.parent.glob("*.onnx.json"))
+            if cands:
+                return o, cands[0]
+
+    return None, None
+
+
+# ---------------------------
+# CLI: list available voices
+# ---------------------------
+def list_voices_cli(timeout: int = 30, python_executables: Optional[List[str]] = None) -> List[str]:
+    """
+    Run `python -m piper.download_voices` (no args) and parse output into a list of voice IDs.
+    Returns [] on failure.
+    """
+    if python_executables is None:
+        python_executables = [sys.executable, "python3", "python"]
+
+    # Regex: voice ids look like en_US-lessac-medium (letters/digits/._-)
+    voice_regex = re.compile(r'^([A-Za-z0-9_\-\.]+)')
+
+    for py in python_executables:
+        cmd = [py, "-m", "piper.download_voices"]
+        try:
+            logger.debug("Trying Piper CLI list: %s", shlex.join(cmd))
+            cp = subprocess.run(
+                cmd,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+                check=True,
+                timeout=timeout,
+            )
+            out = cp.stdout.strip()
+            # If stdout empty, sometimes the script writes to stderr
+            if not out:
+                out = cp.stderr.strip()
+
+            if not out:
+                logger.debug("Piper CLI listed nothing (empty output) for %s", py)
+                continue
+
+            voices = []
+            for line in out.splitlines():
+                line = line.strip()
+                if not line:
+                    continue
+                # Try to extract first token that matches voice id pattern
+                m = voice_regex.match(line)
+                if m:
+                    v = m.group(1)
+                    # basic sanity: avoid capturing words like 'Available' or headings
+                    if re.search(r'\d', v) or '-' in v or '_' in v or '.' in v:
+                        voices.append(v)
+                    else:
+                        # allow alphabetic tokens too (defensive)
+                        voices.append(v)
+                else:
+                    # Also handle lines like " - en_US-lessac-medium: description"
+                    parts = re.split(r'[:\s]+', line)
+                    if parts:
+                        candidate = parts[0].lstrip('-').strip()
+                        if candidate:
+                            voices.append(candidate)
+            # Dedupe while preserving order
+            seen = set()
+            dedup = []
+            for v in voices:
+                if v not in seen:
+                    seen.add(v)
+                    dedup.append(v)
+            logger.info("Piper CLI list returned %d voices via %s", len(dedup), py)
+            return dedup
+        except subprocess.CalledProcessError as e:
+            logger.debug("Piper CLI list (%s) non-zero exit. stdout=%s stderr=%s", py, e.stdout, e.stderr)
+        except FileNotFoundError:
+            logger.debug("Python executable not found: %s", py)
+        except subprocess.TimeoutExpired:
+            logger.warning("Piper CLI list timed out for %s", py)
+        except Exception as e:
+            logger.exception("Unexpected error running Piper CLI list with %s: %s", py, e)
+
+    logger.error("All Piper CLI list attempts failed.")
+    return []
+
+# ---------------------------
+# CLI: download a voice
+# ---------------------------
+def download_voice_cli(model_name: str, model_dir: Path, python_executables: Optional[List[str]] = None, timeout: int = 300) -> bool:
+    """
+    Try to download a Piper voice using CLI:
+      python -m piper.download_voices <model_name> --data-dir <model_dir>
+    Returns True if the CLI ran and expected files exist afterwards (best effort).
+    """
+    if python_executables is None:
+        python_executables = [sys.executable, "python3", "python"]
+
+    model_dir = Path(model_dir)
+    model_dir.mkdir(parents=True, exist_ok=True)
+
+    for py in python_executables:
+        cmd = [py, "-m", "piper.download_voices", model_name, "--data-dir", str(model_dir)]
+        try:
+            logger.info("Trying Piper CLI download: %s", shlex.join(cmd))
+            cp = subprocess.run(
+                cmd,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+                check=True,
+                timeout=timeout,
+            )
+            logger.debug("Piper CLI download stdout: %s", cp.stdout)
+            logger.debug("Piper CLI download stderr: %s", cp.stderr)
+            # Heuristic success check
+            onnx = model_dir / f"{model_name}.onnx"
+            cfg = model_dir / f"{model_name}.onnx.json"
+            if onnx.exists() and cfg.exists():
+                logger.info("Piper CLI created expected files for %s", model_name)
+                return True
+            # Some versions might create nested dirs; treat non-error CLI execution as success (caller will re-check)
+            return True
+        except subprocess.CalledProcessError as e:
+            logger.warning("Piper CLI (%s) returned non-zero exit. stdout: %s; stderr: %s", py, e.stdout, e.stderr)
+        except FileNotFoundError:
+            logger.debug("Python executable %s not found.", py)
+        except subprocess.TimeoutExpired:
+            logger.warning("Piper CLI call timed out for python %s", py)
+        except Exception as e:
+            logger.exception("Unexpected error running Piper CLI download with %s: %s", py, e)
+
+    logger.error("All Piper CLI attempts failed for model %s", model_name)
+    return False
+
+# ---------------------------
+# Safe get_voices wrapper
+# ---------------------------
+def safe_get_voices(model_dir: Path) -> List[Dict]:
+    """
+    Try to call the in-Python get_voices(..., update_voices=True) and return a list of dicts.
+    If that fails, fall back to list_voices_cli() and return a list of simple dicts:
+      [{"id": "en_US-lessac-medium", "name": "...", "local": False}, ...]
+    Keeps the shape flexible so your existing endpoint can use it with minimal changes.
+    """
+    # Prefer Python API if available
+    try:
+        if get_voices:  # get_voices imported earlier in your file
+            # Ensure up-to-date index (like CLI)
+            raw = get_voices(str(model_dir), update_voices=True)
+            # get_voices may already return the desired structure; normalise to a list of dicts
+            if isinstance(raw, dict):
+                # some versions return mapping id->meta
+                items = []
+                for vid, meta in raw.items():
+                    d = {"id": vid}
+                    if isinstance(meta, dict):
+                        d.update(meta)
+                    items.append(d)
+                return items
+            elif isinstance(raw, list):
+                return raw
+            else:
+                # unknown format -> fall back to CLI
+                logger.debug("get_voices returned unexpected type; falling back to CLI list.")
+    except Exception as e:
+        logger.warning("In-Python get_voices failed: %s. Falling back to CLI listing.", e)
+
+    # CLI fallback: parse voice ids and create simple dicts
+    cli_list = list_voices_cli()
+    results = [{"id": vid, "name": vid, "local": False} for vid in cli_list]
+    return results
+
+def list_kokoro_voices_cli(timeout: int = 60) -> List[str]:
+    """
+    Run `kokoro-tts --help-voices` and parse the output for available models.
+    Returns [] on failure.
+    """
+    model_path = PATHS.KOKORO_MODEL_FILE
+    voices_path = PATHS.KOKORO_VOICES_FILE
+    if not (model_path.exists() and voices_path.exists()):
+        logger.warning("Cannot list Kokoro TTS voices because model/voices files are missing.")
+        return []
+
+    cmd = ["kokoro-tts", "--help-voices", "--model", str(model_path), "--voices", str(voices_path)]
+    try:
+        logger.debug("Trying Kokoro TTS CLI list: %s", shlex.join(cmd))
+        cp = subprocess.run(
+            cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+            check=True,
+            timeout=timeout,
+        )
+        out = cp.stdout.strip()
+        if not out:
+            out = cp.stderr.strip()
+
+        if not out:
+            logger.warning("Kokoro TTS CLI list returned no output.")
+            return []
+
+        voices = []
+        voice_pattern = re.compile(r'^\s*\d+\.\s+([a-z]{2,3}_[a-zA-Z0-9]+)$')
+        for line in out.splitlines():
+            line = line.strip() 
+            match = voice_pattern.match(line)
+            if match:
+                voices.append(match.group(1))
+
+        logger.info("Kokoro TTS CLI list returned %d voices.", len(voices))
+        return sorted(list(set(voices)))
+    except FileNotFoundError:
+        logger.info("Kokoro TTS ('kokoro-tts' command) not found in PATH. Kokoro TTS support disabled.")
+        return []
+    except subprocess.CalledProcessError as e:
+        logger.error("Kokoro TTS CLI list command failed. stderr: %s", e.stderr[:1000])
+        return []
+    except subprocess.TimeoutExpired:
+        logger.warning("Kokoro TTS CLI list command timed out.")
+        return []
+    except Exception as e:
+        logger.exception("Unexpected error running Kokoro TTS CLI list: %s", e)
+        return []
+
+def list_kokoro_languages_cli(timeout: int = 60) -> List[str]:
+    """
+    Run `kokoro-tts --help-languages` and parse the output for available languages.
+    Returns [] on failure.
+    """
+    model_path = PATHS.KOKORO_MODEL_FILE
+    voices_path = PATHS.KOKORO_VOICES_FILE
+    if not (model_path.exists() and voices_path.exists()):
+        logger.warning("Cannot list Kokoro TTS languages because model/voices files are missing.")
+        return []
+
+    cmd = ["kokoro-tts", "--help-languages", "--model", str(model_path), "--voices", str(voices_path)]
+    try:
+        logger.debug("Trying Kokoro TTS language list: %s", shlex.join(cmd))
+        cp = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True, timeout=timeout)
+        out = cp.stdout.strip()
+        if not out:
+            out = cp.stderr.strip()
+
+        if not out:
+            logger.warning("Kokoro TTS language list returned no output.")
+            return []
+        
+        languages = []
+        lang_pattern = re.compile(r'^\s*([a-z]{2,3}(?:-[a-z]{2,3})?)$')
+        for line in out.splitlines():
+            line = line.strip()
+            if line.lower().startswith("supported languages"):
+                continue
+            match = lang_pattern.match(line)
+            if match:
+                languages.append(match.group(1))
+
+        logger.info("Kokoro TTS language list returned %d languages.", len(languages))
+        return sorted(list(set(languages)))
+    except FileNotFoundError:
+        logger.info("Kokoro TTS ('kokoro-tts' command) not found in PATH. Kokoro TTS support disabled.")
+        return []
+    except subprocess.CalledProcessError as e:
+        logger.error("Kokoro TTS language list command failed. stderr: %s", e.stderr[:1000])
+        return []
+    except subprocess.TimeoutExpired:
+        logger.warning("Kokoro TTS language list command timed out.")
+        return []
+    except Exception as e:
+        logger.exception("Unexpected error running Kokoro TTS language list: %s", e)
+        return []
+
+
 def run_command(argv: TypingList[str], timeout: int = 300):
     try:
         res = subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=timeout, preexec_fn=_limit_resources_preexec)
@@ -343,7 +909,11 @@ def run_command(argv: TypingList[str], timeout: int = 300):
 def validate_and_build_command(template_str: str, mapping: Dict[str, str]) -> TypingList[str]:
     fmt = Formatter()
     used = {fname for _, fname, _, _ in fmt.parse(template_str) if fname}
-    ALLOWED_VARS = {"input", "output", "output_dir", "output_ext", "quality", "speed", "preset", "device", "dpi", "samplerate", "bitdepth", "filter"}
+    ALLOWED_VARS = {
+        "input", "output", "output_dir", "output_ext", "quality", "speed", "preset",
+        "device", "dpi", "samplerate", "bitdepth", "filter", "model_name",
+        "model_path", "voices_path", "lang"
+    }
     bad = used - ALLOWED_VARS
     if bad:
         raise ValueError(f"Command template contains disallowed placeholders: {bad}")
@@ -354,8 +924,10 @@ def validate_and_build_command(template_str: str, mapping: Dict[str, str]) -> Ty
     formatted = template_str.format(**safe_mapping)
     return shlex.split(formatted)
 
+# --- TASK RUNNERS ---
+# Each task now accepts app_config and base_url to facilitate webhook notifications
 @huey.task()
-def run_transcription_task(job_id: str, input_path_str: str, output_path_str: str, model_size: str, whisper_settings: dict):
+def run_transcription_task(job_id: str, input_path_str: str, output_path_str: str, model_size: str, whisper_settings: dict, app_config: dict, base_url: str):
     db = SessionLocal()
     input_path = Path(input_path_str)
     try:
@@ -391,12 +963,96 @@ def run_transcription_task(job_id: str, input_path_str: str, output_path_str: st
             ensure_path_is_safe(input_path, [PATHS.UPLOADS_DIR, PATHS.CHUNK_TMP_DIR])
             input_path.unlink(missing_ok=True)
         except Exception:
-            # swallow cleanup errors but log
             logger.exception("Failed to cleanup input file after transcription.")
         db.close()
+        send_webhook_notification(job_id, app_config, base_url)
 
 @huey.task()
-def run_pdf_ocr_task(job_id: str, input_path_str: str, output_path_str: str, ocr_settings: dict):
+def run_tts_task(job_id: str, input_path_str: str, output_path_str: str, model_name: str, tts_settings: dict, app_config: dict, base_url: str):
+    db = SessionLocal()
+    input_path = Path(input_path_str)
+    try:
+        job = get_job(db, job_id)
+        if not job or job.status == 'cancelled':
+            return
+
+        update_job_status(db, job_id, "processing")
+
+        engine, actual_model_name = "piper", model_name
+        if '/' in model_name:
+            parts = model_name.split('/', 1)
+            engine = parts[0]
+            actual_model_name = parts[1]
+
+
+        logger.info(f"Starting TTS for job {job_id} using engine '{engine}' with model '{actual_model_name}'")
+        out_path = Path(output_path_str)
+        tmp_out = out_path.with_name(f"{out_path.stem}.tmp-{uuid.uuid4().hex}{out_path.suffix}")
+
+        if engine == "piper":
+            piper_settings = tts_settings.get("piper", {})
+            voice = get_piper_voice(actual_model_name, piper_settings)
+
+            with open(input_path, 'r', encoding='utf-8') as f:
+                text_to_speak = f.read()
+
+            synthesis_params = piper_settings.get("synthesis_config", {})
+            synthesis_config = SynthesisConfig(**synthesis_params) if SynthesisConfig else None
+
+            with wave.open(str(tmp_out), "wb") as wav_file:
+                wav_file.setnchannels(1)
+                wav_file.setsampwidth(2)
+                wav_file.setframerate(voice.config.sample_rate)
+                voice.synthesize_wav(text_to_speak, wav_file, synthesis_config)
+
+        elif engine == "kokoro":
+            kokoro_settings = tts_settings.get("kokoro", {})
+            command_template_str = kokoro_settings.get("command_template")
+            if not command_template_str:
+                raise ValueError("Kokoro TTS command_template is not defined in settings.")
+            
+            try:
+                lang, voice_name = actual_model_name.split('/', 1)
+            except ValueError:
+                raise ValueError(f"Invalid Kokoro model format. Expected 'lang/voice', but got '{actual_model_name}'.")
+
+            mapping = {
+                "input": str(input_path),
+                "output": str(tmp_out),
+                "lang": lang,
+                "model_name": voice_name,
+                "model_path": str(PATHS.KOKORO_MODEL_FILE),
+                "voices_path": str(PATHS.KOKORO_VOICES_FILE),
+            }
+
+            command = validate_and_build_command(command_template_str, mapping)
+            logger.info(f"Executing Kokoro TTS command: {' '.join(command)}")
+            run_command(command, timeout=kokoro_settings.get("timeout", 300))
+            
+            if not tmp_out.exists():
+                raise FileNotFoundError("Kokoro TTS command did not produce an output file.")
+
+        else:
+            raise ValueError(f"Unsupported TTS engine: {engine}")
+
+        tmp_out.replace(out_path)
+        mark_job_as_completed(db, job_id, output_filepath_str=output_path_str, preview="Successfully generated audio.")
+        logger.info(f"TTS for job {job_id} completed.")
+
+    except Exception as e:
+        logger.exception(f"ERROR during TTS for job {job_id}")
+        update_job_status(db, job_id, "failed", error=f"TTS failed: {e}")
+    finally:
+        try:
+            ensure_path_is_safe(input_path, [PATHS.UPLOADS_DIR, PATHS.CHUNK_TMP_DIR])
+            input_path.unlink(missing_ok=True)
+        except Exception:
+            logger.exception("Failed to cleanup input file after TTS.")
+        db.close()
+        send_webhook_notification(job_id, app_config, base_url)
+
+@huey.task()
+def run_pdf_ocr_task(job_id: str, input_path_str: str, output_path_str: str, ocr_settings: dict, app_config: dict, base_url: str):
     db = SessionLocal()
     input_path = Path(input_path_str)
     try:
@@ -426,9 +1082,10 @@ def run_pdf_ocr_task(job_id: str, input_path_str: str, output_path_str: str, ocr
         except Exception:
             logger.exception("Failed to cleanup input file after PDF OCR.")
         db.close()
+        send_webhook_notification(job_id, app_config, base_url)
 
 @huey.task()
-def run_image_ocr_task(job_id: str, input_path_str: str, output_path_str: str):
+def run_image_ocr_task(job_id: str, input_path_str: str, output_path_str: str, app_config: dict, base_url: str):
     db = SessionLocal()
     input_path = Path(input_path_str)
     try:
@@ -455,10 +1112,11 @@ def run_image_ocr_task(job_id: str, input_path_str: str, output_path_str: str):
         except Exception:
             logger.exception("Failed to cleanup input file after Image OCR.")
         db.close()
+        send_webhook_notification(job_id, app_config, base_url)
 
 
 @huey.task()
-def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str, tool: str, task_key: str, conversion_tools_config: dict):
+def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str, tool: str, task_key: str, conversion_tools_config: dict, app_config: dict, base_url: str):
     db = SessionLocal()
     input_path = Path(input_path_str)
     output_path = Path(output_path_str)
@@ -476,7 +1134,6 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str,
 
         current_input_path = input_path
 
-        # Pre-processing for specific tools
         if tool == "mozjpeg":
             temp_input_file = input_path.with_suffix('.temp.ppm')
             logger.info(f"Pre-converting for MozJPEG: {input_path} -> {temp_input_file}")
@@ -489,7 +1146,6 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str,
 
         update_job_status(db, job_id, "processing", progress=50)
 
-        # prepare temporary output and mapping
         temp_output_file = output_path.with_name(f"{output_path.stem}.tmp-{uuid.uuid4().hex}{output_path.suffix}")
         mapping = {
             "input": str(current_input_path),
@@ -498,9 +1154,7 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str,
             "output_ext": output_path.suffix.lstrip('.'),
         }
 
-        # tool specific mapping adjustments
         if tool.startswith("ghostscript"):
-            # task_key form: "device_setting"
             parts = task_key.split('_', 1)
             device = parts[0] if parts else ""
             setting = parts[1] if len(parts) > 1 else ""
@@ -518,7 +1172,6 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str,
             except:
                 _, rate = task_key.split('_')
                 depth = ''
-            
             rate = rate.replace('k', '000') if 'k' in rate else rate
             mapping.update({"samplerate": rate, "bitdepth":  depth})
         elif tool == "mozjpeg":
@@ -534,7 +1187,7 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str,
         command = validate_and_build_command(command_template_str, mapping)
         logger.info(f"Executing command: {' '.join(command)}")
 
-        result = run_command(command, timeout=tool_config.get("timeout", 300))
+        run_command(command, timeout=tool_config.get("timeout", 300))
 
         if temp_output_file and temp_output_file.exists():
             temp_output_file.replace(output_path)
@@ -546,7 +1199,7 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str,
         update_job_status(db, job_id, "failed", error=f"Conversion failed: {e}")
     finally:
         try:
-            ensure_path_is_safe(input_path, [PATHS.UPLOADS_DIR])
+            ensure_path_is_safe(input_path, [PATHS.UPLOADS_DIR, PATHS.CHUNK_TMP_DIR])
             input_path.unlink(missing_ok=True)
         except Exception:
             logger.exception("Failed to cleanup main input file after conversion.")
@@ -565,16 +1218,51 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str,
             except Exception:
                 logger.exception("Failed to cleanup temp output file after conversion.")
         db.close()
+        send_webhook_notification(job_id, app_config, base_url)
 
 # --------------------------------------------------------------------------------
 # --- 5. FASTAPI APPLICATION
 # --------------------------------------------------------------------------------
+async def download_kokoro_models_if_missing():
+    """Checks for Kokoro TTS model files and downloads them if they don't exist."""
+    files_to_download = {
+        "model": {"path": PATHS.KOKORO_MODEL_FILE, "url": "https://github.com/nazdridoy/kokoro-tts/releases/download/v1.0.0/kokoro-v1.0.onnx"},
+        "voices": {"path": PATHS.KOKORO_VOICES_FILE, "url": "https://github.com/nazdridoy/kokoro-tts/releases/download/v1.0.0/voices-v1.0.bin"}
+    }
+    async with httpx.AsyncClient() as client:
+        for name, details in files_to_download.items():
+            path, url = details["path"], details["url"]
+            if not path.exists():
+                logger.info(f"Kokoro TTS {name} file missing. Downloading from {url}...")
+                try:
+                    with path.open("wb") as f:
+                        async with client.stream("GET", url, follow_redirects=True, timeout=300) as response:
+                            response.raise_for_status()
+                            async for chunk in response.aiter_bytes():
+                                f.write(chunk)
+                    logger.info(f"Successfully downloaded Kokoro TTS {name} file to {path}.")
+                except Exception as e:
+                    logger.error(f"Failed to download Kokoro TTS {name} file: {e}")
+                    if path.exists(): path.unlink(missing_ok=True)
+            else:
+                logger.info(f"Found existing Kokoro TTS {name} file at {path}.")
+
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     logger.info("Application starting up...")
     Base.metadata.create_all(bind=engine)
     load_app_config()
-    ENV = os.environ.get('ENV', 'dev').lower() # probably reduntant because I load the .env at the start but whatever
+    
+    # Download required models on startup
+    if shutil.which("kokoro-tts"):
+        await download_kokoro_models_if_missing()
+
+    if PiperVoice is None:
+        logger.warning("piper-tts is not installed. Piper TTS features will be disabled. Install with: pip install piper-tts")
+    if not shutil.which("kokoro-tts"):
+        logger.warning("kokoro-tts command not found in PATH. Kokoro TTS features will be disabled.")
+
+    ENV = os.environ.get('ENV', 'dev').lower()
     ALLOW_LOCAL_ONLY = os.environ.get('ALLOW_LOCAL_ONLY', 'false').lower() == 'true'
     if LOCAL_ONLY_MODE and ENV != 'dev' and not ALLOW_LOCAL_ONLY:
         raise RuntimeError('LOCAL_ONLY_MODE may only be enabled in dev or when ALLOW_LOCAL_ONLY=true is set.')
@@ -605,13 +1293,12 @@ if not SECRET_KEY:
     logger.warning('SECRET_KEY is not set. Generating a temporary key. Sessions will not persist across restarts.')
     SECRET_KEY = os.urandom(24).hex()
 
-# Should probably set https_only=True in production behind HTTPS i guess
 app.add_middleware(
     SessionMiddleware,
     secret_key=SECRET_KEY,
-    https_only=False,
+    https_only=False, # Set to True if behind HTTPS proxy
     same_site='lax',
-    max_age=14 * 24 * 60 * 60  # 14 days in seconds
+    max_age=14 * 24 * 60 * 60  # 14 days
 )
 
 
@@ -620,11 +1307,28 @@ app.mount("/static", StaticFiles(directory=str(PATHS.BASE_DIR / "static")), name
 templates = Jinja2Templates(directory=str(PATHS.BASE_DIR / "templates"))
 
 # --- AUTH & USER HELPERS ---
+http_bearer = HTTPBearer()
+
 def get_current_user(request: Request):
     if LOCAL_ONLY_MODE:
         return {'sub': 'local_user', 'email': 'local@user.com', 'name': 'Local User'}
     return request.session.get('user')
 
+async def require_api_user(request: Request, creds: HTTPAuthorizationCredentials = Depends(http_bearer)):
+    """Dependency for API routes requiring OIDC bearer token authentication."""
+    if LOCAL_ONLY_MODE:
+        return {'sub': 'local_api_user', 'email': 'local@api.user.com', 'name': 'Local API User'}
+
+    if not creds:
+        raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Not authenticated")
+    token = creds.credentials
+    try:
+        user = await oauth.oidc.userinfo(token={'access_token': token})
+        return dict(user)
+    except Exception as e:
+        logger.error(f"API token validation failed: {e}")
+        raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid or expired token")
+
 def is_admin(request: Request) -> bool:
     if LOCAL_ONLY_MODE: return True
     user = get_current_user(request)
@@ -641,17 +1345,54 @@ def require_admin(request: Request):
     if not is_admin(request): raise HTTPException(status_code=403, detail="Administrator privileges required.")
     return True
 
-# --- CHUNKED UPLOADs ---
+# --- FILE SAVING UTILITY ---
+async def save_upload_file(upload_file: UploadFile, destination: Path) -> int:
+    """
+    Saves an uploaded file to a destination, handling size limits.
+    This function is used by both the simple API and the legacy direct-upload routes.
+    """
+    max_size = APP_CONFIG.get("app_settings", {}).get("max_file_size_bytes", 100 * 1024 * 1024)
+    tmp_path = destination.with_name(f"{destination.stem}.tmp-{uuid.uuid4().hex}{destination.suffix}")
+    size = 0
+    try:
+        with tmp_path.open("wb") as buffer:
+            while True:
+                chunk = await upload_file.read(1024 * 1024)
+                if not chunk:
+                    break
+                size += len(chunk)
+                if size > max_size:
+                    raise HTTPException(status_code=413, detail=f"File exceeds {max_size / 1024 / 1024} MB limit")
+                buffer.write(chunk)
+        tmp_path.replace(destination)
+        return size
+    except Exception as e:
+        try:
+            # Ensure temp file is cleaned up on error
+            ensure_path_is_safe(tmp_path, [PATHS.UPLOADS_DIR, PATHS.CHUNK_TMP_DIR])
+            tmp_path.unlink(missing_ok=True)
+        except Exception:
+            logger.exception("Failed to remove temp upload file after error.")
+        # Re-raise the original exception
+        raise e
+    finally:
+        await upload_file.close()
+
+def is_allowed_file(filename: str, allowed_extensions: set) -> bool:
+    if not allowed_extensions: # If set is empty, allow all
+        return True
+    return Path(filename).suffix.lower() in allowed_extensions
+
+# --- CHUNKED UPLOADS (for UI) ---
 @app.post("/upload/chunk")
 async def upload_chunk(
     chunk: UploadFile = File(...),
     upload_id: str = Form(...),
     chunk_number: int = Form(...),
-    user: dict = Depends(require_user) # AUTHENTICATION
+    user: dict = Depends(require_user)
 ):
     safe_upload_id = secure_filename(upload_id)
-    temp_dir = PATHS.CHUNK_TMP_DIR / safe_upload_id
-    temp_dir = ensure_path_is_safe(temp_dir, [PATHS.CHUNK_TMP_DIR])
+    temp_dir = ensure_path_is_safe(PATHS.CHUNK_TMP_DIR / safe_upload_id, [PATHS.CHUNK_TMP_DIR])
     temp_dir.mkdir(exist_ok=True)
     chunk_path = temp_dir / f"{chunk_number}.chunk"
 
@@ -677,36 +1418,49 @@ async def _stitch_chunks(temp_dir: Path, final_path: Path, total_chunks: int):
     shutil.rmtree(temp_dir, ignore_errors=True)
 
 @app.post("/upload/finalize", status_code=status.HTTP_202_ACCEPTED)
-async def finalize_upload(payload: FinalizeUploadPayload, user: dict = Depends(require_user), db: Session = Depends(get_db)):
+async def finalize_upload(request: Request, payload: FinalizeUploadPayload, user: dict = Depends(require_user), db: Session = Depends(get_db)):
     safe_upload_id = secure_filename(payload.upload_id)
-    temp_dir = PATHS.CHUNK_TMP_DIR / safe_upload_id
-    temp_dir = ensure_path_is_safe(temp_dir, [PATHS.CHUNK_TMP_DIR])
+    temp_dir = ensure_path_is_safe(PATHS.CHUNK_TMP_DIR / safe_upload_id, [PATHS.CHUNK_TMP_DIR])
     if not temp_dir.is_dir():
         raise HTTPException(status_code=404, detail="Upload session not found or already finalized.")
 
+    webhook_config = APP_CONFIG.get("webhook_settings", {})
+    if payload.callback_url and not is_allowed_callback_url(payload.callback_url, webhook_config.get("allowed_callback_urls", [])):
+        raise HTTPException(status_code=400, detail="Provided callback_url is not allowed.")
+
+
     job_id = uuid.uuid4().hex
     safe_filename = secure_filename(payload.original_filename)
     final_path = PATHS.UPLOADS_DIR / f"{Path(safe_filename).stem}_{job_id}{Path(safe_filename).suffix}"
     await _stitch_chunks(temp_dir, final_path, payload.total_chunks)
 
+    base_url = str(request.base_url)
     job_data = JobCreate(
         id=job_id, user_id=user['sub'], task_type=payload.task_type,
         original_filename=payload.original_filename, input_filepath=str(final_path),
         input_filesize=final_path.stat().st_size
     )
 
+    # --- Task Dispatching for UI chunked uploads ---
     if payload.task_type == "transcription":
         stem = Path(safe_filename).stem
         processed_path = PATHS.PROCESSED_DIR / f"{stem}_{job_id}.txt"
         job_data.processed_filepath = str(processed_path)
         create_job(db=db, job=job_data)
-        run_transcription_task(job_id, str(final_path), str(processed_path), payload.model_size, APP_CONFIG.get("transcription_settings", {}).get("whisper", {}))
+        run_transcription_task(job_data.id, str(final_path), str(processed_path), payload.model_size, APP_CONFIG.get("transcription_settings", {}).get("whisper", {}), APP_CONFIG, base_url)
+    elif payload.task_type == "tts":
+        tts_config = APP_CONFIG.get("tts_settings", {})
+        stem = Path(safe_filename).stem
+        processed_path = PATHS.PROCESSED_DIR / f"{stem}_{job_id}.wav"
+        job_data.processed_filepath = str(processed_path)
+        create_job(db=db, job=job_data)
+        run_tts_task(job_data.id, str(final_path), str(processed_path), payload.model_name, tts_config, APP_CONFIG, base_url)
     elif payload.task_type == "ocr":
         stem, suffix = Path(safe_filename).stem, Path(safe_filename).suffix
         processed_path = PATHS.PROCESSED_DIR / f"{stem}_{job_id}{suffix}"
         job_data.processed_filepath = str(processed_path)
         create_job(db=db, job=job_data)
-        run_pdf_ocr_task(job_id, str(final_path), str(processed_path), APP_CONFIG.get("ocr_settings", {}).get("ocrmypdf", {}))
+        run_pdf_ocr_task(job_data.id, str(final_path), str(processed_path), APP_CONFIG.get("ocr_settings", {}).get("ocrmypdf", {}), APP_CONFIG, base_url)
     elif payload.task_type == "conversion":
         try:
             tool, task_key = payload.output_format.split('_', 1)
@@ -719,52 +1473,18 @@ async def finalize_upload(payload: FinalizeUploadPayload, user: dict = Depends(r
         processed_path = PATHS.PROCESSED_DIR / f"{original_stem}_{job_id}.{target_ext}"
         job_data.processed_filepath = str(processed_path)
         create_job(db=db, job=job_data)
-        run_conversion_task(job_id, str(final_path), str(processed_path), tool, task_key, APP_CONFIG.get("conversion_tools", {}))
+        run_conversion_task(job_data.id, str(final_path), str(processed_path), tool, task_key, APP_CONFIG.get("conversion_tools", {}), APP_CONFIG, base_url)
     else:
         final_path.unlink(missing_ok=True)
         raise HTTPException(status_code=400, detail="Invalid task type.")
 
     return {"job_id": job_id, "status": "pending"}
 
-# --- OLD DIRECT-UPLOAD ROUTES (kept for compatibility) ---
-# These use the same task functions but accept direct file uploads (no chunking).
-async def save_upload_file_chunked(upload_file: UploadFile, destination: Path) -> int:
-    """
-    Write upload to a tmp file in chunks, then atomically move to final destination.
-    Returns the final size of the file in bytes.
-    """
-    max_size = APP_CONFIG.get("app_settings", {}).get("max_file_size_bytes", 100 * 1024 * 1024)
-    tmp = destination.with_name(f"{destination.stem}.tmp-{uuid.uuid4().hex}{destination.suffix}")
-    size = 0
-    try:
-        with tmp.open("wb") as buffer:
-            while True:
-                chunk = await upload_file.read(1024 * 1024)
-                if not chunk:
-                    break
-                size += len(chunk)
-                if size > max_size:
-                    raise HTTPException(status_code=413, detail=f"File exceeds {max_size / 1024 / 1024} MB limit")
-                buffer.write(chunk)
-        tmp.replace(destination)
-        return size
-    except Exception:
-        try:
-            ensure_path_is_safe(tmp, [PATHS.UPLOADS_DIR, PATHS.CHUNK_TMP_DIR])
-            tmp.unlink(missing_ok=True)
-        except Exception:
-            logger.exception("Failed to remove temp upload file after error.")
-        raise
-
-def is_allowed_file(filename: str, allowed_extensions: set) -> bool:
-    return Path(filename).suffix.lower() in allowed_extensions
-
+# --- LEGACY DIRECT-UPLOAD ROUTES (kept for compatibility) ---
 @app.post("/transcribe-audio", status_code=status.HTTP_202_ACCEPTED)
 async def submit_audio_transcription(
-    file: UploadFile = File(...),
-    model_size: str = Form("base"),
-    db: Session = Depends(get_db),
-    user: dict = Depends(require_user)
+    request: Request, file: UploadFile = File(...), model_size: str = Form("base"),
+    db: Session = Depends(get_db), user: dict = Depends(require_user)
 ):
     allowed_audio_exts = {".mp3", ".wav", ".m4a", ".flac", ".ogg", ".opus"}
     if not is_allowed_file(file.filename, allowed_audio_exts):
@@ -774,105 +1494,282 @@ async def submit_audio_transcription(
     if model_size not in whisper_config.get("allowed_models", []):
         raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=f"Invalid model size: {model_size}.")
 
-    job_id = uuid.uuid4().hex
-    safe_basename = secure_filename(file.filename)
+    job_id, safe_basename = uuid.uuid4().hex, secure_filename(file.filename)
     stem, suffix = Path(safe_basename).stem, Path(safe_basename).suffix
+    upload_path = PATHS.UPLOADS_DIR / f"{stem}_{job_id}{suffix}"
+    processed_path = PATHS.PROCESSED_DIR / f"{stem}_{job_id}.txt"
+    input_size = await save_upload_file(file, upload_path)
+    base_url = str(request.base_url)
 
-    audio_filename = f"{stem}_{job_id}{suffix}"
-    transcript_filename = f"{stem}_{job_id}.txt"
-    upload_path = PATHS.UPLOADS_DIR / audio_filename
-    processed_path = PATHS.PROCESSED_DIR / transcript_filename
-
-    input_size = await save_upload_file_chunked(file, upload_path)
-
-    job_data = JobCreate(
-        id=job_id,
-        user_id=user['sub'],
-        task_type="transcription",
-        original_filename=file.filename,
-        input_filepath=str(upload_path),
-        input_filesize=input_size,
-        processed_filepath=str(processed_path)
-    )
+    job_data = JobCreate(id=job_id, user_id=user['sub'], task_type="transcription", original_filename=file.filename,
+                         input_filepath=str(upload_path), input_filesize=input_size, processed_filepath=str(processed_path))
     new_job = create_job(db=db, job=job_data)
-
-    run_transcription_task(new_job.id, str(upload_path), str(processed_path), model_size, whisper_settings=whisper_config)
-
+    run_transcription_task(new_job.id, str(upload_path), str(processed_path), model_size, whisper_settings=whisper_config, app_config=APP_CONFIG, base_url=base_url)
     return {"job_id": new_job.id, "status": new_job.status, "status_url": f"/job/{new_job.id}"}
 
 @app.post("/convert-file", status_code=status.HTTP_202_ACCEPTED)
-async def submit_file_conversion(file: UploadFile = File(...), output_format: str = Form(...), db: Session = Depends(get_db), user: dict = Depends(require_user)):
+async def submit_file_conversion(request: Request, file: UploadFile = File(...), output_format: str = Form(...), db: Session = Depends(get_db), user: dict = Depends(require_user)):
     allowed_exts = APP_CONFIG.get("app_settings", {}).get("allowed_all_extensions", set())
     if not is_allowed_file(file.filename, allowed_exts):
         raise HTTPException(status_code=400, detail=f"File type '{Path(file.filename).suffix}' not allowed.")
     conversion_tools = APP_CONFIG.get("conversion_tools", {})
     try:
         tool, task_key = output_format.split('_', 1)
-        if tool not in conversion_tools or task_key not in conversion_tools[tool].get("formats", {}):
-            # fallback: allow tasks that exist but may not be in formats map (some configs only have commands)
-            if tool not in conversion_tools:
-                raise ValueError()
+        if tool not in conversion_tools: raise ValueError()
     except ValueError:
         raise HTTPException(status_code=400, detail="Invalid output format selected.")
-    job_id = uuid.uuid4().hex
-    safe_basename = secure_filename(file.filename)
+
+    job_id, safe_basename = uuid.uuid4().hex, secure_filename(file.filename)
     original_stem = Path(safe_basename).stem
     target_ext = task_key.split('_')[0]
-    if tool == "ghostscript_pdf":
-        target_ext = "pdf"
-    upload_filename = f"{original_stem}_{job_id}{Path(safe_basename).suffix}"
-    processed_filename = f"{original_stem}_{job_id}.{target_ext}"
-    upload_path = PATHS.UPLOADS_DIR / upload_filename
-    processed_path = PATHS.PROCESSED_DIR / processed_filename
-    input_size = await save_upload_file_chunked(file, upload_path)
+    if tool == "ghostscript_pdf": target_ext = "pdf"
+    upload_path = PATHS.UPLOADS_DIR / f"{original_stem}_{job_id}{Path(safe_basename).suffix}"
+    processed_path = PATHS.PROCESSED_DIR / f"{original_stem}_{job_id}.{target_ext}"
+    input_size = await save_upload_file(file, upload_path)
+    base_url = str(request.base_url)
+
     job_data = JobCreate(id=job_id, user_id=user['sub'], task_type="conversion", original_filename=file.filename,
-                         input_filepath=str(upload_path),
-                         input_filesize=input_size,
-                         processed_filepath=str(processed_path))
+                         input_filepath=str(upload_path), input_filesize=input_size, processed_filepath=str(processed_path))
     new_job = create_job(db=db, job=job_data)
-    run_conversion_task(new_job.id, str(upload_path), str(processed_path), tool, task_key, conversion_tools)
+    run_conversion_task(new_job.id, str(upload_path), str(processed_path), tool, task_key, conversion_tools, APP_CONFIG, base_url)
     return {"job_id": new_job.id, "status": new_job.status, "status_url": f"/job/{new_job.id}"}
 
 @app.post("/ocr-pdf", status_code=status.HTTP_202_ACCEPTED)
-async def submit_pdf_ocr(file: UploadFile = File(...), db: Session = Depends(get_db), user: dict = Depends(require_user)):
+async def submit_pdf_ocr(request: Request, file: UploadFile = File(...), db: Session = Depends(get_db), user: dict = Depends(require_user)):
     if not is_allowed_file(file.filename, {".pdf"}):
         raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid file type. Please upload a PDF.")
-    job_id = uuid.uuid4().hex
-    safe_basename = secure_filename(file.filename)
+    job_id, safe_basename = uuid.uuid4().hex, secure_filename(file.filename)
     unique_filename = f"{Path(safe_basename).stem}_{job_id}{Path(safe_basename).suffix}"
     upload_path = PATHS.UPLOADS_DIR / unique_filename
     processed_path = PATHS.PROCESSED_DIR / unique_filename
-    input_size = await save_upload_file_chunked(file, upload_path)
+    input_size = await save_upload_file(file, upload_path)
+    base_url = str(request.base_url)
+
     job_data = JobCreate(id=job_id, user_id=user['sub'], task_type="ocr", original_filename=file.filename,
-                         input_filepath=str(upload_path),
-                         input_filesize=input_size,
-                         processed_filepath=str(processed_path))
+                         input_filepath=str(upload_path), input_filesize=input_size, processed_filepath=str(processed_path))
     new_job = create_job(db=db, job=job_data)
     ocr_settings = APP_CONFIG.get("ocr_settings", {}).get("ocrmypdf", {})
-    run_pdf_ocr_task(new_job.id, str(upload_path), str(processed_path), ocr_settings)
+    run_pdf_ocr_task(new_job.id, str(upload_path), str(processed_path), ocr_settings, APP_CONFIG, base_url)
     return {"job_id": new_job.id, "status": new_job.status, "status_url": f"/job/{new_job.id}"}
 
 @app.post("/ocr-image", status_code=status.HTTP_202_ACCEPTED)
-async def submit_image_ocr(file: UploadFile = File(...), db: Session = Depends(get_db), user: dict = Depends(require_user)):
+async def submit_image_ocr(request: Request, file: UploadFile = File(...), db: Session = Depends(get_db), user: dict = Depends(require_user)):
     allowed_exts = {".png", ".jpg", ".jpeg", ".tiff", ".tif"}
     if not is_allowed_file(file.filename, allowed_exts):
         raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid file type. Please upload a PNG, JPG, or TIFF.")
-    job_id = uuid.uuid4().hex
-    safe_basename = secure_filename(file.filename)
+    job_id, safe_basename = uuid.uuid4().hex, secure_filename(file.filename)
     file_ext = Path(safe_basename).suffix
     unique_filename = f"{Path(safe_basename).stem}_{job_id}{file_ext}"
     upload_path = PATHS.UPLOADS_DIR / unique_filename
     processed_path = PATHS.PROCESSED_DIR / f"{Path(safe_basename).stem}_{job_id}.txt"
-    input_size = await save_upload_file_chunked(file, upload_path)
+    input_size = await save_upload_file(file, upload_path)
+    base_url = str(request.base_url)
+
     job_data = JobCreate(id=job_id, user_id=user['sub'], task_type="ocr-image", original_filename=file.filename,
-                         input_filepath=str(upload_path),
-                         input_filesize=input_size,
-                         processed_filepath=str(processed_path))
+                         input_filepath=str(upload_path), input_filesize=input_size, processed_filepath=str(processed_path))
     new_job = create_job(db=db, job=job_data)
-    run_image_ocr_task(new_job.id, str(upload_path), str(processed_path))
+    run_image_ocr_task(new_job.id, str(upload_path), str(processed_path), APP_CONFIG, base_url)
     return {"job_id": new_job.id, "status": new_job.status, "status_url": f"/job/{new_job.id}"}
 
-# --- Routes for auth and pages ---
+# --------------------------------------------------------------------------------
+# --- API V1 ROUTES (for programmatic access)
+# --------------------------------------------------------------------------------
+def is_allowed_callback_url(url: str, allowed: List[str]) -> bool:
+    if not allowed:
+        return False
+    try:
+        parsed = urlparse(url)
+        if not parsed.scheme or not parsed.netloc:
+            return False
+        for a in allowed:
+            ap = urlparse(a)
+            if ap.scheme and ap.netloc:
+                if parsed.scheme == ap.scheme and parsed.netloc == ap.netloc:
+                    return True
+            else:
+                # support legacy prefix entries - keep fallback
+                if url.startswith(a):
+                    return True
+        return False
+    except Exception:
+        return False
+
+@app.get("/api/v1/tts-voices")
+async def get_tts_voices_list(user: dict = Depends(require_user)):
+    global AVAILABLE_TTS_VOICES_CACHE
+    
+    kokoro_available = shutil.which("kokoro-tts") is not None
+    piper_available = PiperVoice is not None
+
+    if not piper_available and not kokoro_available:
+        return JSONResponse(content={"error": "TTS feature not configured on server (no TTS engines found)."}, status_code=501)
+
+    if AVAILABLE_TTS_VOICES_CACHE:
+        return AVAILABLE_TTS_VOICES_CACHE
+
+    all_voices = []
+    try:
+        if piper_available:
+            logger.info("Fetching available Piper voices list...")
+            piper_voices = safe_get_voices(PATHS.TTS_MODELS_DIR)
+            for voice in piper_voices:
+                voice['id'] = f"piper/{voice.get('id')}"
+                voice['name'] = f"Piper: {voice.get('name', voice.get('id'))}"
+            all_voices.extend(piper_voices)
+
+        if kokoro_available:
+            logger.info("Fetching available Kokoro TTS voices and languages...")
+            kokoro_voices = list_kokoro_voices_cli()
+            kokoro_langs = list_kokoro_languages_cli()
+            for lang in kokoro_langs:
+                for voice in kokoro_voices:
+                    all_voices.append({
+                        "id": f"kokoro/{lang}/{voice}",
+                        "name": f"Kokoro ({lang}): {voice}",
+                        "local": False 
+                    })
+
+        AVAILABLE_TTS_VOICES_CACHE = sorted(all_voices, key=lambda x: x['name'])
+        return AVAILABLE_TTS_VOICES_CACHE
+    except Exception as e:
+        logger.exception("Could not fetch list of TTS voices.")
+        raise HTTPException(status_code=500, detail=f"Could not retrieve voices list: {e}")
+
+# --- Standard API endpoint (non-chunked) ---
+@app.post("/api/v1/process", status_code=status.HTTP_202_ACCEPTED, tags=["Webhook API"])
+async def api_process_file(
+    request: Request, file: UploadFile = File(...), task_type: str = Form(...), callback_url: str = Form(...),
+    model_size: Optional[str] = Form("base"), model_name: Optional[str] = Form(None),
+    output_format: Optional[str] = Form(None),
+    db: Session = Depends(get_db), user: dict = Depends(require_api_user)
+):
+    """
+    Programmatically submit a file for processing via a single HTTP request.
+    This is the recommended endpoint for services like n8n.
+    Requires bearer token authentication unless in LOCAL_ONLY_MODE.
+    """
+    webhook_config = APP_CONFIG.get("webhook_settings", {})
+    if not webhook_config.get("enabled", False):
+        raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Webhook processing is disabled on the server.")
+
+    if not is_allowed_callback_url(callback_url, webhook_config.get("allowed_callback_urls", [])):
+        logger.warning(f"Rejected webhook from user '{user.get('email')}' with disallowed callback URL: {callback_url}")
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Provided callback_url is not in the list of allowed URLs.")
+
+    job_id = uuid.uuid4().hex
+    safe_basename = secure_filename(file.filename)
+    stem, suffix = Path(safe_basename).stem, Path(safe_basename).suffix
+    upload_filename = f"{stem}_{job_id}{suffix}"
+    upload_path = PATHS.UPLOADS_DIR / upload_filename
+
+    try:
+        input_size = await save_upload_file(file, upload_path)
+    except HTTPException as e:
+        raise e # Re-raise exceptions from save_upload_file (e.g., file too large)
+    except Exception as e:
+        logger.exception("Failed to save uploaded file for webhook processing.")
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Failed to save file: {e}")
+
+    base_url = str(request.base_url)
+    job_data_args = {
+        "id": job_id, "user_id": user['sub'], "original_filename": file.filename,
+        "input_filepath": str(upload_path), "input_filesize": input_size,
+        "callback_url": callback_url, "task_type": task_type,
+    }
+
+    # --- API Task Dispatching Logic ---
+    if task_type == "transcription":
+        whisper_config = APP_CONFIG.get("transcription_settings", {}).get("whisper", {})
+        if model_size not in whisper_config.get("allowed_models", []):
+            raise HTTPException(status_code=400, detail=f"Invalid model_size '{model_size}'")
+        processed_path = PATHS.PROCESSED_DIR / f"{stem}_{job_id}.txt"
+        job_data_args["processed_filepath"] = str(processed_path)
+        create_job(db=db, job=JobCreate(**job_data_args))
+        run_transcription_task(job_id, str(upload_path), str(processed_path), model_size, whisper_config, APP_CONFIG, base_url)
+
+    elif task_type == "tts":
+        if not is_allowed_file(file.filename, {".txt"}):
+            raise HTTPException(status_code=400, detail="Invalid file type for TTS, requires .txt")
+        if not model_name:
+            raise HTTPException(status_code=400, detail="model_name is required for TTS task.")
+        tts_config = APP_CONFIG.get("tts_settings", {})
+        processed_path = PATHS.PROCESSED_DIR / f"{stem}_{job_id}.wav"
+        job_data_args["processed_filepath"] = str(processed_path)
+        create_job(db=db, job=JobCreate(**job_data_args))
+        run_tts_task(job_id, str(upload_path), str(processed_path), model_name, tts_config, APP_CONFIG, base_url)
+
+    elif task_type == "conversion":
+        if not output_format:
+            raise HTTPException(status_code=400, detail="output_format is required for conversion task.")
+        conversion_tools = APP_CONFIG.get("conversion_tools", {})
+        try:
+            tool, task_key = output_format.split('_', 1)
+            if tool not in conversion_tools: raise ValueError("Invalid tool")
+        except ValueError:
+            raise HTTPException(status_code=400, detail="Invalid output_format selected.")
+        target_ext = task_key.split('_')[0]
+        if tool == "ghostscript_pdf": target_ext = "pdf"
+        processed_path = PATHS.PROCESSED_DIR / f"{stem}_{job_id}.{target_ext}"
+        job_data_args["processed_filepath"] = str(processed_path)
+        create_job(db=db, job=JobCreate(**job_data_args))
+        run_conversion_task(job_id, str(upload_path), str(processed_path), tool, task_key, conversion_tools, APP_CONFIG, base_url)
+
+    elif task_type == "ocr":
+        if not is_allowed_file(file.filename, {".pdf"}):
+            raise HTTPException(status_code=400, detail="Invalid file type for ocr, requires .pdf")
+        processed_path = PATHS.PROCESSED_DIR / f"{stem}_{job_id}{suffix}"
+        job_data_args["processed_filepath"] = str(processed_path)
+        create_job(db=db, job=JobCreate(**job_data_args))
+        run_pdf_ocr_task(job_id, str(upload_path), str(processed_path), APP_CONFIG.get("ocr_settings", {}).get("ocrmypdf", {}), APP_CONFIG, base_url)
+
+    elif task_type == "ocr-image":
+        if not is_allowed_file(file.filename, {".png", ".jpg", ".jpeg", ".tiff", ".tif"}):
+             raise HTTPException(status_code=400, detail="Invalid file type for ocr-image.")
+        processed_path = PATHS.PROCESSED_DIR / f"{stem}_{job_id}.txt"
+        job_data_args["processed_filepath"] = str(processed_path)
+        create_job(db=db, job=JobCreate(**job_data_args))
+        run_image_ocr_task(job_id, str(upload_path), str(processed_path), APP_CONFIG, base_url)
+
+    else:
+        upload_path.unlink(missing_ok=True) # Cleanup orphaned file
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=f"Invalid task_type: '{task_type}'")
+
+    return {"job_id": job_id, "status": "pending"}
+
+
+# --- Chunked API endpoints (optional) ---
+@app.post("/api/v1/upload/chunk", tags=["Webhook API"])
+async def api_upload_chunk(
+    chunk: UploadFile = File(...), upload_id: str = Form(...), chunk_number: int = Form(...),
+    user: dict = Depends(require_api_user)
+):
+    """API endpoint for uploading a single file chunk."""
+    webhook_config = APP_CONFIG.get("webhook_settings", {})
+    if not webhook_config.get("enabled", False) or not webhook_config.get("allow_chunked_api_uploads", False):
+        raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Chunked API uploads are disabled.")
+    
+    return await upload_chunk(chunk, upload_id, chunk_number, user)
+
+@app.post("/api/v1/upload/finalize", status_code=status.HTTP_202_ACCEPTED, tags=["Webhook API"])
+async def api_finalize_upload(
+    request: Request, payload: FinalizeUploadPayload, user: dict = Depends(require_api_user), db: Session = Depends(get_db)
+):
+    """API endpoint to finalize a chunked upload and start a processing job."""
+    webhook_config = APP_CONFIG.get("webhook_settings", {})
+    if not webhook_config.get("enabled", False) or not webhook_config.get("allow_chunked_api_uploads", False):
+        raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Chunked API uploads are disabled.")
+    
+    # Validate callback URL if provided for a webhook job
+    if payload.callback_url and not is_allowed_callback_url(payload.callback_url, webhook_config.get("allowed_callback_urls", [])):
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Provided callback_url is not allowed.")
+
+    # Re-use the main finalization logic, but with API user context
+    return await finalize_upload(request, payload, user, db)
+
+
+# --------------------------------------------------------------------------------
+# --- AUTH & PAGE ROUTES
+# --------------------------------------------------------------------------------
 if not LOCAL_ONLY_MODE:
     @app.get('/login')
     async def login(request: Request):
@@ -885,7 +1782,6 @@ if not LOCAL_ONLY_MODE:
             token = await oauth.oidc.authorize_access_token(request)
             user = await oauth.oidc.userinfo(token=token)
             request.session['user'] = dict(user)
-            # Store id_token in session for logout
             request.session['id_token'] = token.get('id_token')
         except Exception as e:
             logger.error(f"Authentication failed: {e}")
@@ -895,29 +1791,18 @@ if not LOCAL_ONLY_MODE:
     @app.get("/logout")
     async def logout(request: Request):
         logout_endpoint = oauth.oidc.server_metadata.get("end_session_endpoint")
-        logger.info(f"OIDC end_session_endpoint: {logout_endpoint}")
-
-        # local-only logout if provider doesn't expose end_session_endpoint
         if not logout_endpoint:
             request.session.clear()
             logger.warning("OIDC 'end_session_endpoint' not found. Performing local-only logout.")
             return RedirectResponse(url="/", status_code=302)
 
-
-        # Prefer a single canonical / registered post-logout redirect URI from config
         post_logout_redirect_uri = str(request.url_for("get_index"))
-        logger.info(f"Post logout redirect URI: {post_logout_redirect_uri}")
-
         logout_url = f"{logout_endpoint}?post_logout_redirect_uri={post_logout_redirect_uri}"
-
-        logger.info(f"Redirecting to provider logout URL: {logout_url}")
-
         request.session.clear()
         return RedirectResponse(url=logout_url, status_code=302)
 
 
-#### TODO: Remove this weird forward authz endpoint, its needed if reverse proxy does foward auth
-
+# This is for reverse proxies that use forward auth
 @app.get("/api/authz/forward-auth")
 async def forward_auth(request: Request):
     redirect_uri = request.url_for('auth')
@@ -930,146 +1815,94 @@ async def get_index(request: Request):
     whisper_models = APP_CONFIG.get("transcription_settings", {}).get("whisper", {}).get("allowed_models", [])
     conversion_tools = APP_CONFIG.get("conversion_tools", {})
     return templates.TemplateResponse("index.html", {
-        "request": request,
-        "user": user,
-        "is_admin": admin_status,
+        "request": request, "user": user, "is_admin": admin_status,
         "whisper_models": sorted(list(whisper_models)),
-        "conversion_tools": conversion_tools,
-        "local_only_mode": LOCAL_ONLY_MODE
+        "conversion_tools": conversion_tools, "local_only_mode": LOCAL_ONLY_MODE
     })
 
 @app.get("/settings")
 async def get_settings_page(request: Request):
-    """
-    Displays the contents of the currently active configuration file.
-    It prioritizes settings.yml and falls back to settings.default.yml.
-    """
+    """Displays the contents of the currently active configuration file."""
     user = get_current_user(request)
     admin_status = is_admin(request)
-    current_config = {}
-    config_source = "none" # A helper variable to track which file was loaded
-
+    current_config, config_source = {}, "none"
     try:
-        # 1. Attempt to load the primary, user-provided settings.yml
         with open(PATHS.SETTINGS_FILE, 'r', encoding='utf8') as f:
             current_config = yaml.safe_load(f) or {}
         config_source = str(PATHS.SETTINGS_FILE.name)
-        logger.info(f"Displaying configuration from '{config_source}' on settings page.")
-
     except FileNotFoundError:
-        logger.warning(f"'{PATHS.SETTINGS_FILE.name}' not found. Attempting to display fallback configuration.")
         try:
-            # 2. If it's not found, fall back to the default settings file
             with open(PATHS.DEFAULT_SETTINGS_FILE, 'r', encoding='utf8') as f:
                 current_config = yaml.safe_load(f) or {}
             config_source = str(PATHS.DEFAULT_SETTINGS_FILE.name)
-            logger.info(f"Displaying configuration from fallback '{config_source}' on settings page.")
-        
-        except Exception as e_fallback:
-            # 3. If even the default file fails, log the error and use an empty config
-            logger.exception(f"CRITICAL: Could not load fallback '{PATHS.DEFAULT_SETTINGS_FILE.name}' for settings page: {e_fallback}")
-            current_config = {} # Failsafe
+        except Exception as e:
+            logger.exception(f"CRITICAL: Could not load fallback config: {e}")
             config_source = "error"
-            
-    except Exception as e_primary:
-        # Handles other errors with the primary settings.yml (e.g., parsing errors, permissions)
-        logger.exception(f"Could not load '{PATHS.SETTINGS_FILE.name}' for settings page: {e_primary}")
-        current_config = {} # Failsafe
+    except Exception as e:
+        logger.exception(f"Could not load primary config: {e}")
         config_source = "error"
 
     return templates.TemplateResponse(
         "settings.html",
-        {
-            "request": request,
-            "config": current_config,
-            "config_source": config_source, # You can use this in the template!
-            "user": user,
-            "is_admin": admin_status,
-            "local_only_mode": LOCAL_ONLY_MODE,
-        }
+        {"request": request, "config": current_config, "config_source": config_source,
+         "user": user, "is_admin": admin_status, "local_only_mode": LOCAL_ONLY_MODE}
     )
 
-
-import collections.abc
-
 def deep_merge(source: dict, destination: dict) -> dict:
-    """
-    Recursively merges the `source` dictionary into the `destination` dictionary.
-
-    Values from `source` will overwrite values in `destination`.
-    """
+    """Recursively merges dicts."""
     for key, value in source.items():
         if isinstance(value, collections.abc.Mapping):
-            # If the value is a dictionary, recurse
             node = destination.setdefault(key, {})
             deep_merge(value, node)
         else:
-            # Otherwise, overwrite the value
             destination[key] = value
     return destination
 
 @app.post("/settings/save")
 async def save_settings(
-    request: Request,
-    new_config_from_ui: Dict = Body(...),
-    admin: bool = Depends(require_admin)
+    request: Request, new_config_from_ui: Dict = Body(...), admin: bool = Depends(require_admin)
 ):
-    """
-    Safely updates settings.yml by merging UI changes with the existing file,
-    preserving any settings not managed by the UI.
-    """
+    """Safely updates settings.yml by merging UI changes with the existing file."""
     tmp_path = PATHS.SETTINGS_FILE.with_suffix(".tmp")
     user = get_current_user(request)
-
     try:
-        # Handle the special case where the user wants to revert to defaults
         if not new_config_from_ui:
             if PATHS.SETTINGS_FILE.exists():
                 PATHS.SETTINGS_FILE.unlink()
-                logger.info(f"Admin '{user.get('email')}' reverted to default settings by deleting settings.yml.")
+                logger.info(f"Admin '{user.get('email')}' reverted to default settings.")
             load_app_config()
             return JSONResponse({"message": "Settings reverted to default."})
 
-        # --- Read-Modify-Write Cycle ---
-
-        # 1. READ: Load the current configuration from settings.yml on disk.
-        # If the file doesn't exist, start with an empty dictionary.
         try:
             with PATHS.SETTINGS_FILE.open("r", encoding="utf8") as f:
                 current_config_on_disk = yaml.safe_load(f) or {}
         except FileNotFoundError:
             current_config_on_disk = {}
 
-        # 2. MODIFY: Deep merge the changes from the UI into the config from the disk.
-        # The UI config (`source`) overwrites keys in the disk config (`destination`).
         merged_config = deep_merge(source=new_config_from_ui, destination=current_config_on_disk)
 
-        # 3. WRITE: Save the fully merged configuration back to the file.
         with tmp_path.open("w", encoding="utf8") as f:
             yaml.safe_dump(merged_config, f, default_flow_style=False, sort_keys=False)
         
         tmp_path.replace(PATHS.SETTINGS_FILE)
-        logger.info(f"Admin '{user.get('email')}' successfully updated settings.yml.")
-
-        # Reload the app config to apply changes immediately
+        logger.info(f"Admin '{user.get('email')}' updated settings.yml.")
         load_app_config()
-        
-        return JSONResponse({"message": "Settings saved successfully. The new configuration is now active."})
+        return JSONResponse({"message": "Settings saved successfully."})
 
     except Exception as e:
         logger.exception(f"Failed to update settings for admin '{user.get('email')}'")
-        if tmp_path.exists():
-            tmp_path.unlink()
+        if tmp_path.exists(): tmp_path.unlink()
         raise HTTPException(status_code=500, detail=f"Could not save settings.yml: {e}")
 
-# job management endpoints
-
+# --------------------------------------------------------------------------------
+# --- JOB MANAGEMENT & UTILITY ROUTES
+# --------------------------------------------------------------------------------
 @app.post("/settings/clear-history")
 async def clear_job_history(db: Session = Depends(get_db), user: dict = Depends(require_user)):
     try:
         num_deleted = db.query(Job).filter(Job.user_id == user['sub']).delete()
         db.commit()
-        logger.info(f"Cleared {num_deleted} jobs from history for user {user['sub']}.")
+        logger.info(f"Cleared {num_deleted} jobs for user {user['sub']}.")
         return {"deleted_count": num_deleted}
     except Exception:
         db.rollback()
@@ -1078,22 +1911,20 @@ async def clear_job_history(db: Session = Depends(get_db), user: dict = Depends(
 
 @app.post("/settings/delete-files")
 async def delete_processed_files(db: Session = Depends(get_db), user: dict = Depends(require_user)):
-    deleted_count = 0
-    errors = []
-    user_jobs = get_jobs(db, user_id=user['sub'])
-    for job in user_jobs:
+    deleted_count, errors = 0, []
+    for job in get_jobs(db, user_id=user['sub']):
         if job.processed_filepath:
             try:
                 p = ensure_path_is_safe(Path(job.processed_filepath), [PATHS.PROCESSED_DIR])
                 if p.is_file():
                     p.unlink()
                     deleted_count += 1
-            except Exception as e:
+            except Exception:
                 errors.append(Path(job.processed_filepath).name)
-                logger.exception(f"Could not delete processed file {Path(job.processed_filepath).name}")
+                logger.exception(f"Could not delete file {Path(job.processed_filepath).name}")
     if errors:
         raise HTTPException(status_code=500, detail=f"Could not delete some files: {', '.join(errors)}")
-    logger.info(f"Deleted {deleted_count} files from processed directory for user {user['sub']}.")
+    logger.info(f"Deleted {deleted_count} files for user {user['sub']}.")
     return {"deleted_count": deleted_count}
 
 @app.post("/job/{job_id}/cancel", status_code=status.HTTP_202_ACCEPTED)
@@ -1123,7 +1954,9 @@ async def download_file(filename: str, db: Session = Depends(get_db), user: dict
     file_path = ensure_path_is_safe(PATHS.PROCESSED_DIR / safe_filename, [PATHS.PROCESSED_DIR])
     if not file_path.is_file():
         raise HTTPException(status_code=404, detail="File not found.")
-    job = db.query(Job).filter(Job.processed_filepath == str(file_path), Job.user_id == user['sub']).first()
+    # API users can download files they own via webhook URL. UI users need session.
+    job_owner_id = user.get('sub') if user else None
+    job = db.query(Job).filter(Job.processed_filepath == str(file_path), Job.user_id == job_owner_id).first()
     if not job:
         raise HTTPException(status_code=403, detail="You do not have permission to download this file.")
     download_filename = Path(job.original_filename).stem + Path(job.processed_filepath).suffix
@@ -1133,7 +1966,7 @@ async def download_file(filename: str, db: Session = Depends(get_db), user: dict
 async def health():
     try:
         with engine.connect() as conn:
-            conn.execute("SELECT 1")
+            conn.execution_options(isolation_level="AUTOCOMMIT").execute("SELECT 1")
     except Exception:
         logger.exception("Health check failed")
         return JSONResponse({"ok": False}, status_code=500)
diff --git a/requirements.txt b/requirements.txt
index be5f40d..3dfab3e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,6 +13,8 @@ faster-whisper
 ocrmypdf
 pytesseract
 pypdf
+piper-tts
+kokoro-tts
 
 # Configuration & Utilities
 werkzeug
diff --git a/settings.default.yml b/settings.default.yml
index 2906c10..5d8fc16 100644
--- a/settings.default.yml
+++ b/settings.default.yml
@@ -1,13 +1,31 @@
 auth_settings:
   oidc_client_id: filewiz
-  oidc_client_secret: 
-  oidc_server_metadata_url: https://accounts.test.de/oidc/.well-known/openid-configuration
-  oidc_userinfo_endpoint: https://accounts.test.de/oidc/me
-  oidc_end_session_endpoint: https://accounts.test.de/oidc/session/end
+  oidc_client_secret:
+  oidc_server_metadata_url: https://accounts.example.com/oidc/.well-known/openid-configuration
+  oidc_userinfo_endpoint: https://accounts.example.com/oidc/me
+  oidc_end_session_endpoint: https://accounts.example.com/oidc/session/end
   admin_users:
-  - admin@local.com
+  - user@example.com
+web_hook_settings:
+  enabled: False
+  allow_chunked_api_uploads": False
+  allowed_callback_urls:
+  callback_bearer_token":
+tts_settings: 
+  piper: 
+    model_dir: "./models/tts"
+    use_cuda: False 
+    synthesis_config: 
+      length_scale: 1.0
+      noise_scale: 0.667 
+      noise_w: 0.8
+  kokoro: 
+    model_dir: "./models/tts/kokoro"
+    command_template: "kokoro-tts {input} {output} --model {model_path} --voices {voices_path} --voice {model_name}"
+
 app_settings:
   max_file_size_mb: '2000'
+  # app_public_url: "http://localhost:8000" # Uncomment and set to your public URL if downloads don't work correctly
   allowed_all_extensions:
   - .aac
   - .aiff
@@ -352,4 +370,4 @@ conversion_tools:
     formats:
       jpg_q85: JPEG (High Quality)
       jpg_q75: JPEG (Web Quality)
-      jpg_q60: JPEG (Aggressive Compression)
\ No newline at end of file
+      jpg_q60: JPEG (Aggressive Compression)
diff --git a/static/css/style.css b/static/css/style.css
index 5b63c61..8c0466e 100644
--- a/static/css/style.css
+++ b/static/css/style.css
@@ -278,7 +278,7 @@ input[type="file"] {
 
 .actions-group {
     display: grid;
-    grid-template-columns: repeat(3, 1fr); /* 3 columns for wide screens */
+    grid-template-columns: repeat(4, 1fr); /* 3 columns for wide screens */
     gap: 1.5rem;
     margin-top: 1.5rem;
 }
diff --git a/static/js/script.js b/static/js/script.js
index 7d23215..7b8da3a 100644
--- a/static/js/script.js
+++ b/static/js/script.js
@@ -2,6 +2,18 @@ document.addEventListener('DOMContentLoaded', () => {
     // --- Constants ---
     const CHUNK_SIZE = 5 * 1024 * 1024; // 5 MB chunks
 
+    // Allow server to provide API prefix (e.g. "/api/v1") via window.APP_CONFIG.api_base
+    const API_BASE = (window.APP_CONFIG && window.APP_CONFIG.api_base) ? window.APP_CONFIG.api_base.replace(/\/$/, '') : '';
+
+    function apiUrl(path) {
+        // path may start with or without a leading slash
+        if (!path) return API_BASE || '/';
+        if (path.startsWith('/')) {
+            return `${API_BASE}${path}`;
+        }
+        return `${API_BASE}/${path}`;
+    }
+
     // --- User Locale and Timezone Detection ---
     const USER_LOCALE = navigator.language || 'en-US';
     const USER_TIMEZONE = Intl.DateTimeFormat().resolvedOptions().timeZone;
@@ -24,9 +36,11 @@ document.addEventListener('DOMContentLoaded', () => {
     const mainFileName = document.getElementById('main-file-name');
     const mainOutputFormatSelect = document.getElementById('main-output-format-select');
     const mainModelSizeSelect = document.getElementById('main-model-size-select');
+    const mainTtsModelSelect = document.getElementById('main-tts-model-select');
     const startConversionBtn = document.getElementById('start-conversion-btn');
     const startOcrBtn = document.getElementById('start-ocr-btn');
     const startTranscriptionBtn = document.getElementById('start-transcription-btn');
+    const startTtsBtn = document.getElementById('start-tts-btn');
 
     const jobListBody = document.getElementById('job-list-body');
 
@@ -36,37 +50,50 @@ document.addEventListener('DOMContentLoaded', () => {
     const dialogFileCount = document.getElementById('dialog-file-count');
     const dialogInitialView = document.getElementById('dialog-initial-actions');
     const dialogConvertView = document.getElementById('dialog-convert-view');
+    const dialogTtsView = document.getElementById('dialog-tts-view');
     const dialogConvertBtn = document.getElementById('dialog-action-convert');
     const dialogOcrBtn = document.getElementById('dialog-action-ocr');
     const dialogTranscribeBtn = document.getElementById('dialog-action-transcribe');
+    const dialogTtsBtn = document.getElementById('dialog-action-tts');
     const dialogCancelBtn = document.getElementById('dialog-action-cancel');
     const dialogStartConversionBtn = document.getElementById('dialog-start-conversion');
+    const dialogStartTtsBtn = document.getElementById('dialog-start-tts');
     const dialogBackBtn = document.getElementById('dialog-back');
+    const dialogBackTtsBtn = document.getElementById('dialog-back-tts');
     const dialogOutputFormatSelect = document.getElementById('dialog-output-format-select');
+    const dialogTtsModelSelect = document.getElementById('dialog-tts-model-select');
+
 
     // --- State Variables ---
     let conversionChoices = null;
-    let modelChoices = null; // For the model dropdown instance
+    let transcriptionChoices = null;
+    let ttsChoices = null;
     let dialogConversionChoices = null;
+    let dialogTtsChoices = null;
+    let ttsModelsCache = []; // Cache for formatted TTS models list
     const activePolls = new Map();
     let stagedFiles = null;
 
 
     // --- Authentication-aware Fetch Wrapper ---
-    /**
-     * A wrapper around the native fetch API that handles 401 Unauthorized responses.
-     * If a 401 is received, it assumes the session has expired and redirects to the login page.
-     * @param {string} url - The URL to fetch.
-     * @param {object} options - The options for the fetch request.
-     * @returns {Promise<Response>} - A promise that resolves to the fetch Response.
-     */
-    async function authFetch(url, options) {
+    async function authFetch(url, options = {}) {
+        // Normalize URL through apiUrl() if a bare endpoint is provided
+        if (typeof url === 'string' && url.startsWith('/')) {
+            url = apiUrl(url);
+        }
+
+        // Add default options: include credentials and accept JSON by default
+        options = Object.assign({}, options);
+        if (!Object.prototype.hasOwnProperty.call(options, 'credentials')) {
+            options.credentials = 'include';
+        }
+        options.headers = options.headers || {};
+        if (!options.headers.Accept) options.headers.Accept = 'application/json';
+
         const response = await fetch(url, options);
         if (response.status === 401) {
-            // Use a simple alert for now. A more sophisticated modal could be used.
             alert('Your session has expired. You will be redirected to the login page.');
-            window.location.href = '/login';
-            // Throw an error to stop the promise chain of the calling function
+            window.location.href = apiUrl('/login');
             throw new Error('Session expired');
         }
         return response;
@@ -141,7 +168,8 @@ document.addEventListener('DOMContentLoaded', () => {
                 body: JSON.stringify(finalizePayload),
             });
             if (!finalizeResponse.ok) {
-                const errorData = await finalizeResponse.json();
+                let errorData = {};
+                try { errorData = await finalizeResponse.json(); } catch (e) {}
                 throw new Error(errorData.detail || 'Finalization failed');
             }
             const result = await finalizeResponse.json();
@@ -203,13 +231,23 @@ document.addEventListener('DOMContentLoaded', () => {
             }
             options.output_format = selectedFormat;
         } else if (taskType === 'transcription') {
-            options.model_size = mainModelSizeSelect.value;
+            const selectedModel = transcriptionChoices.getValue(true);
+            options.model_size = selectedModel;
+        } else if (taskType === 'tts') {
+            const selectedModel = ttsChoices.getValue(true);
+            if (!selectedModel) {
+                alert('Please select a voice model.');
+                return;
+            }
+            options.model_name = selectedModel;
         }
 
+
         // Disable buttons during upload process
         startConversionBtn.disabled = true;
         startOcrBtn.disabled = true;
         startTranscriptionBtn.disabled = true;
+        startTtsBtn.disabled = true;
 
         const uploadPromises = files.map(file => uploadFileInChunks(file, taskType, options));
         await Promise.allSettled(uploadPromises);
@@ -220,6 +258,7 @@ document.addEventListener('DOMContentLoaded', () => {
         startConversionBtn.disabled = false;
         startOcrBtn.disabled = false;
         startTranscriptionBtn.disabled = false;
+        startTtsBtn.disabled = false;
     }
 
 
@@ -251,17 +290,25 @@ document.addEventListener('DOMContentLoaded', () => {
 
     function showActionDialog() {
         dialogFileCount.textContent = stagedFiles.length;
-        dialogOutputFormatSelect.innerHTML = mainOutputFormatSelect.innerHTML; // Use main select as template
+        
+        // Setup Conversion Dropdown
+        dialogOutputFormatSelect.innerHTML = mainOutputFormatSelect.innerHTML;
         if (dialogConversionChoices) dialogConversionChoices.destroy();
         dialogConversionChoices = new Choices(dialogOutputFormatSelect, {
-            searchEnabled: true,
-            itemSelectText: 'Select',
-            shouldSort: false,
-            placeholder: true,
-            placeholderValue: 'Select a format...',
+            searchEnabled: true, itemSelectText: 'Select', shouldSort: false, placeholder: true, placeholderValue: 'Select a format...',
         });
+
+        // Setup TTS Dropdown
+        if (dialogTtsChoices) dialogTtsChoices.destroy();
+        dialogTtsChoices = new Choices(dialogTtsModelSelect, {
+            searchEnabled: true, itemSelectText: 'Select', shouldSort: false, placeholder: true, placeholderValue: 'Select a voice...',
+        });
+        dialogTtsChoices.setChoices(ttsModelsCache, 'value', 'label', true);
+
+
         dialogInitialView.style.display = 'grid';
         dialogConvertView.style.display = 'none';
+        dialogTtsView.style.display = 'none';
         actionDialog.classList.add('visible');
     }
 
@@ -269,21 +316,34 @@ document.addEventListener('DOMContentLoaded', () => {
         actionDialog.classList.remove('visible');
         stagedFiles = null;
         if (dialogConversionChoices) {
-            dialogConversionChoices.hideDropdown();
             dialogConversionChoices.destroy();
             dialogConversionChoices = null;
         }
+        if (dialogTtsChoices) {
+            dialogTtsChoices.destroy();
+            dialogTtsChoices = null;
+        }
     }
-
+    
+    // --- Dialog Button Listeners ---
     dialogConvertBtn.addEventListener('click', () => {
         dialogInitialView.style.display = 'none';
         dialogConvertView.style.display = 'block';
     });
+    dialogTtsBtn.addEventListener('click', () => {
+        dialogInitialView.style.display = 'none';
+        dialogTtsView.style.display = 'block';
+    });
     dialogBackBtn.addEventListener('click', () => {
         dialogInitialView.style.display = 'grid';
         dialogConvertView.style.display = 'none';
     });
+    dialogBackTtsBtn.addEventListener('click', () => {
+        dialogInitialView.style.display = 'grid';
+        dialogTtsView.style.display = 'none';
+    });
     dialogStartConversionBtn.addEventListener('click', () => handleDialogAction('conversion'));
+    dialogStartTtsBtn.addEventListener('click', () => handleDialogAction('tts'));
     dialogOcrBtn.addEventListener('click', () => handleDialogAction('ocr'));
     dialogTranscribeBtn.addEventListener('click', () => handleDialogAction('transcription'));
     dialogCancelBtn.addEventListener('click', closeActionDialog);
@@ -300,23 +360,81 @@ document.addEventListener('DOMContentLoaded', () => {
             options.output_format = selectedFormat;
         } else if (action === 'transcription') {
             options.model_size = mainModelSizeSelect.value;
+        } else if (action === 'tts') {
+            const selectedModel = dialogTtsChoices.getValue(true);
+            if (!selectedModel) {
+                alert('Please select a voice model.');
+                return;
+            }
+            options.model_name = selectedModel;
         }
         Array.from(stagedFiles).forEach(file => uploadFileInChunks(file, action, options));
         closeActionDialog();
     }
 
-    /**
-     * Initializes all Choices.js dropdowns on the page.
-     */
+    // -----------------------
+    // TTS models loader (robust)
+    // -----------------------
+    async function loadTtsModels() {
+        try {
+            const response = await authFetch('/api/v1/tts-voices');
+            if (!response.ok) throw new Error('Failed to fetch TTS voices.');
+            const voicesData = await response.json();
+
+            // voicesData might be an object map { id: meta } or an array [{ id, name, language, ... }]
+            const voicesArray = [];
+            if (Array.isArray(voicesData)) {
+                for (const v of voicesData) {
+                    // Accept either { id, name, language } or { voice_id, title, locale }
+                    const id = v.id || v.voice_id || v.voice || v.name || null;
+                    const name = v.name || v.title || v.display_name || id || 'Unknown';
+                    const lang = (v.language && (v.language.name_native || v.language.name)) || v.locale || (id ? id.split(/[_-]/)[0] : 'Unknown');
+                    if (id) voicesArray.push({ id, name, lang });
+                }
+            } else if (voicesData && typeof voicesData === 'object') {
+                for (const key in voicesData) {
+                    if (!Object.prototype.hasOwnProperty.call(voicesData, key)) continue;
+                    const v = voicesData[key];
+                    const id = v.id || key;
+                    const name = v.name || v.title || v.display_name || id;
+                    const lang = (v.language && (v.language.name_native || v.language.name)) || v.locale || (id ? id.split(/[_-]/)[0] : 'Unknown');
+                    voicesArray.push({ id, name, lang });
+                }
+            } else {
+                throw new Error('Unexpected voices payload');
+            }
+
+            // Group by language
+            const groups = {};
+            for (const v of voicesArray) {
+                const langLabel = v.lang || 'Unknown';
+                if (!groups[langLabel]) {
+                    groups[langLabel] = { label: langLabel, id: langLabel, disabled: false, choices: [] };
+                }
+                groups[langLabel].choices.push({
+                    value: v.id,
+                    label: `${v.name}`
+                });
+            }
+            ttsModelsCache = Object.values(groups).sort((a,b) => a.label.localeCompare(b.label));
+            // If ttsChoices exists, update it; otherwise the initializer will set choices
+            if (ttsChoices) {
+                ttsChoices.setChoices(ttsModelsCache, 'value', 'label', true);
+            }
+        } catch (error) {
+            console.error("Couldn't load TTS voices:", error);
+            if (error.message !== 'Session expired') {
+                if (ttsChoices) {
+                    ttsChoices.setChoices([{ value: '', label: 'Error loading voices', disabled: true }], 'value', 'label');
+                }
+            }
+        }
+    }
+    
     function initializeSelectors() {
-        // --- Conversion Dropdown ---
         if (conversionChoices) conversionChoices.destroy();
         conversionChoices = new Choices(mainOutputFormatSelect, {
-            searchEnabled: true,
-            itemSelectText: 'Select',
-            shouldSort: false,
-            placeholder: true,
-            placeholderValue: 'Select a format...',
+            searchEnabled: true, itemSelectText: 'Select', shouldSort: false, placeholder: true, placeholderValue: 'Select a format...',
         });
         const tools = window.APP_CONFIG.conversionTools || {};
         const choicesArray = [];
@@ -324,22 +442,22 @@ document.addEventListener('DOMContentLoaded', () => {
             const tool = tools[toolKey];
             const group = { label: tool.name, id: toolKey, disabled: false, choices: [] };
             for (const formatKey in tool.formats) {
-                group.choices.push({
-                    value: `${toolKey}_${formatKey}`,
-                    label: `${tool.name} - ${formatKey.toUpperCase()} (${tool.formats[formatKey]})`
-                });
+                group.choices.push({ value: `${toolKey}_${formatKey}`, label: `${tool.name} - ${formatKey.toUpperCase()} (${tool.formats[formatKey]})` });
             }
             choicesArray.push(group);
         }
         conversionChoices.setChoices(choicesArray, 'value', 'label', true);
 
-        // --- Model Size Dropdown ---
-        if (modelChoices) modelChoices.destroy();
-        modelChoices = new Choices(mainModelSizeSelect, {
-            searchEnabled: false,   // Disables the search box
-            shouldSort: false,      // Keeps the original <option> order
-            itemSelectText: '',     // Hides the "Press to select" tooltip
+        if (transcriptionChoices) transcriptionChoices.destroy();
+        transcriptionChoices = new Choices(mainModelSizeSelect, {
+            searchEnabled: false, shouldSort: false, itemSelectText: '',
         });
+
+        if (ttsChoices) ttsChoices.destroy();
+        ttsChoices = new Choices(mainTtsModelSelect, {
+             searchEnabled: true, itemSelectText: 'Select', shouldSort: false, placeholder: true, placeholderValue: 'Select voice...',
+        });
+        loadTtsModels();
     }
 
     function updateFileName(input, nameDisplay) {
@@ -359,7 +477,8 @@ document.addEventListener('DOMContentLoaded', () => {
         try {
             const response = await authFetch(`/job/${jobId}/cancel`, { method: 'POST' });
             if (!response.ok) {
-                const errorData = await response.json();
+                let errorData = {};
+                try { errorData = await response.json(); } catch (e) {}
                 throw new Error(errorData.detail || 'Failed to cancel job.');
             }
             stopPolling(jobId);
@@ -422,7 +541,7 @@ document.addEventListener('DOMContentLoaded', () => {
     }
 
     function renderJobRow(job) {
-        const rowId = job.id.startsWith('upload-') ? job.id : `job-${job.id}`;
+        const rowId = job.id && String(job.id).startsWith('upload-') ? job.id : `job-${job.id}`;
         let row = document.getElementById(rowId);
         if (!row) {
             row = document.createElement('tr');
@@ -434,9 +553,11 @@ document.addEventListener('DOMContentLoaded', () => {
         if (job.task_type === 'conversion' && job.processed_filepath) {
             const extension = job.processed_filepath.split('.').pop();
             taskTypeLabel = `Convert to ${extension.toUpperCase()}`;
+        } else if (job.task_type === 'tts') {
+            taskTypeLabel = 'Synthesize Speech';
         }
 
-        const submittedDate = new Date(job.created_at);
+        const submittedDate = job.created_at ? new Date(job.created_at) : new Date();
         const formattedDate = submittedDate.toLocaleString(USER_LOCALE, DATETIME_FORMAT_OPTIONS);
 
         let statusHtml = `<span class="job-status-badge status-${job.status}">${job.status}</span>`;
@@ -445,7 +566,7 @@ document.addEventListener('DOMContentLoaded', () => {
             statusHtml += `<div class="progress-bar-container"><div class="progress-bar" style="width: ${job.progress || 0}%"></div></div>`;
         } else if (job.status === 'processing') {
             const progressClass = (job.task_type === 'transcription' && job.progress > 0) ? '' : 'indeterminate';
-            const progressWidth = job.task_type === 'transcription' ? job.progress : 100;
+            const progressWidth = (job.task_type === 'transcription' && job.progress > 0) ? job.progress : 100;
             statusHtml += `<div class="progress-bar-container"><div class="progress-bar ${progressClass}" style="width: ${progressWidth}%"></div></div>`;
         }
 
@@ -454,7 +575,7 @@ document.addEventListener('DOMContentLoaded', () => {
             actionHtml = `<button class="cancel-button" data-job-id="${job.id}">Cancel</button>`;
         } else if (job.status === 'completed' && job.processed_filepath) {
             const downloadFilename = job.processed_filepath.split(/[\\/]/).pop();
-            actionHtml = `<a href="/download/${downloadFilename}" class="download-button" download>Download</a>`;
+            actionHtml = `<a href="${apiUrl('/download')}/${encodeURIComponent(downloadFilename)}" class="download-button" download>Download</a>`;
         } else if (job.status === 'failed') {
             const errorTitle = job.error_message ? ` title="${job.error_message.replace(/"/g, '&quot;')}"` : '';
             actionHtml = `<span class="error-text"${errorTitle}>Failed</span>`;
@@ -485,6 +606,7 @@ document.addEventListener('DOMContentLoaded', () => {
         startConversionBtn.addEventListener('click', () => handleTaskRequest('conversion'));
         startOcrBtn.addEventListener('click', () => handleTaskRequest('ocr'));
         startTranscriptionBtn.addEventListener('click', () => handleTaskRequest('transcription'));
+        startTtsBtn.addEventListener('click', () => handleTaskRequest('tts'));
         mainFileInput.addEventListener('change', () => updateFileName(mainFileInput, mainFileName));
 
         jobListBody.addEventListener('click', (event) => {
@@ -505,7 +627,7 @@ document.addEventListener('DOMContentLoaded', () => {
         if (loginContainer) loginContainer.style.display = 'flex';
         if (loginButton) {
             loginButton.addEventListener('click', () => {
-                window.location.href = '/login';
+                window.location.href = apiUrl('/login');
             });
         }
     }
@@ -516,4 +638,4 @@ document.addEventListener('DOMContentLoaded', () => {
     } else {
         showLoginView();
     }
-});
\ No newline at end of file
+});
diff --git a/templates/index.html b/templates/index.html
index 3fd8d4e..1fff069 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -69,16 +69,22 @@
                                 <div class="form-control">
                                     <label for="main-model-size-select">Model Size</label>
                                     <select name="model_size" id="main-model-size-select">
-                                        <option value="tiny">Tiny (Fastest, lower accuracy)</option>
-                                        <option value="base" selected>Base</option>
-                                        <option value="small">Small (Better accuracy)</option>
-                                        <option value="medium">Medium (High accuracy)</option>
-                                        <option value="large-v3">Large v3 (Best accuracy, slow)</option>
-                                        <option value="distil-large-v2">Distilled Large v2</option>
+                                        {% for model in whisper_models %}
+                                        <option value="{{ model }}">{{ model }}</option>
+                                        {% endfor %}
                                     </select>
                                 </div>
                                 <button type="button" id="start-transcription-btn" class="main-action-button">Start Transcription</button>
                             </fieldset>
+
+                            <fieldset class="action-fieldset">
+                                <legend><h2>Text-to-Speech</h2></legend>
+                                <div class="form-control">
+                                    <label for="main-tts-model-select">Voice Model</label>
+                                    <select name="model_name" id="main-tts-model-select" required></select>
+                                </div>
+                                <button type="button" id="start-tts-btn" class="main-action-button">Start Synthesis</button>
+                            </fieldset>
                         </div>
                     </form>
                 </section>
@@ -120,6 +126,7 @@
                 <button id="dialog-action-convert">Convert</button>
                 <button id="dialog-action-ocr">OCR</button>
                 <button id="dialog-action-transcribe">Transcribe</button>
+                <button id="dialog-action-tts">Synthesize Speech</button>
             </div>
             
             <div id="dialog-convert-view" style="display: none;">
@@ -133,6 +140,17 @@
                 </div>
             </div>
 
+            <div id="dialog-tts-view" style="display: none;">
+                <div class="form-control" style="text-align: left; margin-bottom: 1rem;">
+                    <label for="dialog-tts-model-select">Voice Model</label>
+                    <select id="dialog-tts-model-select" required></select>
+                </div>
+                <div class="dialog-actions">
+                    <button id="dialog-start-tts">Start Synthesis</button>
+                    <button id="dialog-back-tts" class="dialog-secondary-action">Back</button>
+                </div>
+            </div>
+
             <button id="dialog-action-cancel" class="dialog-cancel">Cancel</button>
         </div>
     </div>