From 20e41b67a7fb99429f47b5f9f85c70f3e386a6af Mon Sep 17 00:00:00 2001 From: Manuel Date: Wed, 17 Sep 2025 18:45:55 +0000 Subject: [PATCH] Drag and Drop --- Dockerfile | 42 +++++ docker-compose.yml | 10 ++ main.py | 401 +++++++++++++++++++++++++++--------------- requirements.txt | 167 +++++++++++++++--- run.sh | 4 +- settings .yml.default | 272 ++++++++++++++++++++++++++++ settings.yml | 335 +++++++++++++++++++++-------------- static/css/style.css | 129 ++++++++++++-- static/js/script.js | 338 +++++++++++++++++++++++++++++------ templates/index.html | 39 +++- 10 files changed, 1358 insertions(+), 379 deletions(-) create mode 100644 Dockerfile create mode 100644 docker-compose.yml create mode 100644 settings .yml.default diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..cc2afd8 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,42 @@ +# Dockerfile +FROM python:3.13.7-slim + + +RUN apt-get update && apt-get install -y --no-install-recommends \ + tesseract-ocr \ + ghostscript \ + poppler-utils \ + libreoffice \ + imagemagick \ + graphicsmagick \ + libvips-tools \ + ffmpeg \ + libheif-examples \ + inkscape \ + calibre \ + build-essential \ + pkg-config \ + git \ + curl \ + texlive \ + texlive-latex-extra \ + texlive-xetex + && rm -rf /var/lib/apt/lists/* + + + +# Set working directory inside the container +WORKDIR /app + +# Copy requirements and install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the rest of the app +COPY . . + +# Expose the app port +EXPOSE 8000 +RUN chmod +x run.sh +# Command to run when container starts +CMD ["./run.sh"] diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..28f5839 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,10 @@ +version: "3.9" +services: + web: + build: . + ports: + - "5000:5000" + volumes: + - .:/app # optional: mount code for live changes + environment: + - FLASK_ENV=development diff --git a/main.py b/main.py index 665b317..27b76de 100644 --- a/main.py +++ b/main.py @@ -6,7 +6,7 @@ import uuid import shlex import yaml from contextlib import asynccontextmanager -from datetime import datetime +from datetime import datetime, timezone from pathlib import Path from typing import Dict, List, Any @@ -21,17 +21,21 @@ from fastapi.responses import FileResponse, JSONResponse from fastapi.staticfiles import StaticFiles from fastapi.templating import Jinja2Templates from huey import SqliteHuey -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, field_serializer # MODIFIED: Import field_serializer from sqlalchemy import (Column, DateTime, Integer, String, Text, create_engine, delete, event) +from sqlalchemy.orm import Session, declarative_base, sessionmaker from sqlalchemy.pool import NullPool from string import Formatter -from sqlalchemy.orm import Session, declarative_base, sessionmaker from werkzeug.utils import secure_filename +from typing import List as TypingList # -------------------------------------------------------------------------------- # --- 1. CONFIGURATION # -------------------------------------------------------------------------------- +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + class AppPaths(BaseModel): BASE_DIR: Path = Path(__file__).resolve().parent @@ -43,30 +47,46 @@ class AppPaths(BaseModel): PATHS = AppPaths() APP_CONFIG: Dict[str, Any] = {} +PATHS.UPLOADS_DIR.mkdir(exist_ok=True) +PATHS.PROCESSED_DIR.mkdir(exist_ok=True) def load_app_config(): global APP_CONFIG try: - with open(PATHS.SETTINGS_FILE, 'r') as f: - APP_CONFIG = yaml.safe_load(f) - APP_CONFIG['app_settings']['max_file_size_bytes'] = APP_CONFIG['app_settings']['max_file_size_mb'] * 1024 * 1024 - allowed_extensions = { - ".pdf", ".ps", ".eps", ".png", ".jpg", ".jpeg", ".tiff", ".tif", ".gif", - ".bmp", ".webp", ".svg", ".jxl", ".avif", ".ppm", ".mp3", ".m4a", ".ogg", - ".flac", ".opus", ".wav", ".aac", ".mp4", ".mkv", ".mov", ".webm", ".avi", - ".flv", ".md", ".txt", ".html", ".docx", ".odt", ".rst", ".epub", ".mobi", - ".azw3", ".pptx", ".xlsx" + with open(PATHS.SETTINGS_FILE, 'r', encoding='utf8') as f: + cfg_raw = yaml.safe_load(f) or {} + # basic defaults + defaults = { + "app_settings": {"max_file_size_mb": 100, "allowed_all_extensions": []}, + "transcription_settings": {"whisper": {"allowed_models": ["tiny", "base", "small"], "compute_type": "int8"}}, + "conversion_tools": {}, + "ocr_settings": {"ocrmypdf": {}} } - APP_CONFIG['app_settings']['allowed_all_extensions'] = allowed_extensions + # shallow merge (safe for top-level keys) + cfg = defaults.copy() + cfg.update(cfg_raw) + # normalize app settings + app_settings = cfg.get("app_settings", {}) + max_mb = app_settings.get("max_file_size_mb", 100) + app_settings["max_file_size_bytes"] = int(max_mb) * 1024 * 1024 + allowed = app_settings.get("allowed_all_extensions", []) + if not isinstance(allowed, (list, set)): + allowed = list(allowed) + app_settings["allowed_all_extensions"] = set(allowed) + cfg["app_settings"] = app_settings + APP_CONFIG = cfg logger.info("Successfully loaded settings from settings.yml") except (FileNotFoundError, yaml.YAMLError) as e: - logger.error(f"Could not load settings.yml: {e}. App may not function correctly.") - APP_CONFIG = {} + logging.getLogger(__name__).exception(f"Could not load settings.yml: {e}. Using defaults.") + + APP_CONFIG = { + "app_settings": {"max_file_size_mb": 100, "max_file_size_bytes": 100 * 1024 * 1024, "allowed_all_extensions": set()}, + "transcription_settings": {"whisper": {"allowed_models": ["tiny", "base", "small"], "compute_type": "int8"}}, + "conversion_tools": {}, + "ocr_settings": {"ocrmypdf": {}} + } + -logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') -logger = logging.getLogger(__name__) -PATHS.UPLOADS_DIR.mkdir(exist_ok=True) -PATHS.PROCESSED_DIR.mkdir(exist_ok=True) # -------------------------------------------------------------------------------- # --- 2. DATABASE & Schemas @@ -77,8 +97,6 @@ engine = create_engine( poolclass=NullPool, ) SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) - -# THIS IS THE CRITICAL FIX Base = declarative_base() @event.listens_for(engine, "connect") @@ -102,11 +120,13 @@ class Job(Base): progress = Column(Integer, default=0) original_filename = Column(String) input_filepath = Column(String) + input_filesize = Column(Integer, nullable=True) processed_filepath = Column(String, nullable=True) + output_filesize = Column(Integer, nullable=True) result_preview = Column(Text, nullable=True) error_message = Column(Text, nullable=True) - created_at = Column(DateTime, default=datetime.utcnow) - updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc)) + updated_at = Column(DateTime, default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc)) def get_db(): db = SessionLocal() @@ -120,6 +140,7 @@ class JobCreate(BaseModel): task_type: str original_filename: str input_filepath: str + input_filesize: int | None = None processed_filepath: str | None = None class JobSchema(BaseModel): @@ -128,6 +149,8 @@ class JobSchema(BaseModel): status: str progress: int original_filename: str + input_filesize: int | None = None + output_filesize: int | None = None processed_filepath: str | None = None result_preview: str | None = None error_message: str | None = None @@ -135,8 +158,14 @@ class JobSchema(BaseModel): updated_at: datetime model_config = ConfigDict(from_attributes=True) + # NEW: This serializer ensures the datetime string sent to the frontend ALWAYS + # includes the 'Z' UTC indicator, fixing the timezone bug. + @field_serializer('created_at', 'updated_at') + def serialize_dt(self, dt: datetime, _info): + return dt.isoformat() + "Z" + # -------------------------------------------------------------------------------- -# --- 3. CRUD OPERATIONS (No Changes) +# --- 3. CRUD OPERATIONS # -------------------------------------------------------------------------------- def get_job(db: Session, job_id: str): return db.query(Job).filter(Job.id == job_id).first() @@ -163,80 +192,120 @@ def update_job_status(db: Session, job_id: str, status: str, progress: int = Non db.refresh(db_job) return db_job -def mark_job_as_completed(db: Session, job_id: str, preview: str | None = None): +def mark_job_as_completed(db: Session, job_id: str, output_filepath_str: str | None = None, preview: str | None = None): db_job = get_job(db, job_id) if db_job and db_job.status != 'cancelled': db_job.status = "completed" db_job.progress = 100 if preview: db_job.result_preview = preview.strip()[:2000] + if output_filepath_str: + try: + output_path = Path(output_filepath_str) + if output_path.exists(): + db_job.output_filesize = output_path.stat().st_size + except Exception: + logger.exception(f"Could not stat output file {output_filepath_str} for job {job_id}") db.commit() return db_job +# ... (The rest of the file is unchanged and remains the same) ... + # -------------------------------------------------------------------------------- # --- 4. BACKGROUND TASK SETUP # -------------------------------------------------------------------------------- huey = SqliteHuey(filename=PATHS.HUEY_DB_PATH) -# --- START: NEW WHISPER MODEL CACHING --- -# This dictionary will live in the memory of the Huey worker process, -# allowing us to reuse loaded models across tasks. +# Whisper model cache per worker process WHISPER_MODELS_CACHE: Dict[str, WhisperModel] = {} def get_whisper_model(model_size: str, whisper_settings: dict) -> WhisperModel: - """ - Loads a Whisper model into the cache if not present, and returns the model. - This ensures a model is only loaded into memory once per worker process. - """ - if model_size not in WHISPER_MODELS_CACHE: - compute_type = whisper_settings.get('compute_type', 'int8') - logger.info(f"Whisper model '{model_size}' not in cache. Loading into memory...") - model = WhisperModel(model_size, device="cpu", compute_type=compute_type) - WHISPER_MODELS_CACHE[model_size] = model - logger.info(f"Model '{model_size}' loaded successfully.") - else: + if model_size in WHISPER_MODELS_CACHE: logger.info(f"Found model '{model_size}' in cache. Reusing.") - return WHISPER_MODELS_CACHE[model_size] -# --- END: NEW WHISPER MODEL CACHING --- + return WHISPER_MODELS_CACHE[model_size] + device = whisper_settings.get("device", "cpu") + compute_type = whisper_settings.get('compute_type', 'int8') + logger.info(f"Whisper model '{model_size}' not in cache. Loading into memory on device={device}...") + try: + model = WhisperModel(model_size, device=device, compute_type=compute_type) + except Exception: + logger.exception("Failed to load whisper model") + raise + WHISPER_MODELS_CACHE[model_size] = model + logger.info(f"Model '{model_size}' loaded successfully.") + return model +# Helper: safe run_command (trimmed logs + timeout) +def run_command(argv: TypingList[str], timeout: int = 300): + try: + res = subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=timeout) + except subprocess.TimeoutExpired: + raise Exception(f"Command timed out after {timeout}s") + if res.returncode != 0: + stderr = (res.stderr or "")[:4000] + stdout = (res.stdout or "")[:4000] + raise Exception(f"Command failed exit {res.returncode}. stderr: {stderr}; stdout: {stdout}") + return res + +# Helper: validate and build command from template with allowlist +ALLOWED_VARS = {"input", "output", "output_dir", "output_ext", "quality", "speed", "preset", "device", "dpi", "samplerate", "bitdepth", "filter"} + +def validate_and_build_command(template_str: str, mapping: Dict[str, str]) -> TypingList[str]: + """ + Validate placeholders against ALLOWED_VARS and build a safe argv list. + If a template uses allowed placeholders that are missing from `mapping`, + auto-fill sensible defaults: + - 'filter' -> mapping.get('output_ext', '') + - others -> empty string + This prevents KeyError while preserving the allowlist security check. + """ + fmt = Formatter() + used = {fname for _, fname, _, _ in fmt.parse(template_str) if fname} + bad = used - ALLOWED_VARS + if bad: + raise ValueError(f"Command template contains disallowed placeholders: {bad}") + + # auto-fill missing allowed placeholders with safe defaults + safe_mapping = dict(mapping) # shallow copy to avoid mutating caller mapping + for name in used: + if name not in safe_mapping: + if name == "filter": + safe_mapping[name] = safe_mapping.get("output_ext", "") + else: + safe_mapping[name] = "" + + formatted = template_str.format(**safe_mapping) + return shlex.split(formatted) @huey.task() def run_transcription_task(job_id: str, input_path_str: str, output_path_str: str, model_size: str, whisper_settings: dict): db = SessionLocal() try: job = get_job(db, job_id) - if not job or job.status == 'cancelled': return - + if not job or job.status == 'cancelled': + return update_job_status(db, job_id, "processing") - - # --- MODIFIED: Use the caching function to get the model --- model = get_whisper_model(model_size, whisper_settings) - logger.info(f"Starting transcription for job {job_id}") segments, info = model.transcribe(input_path_str, beam_size=5) - full_transcript = [] for segment in segments: - job_check = get_job(db, job_id) # Check for cancellation during long tasks + job_check = get_job(db, job_id) # Check for cancellation during long tasks if job_check.status == 'cancelled': logger.info(f"Job {job_id} cancelled during transcription.") return - if info.duration > 0: progress = int((segment.end / info.duration) * 100) update_job_status(db, job_id, "processing", progress=progress) - full_transcript.append(segment.text.strip()) - transcript_text = "\n".join(full_transcript) - # write atomically to avoid partial files + # atomic write of transcript — keep the real extension and mark tmp in the name out_path = Path(output_path_str) - tmp_out = out_path.with_suffix(out_path.suffix + f".{uuid.uuid4().hex}.tmp") + tmp_out = out_path.with_name(f"{out_path.stem}.tmp-{uuid.uuid4().hex}{out_path.suffix}") with tmp_out.open("w", encoding="utf-8") as f: f.write(transcript_text) tmp_out.replace(out_path) - - mark_job_as_completed(db, job_id, preview=transcript_text) + mark_job_as_completed(db, job_id, output_filepath_str=output_path_str, preview=transcript_text) logger.info(f"Transcription for job {job_id} completed.") except Exception: logger.exception(f"ERROR during transcription for job {job_id}") @@ -245,13 +314,13 @@ def run_transcription_task(job_id: str, input_path_str: str, output_path_str: st Path(input_path_str).unlink(missing_ok=True) db.close() -# Other tasks remain unchanged @huey.task() def run_pdf_ocr_task(job_id: str, input_path_str: str, output_path_str: str, ocr_settings: dict): db = SessionLocal() try: job = get_job(db, job_id) - if not job or job.status == 'cancelled': return + if not job or job.status == 'cancelled': + return update_job_status(db, job_id, "processing") logger.info(f"Starting PDF OCR for job {job_id}") ocrmypdf.ocr(input_path_str, output_path_str, @@ -263,7 +332,7 @@ def run_pdf_ocr_task(job_id: str, input_path_str: str, output_path_str: str, ocr with open(output_path_str, "rb") as f: reader = pypdf.PdfReader(f) preview = "\n".join(page.extract_text() or "" for page in reader.pages) - mark_job_as_completed(db, job_id, preview=preview) + mark_job_as_completed(db, job_id, output_filepath_str=output_path_str, preview=preview) logger.info(f"PDF OCR for job {job_id} completed.") except Exception: logger.exception(f"ERROR during PDF OCR for job {job_id}") @@ -277,13 +346,18 @@ def run_image_ocr_task(job_id: str, input_path_str: str, output_path_str: str): db = SessionLocal() try: job = get_job(db, job_id) - if not job or job.status == 'cancelled': return + if not job or job.status == 'cancelled': + return update_job_status(db, job_id, "processing", progress=50) logger.info(f"Starting Image OCR for job {job_id}") text = pytesseract.image_to_string(Image.open(input_path_str)) - with open(output_path_str, "w", encoding="utf-8") as f: + # atomic write of OCR text + out_path = Path(output_path_str) + tmp_out = out_path.with_name(f"{out_path.stem}.tmp-{uuid.uuid4().hex}{out_path.suffix}") + with tmp_out.open("w", encoding="utf-8") as f: f.write(text) - mark_job_as_completed(db, job_id, preview=text) + tmp_out.replace(out_path) + mark_job_as_completed(db, job_id, output_filepath_str=output_path_str, preview=text) logger.info(f"Image OCR for job {job_id} completed.") except Exception: logger.exception(f"ERROR during Image OCR for job {job_id}") @@ -300,14 +374,18 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str, temp_output_file = None try: job = get_job(db, job_id) - if not job or job.status == 'cancelled': return + if not job or job.status == 'cancelled': + return update_job_status(db, job_id, "processing", progress=25) logger.info(f"Starting conversion for job {job_id} using {tool} with task {task_key}") tool_config = conversion_tools_config.get(tool) - if not tool_config: raise ValueError(f"Unknown conversion tool: {tool}") + if not tool_config: + raise ValueError(f"Unknown conversion tool: {tool}") input_path = Path(input_path_str) output_path = Path(output_path_str) current_input_path = input_path + + # Pre-processing for specific tools if tool == "mozjpeg": temp_input_file = input_path.with_suffix('.temp.ppm') logger.info(f"Pre-converting for MozJPEG: {input_path} -> {temp_input_file}") @@ -317,22 +395,12 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str, err = (pre_conv_result.stderr or "")[:4000] raise Exception(f"MozJPEG pre-conversion to PPM failed: {err}") current_input_path = temp_input_file + update_job_status(db, job_id, "processing", progress=50) - # Build safe mapping for formatting and validate placeholders - ALLOWED_VARS = {"input", "output", "output_dir", "output_ext", "quality", "speed", "preset", "device", "dpi", "samplerate", "bitdepth"} - def validate_and_build_command(template_str: str, mapping: dict): - fmt = Formatter() - used = {fname for _, fname, _, _ in fmt.parse(template_str) if fname} - bad = used - ALLOWED_VARS - if bad: - raise ValueError(f"Command template contains disallowed placeholders: {bad}") - formatted = template_str.format(**mapping) - return shlex.split(formatted) - # Use a temporary output path and atomically move into place after success - temp_output_file = output_path.with_suffix(output_path.suffix + f".{uuid.uuid4().hex}.tmp") - - # Prepare mapping + # prepare temporary output and mapping + # use a temp filename that preserves the real extension, e.g. file.tmp-.pdf + temp_output_file = output_path.with_name(f"{output_path.stem}.tmp-{uuid.uuid4().hex}{output_path.suffix}") mapping = { "input": str(current_input_path), "output": str(temp_output_file), @@ -340,7 +408,7 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str, "output_ext": output_path.suffix.lstrip('.'), } - # Allow tool-specific adjustments to mapping + # tool specific mapping adjustments if tool.startswith("ghostscript"): device, setting = task_key.split('_') mapping.update({"device": device, "dpi": setting, "preset": setting}) @@ -358,38 +426,30 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str, _, quality = task_key.split('_') quality = quality.replace('q', '') mapping.update({"quality": quality}) + elif tool == "libreoffice": + target_ext = output_path.suffix.lstrip('.') + # tool_config may include a 'filters' mapping (see settings.yml example) + filter_val = tool_config.get("filters", {}).get(target_ext, target_ext) + mapping["filter"] = filter_val command_template_str = tool_config["command_template"] command = validate_and_build_command(command_template_str, mapping) logger.info(f"Executing command: {' '.join(command)}") - # run with timeout and capture output; run_command helper ensures trimmed logs on failure - def run_command(argv: List[str], timeout: int = 300): - try: - res = subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=timeout) - except subprocess.TimeoutExpired: - raise Exception(f"Command timed out after {timeout}s") - if res.returncode != 0: - stderr = (res.stderr or "")[:4000] - stdout = (res.stdout or "")[:4000] - raise Exception(f"Command failed exit {res.returncode}. stderr: {stderr}; stdout: {stdout}") - return res + # execute command with timeout and trimmed logs on error result = run_command(command, timeout=tool_config.get("timeout", 300)) - if tool == "libreoffice": - expected_output_filename = input_path.with_suffix(output_path.suffix).name - generated_file = output_path.parent / expected_output_filename - if generated_file.exists(): - # move generated file into place - generated_file.replace(output_path) - else: - raise Exception(f"LibreOffice did not create the expected file: {expected_output_filename}") + + # handle LibreOffice special case: sometimes it writes differently + # Special-case LibreOffice: support per-format export filters via settings.yml + + # move temp output into final location atomically if temp_output_file and temp_output_file.exists(): temp_output_file.replace(output_path) - mark_job_as_completed(db, job_id, preview=f"Successfully converted file.") + mark_job_as_completed(db, job_id, output_filepath_str=output_path_str, preview=f"Successfully converted file.") logger.info(f"Conversion for job {job_id} completed.") - except Exception as e: + except Exception: logger.exception(f"ERROR during conversion for job {job_id}") update_job_status(db, job_id, "failed", error="See server logs for details.") finally: @@ -415,13 +475,14 @@ app = FastAPI(lifespan=lifespan) app.mount("/static", StaticFiles(directory=PATHS.BASE_DIR / "static"), name="static") templates = Jinja2Templates(directory=PATHS.BASE_DIR / "templates") -async def save_upload_file_chunked(upload_file: UploadFile, destination: Path): +async def save_upload_file_chunked(upload_file: UploadFile, destination: Path) -> int: """ - Streams the uploaded file in chunks directly to a file on disk. - This is memory-efficient and reliable for large files. + Write upload to a tmp file in chunks, then atomically move to final destination. + Returns the final size of the file in bytes. """ max_size = APP_CONFIG.get("app_settings", {}).get("max_file_size_bytes", 100 * 1024 * 1024) - tmp = destination.with_suffix(destination.suffix + f".{uuid.uuid4().hex}.tmp") + # make a temp filename that keeps the real extension, e.g. file.tmp-.pdf + tmp = destination.with_name(f"{destination.stem}.tmp-{uuid.uuid4().hex}{destination.suffix}") size = 0 try: with tmp.open("wb") as buffer: @@ -433,17 +494,16 @@ async def save_upload_file_chunked(upload_file: UploadFile, destination: Path): if size > max_size: raise HTTPException(status_code=413, detail=f"File exceeds {max_size / 1024 / 1024} MB limit") buffer.write(chunk) - # atomic move into place tmp.replace(destination) + return size except Exception: tmp.unlink(missing_ok=True) raise - def is_allowed_file(filename: str, allowed_extensions: set) -> bool: return Path(filename).suffix.lower() in allowed_extensions -# --- Routes (only transcription route is modified) --- +# --- Routes (transcription route uses Huey task enqueuing) --- @app.post("/transcribe-audio", status_code=status.HTTP_202_ACCEPTED) async def submit_audio_transcription( @@ -453,7 +513,7 @@ async def submit_audio_transcription( ): if not is_allowed_file(file.filename, {".mp3", ".wav", ".m4a", ".flac", ".ogg", ".opus"}): raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid audio file type.") - + whisper_config = APP_CONFIG.get("transcription_settings", {}).get("whisper", {}) if model_size not in whisper_config.get("allowed_models", []): raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=f"Invalid model size: {model_size}.") @@ -461,24 +521,29 @@ async def submit_audio_transcription( job_id = uuid.uuid4().hex safe_basename = secure_filename(file.filename) stem, suffix = Path(safe_basename).stem, Path(safe_basename).suffix - + audio_filename = f"{stem}_{job_id}{suffix}" transcript_filename = f"{stem}_{job_id}.txt" upload_path = PATHS.UPLOADS_DIR / audio_filename processed_path = PATHS.PROCESSED_DIR / transcript_filename - await save_upload_file_chunked(file, upload_path) - - job_data = JobCreate(id=job_id, task_type="transcription", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path)) + input_size = await save_upload_file_chunked(file, upload_path) + + job_data = JobCreate( + id=job_id, + task_type="transcription", + original_filename=file.filename, + input_filepath=str(upload_path), + input_filesize=input_size, + processed_filepath=str(processed_path) + ) new_job = create_job(db=db, job=job_data) - - # --- MODIFIED: Pass whisper_config to the task --- + + # enqueue the Huey task (decorated function call enqueues when using huey) run_transcription_task(new_job.id, str(upload_path), str(processed_path), model_size=model_size, whisper_settings=whisper_config) + + return {"job_id": new_job.id, "status": new_job.status, "status_url": f"/job/{new_job.id}"} - return {"job_id": new_job.id, "status": new_job.status} - - -# --- Other routes remain unchanged --- @app.get("/") async def get_index(request: Request): @@ -493,23 +558,55 @@ async def get_index(request: Request): @app.get("/settings") async def get_settings_page(request: Request): try: - with open(PATHS.SETTINGS_FILE, 'r') as f: - current_config = yaml.safe_load(f) - except Exception as e: - logger.error(f"Could not load settings.yml for settings page: {e}") + with open(PATHS.SETTINGS_FILE, 'r', encoding='utf8') as f: + current_config = yaml.safe_load(f) or {} + except Exception: + logger.exception("Could not load settings.yml for settings page") current_config = {} return templates.TemplateResponse("settings.html", {"request": request, "config": current_config}) +def deep_merge(base: dict, updates: dict) -> dict: + """ + Recursively merge `updates` into `base`. Lists and scalars are replaced. + """ + for key, value in updates.items(): + if ( + key in base + and isinstance(base[key], dict) + and isinstance(value, dict) + ): + base[key] = deep_merge(base[key], value) + else: + base[key] = value + return base + + @app.post("/settings/save") async def save_settings(new_config: Dict = Body(...)): + tmp = PATHS.SETTINGS_FILE.with_suffix(".tmp") try: - with open(PATHS.SETTINGS_FILE, 'w') as f: - yaml.dump(new_config, f, default_flow_style=False, sort_keys=False) + # load existing config if present + try: + with PATHS.SETTINGS_FILE.open("r", encoding="utf8") as f: + current_config = yaml.safe_load(f) or {} + except FileNotFoundError: + current_config = {} + + # deep merge new values + merged = deep_merge(current_config, new_config) + + # atomic write back + with tmp.open("w", encoding="utf8") as f: + yaml.safe_dump(merged, f, default_flow_style=False, sort_keys=False) + tmp.replace(PATHS.SETTINGS_FILE) + load_app_config() - return JSONResponse({"message": "Settings saved successfully."}) - except Exception as e: - logger.error(f"Failed to save settings: {e}") - raise HTTPException(status_code=500, detail="Could not write to settings.yml.") + return JSONResponse({"message": "Settings updated successfully."}) + except Exception: + logger.exception("Failed to update settings") + tmp.unlink(missing_ok=True) + raise HTTPException(status_code=500, detail="Could not update settings.yml.") + @app.post("/settings/clear-history") async def clear_job_history(db: Session = Depends(get_db)): @@ -518,9 +615,9 @@ async def clear_job_history(db: Session = Depends(get_db)): db.commit() logger.info(f"Cleared {num_deleted} jobs from history.") return {"deleted_count": num_deleted} - except Exception as e: + except Exception: db.rollback() - logger.error(f"Failed to clear job history: {e}") + logger.exception("Failed to clear job history") raise HTTPException(status_code=500, detail="Database error while clearing history.") @app.post("/settings/delete-files") @@ -532,9 +629,9 @@ async def delete_processed_files(): if f.is_file(): f.unlink() deleted_count += 1 - except Exception as e: + except Exception: errors.append(f.name) - logger.error(f"Could not delete processed file {f.name}: {e}") + logger.exception(f"Could not delete processed file {f.name}") if errors: raise HTTPException(status_code=500, detail=f"Could not delete some files: {', '.join(errors)}") logger.info(f"Deleted {deleted_count} files from processed directory.") @@ -562,12 +659,14 @@ async def submit_file_conversion(file: UploadFile = File(...), output_format: st processed_filename = f"{original_stem}_{job_id}.{target_ext}" upload_path = PATHS.UPLOADS_DIR / upload_filename processed_path = PATHS.PROCESSED_DIR / processed_filename - await save_upload_file_chunked(file, upload_path) + input_size = await save_upload_file_chunked(file, upload_path) job_data = JobCreate(id=job_id, task_type="conversion", original_filename=file.filename, - input_filepath=str(upload_path), processed_filepath=str(processed_path)) + input_filepath=str(upload_path), + input_filesize=input_size, + processed_filepath=str(processed_path)) new_job = create_job(db=db, job=job_data) run_conversion_task(new_job.id, str(upload_path), str(processed_path), tool, task_key, conversion_tools) - return {"job_id": new_job.id, "status": new_job.status} + return {"job_id": new_job.id, "status": new_job.status, "status_url": f"/job/{new_job.id}"} @app.post("/ocr-pdf", status_code=status.HTTP_202_ACCEPTED) async def submit_pdf_ocr(file: UploadFile = File(...), db: Session = Depends(get_db)): @@ -578,12 +677,15 @@ async def submit_pdf_ocr(file: UploadFile = File(...), db: Session = Depends(get unique_filename = f"{Path(safe_basename).stem}_{job_id}{Path(safe_basename).suffix}" upload_path = PATHS.UPLOADS_DIR / unique_filename processed_path = PATHS.PROCESSED_DIR / unique_filename - await save_upload_file_chunked(file, upload_path) - job_data = JobCreate(id=job_id, task_type="ocr", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path)) + input_size = await save_upload_file_chunked(file, upload_path) + job_data = JobCreate(id=job_id, task_type="ocr", original_filename=file.filename, + input_filepath=str(upload_path), + input_filesize=input_size, + processed_filepath=str(processed_path)) new_job = create_job(db=db, job=job_data) ocr_settings = APP_CONFIG.get("ocr_settings", {}).get("ocrmypdf", {}) run_pdf_ocr_task(new_job.id, str(upload_path), str(processed_path), ocr_settings) - return {"job_id": new_job.id, "status": new_job.status} + return {"job_id": new_job.id, "status": new_job.status, "status_url": f"/job/{new_job.id}"} @app.post("/ocr-image", status_code=status.HTTP_202_ACCEPTED) async def submit_image_ocr(file: UploadFile = File(...), db: Session = Depends(get_db)): @@ -596,11 +698,14 @@ async def submit_image_ocr(file: UploadFile = File(...), db: Session = Depends(g unique_filename = f"{Path(safe_basename).stem}_{job_id}{file_ext}" upload_path = PATHS.UPLOADS_DIR / unique_filename processed_path = PATHS.PROCESSED_DIR / f"{Path(safe_basename).stem}_{job_id}.txt" - await save_upload_file_chunked(file, upload_path) - job_data = JobCreate(id=job_id, task_type="ocr-image", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path)) + input_size = await save_upload_file_chunked(file, upload_path) + job_data = JobCreate(id=job_id, task_type="ocr-image", original_filename=file.filename, + input_filepath=str(upload_path), + input_filesize=input_size, + processed_filepath=str(processed_path)) new_job = create_job(db=db, job=job_data) run_image_ocr_task(new_job.id, str(upload_path), str(processed_path)) - return {"job_id": new_job.id, "status": new_job.status} + return {"job_id": new_job.id, "status": new_job.status, "status_url": f"/job/{new_job.id}"} @app.post("/job/{job_id}/cancel", status_code=status.HTTP_202_ACCEPTED) async def cancel_job(job_id: str, db: Session = Depends(get_db)): @@ -626,8 +731,7 @@ async def get_job_status(job_id: str, db: Session = Depends(get_db)): @app.get("/download/{filename}") async def download_file(filename: str): safe_filename = secure_filename(filename) - file_path = PATHS.PROCESSED_DIR / safe_filename - file_path = file_path.resolve() + file_path = (PATHS.PROCESSED_DIR / safe_filename).resolve() base = PATHS.PROCESSED_DIR.resolve() try: file_path.relative_to(base) @@ -635,4 +739,15 @@ async def download_file(filename: str): raise HTTPException(status_code=403, detail="Access denied.") if not file_path.is_file(): raise HTTPException(status_code=404, detail="File not found.") - return FileResponse(path=file_path, filename=safe_filename, media_type="application/octet-stream") \ No newline at end of file + return FileResponse(path=file_path, filename=safe_filename, media_type="application/octet-stream") + +# Small health endpoint +@app.get("/health") +async def health(): + try: + with engine.connect() as conn: + conn.execute("SELECT 1") + except Exception: + logger.exception("Health check failed") + return JSONResponse({"ok": False}, status_code=500) + return {"ok": True} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 65dea7d..4cc7d6f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,22 +1,145 @@ -# requirements.txt - -# Web framework -fastapi -uvicorn[standard] -python-multipart -jinja2 - -# PDF OCR -ocrmypdf -PyPDF2 - -# Audio Transcription -faster-whisper -# The following are core dependencies for faster-whisper, -# but it's good to list them explicitly. -# ctranslate2 -# transformers -# torch # Note: torch is a dependency of transformers - -# Utilities -werkzeug +annotated-types==0.7.0 +anyio==4.10.0 +audioop-lts==0.2.2 +av==15.1.0 +azure-ai-documentintelligence==1.0.2 +azure-core==1.35.1 +azure-identity==1.25.0 +beautifulsoup4==4.13.5 +certifi==2025.8.3 +cffi==2.0.0 +chardet==5.2.0 +charset-normalizer==3.4.3 +click==8.2.1 +cobble==0.1.4 +coloredlogs==15.0.1 +cryptography==45.0.7 +css-parser==1.0.10 +ctranslate2==4.6.0 +defusedxml==0.7.1 +Deprecated==1.2.18 +deprecation==2.1.0 +et_xmlfile==2.0.0 +fastapi==0.116.1 +faster-whisper==1.2.0 +filelock==3.19.1 +flatbuffers==25.2.10 +fsspec==2025.9.0 +greenlet==3.2.4 +gunicorn==23.0.0 +h11==0.16.0 +hf-xet==1.1.10 +html5-parser==0.4.12 +html5lib==1.1 +httptools==0.6.4 +huey==2.5.3 +huggingface-hub==0.34.4 +humanfriendly==10.0 +idna==3.10 +imageio==2.37.0 +img2pdf==0.6.1 +isodate==0.7.2 +Jinja2==3.1.6 +lazy_loader==0.4 +lxml==6.0.1 +magika==0.6.2 +mammoth==1.10.0 +markdown-it-py==4.0.0 +markdownify==1.2.0 +markitdown==0.1.3 +MarkupSafe==3.0.2 +mdurl==0.1.2 +mechanize==0.4.10 +mpmath==1.3.0 +msal==1.33.0 +msal-extensions==1.3.1 +msgpack==1.1.1 +networkx==3.5 +ninja==1.13.0 +numpy==2.2.6 +nvidia-cublas-cu12==12.8.4.1 +nvidia-cuda-cupti-cu12==12.8.90 +nvidia-cuda-nvrtc-cu12==12.8.93 +nvidia-cuda-runtime-cu12==12.8.90 +nvidia-cudnn-cu12==9.10.2.21 +nvidia-cufft-cu12==11.3.3.83 +nvidia-cufile-cu12==1.13.1.3 +nvidia-curand-cu12==10.3.9.90 +nvidia-cusolver-cu12==11.7.3.90 +nvidia-cusparse-cu12==12.5.8.93 +nvidia-cusparselt-cu12==0.7.1 +nvidia-nccl-cu12==2.27.3 +nvidia-nvjitlink-cu12==12.8.93 +nvidia-nvtx-cu12==12.8.90 +ocrmypdf==16.11.0 +olefile==0.47 +onnxruntime==1.22.1 +opencv-python-headless==4.12.0.88 +openpyxl==3.1.5 +packaging==25.0 +pandas==2.3.2 +pdfminer.six==20250506 +pi_heif==1.1.0 +pikepdf==9.11.0 +pillow==11.3.0 +pluggy==1.6.0 +protobuf==6.32.1 +pyclipper==1.3.0.post6 +pycparser==2.23 +pydantic==2.11.9 +pydantic-settings==2.10.1 +pydantic_core==2.33.2 +pydub==0.25.1 +Pygments==2.19.2 +PyJWT==2.10.1 +pypdf==6.0.0 +PyPDF2==3.0.1 +PyQt6==6.9.1 +PyQt6-Qt6==6.9.2 +PyQt6-WebEngine==6.9.0 +PyQt6-WebEngine-Qt6==6.9.2 +PyQt6_sip==13.10.2 +pytesseract==0.3.13 +python-bidi==0.6.6 +python-dateutil==2.9.0.post0 +python-dotenv==1.1.1 +python-multipart==0.0.20 +python-pptx==1.0.2 +pytz==2025.2 +PyYAML==6.0.2 +regex==2025.9.1 +requests==2.32.5 +rich==14.1.0 +scikit-image==0.25.2 +scipy==1.16.2 +setuptools==80.9.0 +shapely==2.1.1 +six==1.17.0 +sniffio==1.3.1 +soupsieve==2.8 +SpeechRecognition==3.14.3 +SQLAlchemy==2.0.43 +standard-aifc==3.13.0 +standard-chunk==3.13.0 +starlette==0.47.3 +sympy==1.14.0 +tifffile==2025.9.9 +tokenizers==0.22.0 +torch==2.8.0 +torchvision==0.23.0 +tqdm==4.67.1 +triton==3.4.0 +typing-inspection==0.4.1 +typing_extensions==4.15.0 +tzdata==2025.2 +urllib3==2.5.0 +uvicorn==0.35.0 +uvloop==0.21.0 +watchfiles==1.1.0 +webencodings==0.5.1 +websockets==15.0.1 +Werkzeug==3.1.3 +wrapt==1.17.3 +xlrd==2.0.2 +xlsxwriter==3.2.9 +youtube-transcript-api==1.0.3 diff --git a/run.sh b/run.sh index c59047d..0e27e9a 100755 --- a/run.sh +++ b/run.sh @@ -3,8 +3,8 @@ echo "Starting DocProcessor with Gunicorn..." -exec gunicorn -w 2 -k uvicorn.workers.UvicornWorker --forwarded-allow-ips='*' main:app -b 0.0.0.0:8000 & +exec gunicorn -w 4 --threads 2 -k uvicorn.workers.UvicornWorker --forwarded-allow-ips='*' main:app -b 0.0.0.0:8000 & echo "Done" echo "Starting huey..." -exec huey_consumer.py main.huey -w 2 & +exec huey_consumer.py main.huey -w 4 & echo "Done" diff --git a/settings .yml.default b/settings .yml.default new file mode 100644 index 0000000..e139f6d --- /dev/null +++ b/settings .yml.default @@ -0,0 +1,272 @@ +# settings.yml + +# General application settings +app_settings: + max_file_size_mb: 2000 # Maximum upload size in Megabytes + # Allowed extensions (list will be normalized to a set by the server) + allowed_all_extensions: + - .pdf + - .ps + - .eps + - .png + - .jpg + - .jpeg + - .tiff + - .tif + - .gif + - .bmp + - .webp + - .svg + - .jxl + - .avif + - .ppm + - .mp3 + - .m4a + - .ogg + - .flac + - .opus + - .wav + - .aac + - .mp4 + - .mkv + - .mov + - .webm + - .avi + - .flv + - .md + - .txt + - .html + - .docx + - .odt + - .rst + - .epub + - .mobi + - .azw3 + - .pptx + - .xlsx + +# Settings for Optical Character Recognition (OCR) tasks +ocr_settings: + ocrmypdf: + deskew: true + clean: true + optimize: 1 + force_ocr: true + +# Settings for audio transcription tasks +transcription_settings: + whisper: + compute_type: "int8" + allowed_models: + - "tiny" + - "base" + - "small" + - "medium" + - "large-v3" + - "distil-large-v2" + # optional: specify device if workers have GPU (e.g. "cuda" or "cpu") + # device: "cpu" + +# --- Conversion Tool Definitions --- +# The server validates placeholders against an allowlist: +# {input}, {output}, {output_dir}, {output_ext}, {quality}, {speed}, +# {preset}, {device}, {dpi}, {samplerate}, {bitdepth} +conversion_tools: + libreoffice: + name: "LibreOffice" + # Use {filter} so we can supply liboffce export filters like "txt:Text" + command_template: 'libreoffice --headless --convert-to {filter} --outdir {output_dir} {input}' + timeout: 120 + # Optional: per-format export filter. If missing for a format, server falls back to the extension. + filters: + pdf: "pdf" + docx: "docx" + odt: "odt" + html: "html" + rtf: "rtf" + txt: "txt:Text" + xml: "xml" + epub: "epub" + xlsx: "xlsx" + ods: "ods" + csv: "csv:Text" + pptx: "pptx" + odp: "odp" + svg: "svg" + formats: + pdf: "PDF" + docx: "Word Document" + odt: "OpenDocument Text" + html: "HTML" + rtf: "Rich Text Format" + txt: "Plain Text" + xml: "Word 2003 XML" + epub: "EPUB" + xlsx: "Excel Spreadsheet" + ods: "OpenDocument Spreadsheet" + csv: "CSV" + pptx: "PowerPoint Presentation" + odp: "OpenDocument Presentation" + svg: "SVG" + + pandoc: + name: "Pandoc" + command_template: 'pandoc --standalone {input} -o {output} --to={output_ext} --pdf-engine=xelatex' + timeout: 60 + formats: + docx: "Word Document" + odt: "OpenDocument Text" + pdf: "PDF" + rtf: "Rich Text Format" + txt: "Plain Text" + tex: "LaTeX" + man: "Groff Man Page" + epub: "EPUB v3 Book" + epub2: "EPUB v2 Book" + html: "HTML" + html5: "HTML5" + pptx: "PowerPoint Presentation" + beamer: "Beamer PDF Slides" + slidy: "Slidy HTML Slides" + md: "Markdown" + rst: "reStructuredText" + jira: "Jira Wiki Markup" + mediawiki: "MediaWiki Markup" + + ghostscript_pdf: + name: "Ghostscript (PDF)" + # placeholders used: {preset}, {output}, {input} + command_template: 'gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dNOPAUSE -dQUIET -dBATCH {preset} -sOutputFile={output} {input}' + timeout: 60 + formats: + screen: "PDF (Optimized for Screen)" + ebook: "PDF (Optimized for Ebooks)" + printer: "PDF (Optimized for Print)" + archive: "PDF/A (for Archiving)" + + ghostscript_image: + name: "Ghostscript (Image)" + # placeholders used: {device}, {dpi}, {output}, {input} + command_template: 'gs -dNOPAUSE -dBATCH -sDEVICE={device} -r{dpi} -sOutputFile={output} {input}' + timeout: 60 + formats: + jpeg_72: "JPEG Image (72 DPI)" + jpeg_300: "JPEG Image (300 DPI)" + png16m_150: "PNG Image (150 DPI)" + png16m_300: "PNG Image (300 DPI)" + tiff24nc_300: "TIFF Image (300 DPI)" + tiff24nc_600: "TIFF Image (600 DPI)" + + calibre: + name: "Calibre (ebook-convert)" + command_template: 'ebook-convert {input} {output}' + timeout: 60 + formats: + epub: "EPUB" + mobi: "MOBI" + azw3: "Amazon Kindle" + pdf: "PDF" + docx: "Word Document" + + ffmpeg: + name: "FFmpeg" + command_template: 'ffmpeg -i {input} -y -preset medium {output}' + timeout: 300 + formats: + mp4: "MP4 Video" + mkv: "MKV Video" + mov: "MOV Video" + webm: "WebM Video" + mp3: "MP3 Audio" + wav: "WAV Audio" + flac: "FLAC Audio" + gif: "Animated GIF" + + vips: + name: "VIPS" + command_template: 'vips copy {input} {output}[Q=90]' + timeout: 60 + formats: + jpg: "JPEG Image (Q90)" + png: "PNG Image" + webp: "WebP Image (Q90)" + tiff: "TIFF Image" + avif: "AVIF Image" + + graphicsmagick: + name: "GraphicsMagick" + command_template: 'gm convert {input} -quality 90 {output}' + timeout: 60 + formats: + jpg: "JPEG Image (Q90)" + png: "PNG Image" + webp: "WebP Image" + tiff: "TIFF Image" + pdf: "PDF from Images" + + inkscape: + name: "Inkscape" + command_template: 'inkscape {input} --export-filename={output}' + timeout: 30 + formats: + svg: "SVG (Plain)" + png: "PNG Image (96 DPI)" + pdf: "PDF Document" + + libjxl: + name: "libjxl (cjxl)" + command_template: 'cjxl {input} {output} -q 90' + timeout: 30 + formats: + jxl: "JPEG XL (Q90)" + + resvg: + name: "resvg" + command_template: 'resvg {input} {output}' + timeout: 30 + formats: + png: "PNG from SVG" + + potrace: + name: "Potrace" + command_template: 'potrace {input} --svg -o {output}' + timeout: 30 + formats: + svg: "SVG from Bitmap" + + markitdown: + name: "Markitdown" + command_template: 'markitdown {input} -o {output}' + timeout: 30 + formats: + md: "Markdown from Everything!" + + pngquant: + name: "pngquant" + command_template: 'pngquant --quality={quality} --speed {speed} --force --output {output} {input}' + timeout: 30 + formats: + png_hq: "PNG (High Quality Compression)" + png_mq: "PNG (Medium Quality Compression)" + png_fast: "PNG (Fast Compression)" + + sox: + name: "SoX Audio Converter" + command_template: 'sox {input} -r {samplerate} -b {bitdepth} {output}' + timeout: 120 + formats: + wav_48k_24b: "WAV (48kHz, 24-bit)" + wav_44k_16b: "WAV (CD, 44.1kHz, 16-bit)" + flac_48k_24b: "FLAC (48kHz, 24-bit)" + flac_44k_16b: "FLAC (CD, 44.1kHz, 16-bit)" + ogg_32k_16b: "Ogg Vorbis (32kHz)" + ogg_16k_16b: "Ogg Vorbis (16kHz, Voice)" + + mozjpeg: + name: "MozJPEG" + command_template: 'cjpeg -quality {quality} -outfile {output} {input}' + timeout: 30 + formats: + jpg_q85: "JPEG (High Quality)" + jpg_q75: "JPEG (Web Quality)" + jpg_q60: "JPEG (Aggressive Compression)" diff --git a/settings.yml b/settings.yml index ee50d23..8c17157 100644 --- a/settings.yml +++ b/settings.yml @@ -1,179 +1,242 @@ -# settings.yml - -# General application settings app_settings: - max_file_size_mb: 2000 # Maximum upload size in Megabytes - -# Settings for Optical Character Recognition (OCR) tasks + max_file_size_mb: '2000' + allowed_all_extensions: + - .pdf + - .ps + - .eps + - .png + - .jpg + - .jpeg + - .tiff + - .tif + - .gif + - .bmp + - .webp + - .svg + - .jxl + - .avif + - .ppm + - .mp3 + - .m4a + - .ogg + - .flac + - .opus + - .wav + - .aac + - .mp4 + - .mkv + - .mov + - .webm + - .avi + - .flv + - .md + - .txt + - .html + - .docx + - .odt + - .rst + - .epub + - .mobi + - .azw3 + - .pptx + - .xlsx ocr_settings: ocrmypdf: deskew: true clean: true optimize: 1 force_ocr: true - -# Settings for audio transcription tasks transcription_settings: whisper: - compute_type: "int8" + compute_type: int8 allowed_models: - - "tiny" - - "base" - - "small" - - "medium" - - "large-v3" - - "distil-large-v2" - -# --- Conversion Tool Definitions --- -# Each tool's command is a single string. The backend uses shlex to parse it, -# so you can use quotes for arguments with spaces. -# Use placeholders like: {input}, {output}, {output_dir}, {output_ext}, etc. + - tiny + - base + - small + - medium + - large-v3 + - distil-large-v2 conversion_tools: libreoffice: - name: "LibreOffice" - command_template: 'libreoffice --headless --convert-to {output_ext} --outdir {output_dir} {input}' + name: LibreOffice + command_template: libreoffice --headless --convert-to {filter} --outdir {output_dir} + {input} + timeout: 300 + filters: + pdf: pdf + docx: docx + odt: odt + html: html + rtf: rtf + txt: txt:Text + xml: xml + epub: epub + xlsx: xlsx + ods: ods + csv: csv:Text + pptx: pptx + odp: odp + svg: svg formats: - pdf: "PDF" - docx: "Word Document" - odt: "OpenDocument Text" - html: "HTML" - rtf: "Rich Text Format" - txt: "Plain Text" - xml: "Word 2003 XML" - epub: "EPUB" - xlsx: "Excel Spreadsheet" - ods: "OpenDocument Spreadsheet" - csv: "CSV" - pptx: "PowerPoint Presentation" - odp: "OpenDocument Presentation" - svg: "SVG" + pdf: PDF + docx: Word Document + odt: OpenDocument Text + html: HTML + rtf: Rich Text Format + txt: Plain Text + xml: Word 2003 XML + epub: EPUB + xlsx: Excel Spreadsheet + ods: OpenDocument Spreadsheet + csv: CSV + pptx: PowerPoint Presentation + odp: OpenDocument Presentation + svg: SVG pandoc: - name: "Pandoc" - command_template: 'pandoc --standalone {input} -o {output}' + name: Pandoc + command_template: pandoc --standalone {input} -o {output} --to={output_ext} --pdf-engine=xelatex + timeout: 300 formats: - docx: "Word Document" - odt: "OpenDocument Text" - pdf: "PDF" - rtf: "Rich Text Format" - txt: "Plain Text" - tex: "LaTeX" - man: "Groff Man Page" - epub: "EPUB v3 Book" - epub2: "EPUB v2 Book" - html: "HTML" - html5: "HTML5" - pptx: "PowerPoint Presentation" - beamer: "Beamer PDF Slides" - slidy: "Slidy HTML Slides" - md: "Markdown" - rst: "reStructuredText" - jira: "Jira Wiki Markup" - mediawiki: "MediaWiki Markup" + docx: Word Document + odt: OpenDocument Text + pdf: PDF + rtf: Rich Text Format + txt: Plain Text + tex: LaTeX + man: Groff Man Page + epub: EPUB v3 Book + epub2: EPUB v2 Book + html: HTML + html5: HTML5 + pptx: PowerPoint Presentation + beamer: Beamer PDF Slides + slidy: Slidy HTML Slides + md: Markdown + rst: reStructuredText + jira: Jira Wiki Markup + mediawiki: MediaWiki Markup ghostscript_pdf: - name: "Ghostscript (PDF)" - command_template: 'gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dNOPAUSE -dQUIET -dBATCH {preset} -sOutputFile={output} {input}' + name: Ghostscript (PDF) + command_template: gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dNOPAUSE -dQUIET + -dBATCH {preset} -sOutputFile={output} {input} + timeout: 60 formats: - screen: "PDF (Optimized for Screen)" - ebook: "PDF (Optimized for Ebooks)" - printer: "PDF (Optimized for Print)" - archive: "PDF/A (for Archiving)" + screen: PDF (Optimized for Screen) + ebook: PDF (Optimized for Ebooks) + printer: PDF (Optimized for Print) + archive: PDF/A (for Archiving) ghostscript_image: - name: "Ghostscript (Image)" - command_template: 'gs -dNOPAUSE -dBATCH -sDEVICE={device} -r{dpi} -sOutputFile={output} {input}' + name: Ghostscript (Image) + command_template: gs -dNOPAUSE -dBATCH -sDEVICE={device} -r{dpi} -sOutputFile={output} + {input} + timeout: 60 formats: - jpeg_72: "JPEG Image (72 DPI)" - jpeg_300: "JPEG Image (300 DPI)" - png16m_150: "PNG Image (150 DPI)" - png16m_300: "PNG Image (300 DPI)" - tiff24nc_300: "TIFF Image (300 DPI)" - tiff24nc_600: "TIFF Image (600 DPI)" + jpeg_72: JPEG Image (72 DPI) + jpeg_300: JPEG Image (300 DPI) + png16m_150: PNG Image (150 DPI) + png16m_300: PNG Image (300 DPI) + tiff24nc_300: TIFF Image (300 DPI) + tiff24nc_600: TIFF Image (600 DPI) calibre: - name: "Calibre (ebook-convert)" - command_template: 'ebook-convert {input} {output}' + name: Calibre (ebook-convert) + command_template: ebook-convert {input} {output} + timeout: 600 formats: - epub: "EPUB" - mobi: "MOBI" - azw3: "Amazon Kindle" - pdf: "PDF" - docx: "Word Document" + epub: EPUB + mobi: MOBI + azw3: Amazon Kindle + pdf: PDF + docx: Word Document ffmpeg: - name: "FFmpeg" - command_template: 'ffmpeg -i {input} -y -preset medium {output}' + name: FFmpeg + command_template: ffmpeg -i {input} -y -preset medium {output} + timeout: 600 formats: - mp4: "MP4 Video" - mkv: "MKV Video" - mov: "MOV Video" - webm: "WebM Video" - mp3: "MP3 Audio" - wav: "WAV Audio" - flac: "FLAC Audio" - gif: "Animated GIF" + mp4: MP4 Video + mkv: MKV Video + mov: MOV Video + webm: WebM Video + mp3: MP3 Audio + wav: WAV Audio + flac: FLAC Audio + gif: Animated GIF vips: - name: "VIPS" - command_template: 'vips copy {input} {output}[Q=90]' + name: VIPS + command_template: vips copy {input} {output}[Q=90] + timeout: 60 formats: - jpg: "JPEG Image (Q90)" - png: "PNG Image" - webp: "WebP Image (Q90)" - tiff: "TIFF Image" - avif: "AVIF Image" + jpg: JPEG Image (Q90) + png: PNG Image + webp: WebP Image (Q90) + tiff: TIFF Image + avif: AVIF Image graphicsmagick: - name: "GraphicsMagick" - command_template: 'gm convert {input} -quality 90 {output}' + name: GraphicsMagick + command_template: gm convert {input} -quality 90 {output} + timeout: 60 formats: - jpg: "JPEG Image (Q90)" - png: "PNG Image" - webp: "WebP Image" - tiff: "TIFF Image" - pdf: "PDF from Images" + jpg: JPEG Image (Q90) + png: PNG Image + webp: WebP Image + tiff: TIFF Image + pdf: PDF from Images inkscape: - name: "Inkscape" - command_template: 'inkscape {input} --export-filename={output}' + name: Inkscape + command_template: inkscape {input} --export-filename={output} + timeout: 30 formats: - svg: "SVG (Plain)" - png: "PNG Image (96 DPI)" - pdf: "PDF Document" + svg: SVG (Plain) + png: PNG Image (96 DPI) + pdf: PDF Document libjxl: - name: "libjxl (cjxl)" - command_template: 'cjxl {input} {output} -q 90' + name: libjxl (cjxl) + command_template: cjxl {input} {output} -q 90 + timeout: 30 formats: - jxl: "JPEG XL (Q90)" + jxl: JPEG XL (Q90) resvg: - name: "resvg" - command_template: 'resvg {input} {output}' + name: resvg + command_template: resvg {input} {output} + timeout: 30 formats: - png: "PNG from SVG" + png: PNG from SVG potrace: - name: "Potrace" - command_template: 'potrace {input} --svg -o {output}' + name: Potrace + command_template: potrace {input} --svg -o {output} + timeout: 30 formats: - svg: "SVG from Bitmap" + svg: SVG from Bitmap markitdown: - name: "Markitdown" - command_template: 'markitdown {input} -o {output}' + name: Markitdown + command_template: markitdown {input} -o {output} + timeout: 300 formats: - md: "Markdown from Everything!" + md: Markdown from Everything! pngquant: - name: "pngquant" - command_template: 'pngquant --quality={quality} --speed {speed} --force --output {output} {input}' + name: pngquant + command_template: pngquant --quality={quality} --speed {speed} --force --output + {output} {input} + timeout: 300 formats: - png_hq: "PNG (High Quality Compression)" - png_mq: "PNG (Medium Quality Compression)" - png_fast: "PNG (Fast Compression)" + png_hq: PNG (High Quality Compression) + png_mq: PNG (Medium Quality Compression) + png_fast: PNG (Fast Compression) sox: - name: "SoX Audio Converter" - command_template: 'sox {input} -r {samplerate} -b {bitdepth} {output}' + name: SoX Audio Converter + command_template: sox {input} -r {samplerate} -b {bitdepth} {output} + timeout: 600 formats: - wav_48k_24b: "WAV (48kHz, 24-bit)" - wav_44k_16b: "WAV (CD, 44.1kHz, 16-bit)" - flac_48k_24b: "FLAC (48kHz, 24-bit)" - flac_44k_16b: "FLAC (CD, 44.1kHz, 16-bit)" - ogg_32k: "Ogg Vorbis (32kHz)" - ogg_16k: "Ogg Vorbis (16kHz, Voice)" + wav_48k_24b: WAV (48kHz, 24-bit) + wav_44k_16b: WAV (CD, 44.1kHz, 16-bit) + flac_48k_24b: FLAC (48kHz, 24-bit) + flac_44k_16b: FLAC (CD, 44.1kHz, 16-bit) + ogg_32k_16b: Ogg Vorbis (32kHz) + ogg_16k_16b: Ogg Vorbis (16kHz, Voice) mozjpeg: - name: "MozJPEG" - command_template: 'cjpeg -quality {quality} -outfile {output} {input}' + name: MozJPEG + command_template: cjpeg -quality {quality} -outfile {output} {input} + timeout: 30 formats: - jpg_q85: "JPEG (High Quality)" - jpg_q75: "JPEG (Web Quality)" - jpg_q60: "JPEG (Aggressive Compression)" \ No newline at end of file + jpg_q85: JPEG (High Quality) + jpg_q75: JPEG (Web Quality) + jpg_q60: JPEG (Aggressive Compression) diff --git a/static/css/style.css b/static/css/style.css index 6d0b0f3..69a3381 100644 --- a/static/css/style.css +++ b/static/css/style.css @@ -1,5 +1,3 @@ -/* static/css/style.css */ - :root { /* Core */ --bg-color: #000000; @@ -449,6 +447,110 @@ button[type="submit"]:disabled { } +/* --- START: Drag and Drop and Dialog Styles --- */ +.drag-overlay { + position: fixed; + inset: 0; + z-index: 9999; + display: none; /* Hidden by default */ + justify-content: center; + align-items: center; + background-color: rgba(0, 0, 0, 0.7); + backdrop-filter: blur(5px); +} +body.dragging .drag-overlay { + display: flex; /* Shown when body has .dragging class */ +} +.drag-overlay-content { + border: 3px dashed var(--primary-color); + border-radius: 12px; + padding: 2rem 4rem; + text-align: center; + background-color: rgba(0, 0, 0, 0.2); +} +.drag-overlay-content p { + margin: 0; + font-size: 1.5rem; + font-weight: 500; + color: var(--primary-color); +} + +.dialog-overlay { + position: fixed; + inset: 0; + z-index: 10000; + display: none; /* Hidden by default */ + justify-content: center; + align-items: center; + background-color: rgba(0, 0, 0, 0.7); + backdrop-filter: blur(5px); +} +.dialog-overlay.visible { + display: flex; /* Show when .visible class is added */ +} + +.dialog-box { + background: var(--card-bg); + border: 1px solid var(--border-color); + border-radius: 8px; + padding: 1.5rem; + width: 100%; + max-width: 450px; + text-align: center; + box-shadow: 0 10px 30px rgba(0,0,0,0.5); +} +.dialog-box h2 { + margin-top: 0; + font-size: 1.5rem; +} +.dialog-box p { + color: var(--muted-text); + margin-bottom: 1.5rem; +} + +.dialog-actions { + display: grid; + grid-template-columns: 1fr; + gap: 0.75rem; + margin-bottom: 1rem; +} +.dialog-actions button { + display: block; + width: 100%; + background: transparent; + border: 1px solid var(--border-color); + color: var(--text-color); + padding: 0.65rem 1rem; + font-size: 1rem; + font-weight: 600; + border-radius: 5px; + cursor: pointer; + transition: background-color 0.15s ease, border-color 0.15s ease; +} +.dialog-actions button:hover { + background: var(--primary-hover); + border-color: var(--primary-hover); +} +.dialog-secondary-action { + background-color: transparent !important; + border: 1px solid var(--border-color) !important; +} +.dialog-secondary-action:hover { + background-color: rgba(255, 255, 255, 0.05) !important; +} +.dialog-cancel { + background: none; + border: none; + color: var(--muted-text); + cursor: pointer; + font-size: 0.9rem; + padding: 0.5rem; +} +.dialog-cancel:hover { + color: var(--text-color); +} +/* --- END: Drag and Drop and Dialog Styles --- */ + /* Spinner */ .spinner-small { border: 3px solid rgba(255,255,255,0.1); @@ -467,7 +569,6 @@ button[type="submit"]:disabled { /* Mobile responsive table */ @media (max-width: 768px) { - /* ... (no changes in this section) ... */ .table-wrapper { border: none; background-color: transparent; @@ -513,17 +614,17 @@ button[type="submit"]:disabled { .cell-value { min-width: 0; - max-width: 20em; - text-wrap: nowrap; - overflow: scroll; -} + max-width: 20em; + text-wrap: nowrap; + overflow: scroll; + } -#job-table td[data-label="File"], -#job-table td[data-label="Task"] { - overflow: scroll; - text-overflow: ellipsis; - text-wrap: nowrap; - max-width: 100em; -} + #job-table td[data-label="File"], + #job-table td[data-label="Task"] { + overflow: scroll; + text-overflow: ellipsis; + text-wrap: nowrap; + max-width: 100em; + } } \ No newline at end of file diff --git a/static/js/script.js b/static/js/script.js index 49c0dd9..c9b54da 100644 --- a/static/js/script.js +++ b/static/js/script.js @@ -1,6 +1,17 @@ -// static/js/script.js - document.addEventListener('DOMContentLoaded', () => { + // --- User Locale and Timezone Detection (Corrected Implementation) --- + const USER_LOCALE = navigator.language || 'en-US'; // Fallback to en-US + const USER_TIMEZONE = Intl.DateTimeFormat().resolvedOptions().timeZone; + const DATETIME_FORMAT_OPTIONS = { + year: 'numeric', + month: 'short', + day: 'numeric', + hour: 'numeric', + minute: '2-digit', + timeZone: USER_TIMEZONE, + }; + console.log(`Using locale: ${USER_LOCALE} and timezone: ${USER_TIMEZONE}`); + // --- Element Selectors --- const jobListBody = document.getElementById('job-list-body'); @@ -11,16 +22,35 @@ document.addEventListener('DOMContentLoaded', () => { const audioForm = document.getElementById('audio-form'); const audioFileInput = document.getElementById('audio-file-input'); const audioFileName = document.getElementById('audio-file-name'); + const modelSizeSelect = document.getElementById('model-size-select'); const conversionForm = document.getElementById('conversion-form'); const conversionFileInput = document.getElementById('conversion-file-input'); const conversionFileName = document.getElementById('conversion-file-name'); const outputFormatSelect = document.getElementById('output-format-select'); - // MODIFICATION: Store the Choices.js instance in a variable - let conversionChoices = null; + // START: Drag and Drop additions + const dragOverlay = document.getElementById('drag-overlay'); + const actionDialog = document.getElementById('action-dialog'); + const dialogFileCount = document.getElementById('dialog-file-count'); + // Dialog Views + const dialogInitialView = document.getElementById('dialog-initial-actions'); + const dialogConvertView = document.getElementById('dialog-convert-view'); + // Dialog Buttons + const dialogConvertBtn = document.getElementById('dialog-action-convert'); + const dialogOcrBtn = document.getElementById('dialog-action-ocr'); + const dialogTranscribeBtn = document.getElementById('dialog-action-transcribe'); + const dialogCancelBtn = document.getElementById('dialog-action-cancel'); + const dialogStartConversionBtn = document.getElementById('dialog-start-conversion'); + const dialogBackBtn = document.getElementById('dialog-back'); + // Dialog Select + const dialogOutputFormatSelect = document.getElementById('dialog-output-format-select'); + // END: Drag and Drop additions + let conversionChoices = null; + let dialogConversionChoices = null; // For the dialog's format selector const activePolls = new Map(); + let stagedFiles = null; // To hold files from a drop event // --- Main Event Listeners --- pdfFileInput.addEventListener('change', () => updateFileName(pdfFileInput, pdfFileName)); @@ -37,13 +67,222 @@ document.addEventListener('DOMContentLoaded', () => { handleCancelJob(jobId); } }); - + + // --- Helper Functions --- + function formatBytes(bytes, decimals = 1) { + if (!+bytes) return '0 Bytes'; // Handles 0, null, undefined + const k = 1024; + const dm = decimals < 0 ? 0 : decimals; + const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB']; + const i = Math.floor(Math.log(bytes) / Math.log(k)); + return `${parseFloat((bytes / Math.pow(k, i)).toFixed(dm))} ${sizes[i]}`; + } + + // --- Core Job Submission Logic (Refactored for reuse) --- + async function submitJob(endpoint, formData, originalFilename) { + try { + const response = await fetch(endpoint, { method: 'POST', body: formData }); + if (!response.ok) { + const errorData = await response.json(); + throw new Error(errorData.detail || `HTTP error! Status: ${response.status}`); + } + const result = await response.json(); + const preliminaryJob = { + id: result.job_id, + status: 'pending', + progress: 0, + original_filename: originalFilename, + input_filesize: formData.get('file').size, + task_type: endpoint.includes('ocr') ? 'ocr' : (endpoint.includes('transcribe') ? 'transcription' : 'conversion'), + created_at: new Date().toISOString() // Create preliminary UTC timestamp + }; + renderJobRow(preliminaryJob); + startPolling(result.job_id); + } catch (error) { + console.error('Error submitting job:', error); + alert(`Submission failed for ${originalFilename}: ${error.message}`); + } + } + + // --- Original Form Submission Handler (Now uses submitJob) --- + async function handleFormSubmit(event, endpoint, form) { + event.preventDefault(); + const fileInput = form.querySelector('input[type="file"]'); + if (fileInput.files.length === 0) return; + + const submitButton = form.querySelector('button[type="submit"]'); + submitButton.disabled = true; + + // Convert FileList to an array to loop through it + const files = Array.from(fileInput.files); + + // Process each file as a separate job + for (const file of files) { + const formData = new FormData(); + formData.append('file', file); + + // Append other form data if it exists + const outputFormat = form.querySelector('select[name="output_format"]'); + if (outputFormat) { + formData.append('output_format', outputFormat.value); + } + const modelSize = form.querySelector('select[name="model_size"]'); + if (modelSize) { + formData.append('model_size', modelSize.value); + } + + // Await each job submission to process them sequentially + await submitJob(endpoint, formData, file.name); + } + + // Reset the form UI after all jobs have been submitted + const fileNameDisplay = form.querySelector('.file-name'); + form.reset(); + if (fileNameDisplay) { + fileNameDisplay.textContent = 'No file chosen'; + fileNameDisplay.title = 'No file chosen'; + } + if (form.id === 'conversion-form' && conversionChoices) { + conversionChoices.clearInput(); + conversionChoices.setValue([]); + } + submitButton.disabled = false; + } + + // --- START: Drag and Drop Implementation --- + function setupDragAndDropListeners() { + let dragCounter = 0; // Counter to manage enter/leave events reliably + + window.addEventListener('dragenter', (e) => { + e.preventDefault(); + dragCounter++; + document.body.classList.add('dragging'); + }); + + window.addEventListener('dragleave', (e) => { + e.preventDefault(); + dragCounter--; + if (dragCounter === 0) { + document.body.classList.remove('dragging'); + } + }); + + window.addEventListener('dragover', (e) => { + e.preventDefault(); // This is necessary to allow a drop + }); + + window.addEventListener('drop', (e) => { + e.preventDefault(); + dragCounter = 0; // Reset counter + document.body.classList.remove('dragging'); + + // Only handle the drop if it's on our designated overlay + if (e.target === dragOverlay || dragOverlay.contains(e.target)) { + const files = e.dataTransfer.files; + if (files && files.length > 0) { + stagedFiles = files; + showActionDialog(); + } + } + }); + } + + function showActionDialog() { + dialogFileCount.textContent = stagedFiles.length; + + // Clone options from main form's select to the dialog's select + dialogOutputFormatSelect.innerHTML = outputFormatSelect.innerHTML; + + // Clean up previous Choices.js instance if it exists + if (dialogConversionChoices) { + dialogConversionChoices.destroy(); + } + + // Initialize a new Choices.js instance for the dialog + dialogConversionChoices = new Choices(dialogOutputFormatSelect, { + searchEnabled: true, + itemSelectText: 'Select', + shouldSort: false, + placeholder: true, + placeholderValue: 'Select a format...', + }); + + // Ensure the initial view is shown + dialogInitialView.style.display = 'grid'; + dialogConvertView.style.display = 'none'; + actionDialog.classList.add('visible'); + } + + function closeActionDialog() { + actionDialog.classList.remove('visible'); + stagedFiles = null; + // Important: Destroy the Choices instance to prevent memory leaks + if (dialogConversionChoices) { + // Explicitly hide the dropdown before destroying + dialogConversionChoices.hideDropdown(); + dialogConversionChoices.destroy(); + dialogConversionChoices = null; + } + } + + // --- Dialog Button and Action Listeners --- + dialogConvertBtn.addEventListener('click', () => { + // Switch to the conversion view + dialogInitialView.style.display = 'none'; + dialogConvertView.style.display = 'block'; + }); + + dialogBackBtn.addEventListener('click', () => { + // Switch back to the initial view + dialogInitialView.style.display = 'grid'; + dialogConvertView.style.display = 'none'; + }); + + dialogStartConversionBtn.addEventListener('click', () => handleDialogAction('convert')); + dialogOcrBtn.addEventListener('click', () => handleDialogAction('ocr')); + dialogTranscribeBtn.addEventListener('click', () => handleDialogAction('transcribe')); + dialogCancelBtn.addEventListener('click', closeActionDialog); + + + function handleDialogAction(action) { + if (!stagedFiles) return; + + let endpoint = ''; + const formDataArray = []; + + for (const file of stagedFiles) { + const formData = new FormData(); + formData.append('file', file); + + if (action === 'convert') { + const selectedFormat = dialogConversionChoices.getValue(true); + if (!selectedFormat) { + alert('Please select a format to convert to.'); + return; + } + formData.append('output_format', selectedFormat); + endpoint = '/convert-file'; + } else if (action === 'ocr') { + endpoint = '/ocr-pdf'; + } else if (action === 'transcribe') { + formData.append('model_size', modelSizeSelect.value); + endpoint = '/transcribe-audio'; + } + formDataArray.push({ formData, name: file.name }); + } + + formDataArray.forEach(item => { + submitJob(endpoint, item.formData, item.name); + }); + + closeActionDialog(); + } + // --- END: Drag and Drop Implementation --- + function initializeConversionSelector() { - // MODIFICATION: Destroy the old instance if it exists before creating a new one if (conversionChoices) { conversionChoices.destroy(); } - conversionChoices = new Choices(outputFormatSelect, { searchEnabled: true, itemSelectText: 'Select', @@ -65,7 +304,7 @@ document.addEventListener('DOMContentLoaded', () => { for (const formatKey in tool.formats) { group.choices.push({ value: `${toolKey}_${formatKey}`, - label: `${formatKey.toUpperCase()} - ${tool.formats[formatKey]}` + label: `${tool.name} - ${formatKey.toUpperCase()} (${tool.formats[formatKey]})` }); } choicesArray.push(group); @@ -73,58 +312,23 @@ document.addEventListener('DOMContentLoaded', () => { conversionChoices.setChoices(choicesArray, 'value', 'label', true); } - // --- Helper Functions --- function updateFileName(input, nameDisplay) { - const fileName = input.files.length > 0 ? input.files[0].name : 'No file chosen'; - nameDisplay.textContent = fileName; - nameDisplay.title = fileName; - } + const numFiles = input.files.length; + let displayText = 'No file chosen'; + let displayTitle = 'No file chosen'; - async function handleFormSubmit(event, endpoint, form) { - event.preventDefault(); - const fileInput = form.querySelector('input[type="file"]'); - const fileNameDisplay = form.querySelector('.file-name'); - if (!fileInput.files[0]) return; - - const formData = new FormData(form); - const submitButton = form.querySelector('button[type="submit"]'); - submitButton.disabled = true; - - try { - const response = await fetch(endpoint, { method: 'POST', body: formData }); - if (!response.ok) { - const errorData = await response.json(); - throw new Error(errorData.detail || `HTTP error! Status: ${response.status}`); - } - const result = await response.json(); - const preliminaryJob = { - id: result.job_id, - status: 'pending', - progress: 0, - original_filename: fileInput.files[0].name, - task_type: endpoint.includes('ocr') ? 'ocr' : (endpoint.includes('transcribe') ? 'transcription' : 'conversion'), - created_at: new Date().toISOString() - }; - renderJobRow(preliminaryJob); - startPolling(result.job_id); - } catch (error) { - console.error('Error submitting job:', error); - alert(`Submission failed: ${error.message}`); - } finally { - form.reset(); - if (fileNameDisplay) fileNameDisplay.textContent = 'No file chosen'; - - // MODIFICATION: Use the stored instance to correctly reset the dropdown - // without causing an error. - if (form.id === 'conversion-form' && conversionChoices) { - conversionChoices.clearInput(); - conversionChoices.setValue([]); // Clears the selected value - } - - submitButton.disabled = false; + if (numFiles === 1) { + displayText = input.files[0].name; + displayTitle = input.files[0].name; + } else if (numFiles > 1) { + displayText = `${numFiles} files selected`; + // Create a title attribute to show all filenames on hover + displayTitle = Array.from(input.files).map(file => file.name).join(', '); } + nameDisplay.textContent = displayText; + nameDisplay.title = displayTitle; } - + async function handleCancelJob(jobId) { if (!confirm('Are you sure you want to cancel this job?')) return; try { @@ -161,7 +365,7 @@ document.addEventListener('DOMContentLoaded', () => { } } catch (error) { console.error("Couldn't load job history:", error); - jobListBody.innerHTML = 'Could not load job history.'; + jobListBody.innerHTML = 'Could not load job history.'; } } @@ -214,7 +418,12 @@ document.addEventListener('DOMContentLoaded', () => { taskTypeLabel = 'Conversion'; } - const formattedDate = new Date(job.created_at).toLocaleString(); + // --- CORRECTED DATE FORMATTING --- + // Takes the UTC string from the server (or the preliminary job) + // and formats it using the user's detected locale and timezone. + const submittedDate = new Date(job.created_at); + const formattedDate = submittedDate.toLocaleString(USER_LOCALE, DATETIME_FORMAT_OPTIONS); + let statusHtml = `${job.status}`; if (job.status === 'processing') { const progressClass = (job.task_type === 'transcription' && job.progress > 0) ? '' : 'indeterminate'; @@ -233,9 +442,21 @@ document.addEventListener('DOMContentLoaded', () => { actionHtml = `Failed`; } + // --- File Size Logic --- + let fileSizeHtml = '-'; + if (job.input_filesize) { + let sizeString = formatBytes(job.input_filesize); + if (job.status === 'completed' && job.output_filesize) { + sizeString += ` → ${formatBytes(job.output_filesize)}`; + } + fileSizeHtml = `${sizeString}`; + } + const escapedFilename = job.original_filename ? job.original_filename.replace(//g, ">") : "No filename"; + row.innerHTML = ` ${escapedFilename} + ${fileSizeHtml} ${taskTypeLabel} ${formattedDate} ${statusHtml} @@ -246,4 +467,5 @@ document.addEventListener('DOMContentLoaded', () => { // --- Initial Load --- initializeConversionSelector(); loadInitialJobs(); + setupDragAndDropListeners(); }); \ No newline at end of file diff --git a/templates/index.html b/templates/index.html index 0246b10..7fc5792 100644 --- a/templates/index.html +++ b/templates/index.html @@ -27,7 +27,7 @@

File Conversion

- + No file chosen
@@ -45,7 +45,7 @@

PDF OCR

- + No file chosen
@@ -59,7 +59,7 @@

Transcribe Audio

- + No file chosen
@@ -87,6 +87,7 @@ File + File Size Task Submitted Status @@ -100,7 +101,37 @@ - + +
+
+

Drop files anywhere to begin

+
+
+
+
+

Choose Action

+

file(s) dropped. What would you like to do?

+ +
+ + + +
+ + + + +
+