diff --git a/main.py b/main.py index cbe7900..665b317 100644 --- a/main.py +++ b/main.py @@ -1,64 +1,99 @@ import logging import shutil +import subprocess import traceback import uuid +import shlex +import yaml from contextlib import asynccontextmanager from datetime import datetime from pathlib import Path -from typing import List, Set +from typing import Dict, List, Any import ocrmypdf import pypdf import pytesseract from PIL import Image from faster_whisper import WhisperModel -# MODIFICATION: Added Form for model selection from fastapi import (Depends, FastAPI, File, Form, HTTPException, Request, - UploadFile, status) -from fastapi.responses import FileResponse + UploadFile, status, Body) +from fastapi.responses import FileResponse, JSONResponse from fastapi.staticfiles import StaticFiles from fastapi.templating import Jinja2Templates from huey import SqliteHuey from pydantic import BaseModel, ConfigDict -from pydantic_settings import BaseSettings from sqlalchemy import (Column, DateTime, Integer, String, Text, - create_engine) + create_engine, delete, event) +from sqlalchemy.pool import NullPool +from string import Formatter from sqlalchemy.orm import Session, declarative_base, sessionmaker from werkzeug.utils import secure_filename # -------------------------------------------------------------------------------- # --- 1. CONFIGURATION # -------------------------------------------------------------------------------- -class Settings(BaseSettings): + +class AppPaths(BaseModel): BASE_DIR: Path = Path(__file__).resolve().parent UPLOADS_DIR: Path = BASE_DIR / "uploads" PROCESSED_DIR: Path = BASE_DIR / "processed" DATABASE_URL: str = f"sqlite:///{BASE_DIR / 'jobs.db'}" HUEY_DB_PATH: str = str(BASE_DIR / "huey.db") - # MODIFICATION: Removed hardcoded model size, added a set of allowed models - WHISPER_COMPUTE_TYPE: str = "int8" - ALLOWED_WHISPER_MODELS: Set[str] = {"tiny", "base", "small", "medium", "large-v3", "distil-large-v2"} - MAX_FILE_SIZE_BYTES: int = 500 * 1024 * 1024 # 500 MB - ALLOWED_PDF_EXTENSIONS: set = {".pdf"} - ALLOWED_IMAGE_EXTENSIONS: set = {".png", ".jpg", ".jpeg", ".tiff", ".tif"} - ALLOWED_AUDIO_EXTENSIONS: set = {".mp3", "m4a", ".ogg", ".flac", ".opus"} + SETTINGS_FILE: Path = BASE_DIR / "settings.yml" -settings = Settings() +PATHS = AppPaths() +APP_CONFIG: Dict[str, Any] = {} + +def load_app_config(): + global APP_CONFIG + try: + with open(PATHS.SETTINGS_FILE, 'r') as f: + APP_CONFIG = yaml.safe_load(f) + APP_CONFIG['app_settings']['max_file_size_bytes'] = APP_CONFIG['app_settings']['max_file_size_mb'] * 1024 * 1024 + allowed_extensions = { + ".pdf", ".ps", ".eps", ".png", ".jpg", ".jpeg", ".tiff", ".tif", ".gif", + ".bmp", ".webp", ".svg", ".jxl", ".avif", ".ppm", ".mp3", ".m4a", ".ogg", + ".flac", ".opus", ".wav", ".aac", ".mp4", ".mkv", ".mov", ".webm", ".avi", + ".flv", ".md", ".txt", ".html", ".docx", ".odt", ".rst", ".epub", ".mobi", + ".azw3", ".pptx", ".xlsx" + } + APP_CONFIG['app_settings']['allowed_all_extensions'] = allowed_extensions + logger.info("Successfully loaded settings from settings.yml") + except (FileNotFoundError, yaml.YAMLError) as e: + logger.error(f"Could not load settings.yml: {e}. App may not function correctly.") + APP_CONFIG = {} logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) - -settings.UPLOADS_DIR.mkdir(exist_ok=True) -settings.PROCESSED_DIR.mkdir(exist_ok=True) - +PATHS.UPLOADS_DIR.mkdir(exist_ok=True) +PATHS.PROCESSED_DIR.mkdir(exist_ok=True) # -------------------------------------------------------------------------------- -# --- 2. DATABASE (for Job Tracking) - NO CHANGES +# --- 2. DATABASE & Schemas # -------------------------------------------------------------------------------- -engine = create_engine(settings.DATABASE_URL, connect_args={"check_same_thread": False}) +engine = create_engine( + PATHS.DATABASE_URL, + connect_args={"check_same_thread": False, "timeout": 30}, + poolclass=NullPool, +) SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + +# THIS IS THE CRITICAL FIX Base = declarative_base() +@event.listens_for(engine, "connect") +def _set_sqlite_pragmas(dbapi_connection, connection_record): + """ + Enable WAL mode and set sane synchronous for better concurrency + between the FastAPI process and Huey worker processes. + """ + c = dbapi_connection.cursor() + try: + c.execute("PRAGMA journal_mode=WAL;") + c.execute("PRAGMA synchronous=NORMAL;") + finally: + c.close() + class Job(Base): __tablename__ = "jobs" id = Column(String, primary_key=True, index=True) @@ -80,10 +115,6 @@ def get_db(): finally: db.close() - -# -------------------------------------------------------------------------------- -# --- 3. PYDANTIC SCHEMAS (Data Validation) - NO CHANGES -# -------------------------------------------------------------------------------- class JobCreate(BaseModel): id: str task_type: str @@ -104,9 +135,8 @@ class JobSchema(BaseModel): updated_at: datetime model_config = ConfigDict(from_attributes=True) - # -------------------------------------------------------------------------------- -# --- 4. CRUD OPERATIONS (Database Interactions) - NO CHANGES +# --- 3. CRUD OPERATIONS (No Changes) # -------------------------------------------------------------------------------- def get_job(db: Session, job_id: str): return db.query(Job).filter(Job.id == job_id).first() @@ -143,37 +173,101 @@ def mark_job_as_completed(db: Session, job_id: str, preview: str | None = None): db.commit() return db_job - # -------------------------------------------------------------------------------- -# --- 5. BACKGROUND TASKS (Huey) +# --- 4. BACKGROUND TASK SETUP # -------------------------------------------------------------------------------- -huey = SqliteHuey(filename=settings.HUEY_DB_PATH) +huey = SqliteHuey(filename=PATHS.HUEY_DB_PATH) + +# --- START: NEW WHISPER MODEL CACHING --- +# This dictionary will live in the memory of the Huey worker process, +# allowing us to reuse loaded models across tasks. +WHISPER_MODELS_CACHE: Dict[str, WhisperModel] = {} + +def get_whisper_model(model_size: str, whisper_settings: dict) -> WhisperModel: + """ + Loads a Whisper model into the cache if not present, and returns the model. + This ensures a model is only loaded into memory once per worker process. + """ + if model_size not in WHISPER_MODELS_CACHE: + compute_type = whisper_settings.get('compute_type', 'int8') + logger.info(f"Whisper model '{model_size}' not in cache. Loading into memory...") + model = WhisperModel(model_size, device="cpu", compute_type=compute_type) + WHISPER_MODELS_CACHE[model_size] = model + logger.info(f"Model '{model_size}' loaded successfully.") + else: + logger.info(f"Found model '{model_size}' in cache. Reusing.") + return WHISPER_MODELS_CACHE[model_size] +# --- END: NEW WHISPER MODEL CACHING --- -# MODIFICATION: Removed global whisper model and lazy loader. -# The model will now be loaded inside the task itself based on user selection. @huey.task() -def run_pdf_ocr_task(job_id: str, input_path_str: str, output_path_str: str): +def run_transcription_task(job_id: str, input_path_str: str, output_path_str: str, model_size: str, whisper_settings: dict): db = SessionLocal() try: job = get_job(db, job_id) - if not job or job.status == 'cancelled': - logger.info(f"Job {job_id} was cancelled before starting.") - return + if not job or job.status == 'cancelled': return + update_job_status(db, job_id, "processing") + + # --- MODIFIED: Use the caching function to get the model --- + model = get_whisper_model(model_size, whisper_settings) + + logger.info(f"Starting transcription for job {job_id}") + segments, info = model.transcribe(input_path_str, beam_size=5) + + full_transcript = [] + for segment in segments: + job_check = get_job(db, job_id) # Check for cancellation during long tasks + if job_check.status == 'cancelled': + logger.info(f"Job {job_id} cancelled during transcription.") + return + + if info.duration > 0: + progress = int((segment.end / info.duration) * 100) + update_job_status(db, job_id, "processing", progress=progress) + + full_transcript.append(segment.text.strip()) + + transcript_text = "\n".join(full_transcript) + # write atomically to avoid partial files + out_path = Path(output_path_str) + tmp_out = out_path.with_suffix(out_path.suffix + f".{uuid.uuid4().hex}.tmp") + with tmp_out.open("w", encoding="utf-8") as f: + f.write(transcript_text) + tmp_out.replace(out_path) + + mark_job_as_completed(db, job_id, preview=transcript_text) + logger.info(f"Transcription for job {job_id} completed.") + except Exception: + logger.exception(f"ERROR during transcription for job {job_id}") + update_job_status(db, job_id, "failed", error="See server logs for details.") + finally: + Path(input_path_str).unlink(missing_ok=True) + db.close() + +# Other tasks remain unchanged +@huey.task() +def run_pdf_ocr_task(job_id: str, input_path_str: str, output_path_str: str, ocr_settings: dict): + db = SessionLocal() + try: + job = get_job(db, job_id) + if not job or job.status == 'cancelled': return update_job_status(db, job_id, "processing") logger.info(f"Starting PDF OCR for job {job_id}") - - ocrmypdf.ocr(input_path_str, output_path_str, deskew=True, force_ocr=True, clean=True, optimize=1, progress_bar=False) - + ocrmypdf.ocr(input_path_str, output_path_str, + deskew=ocr_settings.get('deskew', True), + force_ocr=ocr_settings.get('force_ocr', True), + clean=ocr_settings.get('clean', True), + optimize=ocr_settings.get('optimize', 1), + progress_bar=False) with open(output_path_str, "rb") as f: reader = pypdf.PdfReader(f) preview = "\n".join(page.extract_text() or "" for page in reader.pages) mark_job_as_completed(db, job_id, preview=preview) logger.info(f"PDF OCR for job {job_id} completed.") - except Exception as e: - logger.error(f"ERROR during PDF OCR for job {job_id}: {e}\n{traceback.format_exc()}") - update_job_status(db, job_id, "failed", error=str(e)) + except Exception: + logger.exception(f"ERROR during PDF OCR for job {job_id}") + update_job_status(db, job_id, "failed", error="See server logs for details.") finally: Path(input_path_str).unlink(missing_ok=True) db.close() @@ -183,10 +277,7 @@ def run_image_ocr_task(job_id: str, input_path_str: str, output_path_str: str): db = SessionLocal() try: job = get_job(db, job_id) - if not job or job.status == 'cancelled': - logger.info(f"Job {job_id} was cancelled before starting.") - return - + if not job or job.status == 'cancelled': return update_job_status(db, job_id, "processing", progress=50) logger.info(f"Starting Image OCR for job {job_id}") text = pytesseract.image_to_string(Image.open(input_path_str)) @@ -194,175 +285,324 @@ def run_image_ocr_task(job_id: str, input_path_str: str, output_path_str: str): f.write(text) mark_job_as_completed(db, job_id, preview=text) logger.info(f"Image OCR for job {job_id} completed.") - except Exception as e: - logger.error(f"ERROR during Image OCR for job {job_id}: {e}\n{traceback.format_exc()}") - update_job_status(db, job_id, "failed", error=str(e)) + except Exception: + logger.exception(f"ERROR during Image OCR for job {job_id}") + update_job_status(db, job_id, "failed", error="See server logs for details.") finally: Path(input_path_str).unlink(missing_ok=True) db.close() -# MODIFICATION: The task now accepts `model_size` and loads the model dynamically. + @huey.task() -def run_transcription_task(job_id: str, input_path_str: str, output_path_str: str, model_size: str): +def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str, tool: str, task_key: str, conversion_tools_config: dict): db = SessionLocal() + temp_input_file = None + temp_output_file = None try: job = get_job(db, job_id) - if not job or job.status == 'cancelled': - logger.info(f"Job {job_id} was cancelled before starting.") - return + if not job or job.status == 'cancelled': return + update_job_status(db, job_id, "processing", progress=25) + logger.info(f"Starting conversion for job {job_id} using {tool} with task {task_key}") + tool_config = conversion_tools_config.get(tool) + if not tool_config: raise ValueError(f"Unknown conversion tool: {tool}") + input_path = Path(input_path_str) + output_path = Path(output_path_str) + current_input_path = input_path + if tool == "mozjpeg": + temp_input_file = input_path.with_suffix('.temp.ppm') + logger.info(f"Pre-converting for MozJPEG: {input_path} -> {temp_input_file}") + pre_conv_cmd = ["vips", "copy", str(input_path), str(temp_input_file)] + pre_conv_result = subprocess.run(pre_conv_cmd, capture_output=True, text=True, check=False, timeout=tool_config.get("timeout", 300)) + if pre_conv_result.returncode != 0: + err = (pre_conv_result.stderr or "")[:4000] + raise Exception(f"MozJPEG pre-conversion to PPM failed: {err}") + current_input_path = temp_input_file + update_job_status(db, job_id, "processing", progress=50) + # Build safe mapping for formatting and validate placeholders + ALLOWED_VARS = {"input", "output", "output_dir", "output_ext", "quality", "speed", "preset", "device", "dpi", "samplerate", "bitdepth"} + def validate_and_build_command(template_str: str, mapping: dict): + fmt = Formatter() + used = {fname for _, fname, _, _ in fmt.parse(template_str) if fname} + bad = used - ALLOWED_VARS + if bad: + raise ValueError(f"Command template contains disallowed placeholders: {bad}") + formatted = template_str.format(**mapping) + return shlex.split(formatted) - update_job_status(db, job_id, "processing") - - # Load the specified model for this task - logger.info(f"Loading faster-whisper model: {model_size} for job {job_id}...") - model = WhisperModel( - model_size, - device="cpu", - compute_type=settings.WHISPER_COMPUTE_TYPE - ) - logger.info(f"Whisper model '{model_size}' loaded successfully.") - - logger.info(f"Starting transcription for job {job_id}") - segments, info = model.transcribe(input_path_str, beam_size=5) - - full_transcript = [] - total_duration = info.duration - for segment in segments: - job_check = get_job(db, job_id) - if job_check.status == 'cancelled': - logger.info(f"Job {job_id} cancelled during transcription.") - return + # Use a temporary output path and atomically move into place after success + temp_output_file = output_path.with_suffix(output_path.suffix + f".{uuid.uuid4().hex}.tmp") - # Update progress based on the segment's end time - if total_duration > 0: - progress = int((segment.end / total_duration) * 100) - update_job_status(db, job_id, "processing", progress=progress) - full_transcript.append(segment.text.strip()) + # Prepare mapping + mapping = { + "input": str(current_input_path), + "output": str(temp_output_file), + "output_dir": str(output_path.parent), + "output_ext": output_path.suffix.lstrip('.'), + } - transcript_text = "\n".join(full_transcript) - with open(output_path_str, "w", encoding="utf-8") as f: - f.write(transcript_text) - mark_job_as_completed(db, job_id, preview=transcript_text) - logger.info(f"Transcription for job {job_id} completed.") + # Allow tool-specific adjustments to mapping + if tool.startswith("ghostscript"): + device, setting = task_key.split('_') + mapping.update({"device": device, "dpi": setting, "preset": setting}) + elif tool == "pngquant": + _, quality_key = task_key.split('_') + quality_map = {"hq": "80-95", "mq": "65-80", "fast": "65-80"} + speed_map = {"hq": "1", "mq": "3", "fast": "11"} + mapping.update({"quality": quality_map.get(quality_key, "65-80"), "speed": speed_map.get(quality_key, "3")}) + elif tool == "sox": + _, rate, depth = task_key.split('_') + rate = rate.replace('k', '000') if 'k' in rate else rate + depth = depth.replace('b', '') if 'b' in depth else '16' + mapping.update({"samplerate": rate, "bitdepth": depth}) + elif tool == "mozjpeg": + _, quality = task_key.split('_') + quality = quality.replace('q', '') + mapping.update({"quality": quality}) + + command_template_str = tool_config["command_template"] + command = validate_and_build_command(command_template_str, mapping) + logger.info(f"Executing command: {' '.join(command)}") + # run with timeout and capture output; run_command helper ensures trimmed logs on failure + def run_command(argv: List[str], timeout: int = 300): + try: + res = subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=timeout) + except subprocess.TimeoutExpired: + raise Exception(f"Command timed out after {timeout}s") + if res.returncode != 0: + stderr = (res.stderr or "")[:4000] + stdout = (res.stdout or "")[:4000] + raise Exception(f"Command failed exit {res.returncode}. stderr: {stderr}; stdout: {stdout}") + return res + + result = run_command(command, timeout=tool_config.get("timeout", 300)) + if tool == "libreoffice": + expected_output_filename = input_path.with_suffix(output_path.suffix).name + generated_file = output_path.parent / expected_output_filename + if generated_file.exists(): + # move generated file into place + generated_file.replace(output_path) + else: + raise Exception(f"LibreOffice did not create the expected file: {expected_output_filename}") + # move temp output into final location atomically + if temp_output_file and temp_output_file.exists(): + temp_output_file.replace(output_path) + + mark_job_as_completed(db, job_id, preview=f"Successfully converted file.") + logger.info(f"Conversion for job {job_id} completed.") except Exception as e: - logger.error(f"ERROR during transcription for job {job_id}: {e}\n{traceback.format_exc()}") - update_job_status(db, job_id, "failed", error=str(e)) + logger.exception(f"ERROR during conversion for job {job_id}") + update_job_status(db, job_id, "failed", error="See server logs for details.") finally: Path(input_path_str).unlink(missing_ok=True) + if temp_input_file: + temp_input_file.unlink(missing_ok=True) + if temp_output_file: + temp_output_file.unlink(missing_ok=True) db.close() - # -------------------------------------------------------------------------------- -# --- 6. FASTAPI APPLICATION +# --- 5. FASTAPI APPLICATION # -------------------------------------------------------------------------------- @asynccontextmanager async def lifespan(app: FastAPI): logger.info("Application starting up...") Base.metadata.create_all(bind=engine) + load_app_config() yield logger.info("Application shutting down...") app = FastAPI(lifespan=lifespan) -app.mount("/static", StaticFiles(directory=settings.BASE_DIR / "static"), name="static") -templates = Jinja2Templates(directory=settings.BASE_DIR / "templates") +app.mount("/static", StaticFiles(directory=PATHS.BASE_DIR / "static"), name="static") +templates = Jinja2Templates(directory=PATHS.BASE_DIR / "templates") -# --- Helper Functions --- async def save_upload_file_chunked(upload_file: UploadFile, destination: Path): + """ + Streams the uploaded file in chunks directly to a file on disk. + This is memory-efficient and reliable for large files. + """ + max_size = APP_CONFIG.get("app_settings", {}).get("max_file_size_bytes", 100 * 1024 * 1024) + tmp = destination.with_suffix(destination.suffix + f".{uuid.uuid4().hex}.tmp") size = 0 - with open(destination, "wb") as buffer: - while chunk := await upload_file.read(1024 * 1024): # 1MB chunks - if size + len(chunk) > settings.MAX_FILE_SIZE_BYTES: - raise HTTPException( - status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE, - detail=f"File exceeds limit of {settings.MAX_FILE_SIZE_BYTES // 1024 // 1024} MB" - ) - buffer.write(chunk) - size += len(chunk) + try: + with tmp.open("wb") as buffer: + while True: + chunk = await upload_file.read(1024 * 1024) + if not chunk: + break + size += len(chunk) + if size > max_size: + raise HTTPException(status_code=413, detail=f"File exceeds {max_size / 1024 / 1024} MB limit") + buffer.write(chunk) + # atomic move into place + tmp.replace(destination) + except Exception: + tmp.unlink(missing_ok=True) + raise + def is_allowed_file(filename: str, allowed_extensions: set) -> bool: return Path(filename).suffix.lower() in allowed_extensions -# --- API Endpoints --- -@app.get("/") -async def get_index(request: Request): - # MODIFICATION: Pass available models to the template - return templates.TemplateResponse("index.html", { - "request": request, - "whisper_models": sorted(list(settings.ALLOWED_WHISPER_MODELS)) - }) +# --- Routes (only transcription route is modified) --- -@app.post("/ocr-pdf", status_code=status.HTTP_202_ACCEPTED) -async def submit_pdf_ocr(file: UploadFile = File(...), db: Session = Depends(get_db)): - if not is_allowed_file(file.filename, settings.ALLOWED_PDF_EXTENSIONS): - raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid file type. Please upload a PDF.") - - job_id = uuid.uuid4().hex - safe_basename = secure_filename(file.filename) - unique_filename = f"{Path(safe_basename).stem}_{job_id}{Path(safe_basename).suffix}" - upload_path = settings.UPLOADS_DIR / unique_filename - processed_path = settings.PROCESSED_DIR / unique_filename - - await save_upload_file_chunked(file, upload_path) - - job_data = JobCreate(id=job_id, task_type="ocr", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path)) - new_job = create_job(db=db, job=job_data) - - run_pdf_ocr_task(new_job.id, str(upload_path), str(processed_path)) - return {"job_id": new_job.id, "status": new_job.status} - -@app.post("/ocr-image", status_code=status.HTTP_202_ACCEPTED) -async def submit_image_ocr(file: UploadFile = File(...), db: Session = Depends(get_db)): - if not is_allowed_file(file.filename, settings.ALLOWED_IMAGE_EXTENSIONS): - raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid file type. Please upload a PNG, JPG, or TIFF.") - - job_id = uuid.uuid4().hex - safe_basename = secure_filename(file.filename) - file_ext = Path(safe_basename).suffix - unique_filename = f"{Path(safe_basename).stem}_{job_id}{file_ext}" - upload_path = settings.UPLOADS_DIR / unique_filename - processed_path = settings.PROCESSED_DIR / f"{Path(safe_basename).stem}_{job_id}.txt" - - await save_upload_file_chunked(file, upload_path) - - job_data = JobCreate(id=job_id, task_type="ocr-image", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path)) - new_job = create_job(db=db, job=job_data) - - run_image_ocr_task(new_job.id, str(upload_path), str(processed_path)) - return {"job_id": new_job.id, "status": new_job.status} - -# MODIFICATION: Endpoint now accepts `model_size` as form data. @app.post("/transcribe-audio", status_code=status.HTTP_202_ACCEPTED) async def submit_audio_transcription( file: UploadFile = File(...), model_size: str = Form("base"), db: Session = Depends(get_db) ): - if not is_allowed_file(file.filename, settings.ALLOWED_AUDIO_EXTENSIONS): + if not is_allowed_file(file.filename, {".mp3", ".wav", ".m4a", ".flac", ".ogg", ".opus"}): raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid audio file type.") - # Validate the selected model size - if model_size not in settings.ALLOWED_WHISPER_MODELS: + whisper_config = APP_CONFIG.get("transcription_settings", {}).get("whisper", {}) + if model_size not in whisper_config.get("allowed_models", []): raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=f"Invalid model size: {model_size}.") job_id = uuid.uuid4().hex safe_basename = secure_filename(file.filename) stem, suffix = Path(safe_basename).stem, Path(safe_basename).suffix - + audio_filename = f"{stem}_{job_id}{suffix}" transcript_filename = f"{stem}_{job_id}.txt" - upload_path = settings.UPLOADS_DIR / audio_filename - processed_path = settings.PROCESSED_DIR / transcript_filename + upload_path = PATHS.UPLOADS_DIR / audio_filename + processed_path = PATHS.PROCESSED_DIR / transcript_filename await save_upload_file_chunked(file, upload_path) - + job_data = JobCreate(id=job_id, task_type="transcription", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path)) new_job = create_job(db=db, job=job_data) - - # Pass the selected model size to the background task - run_transcription_task(new_job.id, str(upload_path), str(processed_path), model_size=model_size) + + # --- MODIFIED: Pass whisper_config to the task --- + run_transcription_task(new_job.id, str(upload_path), str(processed_path), model_size=model_size, whisper_settings=whisper_config) + return {"job_id": new_job.id, "status": new_job.status} -@app.post("/job/{job_id}/cancel", status_code=status.HTTP_200_OK) + +# --- Other routes remain unchanged --- + +@app.get("/") +async def get_index(request: Request): + whisper_models = APP_CONFIG.get("transcription_settings", {}).get("whisper", {}).get("allowed_models", []) + conversion_tools = APP_CONFIG.get("conversion_tools", {}) + return templates.TemplateResponse("index.html", { + "request": request, + "whisper_models": sorted(list(whisper_models)), + "conversion_tools": conversion_tools + }) + +@app.get("/settings") +async def get_settings_page(request: Request): + try: + with open(PATHS.SETTINGS_FILE, 'r') as f: + current_config = yaml.safe_load(f) + except Exception as e: + logger.error(f"Could not load settings.yml for settings page: {e}") + current_config = {} + return templates.TemplateResponse("settings.html", {"request": request, "config": current_config}) + +@app.post("/settings/save") +async def save_settings(new_config: Dict = Body(...)): + try: + with open(PATHS.SETTINGS_FILE, 'w') as f: + yaml.dump(new_config, f, default_flow_style=False, sort_keys=False) + load_app_config() + return JSONResponse({"message": "Settings saved successfully."}) + except Exception as e: + logger.error(f"Failed to save settings: {e}") + raise HTTPException(status_code=500, detail="Could not write to settings.yml.") + +@app.post("/settings/clear-history") +async def clear_job_history(db: Session = Depends(get_db)): + try: + num_deleted = db.query(Job).delete() + db.commit() + logger.info(f"Cleared {num_deleted} jobs from history.") + return {"deleted_count": num_deleted} + except Exception as e: + db.rollback() + logger.error(f"Failed to clear job history: {e}") + raise HTTPException(status_code=500, detail="Database error while clearing history.") + +@app.post("/settings/delete-files") +async def delete_processed_files(): + deleted_count = 0 + errors = [] + for f in PATHS.PROCESSED_DIR.glob('*'): + try: + if f.is_file(): + f.unlink() + deleted_count += 1 + except Exception as e: + errors.append(f.name) + logger.error(f"Could not delete processed file {f.name}: {e}") + if errors: + raise HTTPException(status_code=500, detail=f"Could not delete some files: {', '.join(errors)}") + logger.info(f"Deleted {deleted_count} files from processed directory.") + return {"deleted_count": deleted_count} + +@app.post("/convert-file", status_code=status.HTTP_202_ACCEPTED) +async def submit_file_conversion(file: UploadFile = File(...), output_format: str = Form(...), db: Session = Depends(get_db)): + allowed_exts = APP_CONFIG.get("app_settings", {}).get("allowed_all_extensions", set()) + if not is_allowed_file(file.filename, allowed_exts): + raise HTTPException(status_code=400, detail=f"File type '{Path(file.filename).suffix}' not allowed.") + conversion_tools = APP_CONFIG.get("conversion_tools", {}) + try: + tool, task_key = output_format.split('_', 1) + if tool not in conversion_tools or task_key not in conversion_tools[tool]["formats"]: + raise ValueError() + except ValueError: + raise HTTPException(status_code=400, detail="Invalid output format selected.") + job_id = uuid.uuid4().hex + safe_basename = secure_filename(file.filename) + original_stem = Path(safe_basename).stem + target_ext = task_key.split('_')[0] + if tool == "ghostscript_pdf": + target_ext = "pdf" + upload_filename = f"{original_stem}_{job_id}{Path(safe_basename).suffix}" + processed_filename = f"{original_stem}_{job_id}.{target_ext}" + upload_path = PATHS.UPLOADS_DIR / upload_filename + processed_path = PATHS.PROCESSED_DIR / processed_filename + await save_upload_file_chunked(file, upload_path) + job_data = JobCreate(id=job_id, task_type="conversion", original_filename=file.filename, + input_filepath=str(upload_path), processed_filepath=str(processed_path)) + new_job = create_job(db=db, job=job_data) + run_conversion_task(new_job.id, str(upload_path), str(processed_path), tool, task_key, conversion_tools) + return {"job_id": new_job.id, "status": new_job.status} + +@app.post("/ocr-pdf", status_code=status.HTTP_202_ACCEPTED) +async def submit_pdf_ocr(file: UploadFile = File(...), db: Session = Depends(get_db)): + if not is_allowed_file(file.filename, {".pdf"}): + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid file type. Please upload a PDF.") + job_id = uuid.uuid4().hex + safe_basename = secure_filename(file.filename) + unique_filename = f"{Path(safe_basename).stem}_{job_id}{Path(safe_basename).suffix}" + upload_path = PATHS.UPLOADS_DIR / unique_filename + processed_path = PATHS.PROCESSED_DIR / unique_filename + await save_upload_file_chunked(file, upload_path) + job_data = JobCreate(id=job_id, task_type="ocr", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path)) + new_job = create_job(db=db, job=job_data) + ocr_settings = APP_CONFIG.get("ocr_settings", {}).get("ocrmypdf", {}) + run_pdf_ocr_task(new_job.id, str(upload_path), str(processed_path), ocr_settings) + return {"job_id": new_job.id, "status": new_job.status} + +@app.post("/ocr-image", status_code=status.HTTP_202_ACCEPTED) +async def submit_image_ocr(file: UploadFile = File(...), db: Session = Depends(get_db)): + allowed_exts = {".png", ".jpg", ".jpeg", ".tiff", ".tif"} + if not is_allowed_file(file.filename, allowed_exts): + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid file type. Please upload a PNG, JPG, or TIFF.") + job_id = uuid.uuid4().hex + safe_basename = secure_filename(file.filename) + file_ext = Path(safe_basename).suffix + unique_filename = f"{Path(safe_basename).stem}_{job_id}{file_ext}" + upload_path = PATHS.UPLOADS_DIR / unique_filename + processed_path = PATHS.PROCESSED_DIR / f"{Path(safe_basename).stem}_{job_id}.txt" + await save_upload_file_chunked(file, upload_path) + job_data = JobCreate(id=job_id, task_type="ocr-image", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path)) + new_job = create_job(db=db, job=job_data) + run_image_ocr_task(new_job.id, str(upload_path), str(processed_path)) + return {"job_id": new_job.id, "status": new_job.status} + +@app.post("/job/{job_id}/cancel", status_code=status.HTTP_202_ACCEPTED) async def cancel_job(job_id: str, db: Session = Depends(get_db)): job = get_job(db, job_id) if not job: @@ -386,12 +626,13 @@ async def get_job_status(job_id: str, db: Session = Depends(get_db)): @app.get("/download/{filename}") async def download_file(filename: str): safe_filename = secure_filename(filename) - file_path = settings.PROCESSED_DIR / safe_filename - - if not file_path.resolve().is_relative_to(settings.PROCESSED_DIR.resolve()): + file_path = PATHS.PROCESSED_DIR / safe_filename + file_path = file_path.resolve() + base = PATHS.PROCESSED_DIR.resolve() + try: + file_path.relative_to(base) + except ValueError: raise HTTPException(status_code=403, detail="Access denied.") - if not file_path.is_file(): raise HTTPException(status_code=404, detail="File not found.") - return FileResponse(path=file_path, filename=safe_filename, media_type="application/octet-stream") \ No newline at end of file diff --git a/run.sh b/run.sh index 3f9f0a9..c59047d 100755 --- a/run.sh +++ b/run.sh @@ -3,8 +3,8 @@ echo "Starting DocProcessor with Gunicorn..." -exec gunicorn -w 1 -k uvicorn.workers.UvicornWorker --forwarded-allow-ips='*' main:app -b 0.0.0.0:8000 & +exec gunicorn -w 2 -k uvicorn.workers.UvicornWorker --forwarded-allow-ips='*' main:app -b 0.0.0.0:8000 & echo "Done" echo "Starting huey..." -exec huey_consumer.py main.huey & +exec huey_consumer.py main.huey -w 2 & echo "Done" diff --git a/settings.yml b/settings.yml new file mode 100644 index 0000000..ee50d23 --- /dev/null +++ b/settings.yml @@ -0,0 +1,179 @@ +# settings.yml + +# General application settings +app_settings: + max_file_size_mb: 2000 # Maximum upload size in Megabytes + +# Settings for Optical Character Recognition (OCR) tasks +ocr_settings: + ocrmypdf: + deskew: true + clean: true + optimize: 1 + force_ocr: true + +# Settings for audio transcription tasks +transcription_settings: + whisper: + compute_type: "int8" + allowed_models: + - "tiny" + - "base" + - "small" + - "medium" + - "large-v3" + - "distil-large-v2" + +# --- Conversion Tool Definitions --- +# Each tool's command is a single string. The backend uses shlex to parse it, +# so you can use quotes for arguments with spaces. +# Use placeholders like: {input}, {output}, {output_dir}, {output_ext}, etc. +conversion_tools: + libreoffice: + name: "LibreOffice" + command_template: 'libreoffice --headless --convert-to {output_ext} --outdir {output_dir} {input}' + formats: + pdf: "PDF" + docx: "Word Document" + odt: "OpenDocument Text" + html: "HTML" + rtf: "Rich Text Format" + txt: "Plain Text" + xml: "Word 2003 XML" + epub: "EPUB" + xlsx: "Excel Spreadsheet" + ods: "OpenDocument Spreadsheet" + csv: "CSV" + pptx: "PowerPoint Presentation" + odp: "OpenDocument Presentation" + svg: "SVG" + pandoc: + name: "Pandoc" + command_template: 'pandoc --standalone {input} -o {output}' + formats: + docx: "Word Document" + odt: "OpenDocument Text" + pdf: "PDF" + rtf: "Rich Text Format" + txt: "Plain Text" + tex: "LaTeX" + man: "Groff Man Page" + epub: "EPUB v3 Book" + epub2: "EPUB v2 Book" + html: "HTML" + html5: "HTML5" + pptx: "PowerPoint Presentation" + beamer: "Beamer PDF Slides" + slidy: "Slidy HTML Slides" + md: "Markdown" + rst: "reStructuredText" + jira: "Jira Wiki Markup" + mediawiki: "MediaWiki Markup" + ghostscript_pdf: + name: "Ghostscript (PDF)" + command_template: 'gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dNOPAUSE -dQUIET -dBATCH {preset} -sOutputFile={output} {input}' + formats: + screen: "PDF (Optimized for Screen)" + ebook: "PDF (Optimized for Ebooks)" + printer: "PDF (Optimized for Print)" + archive: "PDF/A (for Archiving)" + ghostscript_image: + name: "Ghostscript (Image)" + command_template: 'gs -dNOPAUSE -dBATCH -sDEVICE={device} -r{dpi} -sOutputFile={output} {input}' + formats: + jpeg_72: "JPEG Image (72 DPI)" + jpeg_300: "JPEG Image (300 DPI)" + png16m_150: "PNG Image (150 DPI)" + png16m_300: "PNG Image (300 DPI)" + tiff24nc_300: "TIFF Image (300 DPI)" + tiff24nc_600: "TIFF Image (600 DPI)" + calibre: + name: "Calibre (ebook-convert)" + command_template: 'ebook-convert {input} {output}' + formats: + epub: "EPUB" + mobi: "MOBI" + azw3: "Amazon Kindle" + pdf: "PDF" + docx: "Word Document" + ffmpeg: + name: "FFmpeg" + command_template: 'ffmpeg -i {input} -y -preset medium {output}' + formats: + mp4: "MP4 Video" + mkv: "MKV Video" + mov: "MOV Video" + webm: "WebM Video" + mp3: "MP3 Audio" + wav: "WAV Audio" + flac: "FLAC Audio" + gif: "Animated GIF" + vips: + name: "VIPS" + command_template: 'vips copy {input} {output}[Q=90]' + formats: + jpg: "JPEG Image (Q90)" + png: "PNG Image" + webp: "WebP Image (Q90)" + tiff: "TIFF Image" + avif: "AVIF Image" + graphicsmagick: + name: "GraphicsMagick" + command_template: 'gm convert {input} -quality 90 {output}' + formats: + jpg: "JPEG Image (Q90)" + png: "PNG Image" + webp: "WebP Image" + tiff: "TIFF Image" + pdf: "PDF from Images" + inkscape: + name: "Inkscape" + command_template: 'inkscape {input} --export-filename={output}' + formats: + svg: "SVG (Plain)" + png: "PNG Image (96 DPI)" + pdf: "PDF Document" + libjxl: + name: "libjxl (cjxl)" + command_template: 'cjxl {input} {output} -q 90' + formats: + jxl: "JPEG XL (Q90)" + resvg: + name: "resvg" + command_template: 'resvg {input} {output}' + formats: + png: "PNG from SVG" + potrace: + name: "Potrace" + command_template: 'potrace {input} --svg -o {output}' + formats: + svg: "SVG from Bitmap" + markitdown: + name: "Markitdown" + command_template: 'markitdown {input} -o {output}' + formats: + md: "Markdown from Everything!" + pngquant: + name: "pngquant" + command_template: 'pngquant --quality={quality} --speed {speed} --force --output {output} {input}' + formats: + png_hq: "PNG (High Quality Compression)" + png_mq: "PNG (Medium Quality Compression)" + png_fast: "PNG (Fast Compression)" + sox: + name: "SoX Audio Converter" + command_template: 'sox {input} -r {samplerate} -b {bitdepth} {output}' + formats: + wav_48k_24b: "WAV (48kHz, 24-bit)" + wav_44k_16b: "WAV (CD, 44.1kHz, 16-bit)" + flac_48k_24b: "FLAC (48kHz, 24-bit)" + flac_44k_16b: "FLAC (CD, 44.1kHz, 16-bit)" + ogg_32k: "Ogg Vorbis (32kHz)" + ogg_16k: "Ogg Vorbis (16kHz, Voice)" + mozjpeg: + name: "MozJPEG" + command_template: 'cjpeg -quality {quality} -outfile {output} {input}' + formats: + jpg_q85: "JPEG (High Quality)" + jpg_q75: "JPEG (Web Quality)" + jpg_q60: "JPEG (Aggressive Compression)" \ No newline at end of file diff --git a/static/css/settings.css b/static/css/settings.css new file mode 100644 index 0000000..3225230 --- /dev/null +++ b/static/css/settings.css @@ -0,0 +1,194 @@ +/* static/css/settings.css */ + +.settings-header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 2rem; + padding-bottom: 1rem; + border-bottom: 1px solid var(--divider-color); +} + +.settings-header h1 {e + margin: 0 0 0.25rem 0; +} + +.settings-header p { + margin: 0; + color: var(--muted-text); +} + +.back-button { + background-color: var(--secondary-color); + color: var(--text-color); + padding: 0.5rem 1rem; + border-radius: 5px; + text-decoration: none; + font-weight: 500; + transition: background-color 0.15s ease; + white-space: nowrap; +} +.back-button:hover { + background-color: var(--primary-hover); +} + +.settings-group { + border: 1px solid var(--border-color); + border-radius: 8px; + padding: 1.5rem; + margin-bottom: 2rem; +} + +.settings-group legend { + padding: 0 0.5rem; + font-weight: 500; + color: var(--primary-color); +} + +.settings-group legend h2 { + margin: 0; + font-size: 1.25rem; +} + +.form-input, .form-select, .form-textarea { + width: 100%; + background-color: rgba(255,255,255,0.05); + color: var(--text-color); + padding: 0.6rem 0.8rem; + border-radius: 5px; + border: 1px solid var(--border-color); + font-family: inherit; + font-size: 0.95rem; + transition: border-color 0.15s ease-in-out, box-shadow 0.15s ease-in-out; +} +.form-input:focus, .form-select:focus, .form-textarea:focus { + outline: none; + border-color: var(--primary-color); + box-shadow: 0 0 0 2px rgba(0, 180, 255, 0.2); +} + +.form-textarea { + resize: vertical; + min-height: 60px; + font-family: 'Courier New', Courier, monospace; +} + +.field-description { + font-size: 0.85rem; + color: var(--muted-text); + margin-top: -0.5rem; + margin-bottom: 1rem; +} +.field-description code { + background-color: rgba(255,255,255,0.1); + padding: 0.1rem 0.3rem; + border-radius: 3px; + font-size: 0.8rem; +} + +.checkbox-group { + display: flex; + align-items: center; + gap: 0.75rem; + margin-bottom: 0.5rem; +} +.checkbox-group input[type="checkbox"] { + width: 1rem; + height: 1rem; +} + +.tool-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(350px, 1fr)); + gap: 1rem; +} + +.tool-card { + border: 1px solid var(--divider-color); + padding: 1rem; + border-radius: 6px; + background-color: rgba(0,0,0,0.2); +} +.tool-card h3 { + margin-top: 0; + margin-bottom: 1rem; + font-size: 1.1rem; +} + +.form-actions { + display: flex; + align-items: center; + gap: 1rem; + margin-top: 1.5rem; +} +.button-primary { + display: inline-block; + background: var(--primary-color); + background-color: transparent; + border-color: var(--border-color); + border-width: 1px; + color: #ffffff; + padding: 0.65rem 1.5rem; + font-size: 1rem; + font-weight: 600; + border-radius: 5px; + cursor: pointer; + transition: background-color 0.15s ease; +} +.button-primary:hover { + background: var(--primary-hover); +} + +.save-status { + font-weight: 500; +} +.save-status.success { + color: var(--success-color); +} +.save-status.error { + color: var(--error-color); +} + +.divider { + border: none; + height: 1px; + background-color: var(--divider-color); + margin: 3rem 0; +} + +.danger-zone { + border: 1px solid var(--error-color); + border-radius: 8px; + padding: 1rem; + background-color: rgba(255, 107, 107, 0.05); +} +.danger-action { + display: flex; + justify-content: space-between; + align-items: center; + gap: 1rem; +} +.danger-action + .danger-action { + margin-top: 1rem; + padding-top: 1rem; + border-top: 1px solid rgba(255, 107, 107, 0.2); +} +.danger-action p { + margin: 0.25rem 0 0 0; + font-size: 0.9rem; + color: var(--muted-text); +} +.button-danger { + background-color: var(--error-color); + color: #1a0000; + border: none; + padding: 0.6rem 1.2rem; + font-weight: 600; + border-radius: 5px; + cursor: pointer; + transition: background-color 0.15s ease; + white-space: nowrap; +} +.button-danger:hover { + background-color: #ff8f8f; +} \ No newline at end of file diff --git a/static/css/style.css b/static/css/style.css index 9de15da..6d0b0f3 100644 --- a/static/css/style.css +++ b/static/css/style.css @@ -3,14 +3,15 @@ :root { /* Core */ --bg-color: #000000; - --surface: #111111; - --card-bg: #0b0b0b; + --surface: #000000; + --card-bg: #000000; --text-color: #e6eef6; --muted-text: #9aa4ad; /* Accent / interactive */ - --primary-color: #00b4ff; - --primary-hover: #00d0ff; + --primary-color: #ffffff; + --secondary-color: #2b2b2b; + --primary-hover: #ffffff3d; --success-color: #26c281; --error-color: #ff6b6b; --cancel-color: #f39c12; /* Orange for cancelled */ @@ -43,12 +44,14 @@ body { /* Container */ .container { width: 100%; - max-width: 960px; + max-width: 1280px; /* Increased max-width for 3 columns */ margin: 0 auto; background: var(--card-bg); border-radius: 10px; padding: 1.5rem; border: 1px solid var(--border-color); + margin-top: 1em; + } @media (max-width: 768px) { .container { @@ -69,8 +72,10 @@ header { header h1 { margin: 0 0 0.25rem 0; - font-size: 1.75rem; + font-size: 3rem; font-weight: 700; + font-family: serif; + font-weight: lighter; } header p { @@ -79,18 +84,28 @@ header p { font-size: 1rem; } +.header-actions { + position: absolute; + top: 1.5rem; + right: 1.5rem; +} +.settings-link { + font-size: 1.5rem; + text-decoration: none; + color: var(--muted-text); + transition: color 0.2s ease; +} +.settings-link:hover { + color: var(--text-color); +} /* Form Layout */ .form-grid { display: grid; - grid-template-columns: 1fr; + /* MODIFICATION: Responsive grid for 1, 2, or 3 items */ + grid-template-columns: repeat(auto-fit, minmax(320px, 1fr)); gap: 1.5rem; margin-bottom: 2rem; } -@media (min-width: 768px) { - .form-grid { - grid-template-columns: 1fr 1fr; - } -} .upload-form fieldset { border: 1px solid var(--border-color); @@ -130,7 +145,6 @@ input[type="file"] { width: 100%; height: 100%; cursor: pointer; - text-overflow: ellipsis; inset: 0; } @@ -142,8 +156,6 @@ input[type="file"] { cursor: pointer; transition: background-color 0.15s ease; font-weight: 500; - text-overflow: ellipsis; - border: 1px solid rgba(255,255,255,0.1); white-space: nowrap; } @@ -159,10 +171,10 @@ input[type="file"] { text-overflow: ellipsis; white-space: nowrap; min-width: 0; - max-width: 10em; + max-width: 15em; } -/* --- STYLES FOR DROPDOWN MENU --- */ +/* Form Controls (for dropdowns) */ .form-control { margin-bottom: 1rem; } @@ -200,22 +212,23 @@ input[type="file"] { border-color: var(--primary-color); box-shadow: 0 0 0 2px rgba(0, 180, 255, 0.2); } -/* ---------------------------------- */ /* Submit button */ button[type="submit"] { display: block; width: 100%; background: var(--primary-color); - color: #00161b; - border: none; + background-color: transparent; + border-color: var(--border-color); + border-width: 1px; + color: #ffffff; padding: 0.65rem 1rem; font-size: 1rem; font-weight: 600; border-radius: 5px; cursor: pointer; transition: background-color 0.15s ease; - margin-top: auto; /* Pushes button to the bottom */ + margin-top: auto; } button[type="submit"]:hover { background: var(--primary-hover); @@ -268,16 +281,24 @@ button[type="submit"]:disabled { white-space: nowrap; } -#job-table td[data-label="File"] { - max-width: 250px; - overflow: hidden; +.cell-value { + max-width: 10em; + text-wrap: wrap; + overflow: scroll; +} + + +#job-table td[data-label="File"], +#job-table td[data-label="Task"] { + overflow: scroll; text-overflow: ellipsis; - white-space: nowrap; + text-wrap: wrap; + max-width: 15em; } .action-col { text-align: center; - width: 100px; + width: 120px; } #job-table td.action-col { text-align: center; @@ -348,35 +369,87 @@ button[type="submit"]:disabled { } /* Action items */ -.download-button { - background-color: var(--success-color); - color: #00160b; +.download-button, .cancel-button { padding: 0.3rem 0.8rem; text-decoration: none; border-radius: 5px; font-weight: 600; font-size: 0.85rem; display: inline-block; - transition: transform 0.1s ease; + transition: transform 0.1s ease, background-color 0.15s ease; border: none; + cursor: pointer; +} +.download-button { + background-color: var(--success-color); + color: #00160b; } .download-button:hover { transform: scale(1.05); } .cancel-button { background-color: var(--error-color); color: #1a0000; - padding: 0.3rem 0.8rem; - border-radius: 5px; - font-weight: 600; - font-size: 0.85rem; - display: inline-block; - transition: transform 0.1s ease; - border: none; - cursor: pointer; } .cancel-button:hover { background-color: #ff8f8f; } +/* --- MODIFICATION: Dark theme for Choices.js --- */ +.choices { + font-size: 0.95rem; +} +.choices__inner { + background-color: rgba(255, 255, 255, 0.05); + border: 1px solid var(--border-color); + border-radius: 5px; + padding: 0.35rem 0.75rem; + color: var(--text-color); + min-height: auto; +} +.is-open .choices__inner { + border-radius: 5px 5px 0 0; + border-color: var(--primary-color); +} +.is-focused .choices__inner { + border-color: var(--primary-color); + box-shadow: 0 0 0 2px rgba(0, 180, 255, 0.2); +} +.choices__list--dropdown, .choices__list[aria-expanded] { + background-color: var(--surface); + border: 1px solid var(--primary-color); + border-top: none; + border-radius: 0 0 5px 5px; +} +.choices__list--dropdown .choices__item--selectable.is-highlighted, +.choices__list[aria-expanded] .choices__item--selectable.is-highlighted { + background-color: var(--secondary-color); + color: var(--text-color) +} +.choices__list--dropdown .choices__item, .choices__list[aria-expanded] .choices__item { + padding: 0.5rem 0.8rem; + font-size: 0.9rem; +} +.choices__group { + background-color: rgba(0,0,0,0.2); + border-bottom: 1px solid var(--divider-color); +} +.choices__group .choices__heading { + color: var(--primary-hover); + font-size: 0.75rem; + font-weight: 700; + border-bottom: none; + padding: 0.5rem 0.8rem; +} +.choices__input { + color: var(--text-color); + background-color: #000000; + font-size: 0.95rem; +} +.choices[data-type*="select-one"] .choices__input { + background-color: #000000; + +} + +/* Spinner */ .spinner-small { border: 3px solid rgba(255,255,255,0.1); border-top: 3px solid var(--primary-color); @@ -392,7 +465,9 @@ button[type="submit"]:disabled { 100% { transform: rotate(360deg); } } +/* Mobile responsive table */ @media (max-width: 768px) { + /* ... (no changes in this section) ... */ .table-wrapper { border: none; background-color: transparent; @@ -432,5 +507,23 @@ button[type="submit"]:disabled { text-align: right; min-width: 0; word-break: break-all; + overflow: scroll; + max-width: 100em; } + + .cell-value { + min-width: 0; + max-width: 20em; + text-wrap: nowrap; + overflow: scroll; +} + + +#job-table td[data-label="File"], +#job-table td[data-label="Task"] { + overflow: scroll; + text-overflow: ellipsis; + text-wrap: nowrap; + max-width: 100em; +} } \ No newline at end of file diff --git a/static/js/script.js b/static/js/script.js index da2eb4c..49c0dd9 100644 --- a/static/js/script.js +++ b/static/js/script.js @@ -1,21 +1,35 @@ // static/js/script.js document.addEventListener('DOMContentLoaded', () => { + // --- Element Selectors --- const jobListBody = document.getElementById('job-list-body'); + const pdfForm = document.getElementById('pdf-form'); const pdfFileInput = document.getElementById('pdf-file-input'); const pdfFileName = document.getElementById('pdf-file-name'); + const audioForm = document.getElementById('audio-form'); const audioFileInput = document.getElementById('audio-file-input'); const audioFileName = document.getElementById('audio-file-name'); + const conversionForm = document.getElementById('conversion-form'); + const conversionFileInput = document.getElementById('conversion-file-input'); + const conversionFileName = document.getElementById('conversion-file-name'); + const outputFormatSelect = document.getElementById('output-format-select'); + + // MODIFICATION: Store the Choices.js instance in a variable + let conversionChoices = null; + const activePolls = new Map(); // --- Main Event Listeners --- pdfFileInput.addEventListener('change', () => updateFileName(pdfFileInput, pdfFileName)); audioFileInput.addEventListener('change', () => updateFileName(audioFileInput, audioFileName)); - pdfForm.addEventListener('submit', (e) => handleFormSubmit(e, '/ocr-pdf', pdfForm, pdfFileInput, pdfFileName)); - audioForm.addEventListener('submit', (e) => handleFormSubmit(e, '/transcribe-audio', audioForm, audioFileInput, audioFileName)); + conversionFileInput.addEventListener('change', () => updateFileName(conversionFileInput, conversionFileName)); + + pdfForm.addEventListener('submit', (e) => handleFormSubmit(e, '/ocr-pdf', pdfForm)); + audioForm.addEventListener('submit', (e) => handleFormSubmit(e, '/transcribe-audio', audioForm)); + conversionForm.addEventListener('submit', (e) => handleFormSubmit(e, '/convert-file', conversionForm)); jobListBody.addEventListener('click', (event) => { if (event.target.classList.contains('cancel-button')) { @@ -24,19 +38,55 @@ document.addEventListener('DOMContentLoaded', () => { } }); + function initializeConversionSelector() { + // MODIFICATION: Destroy the old instance if it exists before creating a new one + if (conversionChoices) { + conversionChoices.destroy(); + } + + conversionChoices = new Choices(outputFormatSelect, { + searchEnabled: true, + itemSelectText: 'Select', + shouldSort: false, + placeholder: true, + placeholderValue: 'Select a format...', + }); + + const tools = window.APP_CONFIG.conversionTools || {}; + const choicesArray = []; + for (const toolKey in tools) { + const tool = tools[toolKey]; + const group = { + label: tool.name, + id: toolKey, + disabled: false, + choices: [] + }; + for (const formatKey in tool.formats) { + group.choices.push({ + value: `${toolKey}_${formatKey}`, + label: `${formatKey.toUpperCase()} - ${tool.formats[formatKey]}` + }); + } + choicesArray.push(group); + } + conversionChoices.setChoices(choicesArray, 'value', 'label', true); + } + + // --- Helper Functions --- function updateFileName(input, nameDisplay) { - nameDisplay.textContent = input.files.length > 0 ? input.files[0].name : 'No file chosen'; - nameDisplay.title = nameDisplay.textContent; // Add a tooltip for the full name + const fileName = input.files.length > 0 ? input.files[0].name : 'No file chosen'; + nameDisplay.textContent = fileName; + nameDisplay.title = fileName; } - async function handleFormSubmit(event, endpoint, form, fileInput, fileNameDisplay) { + async function handleFormSubmit(event, endpoint, form) { event.preventDefault(); + const fileInput = form.querySelector('input[type="file"]'); + const fileNameDisplay = form.querySelector('.file-name'); if (!fileInput.files[0]) return; - // MODIFICATION: Use new FormData(form) to capture all form fields, - // including the new model size dropdown for the audio form. const formData = new FormData(form); - const submitButton = form.querySelector('button[type="submit"]'); submitButton.disabled = true; @@ -47,54 +97,49 @@ document.addEventListener('DOMContentLoaded', () => { throw new Error(errorData.detail || `HTTP error! Status: ${response.status}`); } const result = await response.json(); - - // To provide immediate feedback, create a placeholder job object const preliminaryJob = { id: result.job_id, status: 'pending', progress: 0, original_filename: fileInput.files[0].name, - task_type: endpoint.includes('ocr') ? 'ocr' : 'transcription', - created_at: new Date().toISOString(), - processed_filepath: null, - error_message: null + task_type: endpoint.includes('ocr') ? 'ocr' : (endpoint.includes('transcribe') ? 'transcription' : 'conversion'), + created_at: new Date().toISOString() }; - renderJobRow(preliminaryJob); // Render immediately - startPolling(result.job_id); // Start polling for updates + renderJobRow(preliminaryJob); + startPolling(result.job_id); } catch (error) { console.error('Error submitting job:', error); alert(`Submission failed: ${error.message}`); } finally { form.reset(); - fileNameDisplay.textContent = 'No file chosen'; - fileNameDisplay.title = ''; + if (fileNameDisplay) fileNameDisplay.textContent = 'No file chosen'; + + // MODIFICATION: Use the stored instance to correctly reset the dropdown + // without causing an error. + if (form.id === 'conversion-form' && conversionChoices) { + conversionChoices.clearInput(); + conversionChoices.setValue([]); // Clears the selected value + } + submitButton.disabled = false; } } async function handleCancelJob(jobId) { if (!confirm('Are you sure you want to cancel this job?')) return; - try { const response = await fetch(`/job/${jobId}/cancel`, { method: 'POST' }); if (!response.ok) { const errorData = await response.json(); throw new Error(errorData.detail || 'Failed to cancel job.'); } - // The polling mechanism will update the UI to "cancelled" automatically. - // We can stop polling immediately to be more efficient. stopPolling(jobId); - // Optionally, force an immediate UI update const row = document.getElementById(`job-${jobId}`); if (row) { const statusCell = row.querySelector('td[data-label="Status"] .cell-value'); const actionCell = row.querySelector('td[data-label="Action"] .cell-value'); - if (statusCell) { - statusCell.innerHTML = `cancelled`; - } - if (actionCell) { - actionCell.innerHTML = `-`; - } + if (statusCell) statusCell.innerHTML = `Cancelled`; + if (actionCell) actionCell.innerHTML = `-`; } } catch (error) { console.error('Error cancelling job:', error); @@ -107,7 +152,7 @@ document.addEventListener('DOMContentLoaded', () => { const response = await fetch('/jobs'); if (!response.ok) throw new Error('Failed to fetch jobs.'); const jobs = await response.json(); - jobListBody.innerHTML = ''; // Clear existing + jobListBody.innerHTML = ''; for (const job of jobs.reverse()) { renderJobRow(job); if (['pending', 'processing'].includes(job.status)) { @@ -126,10 +171,7 @@ document.addEventListener('DOMContentLoaded', () => { try { const response = await fetch(`/job/${jobId}`); if (!response.ok) { - if (response.status === 404) { - console.warn(`Job ${jobId} not found. Stopping poll.`); - stopPolling(jobId); - } + if (response.status === 404) stopPolling(jobId); return; } const job = await response.json(); @@ -139,9 +181,9 @@ document.addEventListener('DOMContentLoaded', () => { } } catch (error) { console.error(`Error polling for job ${jobId}:`, error); - stopPolling(jobId); // Stop polling on network or other errors + stopPolling(jobId); } - }, 2500); // Poll every 2.5 seconds + }, 2500); activePolls.set(jobId, intervalId); } @@ -159,21 +201,28 @@ document.addEventListener('DOMContentLoaded', () => { row.id = `job-${job.id}`; jobListBody.prepend(row); } - - const taskTypeLabel = job.task_type.includes('ocr') ? 'PDF/Image OCR' : 'Transcription'; - const formattedDate = new Date(job.created_at).toLocaleString(); - - let statusHtml = `${job.status}`; - let actionHtml = `-`; - - if (job.status === 'processing') { - // Show real progress for transcription, but an indeterminate one for OCR tasks - const progressClass = job.task_type === 'transcription' ? '' : 'indeterminate'; - const progressWidth = job.task_type === 'transcription' ? job.progress : 100; - const progressBarHtml = `
`; - statusHtml += progressBarHtml; + + let taskTypeLabel = 'Unknown'; + if (job.task_type === 'ocr' || job.task_type === 'ocr-image') { + taskTypeLabel = 'OCR'; + } else if (job.task_type === 'transcription') { + taskTypeLabel = 'Transcription'; + } else if (job.task_type === 'conversion' && job.processed_filepath) { + const extension = job.processed_filepath.split('.').pop(); + taskTypeLabel = `Convert to ${extension.toUpperCase()}`; + } else if (job.task_type === 'conversion') { + taskTypeLabel = 'Conversion'; } + const formattedDate = new Date(job.created_at).toLocaleString(); + let statusHtml = `${job.status}`; + if (job.status === 'processing') { + const progressClass = (job.task_type === 'transcription' && job.progress > 0) ? '' : 'indeterminate'; + const progressWidth = job.task_type === 'transcription' ? job.progress : 100; + statusHtml += `
`; + } + + let actionHtml = `-`; if (job.status === 'pending' || job.status === 'processing') { actionHtml = ``; } else if (job.status === 'completed' && job.processed_filepath) { @@ -184,12 +233,10 @@ document.addEventListener('DOMContentLoaded', () => { actionHtml = `Failed`; } - // Use textContent for filename to prevent XSS and add a title for overflow - const escapedFilename = job.original_filename.replace(//g, ">"); - + const escapedFilename = job.original_filename ? job.original_filename.replace(//g, ">") : "No filename"; row.innerHTML = ` ${escapedFilename} - ${taskTypeLabel} + ${taskTypeLabel} ${formattedDate} ${statusHtml} ${actionHtml} @@ -197,5 +244,6 @@ document.addEventListener('DOMContentLoaded', () => { } // --- Initial Load --- + initializeConversionSelector(); loadInitialJobs(); }); \ No newline at end of file diff --git a/static/js/settings.js b/static/js/settings.js new file mode 100644 index 0000000..4ce6e1c --- /dev/null +++ b/static/js/settings.js @@ -0,0 +1,108 @@ +// static/js/settings.js +document.addEventListener('DOMContentLoaded', () => { + const settingsForm = document.getElementById('settings-form'); + const saveStatus = document.getElementById('save-status'); + const clearHistoryBtn = document.getElementById('clear-history-btn'); + const deleteFilesBtn = document.getElementById('delete-files-btn'); + + // --- Save Settings --- + settingsForm.addEventListener('submit', async (event) => { + event.preventDefault(); + saveStatus.textContent = 'Saving...'; + saveStatus.classList.remove('success', 'error'); + + const formData = new FormData(settingsForm); + const settingsObject = {}; + + // Convert FormData to a nested object + formData.forEach((value, key) => { + // Handle checkboxes that might not be submitted if unchecked + if (key.includes('ocr_settings')) { + const checkbox = document.querySelector(`[name="${key}"]`); + if (checkbox && checkbox.type === 'checkbox') { + value = checkbox.checked; + } + } + + const keys = key.split('.'); + let current = settingsObject; + keys.forEach((k, index) => { + if (index === keys.length - 1) { + current[k] = value; + } else { + current[k] = current[k] || {}; + current = current[k]; + } + }); + }); + + // Ensure unchecked OCR boxes are sent as false + const ocrCheckboxes = settingsForm.querySelectorAll('input[type="checkbox"][name^="ocr_settings"]'); + ocrCheckboxes.forEach(cb => { + const keys = cb.name.split('.'); + if (!formData.has(cb.name)) { + // this is a bit of a hack but gets the job done for this specific form + settingsObject[keys[0]][keys[1]][keys[2]] = false; + } + }); + + + try { + const response = await fetch('/settings/save', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(settingsObject) + }); + + if (!response.ok) { + const errorData = await response.json(); + throw new Error(errorData.detail || 'Failed to save settings.'); + } + + saveStatus.textContent = 'Settings saved successfully!'; + saveStatus.classList.add('success'); + + } catch (error) { + saveStatus.textContent = `Error: ${error.message}`; + saveStatus.classList.add('error'); + console.error('Error saving settings:', error); + } finally { + setTimeout(() => { + saveStatus.textContent = ''; + saveStatus.classList.remove('success', 'error'); + }, 5000); + } + }); + + // --- Clear History --- + clearHistoryBtn.addEventListener('click', async () => { + if (!confirm('ARE YOU SURE?\n\nThis will permanently delete all job history records from the database.')) { + return; + } + try { + const response = await fetch('/settings/clear-history', { method: 'POST' }); + if (!response.ok) throw new Error('Server responded with an error.'); + const result = await response.json(); + alert(`Success: ${result.deleted_count} job records have been deleted.`); + } catch (error) { + alert('An error occurred while clearing history.'); + console.error(error); + } + }); + + // --- Delete Files --- + deleteFilesBtn.addEventListener('click', async () => { + if (!confirm('ARE YOU SURE?\n\nThis will permanently delete all files in the "processed" folder.')) { + return; + } + try { + const response = await fetch('/settings/delete-files', { method: 'POST' }); + if (!response.ok) throw new Error('Server responded with an error.'); + const result = await response.json(); + alert(`Success: ${result.deleted_count} files have been deleted.`); + } catch (error) { + alert('An error occurred while deleting files.'); + console.error(error); + } + }); +}); \ No newline at end of file diff --git a/templates/index.html b/templates/index.html index 07adcaf..0246b10 100644 --- a/templates/index.html +++ b/templates/index.html @@ -3,7 +3,8 @@ - DocProcessor + File Wizard + @@ -11,17 +12,38 @@
+
+ ⚙️ +
+
-

DocProcessor 🚀

-

PDF OCR & Audio Transcription

+

File Wizard

+
+
+
+

File Conversion

+
+ + + No file chosen +
+
+ + +
+ +
+
+
+
-

📄 PDF OCR

+

PDF OCR

@@ -35,7 +57,7 @@
-

🎤 Transcribe Audio

+

Transcribe Audio

@@ -45,11 +67,11 @@
@@ -65,7 +87,7 @@ File - Type + Task Submitted Status Action @@ -78,6 +100,14 @@
+ + + + \ No newline at end of file diff --git a/templates/settings.html b/templates/settings.html new file mode 100644 index 0000000..6c76277 --- /dev/null +++ b/templates/settings.html @@ -0,0 +1,113 @@ + + + + + + Settings - File Wizard + + + + + + + +
+
+
+

Settings

+
+ ← Back +
+ +
+ + +
+

General Settings

+
+ + +
+
+ +
+

OCR (ocrmypdf)

+
+ + +
+
+ + +
+
+ + +
+
+ +
+

Transcription (Whisper)

+
+ + +
+
+ +
+

Conversion Tools

+

+ Edit the command line templates for each conversion tool. The following placeholders are available: {input}, {output}, {output_dir}, {output_ext}. + Some tools may have additional placeholders; refer to the sourcecode or documentation for details. +

+
+ {% for tool_id, tool in config.conversion_tools.items() %} +
+

{{ tool.name }}

+
+ + +
+
+ {% endfor %} +
+
+ +
+ +
+
+ + +
+ +
+

History Management

+

These actions are irreversible. Please be certain before proceeding.

+
+
+
+ Clear Job History +

Deletes all job records from the database. Processed files on disk will not be removed.

+
+ +
+
+
+ Delete Processed Files +

Deletes all files from the 'processed' directory. Database records will remain but download links will be broken.

+
+ +
+
+
+
+
+ + + + \ No newline at end of file