diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..73e9146 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +__pycache__ +uploads +processed +venv +core +huey.* +*.db + diff --git a/main.py b/main.py new file mode 100644 index 0000000..cbe7900 --- /dev/null +++ b/main.py @@ -0,0 +1,397 @@ +import logging +import shutil +import traceback +import uuid +from contextlib import asynccontextmanager +from datetime import datetime +from pathlib import Path +from typing import List, Set + +import ocrmypdf +import pypdf +import pytesseract +from PIL import Image +from faster_whisper import WhisperModel +# MODIFICATION: Added Form for model selection +from fastapi import (Depends, FastAPI, File, Form, HTTPException, Request, + UploadFile, status) +from fastapi.responses import FileResponse +from fastapi.staticfiles import StaticFiles +from fastapi.templating import Jinja2Templates +from huey import SqliteHuey +from pydantic import BaseModel, ConfigDict +from pydantic_settings import BaseSettings +from sqlalchemy import (Column, DateTime, Integer, String, Text, + create_engine) +from sqlalchemy.orm import Session, declarative_base, sessionmaker +from werkzeug.utils import secure_filename + +# -------------------------------------------------------------------------------- +# --- 1. CONFIGURATION +# -------------------------------------------------------------------------------- +class Settings(BaseSettings): + BASE_DIR: Path = Path(__file__).resolve().parent + UPLOADS_DIR: Path = BASE_DIR / "uploads" + PROCESSED_DIR: Path = BASE_DIR / "processed" + DATABASE_URL: str = f"sqlite:///{BASE_DIR / 'jobs.db'}" + HUEY_DB_PATH: str = str(BASE_DIR / "huey.db") + # MODIFICATION: Removed hardcoded model size, added a set of allowed models + WHISPER_COMPUTE_TYPE: str = "int8" + ALLOWED_WHISPER_MODELS: Set[str] = {"tiny", "base", "small", "medium", "large-v3", "distil-large-v2"} + MAX_FILE_SIZE_BYTES: int = 500 * 1024 * 1024 # 500 MB + ALLOWED_PDF_EXTENSIONS: set = {".pdf"} + ALLOWED_IMAGE_EXTENSIONS: set = {".png", ".jpg", ".jpeg", ".tiff", ".tif"} + ALLOWED_AUDIO_EXTENSIONS: set = {".mp3", "m4a", ".ogg", ".flac", ".opus"} + +settings = Settings() + +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +settings.UPLOADS_DIR.mkdir(exist_ok=True) +settings.PROCESSED_DIR.mkdir(exist_ok=True) + + +# -------------------------------------------------------------------------------- +# --- 2. DATABASE (for Job Tracking) - NO CHANGES +# -------------------------------------------------------------------------------- +engine = create_engine(settings.DATABASE_URL, connect_args={"check_same_thread": False}) +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) +Base = declarative_base() + +class Job(Base): + __tablename__ = "jobs" + id = Column(String, primary_key=True, index=True) + task_type = Column(String, index=True) + status = Column(String, default="pending") + progress = Column(Integer, default=0) + original_filename = Column(String) + input_filepath = Column(String) + processed_filepath = Column(String, nullable=True) + result_preview = Column(Text, nullable=True) + error_message = Column(Text, nullable=True) + created_at = Column(DateTime, default=datetime.utcnow) + updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + +def get_db(): + db = SessionLocal() + try: + yield db + finally: + db.close() + + +# -------------------------------------------------------------------------------- +# --- 3. PYDANTIC SCHEMAS (Data Validation) - NO CHANGES +# -------------------------------------------------------------------------------- +class JobCreate(BaseModel): + id: str + task_type: str + original_filename: str + input_filepath: str + processed_filepath: str | None = None + +class JobSchema(BaseModel): + id: str + task_type: str + status: str + progress: int + original_filename: str + processed_filepath: str | None = None + result_preview: str | None = None + error_message: str | None = None + created_at: datetime + updated_at: datetime + model_config = ConfigDict(from_attributes=True) + + +# -------------------------------------------------------------------------------- +# --- 4. CRUD OPERATIONS (Database Interactions) - NO CHANGES +# -------------------------------------------------------------------------------- +def get_job(db: Session, job_id: str): + return db.query(Job).filter(Job.id == job_id).first() + +def get_jobs(db: Session, skip: int = 0, limit: int = 100): + return db.query(Job).order_by(Job.created_at.desc()).offset(skip).limit(limit).all() + +def create_job(db: Session, job: JobCreate): + db_job = Job(**job.model_dump()) + db.add(db_job) + db.commit() + db.refresh(db_job) + return db_job + +def update_job_status(db: Session, job_id: str, status: str, progress: int = None, error: str = None): + db_job = get_job(db, job_id) + if db_job: + db_job.status = status + if progress is not None: + db_job.progress = progress + if error: + db_job.error_message = error + db.commit() + db.refresh(db_job) + return db_job + +def mark_job_as_completed(db: Session, job_id: str, preview: str | None = None): + db_job = get_job(db, job_id) + if db_job and db_job.status != 'cancelled': + db_job.status = "completed" + db_job.progress = 100 + if preview: + db_job.result_preview = preview.strip()[:2000] + db.commit() + return db_job + + +# -------------------------------------------------------------------------------- +# --- 5. BACKGROUND TASKS (Huey) +# -------------------------------------------------------------------------------- +huey = SqliteHuey(filename=settings.HUEY_DB_PATH) + +# MODIFICATION: Removed global whisper model and lazy loader. +# The model will now be loaded inside the task itself based on user selection. + +@huey.task() +def run_pdf_ocr_task(job_id: str, input_path_str: str, output_path_str: str): + db = SessionLocal() + try: + job = get_job(db, job_id) + if not job or job.status == 'cancelled': + logger.info(f"Job {job_id} was cancelled before starting.") + return + + update_job_status(db, job_id, "processing") + logger.info(f"Starting PDF OCR for job {job_id}") + + ocrmypdf.ocr(input_path_str, output_path_str, deskew=True, force_ocr=True, clean=True, optimize=1, progress_bar=False) + + with open(output_path_str, "rb") as f: + reader = pypdf.PdfReader(f) + preview = "\n".join(page.extract_text() or "" for page in reader.pages) + mark_job_as_completed(db, job_id, preview=preview) + logger.info(f"PDF OCR for job {job_id} completed.") + except Exception as e: + logger.error(f"ERROR during PDF OCR for job {job_id}: {e}\n{traceback.format_exc()}") + update_job_status(db, job_id, "failed", error=str(e)) + finally: + Path(input_path_str).unlink(missing_ok=True) + db.close() + +@huey.task() +def run_image_ocr_task(job_id: str, input_path_str: str, output_path_str: str): + db = SessionLocal() + try: + job = get_job(db, job_id) + if not job or job.status == 'cancelled': + logger.info(f"Job {job_id} was cancelled before starting.") + return + + update_job_status(db, job_id, "processing", progress=50) + logger.info(f"Starting Image OCR for job {job_id}") + text = pytesseract.image_to_string(Image.open(input_path_str)) + with open(output_path_str, "w", encoding="utf-8") as f: + f.write(text) + mark_job_as_completed(db, job_id, preview=text) + logger.info(f"Image OCR for job {job_id} completed.") + except Exception as e: + logger.error(f"ERROR during Image OCR for job {job_id}: {e}\n{traceback.format_exc()}") + update_job_status(db, job_id, "failed", error=str(e)) + finally: + Path(input_path_str).unlink(missing_ok=True) + db.close() + +# MODIFICATION: The task now accepts `model_size` and loads the model dynamically. +@huey.task() +def run_transcription_task(job_id: str, input_path_str: str, output_path_str: str, model_size: str): + db = SessionLocal() + try: + job = get_job(db, job_id) + if not job or job.status == 'cancelled': + logger.info(f"Job {job_id} was cancelled before starting.") + return + + update_job_status(db, job_id, "processing") + + # Load the specified model for this task + logger.info(f"Loading faster-whisper model: {model_size} for job {job_id}...") + model = WhisperModel( + model_size, + device="cpu", + compute_type=settings.WHISPER_COMPUTE_TYPE + ) + logger.info(f"Whisper model '{model_size}' loaded successfully.") + + logger.info(f"Starting transcription for job {job_id}") + segments, info = model.transcribe(input_path_str, beam_size=5) + + full_transcript = [] + total_duration = info.duration + for segment in segments: + job_check = get_job(db, job_id) + if job_check.status == 'cancelled': + logger.info(f"Job {job_id} cancelled during transcription.") + return + + # Update progress based on the segment's end time + if total_duration > 0: + progress = int((segment.end / total_duration) * 100) + update_job_status(db, job_id, "processing", progress=progress) + full_transcript.append(segment.text.strip()) + + transcript_text = "\n".join(full_transcript) + with open(output_path_str, "w", encoding="utf-8") as f: + f.write(transcript_text) + mark_job_as_completed(db, job_id, preview=transcript_text) + logger.info(f"Transcription for job {job_id} completed.") + except Exception as e: + logger.error(f"ERROR during transcription for job {job_id}: {e}\n{traceback.format_exc()}") + update_job_status(db, job_id, "failed", error=str(e)) + finally: + Path(input_path_str).unlink(missing_ok=True) + db.close() + + +# -------------------------------------------------------------------------------- +# --- 6. FASTAPI APPLICATION +# -------------------------------------------------------------------------------- +@asynccontextmanager +async def lifespan(app: FastAPI): + logger.info("Application starting up...") + Base.metadata.create_all(bind=engine) + yield + logger.info("Application shutting down...") + +app = FastAPI(lifespan=lifespan) +app.mount("/static", StaticFiles(directory=settings.BASE_DIR / "static"), name="static") +templates = Jinja2Templates(directory=settings.BASE_DIR / "templates") + +# --- Helper Functions --- +async def save_upload_file_chunked(upload_file: UploadFile, destination: Path): + size = 0 + with open(destination, "wb") as buffer: + while chunk := await upload_file.read(1024 * 1024): # 1MB chunks + if size + len(chunk) > settings.MAX_FILE_SIZE_BYTES: + raise HTTPException( + status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE, + detail=f"File exceeds limit of {settings.MAX_FILE_SIZE_BYTES // 1024 // 1024} MB" + ) + buffer.write(chunk) + size += len(chunk) + +def is_allowed_file(filename: str, allowed_extensions: set) -> bool: + return Path(filename).suffix.lower() in allowed_extensions + +# --- API Endpoints --- +@app.get("/") +async def get_index(request: Request): + # MODIFICATION: Pass available models to the template + return templates.TemplateResponse("index.html", { + "request": request, + "whisper_models": sorted(list(settings.ALLOWED_WHISPER_MODELS)) + }) + +@app.post("/ocr-pdf", status_code=status.HTTP_202_ACCEPTED) +async def submit_pdf_ocr(file: UploadFile = File(...), db: Session = Depends(get_db)): + if not is_allowed_file(file.filename, settings.ALLOWED_PDF_EXTENSIONS): + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid file type. Please upload a PDF.") + + job_id = uuid.uuid4().hex + safe_basename = secure_filename(file.filename) + unique_filename = f"{Path(safe_basename).stem}_{job_id}{Path(safe_basename).suffix}" + upload_path = settings.UPLOADS_DIR / unique_filename + processed_path = settings.PROCESSED_DIR / unique_filename + + await save_upload_file_chunked(file, upload_path) + + job_data = JobCreate(id=job_id, task_type="ocr", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path)) + new_job = create_job(db=db, job=job_data) + + run_pdf_ocr_task(new_job.id, str(upload_path), str(processed_path)) + return {"job_id": new_job.id, "status": new_job.status} + +@app.post("/ocr-image", status_code=status.HTTP_202_ACCEPTED) +async def submit_image_ocr(file: UploadFile = File(...), db: Session = Depends(get_db)): + if not is_allowed_file(file.filename, settings.ALLOWED_IMAGE_EXTENSIONS): + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid file type. Please upload a PNG, JPG, or TIFF.") + + job_id = uuid.uuid4().hex + safe_basename = secure_filename(file.filename) + file_ext = Path(safe_basename).suffix + unique_filename = f"{Path(safe_basename).stem}_{job_id}{file_ext}" + upload_path = settings.UPLOADS_DIR / unique_filename + processed_path = settings.PROCESSED_DIR / f"{Path(safe_basename).stem}_{job_id}.txt" + + await save_upload_file_chunked(file, upload_path) + + job_data = JobCreate(id=job_id, task_type="ocr-image", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path)) + new_job = create_job(db=db, job=job_data) + + run_image_ocr_task(new_job.id, str(upload_path), str(processed_path)) + return {"job_id": new_job.id, "status": new_job.status} + +# MODIFICATION: Endpoint now accepts `model_size` as form data. +@app.post("/transcribe-audio", status_code=status.HTTP_202_ACCEPTED) +async def submit_audio_transcription( + file: UploadFile = File(...), + model_size: str = Form("base"), + db: Session = Depends(get_db) +): + if not is_allowed_file(file.filename, settings.ALLOWED_AUDIO_EXTENSIONS): + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid audio file type.") + + # Validate the selected model size + if model_size not in settings.ALLOWED_WHISPER_MODELS: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=f"Invalid model size: {model_size}.") + + job_id = uuid.uuid4().hex + safe_basename = secure_filename(file.filename) + stem, suffix = Path(safe_basename).stem, Path(safe_basename).suffix + + audio_filename = f"{stem}_{job_id}{suffix}" + transcript_filename = f"{stem}_{job_id}.txt" + upload_path = settings.UPLOADS_DIR / audio_filename + processed_path = settings.PROCESSED_DIR / transcript_filename + + await save_upload_file_chunked(file, upload_path) + + job_data = JobCreate(id=job_id, task_type="transcription", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path)) + new_job = create_job(db=db, job=job_data) + + # Pass the selected model size to the background task + run_transcription_task(new_job.id, str(upload_path), str(processed_path), model_size=model_size) + return {"job_id": new_job.id, "status": new_job.status} + +@app.post("/job/{job_id}/cancel", status_code=status.HTTP_200_OK) +async def cancel_job(job_id: str, db: Session = Depends(get_db)): + job = get_job(db, job_id) + if not job: + raise HTTPException(status_code=404, detail="Job not found.") + if job.status in ["pending", "processing"]: + update_job_status(db, job_id, status="cancelled") + return {"message": "Job cancellation requested."} + raise HTTPException(status_code=400, detail=f"Job is already in a final state ({job.status}).") + +@app.get("/jobs", response_model=List[JobSchema]) +async def get_all_jobs(db: Session = Depends(get_db)): + return get_jobs(db) + +@app.get("/job/{job_id}", response_model=JobSchema) +async def get_job_status(job_id: str, db: Session = Depends(get_db)): + job = get_job(db, job_id) + if not job: + raise HTTPException(status_code=404, detail="Job not found.") + return job + +@app.get("/download/{filename}") +async def download_file(filename: str): + safe_filename = secure_filename(filename) + file_path = settings.PROCESSED_DIR / safe_filename + + if not file_path.resolve().is_relative_to(settings.PROCESSED_DIR.resolve()): + raise HTTPException(status_code=403, detail="Access denied.") + + if not file_path.is_file(): + raise HTTPException(status_code=404, detail="File not found.") + + return FileResponse(path=file_path, filename=safe_filename, media_type="application/octet-stream") \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..65dea7d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,22 @@ +# requirements.txt + +# Web framework +fastapi +uvicorn[standard] +python-multipart +jinja2 + +# PDF OCR +ocrmypdf +PyPDF2 + +# Audio Transcription +faster-whisper +# The following are core dependencies for faster-whisper, +# but it's good to list them explicitly. +# ctranslate2 +# transformers +# torch # Note: torch is a dependency of transformers + +# Utilities +werkzeug diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..3f9f0a9 --- /dev/null +++ b/run.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# This script starts the FastAPI application using Gunicorn. + +echo "Starting DocProcessor with Gunicorn..." + +exec gunicorn -w 1 -k uvicorn.workers.UvicornWorker --forwarded-allow-ips='*' main:app -b 0.0.0.0:8000 & +echo "Done" +echo "Starting huey..." +exec huey_consumer.py main.huey & +echo "Done" diff --git a/static/css/style.css b/static/css/style.css new file mode 100644 index 0000000..9de15da --- /dev/null +++ b/static/css/style.css @@ -0,0 +1,436 @@ +/* static/css/style.css */ + +:root { + /* Core */ + --bg-color: #000000; + --surface: #111111; + --card-bg: #0b0b0b; + --text-color: #e6eef6; + --muted-text: #9aa4ad; + + /* Accent / interactive */ + --primary-color: #00b4ff; + --primary-hover: #00d0ff; + --success-color: #26c281; + --error-color: #ff6b6b; + --cancel-color: #f39c12; /* Orange for cancelled */ + + /* Borders / dividers */ + --border-color: rgba(255, 255, 255, 0.1); + --divider-color: rgba(255, 255, 255, 0.06); + + --font-family: 'Inter', -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; +} + +/* Page */ +html { + box-sizing: border-box; +} +*, *:before, *:after { + box-sizing: inherit; +} + +body { + font-family: var(--font-family); + background-color: var(--bg-color); + color: var(--text-color); + margin: 0; + line-height: 1.5; + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; +} + +/* Container */ +.container { + width: 100%; + max-width: 960px; + margin: 0 auto; + background: var(--card-bg); + border-radius: 10px; + padding: 1.5rem; + border: 1px solid var(--border-color); +} +@media (max-width: 768px) { + .container { + padding: 1rem; + border: none; + border-radius: 0; + } +} + + +/* Header */ +header { + text-align: center; + margin-bottom: 1.5rem; + padding-bottom: 1rem; + border-bottom: 1px solid var(--divider-color); +} + +header h1 { + margin: 0 0 0.25rem 0; + font-size: 1.75rem; + font-weight: 700; +} + +header p { + margin: 0; + color: var(--muted-text); + font-size: 1rem; +} + +/* Form Layout */ +.form-grid { + display: grid; + grid-template-columns: 1fr; + gap: 1.5rem; + margin-bottom: 2rem; +} +@media (min-width: 768px) { + .form-grid { + grid-template-columns: 1fr 1fr; + } +} + +.upload-form fieldset { + border: 1px solid var(--border-color); + border-radius: 8px; + padding: 1rem; + margin: 0; + background: transparent; + height: 100%; + display: flex; + flex-direction: column; +} + +.upload-form legend { + padding: 0 0.5rem; + font-weight: 500; + color: var(--primary-color); +} + +.upload-form legend h2 { + margin: 0; + font-size: 1.25rem; +} + +/* File input */ +.file-input-wrapper { + position: relative; + display: flex; + align-items: center; + gap: 1rem; + margin-top: 1rem; + margin-bottom: 1rem; +} + +input[type="file"] { + opacity: 0; + position: absolute; + width: 100%; + height: 100%; + cursor: pointer; + text-overflow: ellipsis; + inset: 0; +} + +.file-input-label { + background-color: rgba(255,255,255,0.05); + color: var(--muted-text); + padding: 0.5rem 1rem; + border-radius: 5px; + cursor: pointer; + transition: background-color 0.15s ease; + font-weight: 500; + text-overflow: ellipsis; + + border: 1px solid rgba(255,255,255,0.1); + white-space: nowrap; +} +.file-input-label:hover { + background-color: rgba(255,255,255,0.1); +} + +.file-name { + font-size: 0.9rem; + font-style: italic; + color: var(--muted-text); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + min-width: 0; + max-width: 10em; +} + +/* --- STYLES FOR DROPDOWN MENU --- */ +.form-control { + margin-bottom: 1rem; +} + +.form-control label { + display: block; + font-size: 0.9rem; + font-weight: 500; + color: var(--muted-text); + margin-bottom: 0.4rem; +} + +.form-control select { + width: 100%; + background-color: rgba(255,255,255,0.05); + color: var(--text-color); + padding: 0.5rem 0.8rem; + border-radius: 5px; + border: 1px solid var(--border-color); + font-family: inherit; + font-size: 0.95rem; + -webkit-appearance: none; + -moz-appearance: none; + appearance: none; + background-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 16 16'%3e%3cpath fill='none' stroke='%239aa4ad' stroke-linecap='round' stroke-linejoin='round' stroke-width='2' d='M2 5l6 6 6-6'/%3e%3c/svg%3e"); + background-repeat: no-repeat; + background-position: right 0.7rem center; + background-size: 1em; + cursor: pointer; + transition: border-color 0.15s ease-in-out, box-shadow 0.15s ease-in-out; +} + +.form-control select:focus { + outline: none; + border-color: var(--primary-color); + box-shadow: 0 0 0 2px rgba(0, 180, 255, 0.2); +} +/* ---------------------------------- */ + +/* Submit button */ +button[type="submit"] { + display: block; + width: 100%; + background: var(--primary-color); + color: #00161b; + border: none; + padding: 0.65rem 1rem; + font-size: 1rem; + font-weight: 600; + border-radius: 5px; + cursor: pointer; + transition: background-color 0.15s ease; + margin-top: auto; /* Pushes button to the bottom */ +} +button[type="submit"]:hover { + background: var(--primary-hover); +} +button[type="submit"]:disabled { + background-color: var(--muted-text); + cursor: not-allowed; +} + +/* History Section */ +#job-history h2 { + text-align: center; + color: var(--muted-text); + font-size: 1.25rem; + margin-bottom: 1rem; + border-top: 1px solid var(--divider-color); + padding-top: 2rem; +} + +.table-wrapper { + overflow-x: auto; + border: 1px solid var(--border-color); + border-radius: 8px; + background-color: var(--surface); +} + +#job-table { + width: 100%; + border-collapse: collapse; + font-size: 0.9rem; +} + +#job-table th, +#job-table td { + padding: 0.75rem; + text-align: left; + vertical-align: middle; + border-bottom: 1px solid var(--divider-color); +} +#job-table tbody tr:last-child td { + border-bottom: none; +} + +#job-table th { + color: var(--muted-text); + font-weight: 500; + text-transform: uppercase; + font-size: 0.75rem; + letter-spacing: 0.5px; + white-space: nowrap; +} + +#job-table td[data-label="File"] { + max-width: 250px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.action-col { + text-align: center; + width: 100px; +} +#job-table td.action-col { + text-align: center; +} + +/* Status Badge */ +.job-status-badge { + padding: 0.2rem 0.6rem; + border-radius: 12px; + font-size: 0.75rem; + font-weight: 600; + text-transform: capitalize; + display: inline-block; + white-space: nowrap; +} +.status-pending, .status-processing { + background-color: rgba(0,180,255,0.15); + color: var(--primary-hover); +} +.status-completed { + background-color: rgba(38,194,129,0.15); + color: var(--success-color); +} +.status-failed { + background-color: rgba(255,107,107,0.15); + color: var(--error-color); +} +.status-cancelled { + background-color: rgba(243, 156, 18, 0.15); + color: var(--cancel-color); +} +.error-text { + color: var(--error-color); + cursor: help; +} + +/* Progress Bar */ +.progress-bar-container { + height: 6px; + width: 100%; + background-color: rgba(255,255,255,0.1); + border-radius: 3px; + margin-top: 6px; + overflow: hidden; +} +.progress-bar { + height: 100%; + background-color: var(--primary-color); + border-radius: 3px; + transition: width 0.5s ease-in-out; +} + +.progress-bar.indeterminate { + width: 100%; + background: linear-gradient( + 90deg, + rgba(0,180,255,0.4) 25%, + rgba(0,180,255,1) 50%, + rgba(0,180,255,0.4) 75% + ); + background-size: 200% 100%; + animation: indeterminate-scroll 2s linear infinite; +} + +@keyframes indeterminate-scroll { + 0% { background-position: 200% 0; } + 100% { background-position: -200% 0; } +} + +/* Action items */ +.download-button { + background-color: var(--success-color); + color: #00160b; + padding: 0.3rem 0.8rem; + text-decoration: none; + border-radius: 5px; + font-weight: 600; + font-size: 0.85rem; + display: inline-block; + transition: transform 0.1s ease; + border: none; +} +.download-button:hover { transform: scale(1.05); } + +.cancel-button { + background-color: var(--error-color); + color: #1a0000; + padding: 0.3rem 0.8rem; + border-radius: 5px; + font-weight: 600; + font-size: 0.85rem; + display: inline-block; + transition: transform 0.1s ease; + border: none; + cursor: pointer; +} +.cancel-button:hover { background-color: #ff8f8f; } + + +.spinner-small { + border: 3px solid rgba(255,255,255,0.1); + border-top: 3px solid var(--primary-color); + border-radius: 50%; + width: 20px; + height: 20px; + animation: spin 1s linear infinite; + margin: 0 auto; +} + +@keyframes spin { + 0% { transform: rotate(0deg); } + 100% { transform: rotate(360deg); } +} + +@media (max-width: 768px) { + .table-wrapper { + border: none; + background-color: transparent; + } + #job-table thead { + display: none; + } + #job-table, #job-table tbody, #job-table tr, #job-table td { + display: block; + width: 100%; + } + #job-table tr { + margin-bottom: 1rem; + border: 1px solid var(--border-color); + border-radius: 8px; + background-color: var(--surface); + } + #job-table td { + display: flex; + justify-content: space-between; + align-items: center; + padding: 0.75rem 1rem; + border-bottom: 1px solid var(--divider-color); + white-space: normal; + } + #job-table tr td:last-child { + border-bottom: none; + } + #job-table td::before { + content: attr(data-label); + font-weight: 500; + color: var(--muted-text); + padding-right: 1rem; + flex-shrink: 0; + } + .cell-value { + text-align: right; + min-width: 0; + word-break: break-all; + } +} \ No newline at end of file diff --git a/static/css/style.old b/static/css/style.old new file mode 100644 index 0000000..b3851b5 --- /dev/null +++ b/static/css/style.old @@ -0,0 +1,345 @@ +/* static/css/style.css */ + +:root { + /* Core */ + --bg-color: #000000; /* true black for AMOLED */ + --surface: #070707; /* slightly off-black surfaces */ + --card-bg: #0b0b0b; /* card backgrounds */ + --muted-surface: #0f0f10; /* for subtle panels */ + --text-color: #e6eef6; /* primary text - very light */ + --muted-text: #9aa4ad; /* secondary text */ + --mono-text: #d6e6f0; /* for code/monospace previews */ + + /* Accent / interactive */ + --primary-color: #00b4ff; /* bright cyan-blue accent */ + --primary-hover: #00d0ff; + --primary-pressed: #0090cc; + --success-color: #26c281; /* success accent */ + --error-color: #ff6b6b; /* error accent */ + + /* Borders / dividers (kept very subtle or transparent to favor AMOLED) */ + --border-color: rgba(255,255,255,0.06); + --divider-color: rgba(255,255,255,0.04); + + --focus-glow: 0 0 20px rgba(0,180,255,0.14); + + --font-family: 'Inter', -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; +} + +/* Page */ +html, body { + height: 100%; +} + +body { + font-family: var(--font-family); + background-color: var(--bg-color); + color: var(--text-color); + margin: 0; + padding: 2rem 1rem; + line-height: 1.6; + display: flex; + justify-content: center; + align-items: flex-start; + min-height: 100vh; + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; +} + +/* Container / card */ +.container { + width: 100%; + max-width: 800px; + background: linear-gradient(180deg, rgba(255,255,255,0.02), rgba(255,255,255,0.01)) , var(--card-bg); + border-radius: 12px; + padding: 2rem; + border: 1px solid var(--border-color); + box-shadow: none; /* minimal shadow on true black backgrounds */ + transition: background-color .15s ease, border-color .15s ease; +} + +/* Header */ +header { + text-align: center; + margin-bottom: 2rem; + border-bottom: 1px solid var(--divider-color); + padding-bottom: 1.5rem; +} + +header h1 { + margin: 0; + font-size: 2.25rem; + font-weight: 700; + color: var(--text-color); +} + +header p { + color: var(--muted-text); + font-size: 1.1rem; +} + +/* Sections */ +.processor-section { + margin-bottom: 2rem; +} + +.upload-form fieldset { + border: 1px solid var(--border-color); + border-radius: 8px; + padding: 1.5rem; + margin: 0; + background: transparent; +} + +.upload-form legend { + padding: 0 0.5rem; + font-weight: 500; + color: var(--primary-color); +} + +.upload-form legend h2 { + margin: 0; + font-size: 1.5rem; +} + +.upload-form p { + margin-top: 0.5rem; + margin-bottom: 1.5rem; + color: var(--muted-text); +} + +/* File input */ +.file-input-wrapper { + position: relative; + display: flex; + align-items: center; + gap: 1rem; + margin-bottom: 1.5rem; +} + +input[type="file"] { + opacity: 0; + position: absolute; + width: 100%; + height: 100%; + cursor: pointer; + inset: 0; +} + +.file-input-label { + background-color: rgba(255,255,255,0.02); + color: var(--muted-text); + padding: 0.75rem 1.25rem; + border-radius: 6px; + cursor: pointer; + transition: background-color 0.12s ease, transform 0.06s ease; + font-weight: 500; + border: 1px solid rgba(255,255,255,0.03); + backdrop-filter: blur(4px); +} + +.file-input-label:hover { + background-color: rgba(255,255,255,0.03); + transform: translateY(-1px); +} + +.file-name { + font-style: italic; + color: var(--muted-text); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +/* Primary submit button */ +button[type="submit"] { + display: block; + width: 100%; + background: linear-gradient(180deg, var(--primary-color), var(--primary-pressed)); + color: #00161b; + border: none; + padding: 0.85rem 1rem; + font-size: 1.1rem; + font-weight: 600; + border-radius: 8px; + cursor: pointer; + transition: transform 0.08s ease, box-shadow 0.12s ease; + box-shadow: var(--focus-glow); +} + +button[type="submit"]:hover { + background: linear-gradient(180deg, var(--primary-hover), var(--primary-color)); + transform: translateY(-2px); +} + +button[type="submit"]:active { + transform: translateY(0); +} + +/* Divider */ +.divider { + border: none; + border-top: 1px solid var(--divider-color); + margin: 3rem 0; +} + +/* --- NEW STYLES for Job History --- */ +#job-history h2 { + text-align: center; + color: var(--muted-text); + margin-bottom: 1.5rem; +} + +.job-list { + display: flex; + flex-direction: column; + gap: 1rem; +} + +.job-card { + padding: 1.5rem; + border-radius: 8px; + background: var(--muted-surface); + border: 1px solid var(--border-color); + transition: all 0.2s ease-in-out; +} + +.job-card-header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 1rem; +} + +.job-card-header h3 { + margin: 0; + font-size: 1.1rem; + word-break: break-all; +} + +.job-status-badge { + padding: 0.25rem 0.75rem; + border-radius: 12px; + font-size: 0.8rem; + font-weight: 600; + text-transform: uppercase; +} + +/* --- Job Status Variations --- */ +.job-card[data-status="pending"], +.job-card[data-status="processing"] { + border-left: 4px solid var(--primary-color); +} +.job-status-badge.status-pending, +.job-status-badge.status-processing { + background-color: rgba(0,180,255,0.1); + color: var(--primary-hover); +} + +.job-card[data-status="completed"] { + border-left: 4px solid var(--success-color); +} +.job-status-badge.status-completed { + background-color: rgba(38,194,129,0.1); + color: var(--success-color); +} + +.job-card[data-status="failed"] { + border-left: 4px solid var(--error-color); +} +.job-status-badge.status-failed { + background-color: rgba(255,107,107,0.1); + color: var(--error-color); +} +/* --- End Status Variations --- */ + +.job-card-body { + padding-top: 1rem; + border-top: 1px solid var(--divider-color); +} + +.job-card .processing-indicator { + display: flex; + align-items: center; + gap: 0.75rem; + color: var(--muted-text); +} + +.job-card .error-message { + background-color: rgba(255,107,107,0.05); + padding: 0.75rem; + border-radius: 4px; + color: var(--error-color); + font-family: ui-monospace, monospace; + font-size: 0.9rem; +} + +/* --- (Copied from old .result-area) --- */ +.download-button { + display: inline-block; + background: linear-gradient(180deg, var(--success-color), #1f9b63); + color: #00160b; + padding: 0.75rem 1.5rem; + text-decoration: none; + border-radius: 6px; + font-weight: 600; + margin-bottom: 1.5rem; + transition: transform 0.08s ease; +} +.download-button:hover { transform: translateY(-2px); } + +.text-preview { + background-color: rgba(255,255,255,0.01); + border: 1px solid var(--border-color); + border-radius: 6px; + padding: 1rem; + max-height: 300px; + overflow-y: auto; + white-space: pre-wrap; + word-wrap: break-word; + font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, "Roboto Mono", "Courier New", monospace; + font-size: 0.9rem; + color: var(--mono-text); +} +/* --- End Copied Styles --- */ + + +/* Spinner animation (no longer in an overlay) */ +.spinner { + border: 3px solid rgba(255,255,255,0.06); + border-top: 3px solid var(--primary-color); + border-radius: 50%; + width: 24px; + height: 24px; + animation: spin 1s linear infinite; +} + +/* focus states for keyboard users */ +:focus { + outline: none; +} + +button:focus, a:focus, .file-input-label:focus, input:focus, textarea:focus { + box-shadow: var(--focus-glow); + border-color: rgba(0,180,255,0.24); + transform: none; +} + +/* small responsive tweaks */ +@media (max-width: 480px) { + body { padding: 1.25rem; } + header h1 { font-size: 1.6rem; } + .container { padding: 1.25rem; border-radius: 10px; } +} + +/* animation keyframes */ +@keyframes spin { + 0% { transform: rotate(0deg); } + 100% { transform: rotate(360deg); } +} + +/* Text selection (nice accent on black) */ +::selection { + background: rgba(0,180,255,0.18); + color: var(--text-color); +} diff --git a/static/js/script.js b/static/js/script.js new file mode 100644 index 0000000..da2eb4c --- /dev/null +++ b/static/js/script.js @@ -0,0 +1,201 @@ +// static/js/script.js + +document.addEventListener('DOMContentLoaded', () => { + const jobListBody = document.getElementById('job-list-body'); + const pdfForm = document.getElementById('pdf-form'); + const pdfFileInput = document.getElementById('pdf-file-input'); + const pdfFileName = document.getElementById('pdf-file-name'); + const audioForm = document.getElementById('audio-form'); + const audioFileInput = document.getElementById('audio-file-input'); + const audioFileName = document.getElementById('audio-file-name'); + + const activePolls = new Map(); + + // --- Main Event Listeners --- + pdfFileInput.addEventListener('change', () => updateFileName(pdfFileInput, pdfFileName)); + audioFileInput.addEventListener('change', () => updateFileName(audioFileInput, audioFileName)); + pdfForm.addEventListener('submit', (e) => handleFormSubmit(e, '/ocr-pdf', pdfForm, pdfFileInput, pdfFileName)); + audioForm.addEventListener('submit', (e) => handleFormSubmit(e, '/transcribe-audio', audioForm, audioFileInput, audioFileName)); + + jobListBody.addEventListener('click', (event) => { + if (event.target.classList.contains('cancel-button')) { + const jobId = event.target.dataset.jobId; + handleCancelJob(jobId); + } + }); + + function updateFileName(input, nameDisplay) { + nameDisplay.textContent = input.files.length > 0 ? input.files[0].name : 'No file chosen'; + nameDisplay.title = nameDisplay.textContent; // Add a tooltip for the full name + } + + async function handleFormSubmit(event, endpoint, form, fileInput, fileNameDisplay) { + event.preventDefault(); + if (!fileInput.files[0]) return; + + // MODIFICATION: Use new FormData(form) to capture all form fields, + // including the new model size dropdown for the audio form. + const formData = new FormData(form); + + const submitButton = form.querySelector('button[type="submit"]'); + submitButton.disabled = true; + + try { + const response = await fetch(endpoint, { method: 'POST', body: formData }); + if (!response.ok) { + const errorData = await response.json(); + throw new Error(errorData.detail || `HTTP error! Status: ${response.status}`); + } + const result = await response.json(); + + // To provide immediate feedback, create a placeholder job object + const preliminaryJob = { + id: result.job_id, + status: 'pending', + progress: 0, + original_filename: fileInput.files[0].name, + task_type: endpoint.includes('ocr') ? 'ocr' : 'transcription', + created_at: new Date().toISOString(), + processed_filepath: null, + error_message: null + }; + renderJobRow(preliminaryJob); // Render immediately + startPolling(result.job_id); // Start polling for updates + } catch (error) { + console.error('Error submitting job:', error); + alert(`Submission failed: ${error.message}`); + } finally { + form.reset(); + fileNameDisplay.textContent = 'No file chosen'; + fileNameDisplay.title = ''; + submitButton.disabled = false; + } + } + + async function handleCancelJob(jobId) { + if (!confirm('Are you sure you want to cancel this job?')) return; + + try { + const response = await fetch(`/job/${jobId}/cancel`, { method: 'POST' }); + if (!response.ok) { + const errorData = await response.json(); + throw new Error(errorData.detail || 'Failed to cancel job.'); + } + // The polling mechanism will update the UI to "cancelled" automatically. + // We can stop polling immediately to be more efficient. + stopPolling(jobId); + // Optionally, force an immediate UI update + const row = document.getElementById(`job-${jobId}`); + if (row) { + const statusCell = row.querySelector('td[data-label="Status"] .cell-value'); + const actionCell = row.querySelector('td[data-label="Action"] .cell-value'); + if (statusCell) { + statusCell.innerHTML = `cancelled`; + } + if (actionCell) { + actionCell.innerHTML = `-`; + } + } + } catch (error) { + console.error('Error cancelling job:', error); + alert(`Error: ${error.message}`); + } + } + + async function loadInitialJobs() { + try { + const response = await fetch('/jobs'); + if (!response.ok) throw new Error('Failed to fetch jobs.'); + const jobs = await response.json(); + jobListBody.innerHTML = ''; // Clear existing + for (const job of jobs.reverse()) { + renderJobRow(job); + if (['pending', 'processing'].includes(job.status)) { + startPolling(job.id); + } + } + } catch (error) { + console.error("Couldn't load job history:", error); + jobListBody.innerHTML = 'Could not load job history.'; + } + } + + function startPolling(jobId) { + if (activePolls.has(jobId)) return; + const intervalId = setInterval(async () => { + try { + const response = await fetch(`/job/${jobId}`); + if (!response.ok) { + if (response.status === 404) { + console.warn(`Job ${jobId} not found. Stopping poll.`); + stopPolling(jobId); + } + return; + } + const job = await response.json(); + renderJobRow(job); + if (['completed', 'failed', 'cancelled'].includes(job.status)) { + stopPolling(jobId); + } + } catch (error) { + console.error(`Error polling for job ${jobId}:`, error); + stopPolling(jobId); // Stop polling on network or other errors + } + }, 2500); // Poll every 2.5 seconds + activePolls.set(jobId, intervalId); + } + + function stopPolling(jobId) { + if (activePolls.has(jobId)) { + clearInterval(activePolls.get(jobId)); + activePolls.delete(jobId); + } + } + + function renderJobRow(job) { + let row = document.getElementById(`job-${job.id}`); + if (!row) { + row = document.createElement('tr'); + row.id = `job-${job.id}`; + jobListBody.prepend(row); + } + + const taskTypeLabel = job.task_type.includes('ocr') ? 'PDF/Image OCR' : 'Transcription'; + const formattedDate = new Date(job.created_at).toLocaleString(); + + let statusHtml = `${job.status}`; + let actionHtml = `-`; + + if (job.status === 'processing') { + // Show real progress for transcription, but an indeterminate one for OCR tasks + const progressClass = job.task_type === 'transcription' ? '' : 'indeterminate'; + const progressWidth = job.task_type === 'transcription' ? job.progress : 100; + const progressBarHtml = `
`; + statusHtml += progressBarHtml; + } + + if (job.status === 'pending' || job.status === 'processing') { + actionHtml = ``; + } else if (job.status === 'completed' && job.processed_filepath) { + const downloadFilename = job.processed_filepath.split(/[\\/]/).pop(); + actionHtml = `Download`; + } else if (job.status === 'failed') { + const errorTitle = job.error_message ? ` title="${job.error_message.replace(/"/g, '"')}"` : ''; + actionHtml = `Failed`; + } + + // Use textContent for filename to prevent XSS and add a title for overflow + const escapedFilename = job.original_filename.replace(//g, ">"); + + row.innerHTML = ` + ${escapedFilename} + ${taskTypeLabel} + ${formattedDate} + ${statusHtml} + ${actionHtml} + `; + } + + // --- Initial Load --- + loadInitialJobs(); +}); \ No newline at end of file diff --git a/static/js/script.old b/static/js/script.old new file mode 100644 index 0000000..80a088e --- /dev/null +++ b/static/js/script.old @@ -0,0 +1,214 @@ +// static/js/script.js + +document.addEventListener('DOMContentLoaded', () => { + // --- Global Elements --- + const jobListContainer = document.getElementById('job-list'); + + // --- PDF Form --- + const pdfForm = document.getElementById('pdf-form'); + const pdfFileInput = document.getElementById('pdf-file-input'); + const pdfFileName = document.getElementById('pdf-file-name'); + + // --- Audio Form --- + const audioForm = document.getElementById('audio-form'); + const audioFileInput = document.getElementById('audio-file-input'); + const audioFileName = document.getElementById('audio-file-name'); + + // --- State Management --- + let activePolls = new Map(); // Use a Map to store interval IDs for polling + + // --- Event Listeners --- + pdfFileInput.addEventListener('change', () => updateFileName(pdfFileInput, pdfFileName)); + audioFileInput.addEventListener('change', () => updateFileName(audioFileInput, audioFileName)); + + pdfForm.addEventListener('submit', (e) => handleFormSubmit(e, '/ocr-pdf', pdfForm, pdfFileInput, pdfFileName)); + audioForm.addEventListener('submit', (e) => handleFormSubmit(e, '/transcribe-audio', audioForm, audioFileInput, audioFileName)); + + /** + * Updates the file name display. + */ + function updateFileName(input, nameDisplay) { + nameDisplay.textContent = input.files.length > 0 ? input.files[0].name : 'No file selected'; + } + + /** + * Generic handler for submitting a file processing form. + */ + async function handleFormSubmit(event, endpoint, form, fileInput, fileNameDisplay) { + event.preventDefault(); + if (!fileInput.files[0]) { + alert('Please select a file to upload.'); + return; + } + + const formData = new FormData(); + formData.append('file', fileInput.files[0]); + + // Disable the submit button + const submitButton = form.querySelector('button[type="submit"]'); + submitButton.disabled = true; + + try { + const response = await fetch(endpoint, { + method: 'POST', + body: formData, + }); + + if (!response.ok) { + const errorData = await response.json(); + throw new Error(errorData.detail || `HTTP error! Status: ${response.status}`); + } + + const result = await response.json(); // Expects { job_id: "...", status: "pending" } + + // Create a preliminary job object to render immediately + const preliminaryJob = { + id: result.job_id, + status: 'pending', + original_filename: fileInput.files[0].name, + task_type: endpoint.includes('ocr') ? 'ocr' : 'transcription', + created_at: new Date().toISOString() + }; + + renderJobCard(preliminaryJob); // Render in pending state + startPolling(result.job_id); // Start polling for updates + + } catch (error) { + console.error('Error submitting job:', error); + // In a real app, you'd show this error in a more user-friendly way + alert(`Submission failed: ${error.message}`); + } finally { + // Reset form and re-enable button + form.reset(); + fileNameDisplay.textContent = 'No file selected'; + submitButton.disabled = false; + } + } + + /** + * Fetches all existing jobs on page load and renders them. + */ + async function loadInitialJobs() { + try { + const response = await fetch('/jobs'); + if (!response.ok) throw new Error('Failed to fetch jobs.'); + + const jobs = await response.json(); + jobListContainer.innerHTML = ''; // Clear any existing content + + for (const job of jobs) { + renderJobCard(job); + // If a job is still processing from a previous session, resume polling + if (job.status === 'pending' || job.status === 'processing') { + startPolling(job.id); + } + } + } catch (error) { + console.error("Couldn't load job history:", error); + jobListContainer.innerHTML = '

Could not load job history.

'; + } + } + + /** + * Starts polling for a specific job's status. + */ + function startPolling(jobId) { + if (activePolls.has(jobId)) return; // Already polling this job + + const intervalId = setInterval(async () => { + try { + const response = await fetch(`/job/${jobId}`); + if (!response.ok) { + // Stop polling if job not found (e.g., cleaned up) + if (response.status === 404) stopPolling(jobId); + return; + } + + const job = await response.json(); + renderJobCard(job); // Re-render the card with new data + + if (job.status === 'completed' || job.status === 'failed') { + stopPolling(jobId); + } + } catch (error) { + console.error(`Error polling for job ${jobId}:`, error); + stopPolling(jobId); // Stop on network error + } + }, 3000); // Poll every 3 seconds + + activePolls.set(jobId, intervalId); + } + + /** + * Stops polling for a specific job. + */ + function stopPolling(jobId) { + if (activePolls.has(jobId)) { + clearInterval(activePolls.get(jobId)); + activePolls.delete(jobId); + } + } + + /** + * Creates or updates a job card in the UI. + */ + function renderJobCard(job) { + let card = document.getElementById(`job-${job.id}`); + // Create card if it doesn't exist + if (!card) { + card = document.createElement('div'); + card.id = `job-${job.id}`; + card.className = 'job-card'; + // Prepend new jobs to the top of the list + jobListContainer.prepend(card); + } + + // Update status for styling + card.dataset.status = job.status; + + const taskName = job.task_type === 'ocr' ? 'PDF OCR' : 'Audio Transcription'; + const formattedDate = new Date(job.created_at).toLocaleString(); + + let bodyHtml = ''; + switch(job.status) { + case 'pending': + case 'processing': + bodyHtml = ` +
+
+ Status: ${job.status}... +
`; + break; + case 'completed': + const downloadFilename = job.processed_filepath.split(/[\\/]/).pop(); + const downloadUrl = `/download/${downloadFilename}`; + const downloadButton = `Download Result`; + const previewHtml = job.task_type === 'ocr' && job.result_preview + ? `

Extracted Text Preview:

${job.result_preview}
` + : ''; + bodyHtml = `
${downloadButton}${previewHtml}
`; + break; + case 'failed': + bodyHtml = ` +

Processing Failed

+

${job.error_message || 'An unknown error occurred.'}

`; + break; + } + + card.innerHTML = ` +
+

${job.original_filename}

+ ${job.status} +
+

+ ${taskName} • Submitted: ${formattedDate} +

+
+ ${bodyHtml} +
+ `; + } + + // --- Initial Execution --- + loadInitialJobs(); +}); diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..07adcaf --- /dev/null +++ b/templates/index.html @@ -0,0 +1,83 @@ + + + + + + DocProcessor + + + + + + +
+
+

DocProcessor 🚀

+

PDF OCR & Audio Transcription

+
+ +
+
+
+
+
+

📄 PDF OCR

+
+ + + No file chosen +
+ +
+
+
+ +
+
+
+

🎤 Transcribe Audio

+
+ + + No file chosen +
+
+ + +
+ +
+
+
+
+ +
+

History

+
+ + + + + + + + + + + + +
FileTypeSubmittedStatusAction
+
+
+
+
+ + + \ No newline at end of file diff --git a/templates/index.old b/templates/index.old new file mode 100644 index 0000000..5d9a797 --- /dev/null +++ b/templates/index.old @@ -0,0 +1,63 @@ + + + + + + Document Processor AI + + + + + + +
+
+

Document Processor AI 🚀

+

High-speed OCR for PDFs and Transcription for Audio

+
+ +
+
+
+
+

📄 PDF OCR Processor

+

Upload a PDF to create a searchable version and extract its text.

+
+ + + No file selected +
+ +
+
+
+ +
+ +
+
+
+

🎤 Audio Transcription

+

Upload an audio file (MP3, WAV, M4A) to generate a text transcript.

+
+ + + No file selected +
+ +
+
+
+ +
+ +
+

Processing History

+
+
+
+
+
+ + +