Drag and Drop

This commit is contained in:
2025-09-17 18:45:55 +00:00
parent 2115238217
commit 20e41b67a7
10 changed files with 1358 additions and 379 deletions

42
Dockerfile Normal file
View File

@@ -0,0 +1,42 @@
# Dockerfile
FROM python:3.13.7-slim
RUN apt-get update && apt-get install -y --no-install-recommends \
tesseract-ocr \
ghostscript \
poppler-utils \
libreoffice \
imagemagick \
graphicsmagick \
libvips-tools \
ffmpeg \
libheif-examples \
inkscape \
calibre \
build-essential \
pkg-config \
git \
curl \
texlive \
texlive-latex-extra \
texlive-xetex
&& rm -rf /var/lib/apt/lists/*
# Set working directory inside the container
WORKDIR /app
# Copy requirements and install dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy the rest of the app
COPY . .
# Expose the app port
EXPOSE 8000
RUN chmod +x run.sh
# Command to run when container starts
CMD ["./run.sh"]

10
docker-compose.yml Normal file
View File

@@ -0,0 +1,10 @@
version: "3.9"
services:
web:
build: .
ports:
- "5000:5000"
volumes:
- .:/app # optional: mount code for live changes
environment:
- FLASK_ENV=development

385
main.py
View File

@@ -6,7 +6,7 @@ import uuid
import shlex
import yaml
from contextlib import asynccontextmanager
from datetime import datetime
from datetime import datetime, timezone
from pathlib import Path
from typing import Dict, List, Any
@@ -21,17 +21,21 @@ from fastapi.responses import FileResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from huey import SqliteHuey
from pydantic import BaseModel, ConfigDict
from pydantic import BaseModel, ConfigDict, field_serializer # MODIFIED: Import field_serializer
from sqlalchemy import (Column, DateTime, Integer, String, Text,
create_engine, delete, event)
from sqlalchemy.orm import Session, declarative_base, sessionmaker
from sqlalchemy.pool import NullPool
from string import Formatter
from sqlalchemy.orm import Session, declarative_base, sessionmaker
from werkzeug.utils import secure_filename
from typing import List as TypingList
# --------------------------------------------------------------------------------
# --- 1. CONFIGURATION
# --------------------------------------------------------------------------------
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
class AppPaths(BaseModel):
BASE_DIR: Path = Path(__file__).resolve().parent
@@ -43,30 +47,46 @@ class AppPaths(BaseModel):
PATHS = AppPaths()
APP_CONFIG: Dict[str, Any] = {}
PATHS.UPLOADS_DIR.mkdir(exist_ok=True)
PATHS.PROCESSED_DIR.mkdir(exist_ok=True)
def load_app_config():
global APP_CONFIG
try:
with open(PATHS.SETTINGS_FILE, 'r') as f:
APP_CONFIG = yaml.safe_load(f)
APP_CONFIG['app_settings']['max_file_size_bytes'] = APP_CONFIG['app_settings']['max_file_size_mb'] * 1024 * 1024
allowed_extensions = {
".pdf", ".ps", ".eps", ".png", ".jpg", ".jpeg", ".tiff", ".tif", ".gif",
".bmp", ".webp", ".svg", ".jxl", ".avif", ".ppm", ".mp3", ".m4a", ".ogg",
".flac", ".opus", ".wav", ".aac", ".mp4", ".mkv", ".mov", ".webm", ".avi",
".flv", ".md", ".txt", ".html", ".docx", ".odt", ".rst", ".epub", ".mobi",
".azw3", ".pptx", ".xlsx"
with open(PATHS.SETTINGS_FILE, 'r', encoding='utf8') as f:
cfg_raw = yaml.safe_load(f) or {}
# basic defaults
defaults = {
"app_settings": {"max_file_size_mb": 100, "allowed_all_extensions": []},
"transcription_settings": {"whisper": {"allowed_models": ["tiny", "base", "small"], "compute_type": "int8"}},
"conversion_tools": {},
"ocr_settings": {"ocrmypdf": {}}
}
APP_CONFIG['app_settings']['allowed_all_extensions'] = allowed_extensions
# shallow merge (safe for top-level keys)
cfg = defaults.copy()
cfg.update(cfg_raw)
# normalize app settings
app_settings = cfg.get("app_settings", {})
max_mb = app_settings.get("max_file_size_mb", 100)
app_settings["max_file_size_bytes"] = int(max_mb) * 1024 * 1024
allowed = app_settings.get("allowed_all_extensions", [])
if not isinstance(allowed, (list, set)):
allowed = list(allowed)
app_settings["allowed_all_extensions"] = set(allowed)
cfg["app_settings"] = app_settings
APP_CONFIG = cfg
logger.info("Successfully loaded settings from settings.yml")
except (FileNotFoundError, yaml.YAMLError) as e:
logger.error(f"Could not load settings.yml: {e}. App may not function correctly.")
APP_CONFIG = {}
logging.getLogger(__name__).exception(f"Could not load settings.yml: {e}. Using defaults.")
APP_CONFIG = {
"app_settings": {"max_file_size_mb": 100, "max_file_size_bytes": 100 * 1024 * 1024, "allowed_all_extensions": set()},
"transcription_settings": {"whisper": {"allowed_models": ["tiny", "base", "small"], "compute_type": "int8"}},
"conversion_tools": {},
"ocr_settings": {"ocrmypdf": {}}
}
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
PATHS.UPLOADS_DIR.mkdir(exist_ok=True)
PATHS.PROCESSED_DIR.mkdir(exist_ok=True)
# --------------------------------------------------------------------------------
# --- 2. DATABASE & Schemas
@@ -77,8 +97,6 @@ engine = create_engine(
poolclass=NullPool,
)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
# THIS IS THE CRITICAL FIX
Base = declarative_base()
@event.listens_for(engine, "connect")
@@ -102,11 +120,13 @@ class Job(Base):
progress = Column(Integer, default=0)
original_filename = Column(String)
input_filepath = Column(String)
input_filesize = Column(Integer, nullable=True)
processed_filepath = Column(String, nullable=True)
output_filesize = Column(Integer, nullable=True)
result_preview = Column(Text, nullable=True)
error_message = Column(Text, nullable=True)
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
updated_at = Column(DateTime, default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc))
def get_db():
db = SessionLocal()
@@ -120,6 +140,7 @@ class JobCreate(BaseModel):
task_type: str
original_filename: str
input_filepath: str
input_filesize: int | None = None
processed_filepath: str | None = None
class JobSchema(BaseModel):
@@ -128,6 +149,8 @@ class JobSchema(BaseModel):
status: str
progress: int
original_filename: str
input_filesize: int | None = None
output_filesize: int | None = None
processed_filepath: str | None = None
result_preview: str | None = None
error_message: str | None = None
@@ -135,8 +158,14 @@ class JobSchema(BaseModel):
updated_at: datetime
model_config = ConfigDict(from_attributes=True)
# NEW: This serializer ensures the datetime string sent to the frontend ALWAYS
# includes the 'Z' UTC indicator, fixing the timezone bug.
@field_serializer('created_at', 'updated_at')
def serialize_dt(self, dt: datetime, _info):
return dt.isoformat() + "Z"
# --------------------------------------------------------------------------------
# --- 3. CRUD OPERATIONS (No Changes)
# --- 3. CRUD OPERATIONS
# --------------------------------------------------------------------------------
def get_job(db: Session, job_id: str):
return db.query(Job).filter(Job.id == job_id).first()
@@ -163,80 +192,120 @@ def update_job_status(db: Session, job_id: str, status: str, progress: int = Non
db.refresh(db_job)
return db_job
def mark_job_as_completed(db: Session, job_id: str, preview: str | None = None):
def mark_job_as_completed(db: Session, job_id: str, output_filepath_str: str | None = None, preview: str | None = None):
db_job = get_job(db, job_id)
if db_job and db_job.status != 'cancelled':
db_job.status = "completed"
db_job.progress = 100
if preview:
db_job.result_preview = preview.strip()[:2000]
if output_filepath_str:
try:
output_path = Path(output_filepath_str)
if output_path.exists():
db_job.output_filesize = output_path.stat().st_size
except Exception:
logger.exception(f"Could not stat output file {output_filepath_str} for job {job_id}")
db.commit()
return db_job
# ... (The rest of the file is unchanged and remains the same) ...
# --------------------------------------------------------------------------------
# --- 4. BACKGROUND TASK SETUP
# --------------------------------------------------------------------------------
huey = SqliteHuey(filename=PATHS.HUEY_DB_PATH)
# --- START: NEW WHISPER MODEL CACHING ---
# This dictionary will live in the memory of the Huey worker process,
# allowing us to reuse loaded models across tasks.
# Whisper model cache per worker process
WHISPER_MODELS_CACHE: Dict[str, WhisperModel] = {}
def get_whisper_model(model_size: str, whisper_settings: dict) -> WhisperModel:
"""
Loads a Whisper model into the cache if not present, and returns the model.
This ensures a model is only loaded into memory once per worker process.
"""
if model_size not in WHISPER_MODELS_CACHE:
compute_type = whisper_settings.get('compute_type', 'int8')
logger.info(f"Whisper model '{model_size}' not in cache. Loading into memory...")
model = WhisperModel(model_size, device="cpu", compute_type=compute_type)
WHISPER_MODELS_CACHE[model_size] = model
logger.info(f"Model '{model_size}' loaded successfully.")
else:
if model_size in WHISPER_MODELS_CACHE:
logger.info(f"Found model '{model_size}' in cache. Reusing.")
return WHISPER_MODELS_CACHE[model_size]
# --- END: NEW WHISPER MODEL CACHING ---
device = whisper_settings.get("device", "cpu")
compute_type = whisper_settings.get('compute_type', 'int8')
logger.info(f"Whisper model '{model_size}' not in cache. Loading into memory on device={device}...")
try:
model = WhisperModel(model_size, device=device, compute_type=compute_type)
except Exception:
logger.exception("Failed to load whisper model")
raise
WHISPER_MODELS_CACHE[model_size] = model
logger.info(f"Model '{model_size}' loaded successfully.")
return model
# Helper: safe run_command (trimmed logs + timeout)
def run_command(argv: TypingList[str], timeout: int = 300):
try:
res = subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=timeout)
except subprocess.TimeoutExpired:
raise Exception(f"Command timed out after {timeout}s")
if res.returncode != 0:
stderr = (res.stderr or "")[:4000]
stdout = (res.stdout or "")[:4000]
raise Exception(f"Command failed exit {res.returncode}. stderr: {stderr}; stdout: {stdout}")
return res
# Helper: validate and build command from template with allowlist
ALLOWED_VARS = {"input", "output", "output_dir", "output_ext", "quality", "speed", "preset", "device", "dpi", "samplerate", "bitdepth", "filter"}
def validate_and_build_command(template_str: str, mapping: Dict[str, str]) -> TypingList[str]:
"""
Validate placeholders against ALLOWED_VARS and build a safe argv list.
If a template uses allowed placeholders that are missing from `mapping`,
auto-fill sensible defaults:
- 'filter' -> mapping.get('output_ext', '')
- others -> empty string
This prevents KeyError while preserving the allowlist security check.
"""
fmt = Formatter()
used = {fname for _, fname, _, _ in fmt.parse(template_str) if fname}
bad = used - ALLOWED_VARS
if bad:
raise ValueError(f"Command template contains disallowed placeholders: {bad}")
# auto-fill missing allowed placeholders with safe defaults
safe_mapping = dict(mapping) # shallow copy to avoid mutating caller mapping
for name in used:
if name not in safe_mapping:
if name == "filter":
safe_mapping[name] = safe_mapping.get("output_ext", "")
else:
safe_mapping[name] = ""
formatted = template_str.format(**safe_mapping)
return shlex.split(formatted)
@huey.task()
def run_transcription_task(job_id: str, input_path_str: str, output_path_str: str, model_size: str, whisper_settings: dict):
db = SessionLocal()
try:
job = get_job(db, job_id)
if not job or job.status == 'cancelled': return
if not job or job.status == 'cancelled':
return
update_job_status(db, job_id, "processing")
# --- MODIFIED: Use the caching function to get the model ---
model = get_whisper_model(model_size, whisper_settings)
logger.info(f"Starting transcription for job {job_id}")
segments, info = model.transcribe(input_path_str, beam_size=5)
full_transcript = []
for segment in segments:
job_check = get_job(db, job_id) # Check for cancellation during long tasks
if job_check.status == 'cancelled':
logger.info(f"Job {job_id} cancelled during transcription.")
return
if info.duration > 0:
progress = int((segment.end / info.duration) * 100)
update_job_status(db, job_id, "processing", progress=progress)
full_transcript.append(segment.text.strip())
transcript_text = "\n".join(full_transcript)
# write atomically to avoid partial files
# atomic write of transcript — keep the real extension and mark tmp in the name
out_path = Path(output_path_str)
tmp_out = out_path.with_suffix(out_path.suffix + f".{uuid.uuid4().hex}.tmp")
tmp_out = out_path.with_name(f"{out_path.stem}.tmp-{uuid.uuid4().hex}{out_path.suffix}")
with tmp_out.open("w", encoding="utf-8") as f:
f.write(transcript_text)
tmp_out.replace(out_path)
mark_job_as_completed(db, job_id, preview=transcript_text)
mark_job_as_completed(db, job_id, output_filepath_str=output_path_str, preview=transcript_text)
logger.info(f"Transcription for job {job_id} completed.")
except Exception:
logger.exception(f"ERROR during transcription for job {job_id}")
@@ -245,13 +314,13 @@ def run_transcription_task(job_id: str, input_path_str: str, output_path_str: st
Path(input_path_str).unlink(missing_ok=True)
db.close()
# Other tasks remain unchanged
@huey.task()
def run_pdf_ocr_task(job_id: str, input_path_str: str, output_path_str: str, ocr_settings: dict):
db = SessionLocal()
try:
job = get_job(db, job_id)
if not job or job.status == 'cancelled': return
if not job or job.status == 'cancelled':
return
update_job_status(db, job_id, "processing")
logger.info(f"Starting PDF OCR for job {job_id}")
ocrmypdf.ocr(input_path_str, output_path_str,
@@ -263,7 +332,7 @@ def run_pdf_ocr_task(job_id: str, input_path_str: str, output_path_str: str, ocr
with open(output_path_str, "rb") as f:
reader = pypdf.PdfReader(f)
preview = "\n".join(page.extract_text() or "" for page in reader.pages)
mark_job_as_completed(db, job_id, preview=preview)
mark_job_as_completed(db, job_id, output_filepath_str=output_path_str, preview=preview)
logger.info(f"PDF OCR for job {job_id} completed.")
except Exception:
logger.exception(f"ERROR during PDF OCR for job {job_id}")
@@ -277,13 +346,18 @@ def run_image_ocr_task(job_id: str, input_path_str: str, output_path_str: str):
db = SessionLocal()
try:
job = get_job(db, job_id)
if not job or job.status == 'cancelled': return
if not job or job.status == 'cancelled':
return
update_job_status(db, job_id, "processing", progress=50)
logger.info(f"Starting Image OCR for job {job_id}")
text = pytesseract.image_to_string(Image.open(input_path_str))
with open(output_path_str, "w", encoding="utf-8") as f:
# atomic write of OCR text
out_path = Path(output_path_str)
tmp_out = out_path.with_name(f"{out_path.stem}.tmp-{uuid.uuid4().hex}{out_path.suffix}")
with tmp_out.open("w", encoding="utf-8") as f:
f.write(text)
mark_job_as_completed(db, job_id, preview=text)
tmp_out.replace(out_path)
mark_job_as_completed(db, job_id, output_filepath_str=output_path_str, preview=text)
logger.info(f"Image OCR for job {job_id} completed.")
except Exception:
logger.exception(f"ERROR during Image OCR for job {job_id}")
@@ -300,14 +374,18 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str,
temp_output_file = None
try:
job = get_job(db, job_id)
if not job or job.status == 'cancelled': return
if not job or job.status == 'cancelled':
return
update_job_status(db, job_id, "processing", progress=25)
logger.info(f"Starting conversion for job {job_id} using {tool} with task {task_key}")
tool_config = conversion_tools_config.get(tool)
if not tool_config: raise ValueError(f"Unknown conversion tool: {tool}")
if not tool_config:
raise ValueError(f"Unknown conversion tool: {tool}")
input_path = Path(input_path_str)
output_path = Path(output_path_str)
current_input_path = input_path
# Pre-processing for specific tools
if tool == "mozjpeg":
temp_input_file = input_path.with_suffix('.temp.ppm')
logger.info(f"Pre-converting for MozJPEG: {input_path} -> {temp_input_file}")
@@ -317,22 +395,12 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str,
err = (pre_conv_result.stderr or "")[:4000]
raise Exception(f"MozJPEG pre-conversion to PPM failed: {err}")
current_input_path = temp_input_file
update_job_status(db, job_id, "processing", progress=50)
# Build safe mapping for formatting and validate placeholders
ALLOWED_VARS = {"input", "output", "output_dir", "output_ext", "quality", "speed", "preset", "device", "dpi", "samplerate", "bitdepth"}
def validate_and_build_command(template_str: str, mapping: dict):
fmt = Formatter()
used = {fname for _, fname, _, _ in fmt.parse(template_str) if fname}
bad = used - ALLOWED_VARS
if bad:
raise ValueError(f"Command template contains disallowed placeholders: {bad}")
formatted = template_str.format(**mapping)
return shlex.split(formatted)
# Use a temporary output path and atomically move into place after success
temp_output_file = output_path.with_suffix(output_path.suffix + f".{uuid.uuid4().hex}.tmp")
# Prepare mapping
# prepare temporary output and mapping
# use a temp filename that preserves the real extension, e.g. file.tmp-<uuid>.pdf
temp_output_file = output_path.with_name(f"{output_path.stem}.tmp-{uuid.uuid4().hex}{output_path.suffix}")
mapping = {
"input": str(current_input_path),
"output": str(temp_output_file),
@@ -340,7 +408,7 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str,
"output_ext": output_path.suffix.lstrip('.'),
}
# Allow tool-specific adjustments to mapping
# tool specific mapping adjustments
if tool.startswith("ghostscript"):
device, setting = task_key.split('_')
mapping.update({"device": device, "dpi": setting, "preset": setting})
@@ -358,38 +426,30 @@ def run_conversion_task(job_id: str, input_path_str: str, output_path_str: str,
_, quality = task_key.split('_')
quality = quality.replace('q', '')
mapping.update({"quality": quality})
elif tool == "libreoffice":
target_ext = output_path.suffix.lstrip('.')
# tool_config may include a 'filters' mapping (see settings.yml example)
filter_val = tool_config.get("filters", {}).get(target_ext, target_ext)
mapping["filter"] = filter_val
command_template_str = tool_config["command_template"]
command = validate_and_build_command(command_template_str, mapping)
logger.info(f"Executing command: {' '.join(command)}")
# run with timeout and capture output; run_command helper ensures trimmed logs on failure
def run_command(argv: List[str], timeout: int = 300):
try:
res = subprocess.run(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, timeout=timeout)
except subprocess.TimeoutExpired:
raise Exception(f"Command timed out after {timeout}s")
if res.returncode != 0:
stderr = (res.stderr or "")[:4000]
stdout = (res.stdout or "")[:4000]
raise Exception(f"Command failed exit {res.returncode}. stderr: {stderr}; stdout: {stdout}")
return res
# execute command with timeout and trimmed logs on error
result = run_command(command, timeout=tool_config.get("timeout", 300))
if tool == "libreoffice":
expected_output_filename = input_path.with_suffix(output_path.suffix).name
generated_file = output_path.parent / expected_output_filename
if generated_file.exists():
# move generated file into place
generated_file.replace(output_path)
else:
raise Exception(f"LibreOffice did not create the expected file: {expected_output_filename}")
# handle LibreOffice special case: sometimes it writes differently
# Special-case LibreOffice: support per-format export filters via settings.yml
# move temp output into final location atomically
if temp_output_file and temp_output_file.exists():
temp_output_file.replace(output_path)
mark_job_as_completed(db, job_id, preview=f"Successfully converted file.")
mark_job_as_completed(db, job_id, output_filepath_str=output_path_str, preview=f"Successfully converted file.")
logger.info(f"Conversion for job {job_id} completed.")
except Exception as e:
except Exception:
logger.exception(f"ERROR during conversion for job {job_id}")
update_job_status(db, job_id, "failed", error="See server logs for details.")
finally:
@@ -415,13 +475,14 @@ app = FastAPI(lifespan=lifespan)
app.mount("/static", StaticFiles(directory=PATHS.BASE_DIR / "static"), name="static")
templates = Jinja2Templates(directory=PATHS.BASE_DIR / "templates")
async def save_upload_file_chunked(upload_file: UploadFile, destination: Path):
async def save_upload_file_chunked(upload_file: UploadFile, destination: Path) -> int:
"""
Streams the uploaded file in chunks directly to a file on disk.
This is memory-efficient and reliable for large files.
Write upload to a tmp file in chunks, then atomically move to final destination.
Returns the final size of the file in bytes.
"""
max_size = APP_CONFIG.get("app_settings", {}).get("max_file_size_bytes", 100 * 1024 * 1024)
tmp = destination.with_suffix(destination.suffix + f".{uuid.uuid4().hex}.tmp")
# make a temp filename that keeps the real extension, e.g. file.tmp-<uuid>.pdf
tmp = destination.with_name(f"{destination.stem}.tmp-{uuid.uuid4().hex}{destination.suffix}")
size = 0
try:
with tmp.open("wb") as buffer:
@@ -433,17 +494,16 @@ async def save_upload_file_chunked(upload_file: UploadFile, destination: Path):
if size > max_size:
raise HTTPException(status_code=413, detail=f"File exceeds {max_size / 1024 / 1024} MB limit")
buffer.write(chunk)
# atomic move into place
tmp.replace(destination)
return size
except Exception:
tmp.unlink(missing_ok=True)
raise
def is_allowed_file(filename: str, allowed_extensions: set) -> bool:
return Path(filename).suffix.lower() in allowed_extensions
# --- Routes (only transcription route is modified) ---
# --- Routes (transcription route uses Huey task enqueuing) ---
@app.post("/transcribe-audio", status_code=status.HTTP_202_ACCEPTED)
async def submit_audio_transcription(
@@ -467,19 +527,24 @@ async def submit_audio_transcription(
upload_path = PATHS.UPLOADS_DIR / audio_filename
processed_path = PATHS.PROCESSED_DIR / transcript_filename
await save_upload_file_chunked(file, upload_path)
input_size = await save_upload_file_chunked(file, upload_path)
job_data = JobCreate(id=job_id, task_type="transcription", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path))
job_data = JobCreate(
id=job_id,
task_type="transcription",
original_filename=file.filename,
input_filepath=str(upload_path),
input_filesize=input_size,
processed_filepath=str(processed_path)
)
new_job = create_job(db=db, job=job_data)
# --- MODIFIED: Pass whisper_config to the task ---
# enqueue the Huey task (decorated function call enqueues when using huey)
run_transcription_task(new_job.id, str(upload_path), str(processed_path), model_size=model_size, whisper_settings=whisper_config)
return {"job_id": new_job.id, "status": new_job.status}
return {"job_id": new_job.id, "status": new_job.status, "status_url": f"/job/{new_job.id}"}
# --- Other routes remain unchanged ---
@app.get("/")
async def get_index(request: Request):
whisper_models = APP_CONFIG.get("transcription_settings", {}).get("whisper", {}).get("allowed_models", [])
@@ -493,23 +558,55 @@ async def get_index(request: Request):
@app.get("/settings")
async def get_settings_page(request: Request):
try:
with open(PATHS.SETTINGS_FILE, 'r') as f:
current_config = yaml.safe_load(f)
except Exception as e:
logger.error(f"Could not load settings.yml for settings page: {e}")
with open(PATHS.SETTINGS_FILE, 'r', encoding='utf8') as f:
current_config = yaml.safe_load(f) or {}
except Exception:
logger.exception("Could not load settings.yml for settings page")
current_config = {}
return templates.TemplateResponse("settings.html", {"request": request, "config": current_config})
def deep_merge(base: dict, updates: dict) -> dict:
"""
Recursively merge `updates` into `base`. Lists and scalars are replaced.
"""
for key, value in updates.items():
if (
key in base
and isinstance(base[key], dict)
and isinstance(value, dict)
):
base[key] = deep_merge(base[key], value)
else:
base[key] = value
return base
@app.post("/settings/save")
async def save_settings(new_config: Dict = Body(...)):
tmp = PATHS.SETTINGS_FILE.with_suffix(".tmp")
try:
with open(PATHS.SETTINGS_FILE, 'w') as f:
yaml.dump(new_config, f, default_flow_style=False, sort_keys=False)
# load existing config if present
try:
with PATHS.SETTINGS_FILE.open("r", encoding="utf8") as f:
current_config = yaml.safe_load(f) or {}
except FileNotFoundError:
current_config = {}
# deep merge new values
merged = deep_merge(current_config, new_config)
# atomic write back
with tmp.open("w", encoding="utf8") as f:
yaml.safe_dump(merged, f, default_flow_style=False, sort_keys=False)
tmp.replace(PATHS.SETTINGS_FILE)
load_app_config()
return JSONResponse({"message": "Settings saved successfully."})
except Exception as e:
logger.error(f"Failed to save settings: {e}")
raise HTTPException(status_code=500, detail="Could not write to settings.yml.")
return JSONResponse({"message": "Settings updated successfully."})
except Exception:
logger.exception("Failed to update settings")
tmp.unlink(missing_ok=True)
raise HTTPException(status_code=500, detail="Could not update settings.yml.")
@app.post("/settings/clear-history")
async def clear_job_history(db: Session = Depends(get_db)):
@@ -518,9 +615,9 @@ async def clear_job_history(db: Session = Depends(get_db)):
db.commit()
logger.info(f"Cleared {num_deleted} jobs from history.")
return {"deleted_count": num_deleted}
except Exception as e:
except Exception:
db.rollback()
logger.error(f"Failed to clear job history: {e}")
logger.exception("Failed to clear job history")
raise HTTPException(status_code=500, detail="Database error while clearing history.")
@app.post("/settings/delete-files")
@@ -532,9 +629,9 @@ async def delete_processed_files():
if f.is_file():
f.unlink()
deleted_count += 1
except Exception as e:
except Exception:
errors.append(f.name)
logger.error(f"Could not delete processed file {f.name}: {e}")
logger.exception(f"Could not delete processed file {f.name}")
if errors:
raise HTTPException(status_code=500, detail=f"Could not delete some files: {', '.join(errors)}")
logger.info(f"Deleted {deleted_count} files from processed directory.")
@@ -562,12 +659,14 @@ async def submit_file_conversion(file: UploadFile = File(...), output_format: st
processed_filename = f"{original_stem}_{job_id}.{target_ext}"
upload_path = PATHS.UPLOADS_DIR / upload_filename
processed_path = PATHS.PROCESSED_DIR / processed_filename
await save_upload_file_chunked(file, upload_path)
input_size = await save_upload_file_chunked(file, upload_path)
job_data = JobCreate(id=job_id, task_type="conversion", original_filename=file.filename,
input_filepath=str(upload_path), processed_filepath=str(processed_path))
input_filepath=str(upload_path),
input_filesize=input_size,
processed_filepath=str(processed_path))
new_job = create_job(db=db, job=job_data)
run_conversion_task(new_job.id, str(upload_path), str(processed_path), tool, task_key, conversion_tools)
return {"job_id": new_job.id, "status": new_job.status}
return {"job_id": new_job.id, "status": new_job.status, "status_url": f"/job/{new_job.id}"}
@app.post("/ocr-pdf", status_code=status.HTTP_202_ACCEPTED)
async def submit_pdf_ocr(file: UploadFile = File(...), db: Session = Depends(get_db)):
@@ -578,12 +677,15 @@ async def submit_pdf_ocr(file: UploadFile = File(...), db: Session = Depends(get
unique_filename = f"{Path(safe_basename).stem}_{job_id}{Path(safe_basename).suffix}"
upload_path = PATHS.UPLOADS_DIR / unique_filename
processed_path = PATHS.PROCESSED_DIR / unique_filename
await save_upload_file_chunked(file, upload_path)
job_data = JobCreate(id=job_id, task_type="ocr", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path))
input_size = await save_upload_file_chunked(file, upload_path)
job_data = JobCreate(id=job_id, task_type="ocr", original_filename=file.filename,
input_filepath=str(upload_path),
input_filesize=input_size,
processed_filepath=str(processed_path))
new_job = create_job(db=db, job=job_data)
ocr_settings = APP_CONFIG.get("ocr_settings", {}).get("ocrmypdf", {})
run_pdf_ocr_task(new_job.id, str(upload_path), str(processed_path), ocr_settings)
return {"job_id": new_job.id, "status": new_job.status}
return {"job_id": new_job.id, "status": new_job.status, "status_url": f"/job/{new_job.id}"}
@app.post("/ocr-image", status_code=status.HTTP_202_ACCEPTED)
async def submit_image_ocr(file: UploadFile = File(...), db: Session = Depends(get_db)):
@@ -596,11 +698,14 @@ async def submit_image_ocr(file: UploadFile = File(...), db: Session = Depends(g
unique_filename = f"{Path(safe_basename).stem}_{job_id}{file_ext}"
upload_path = PATHS.UPLOADS_DIR / unique_filename
processed_path = PATHS.PROCESSED_DIR / f"{Path(safe_basename).stem}_{job_id}.txt"
await save_upload_file_chunked(file, upload_path)
job_data = JobCreate(id=job_id, task_type="ocr-image", original_filename=file.filename, input_filepath=str(upload_path), processed_filepath=str(processed_path))
input_size = await save_upload_file_chunked(file, upload_path)
job_data = JobCreate(id=job_id, task_type="ocr-image", original_filename=file.filename,
input_filepath=str(upload_path),
input_filesize=input_size,
processed_filepath=str(processed_path))
new_job = create_job(db=db, job=job_data)
run_image_ocr_task(new_job.id, str(upload_path), str(processed_path))
return {"job_id": new_job.id, "status": new_job.status}
return {"job_id": new_job.id, "status": new_job.status, "status_url": f"/job/{new_job.id}"}
@app.post("/job/{job_id}/cancel", status_code=status.HTTP_202_ACCEPTED)
async def cancel_job(job_id: str, db: Session = Depends(get_db)):
@@ -626,8 +731,7 @@ async def get_job_status(job_id: str, db: Session = Depends(get_db)):
@app.get("/download/{filename}")
async def download_file(filename: str):
safe_filename = secure_filename(filename)
file_path = PATHS.PROCESSED_DIR / safe_filename
file_path = file_path.resolve()
file_path = (PATHS.PROCESSED_DIR / safe_filename).resolve()
base = PATHS.PROCESSED_DIR.resolve()
try:
file_path.relative_to(base)
@@ -636,3 +740,14 @@ async def download_file(filename: str):
if not file_path.is_file():
raise HTTPException(status_code=404, detail="File not found.")
return FileResponse(path=file_path, filename=safe_filename, media_type="application/octet-stream")
# Small health endpoint
@app.get("/health")
async def health():
try:
with engine.connect() as conn:
conn.execute("SELECT 1")
except Exception:
logger.exception("Health check failed")
return JSONResponse({"ok": False}, status_code=500)
return {"ok": True}

View File

@@ -1,22 +1,145 @@
# requirements.txt
# Web framework
fastapi
uvicorn[standard]
python-multipart
jinja2
# PDF OCR
ocrmypdf
PyPDF2
# Audio Transcription
faster-whisper
# The following are core dependencies for faster-whisper,
# but it's good to list them explicitly.
# ctranslate2
# transformers
# torch # Note: torch is a dependency of transformers
# Utilities
werkzeug
annotated-types==0.7.0
anyio==4.10.0
audioop-lts==0.2.2
av==15.1.0
azure-ai-documentintelligence==1.0.2
azure-core==1.35.1
azure-identity==1.25.0
beautifulsoup4==4.13.5
certifi==2025.8.3
cffi==2.0.0
chardet==5.2.0
charset-normalizer==3.4.3
click==8.2.1
cobble==0.1.4
coloredlogs==15.0.1
cryptography==45.0.7
css-parser==1.0.10
ctranslate2==4.6.0
defusedxml==0.7.1
Deprecated==1.2.18
deprecation==2.1.0
et_xmlfile==2.0.0
fastapi==0.116.1
faster-whisper==1.2.0
filelock==3.19.1
flatbuffers==25.2.10
fsspec==2025.9.0
greenlet==3.2.4
gunicorn==23.0.0
h11==0.16.0
hf-xet==1.1.10
html5-parser==0.4.12
html5lib==1.1
httptools==0.6.4
huey==2.5.3
huggingface-hub==0.34.4
humanfriendly==10.0
idna==3.10
imageio==2.37.0
img2pdf==0.6.1
isodate==0.7.2
Jinja2==3.1.6
lazy_loader==0.4
lxml==6.0.1
magika==0.6.2
mammoth==1.10.0
markdown-it-py==4.0.0
markdownify==1.2.0
markitdown==0.1.3
MarkupSafe==3.0.2
mdurl==0.1.2
mechanize==0.4.10
mpmath==1.3.0
msal==1.33.0
msal-extensions==1.3.1
msgpack==1.1.1
networkx==3.5
ninja==1.13.0
numpy==2.2.6
nvidia-cublas-cu12==12.8.4.1
nvidia-cuda-cupti-cu12==12.8.90
nvidia-cuda-nvrtc-cu12==12.8.93
nvidia-cuda-runtime-cu12==12.8.90
nvidia-cudnn-cu12==9.10.2.21
nvidia-cufft-cu12==11.3.3.83
nvidia-cufile-cu12==1.13.1.3
nvidia-curand-cu12==10.3.9.90
nvidia-cusolver-cu12==11.7.3.90
nvidia-cusparse-cu12==12.5.8.93
nvidia-cusparselt-cu12==0.7.1
nvidia-nccl-cu12==2.27.3
nvidia-nvjitlink-cu12==12.8.93
nvidia-nvtx-cu12==12.8.90
ocrmypdf==16.11.0
olefile==0.47
onnxruntime==1.22.1
opencv-python-headless==4.12.0.88
openpyxl==3.1.5
packaging==25.0
pandas==2.3.2
pdfminer.six==20250506
pi_heif==1.1.0
pikepdf==9.11.0
pillow==11.3.0
pluggy==1.6.0
protobuf==6.32.1
pyclipper==1.3.0.post6
pycparser==2.23
pydantic==2.11.9
pydantic-settings==2.10.1
pydantic_core==2.33.2
pydub==0.25.1
Pygments==2.19.2
PyJWT==2.10.1
pypdf==6.0.0
PyPDF2==3.0.1
PyQt6==6.9.1
PyQt6-Qt6==6.9.2
PyQt6-WebEngine==6.9.0
PyQt6-WebEngine-Qt6==6.9.2
PyQt6_sip==13.10.2
pytesseract==0.3.13
python-bidi==0.6.6
python-dateutil==2.9.0.post0
python-dotenv==1.1.1
python-multipart==0.0.20
python-pptx==1.0.2
pytz==2025.2
PyYAML==6.0.2
regex==2025.9.1
requests==2.32.5
rich==14.1.0
scikit-image==0.25.2
scipy==1.16.2
setuptools==80.9.0
shapely==2.1.1
six==1.17.0
sniffio==1.3.1
soupsieve==2.8
SpeechRecognition==3.14.3
SQLAlchemy==2.0.43
standard-aifc==3.13.0
standard-chunk==3.13.0
starlette==0.47.3
sympy==1.14.0
tifffile==2025.9.9
tokenizers==0.22.0
torch==2.8.0
torchvision==0.23.0
tqdm==4.67.1
triton==3.4.0
typing-inspection==0.4.1
typing_extensions==4.15.0
tzdata==2025.2
urllib3==2.5.0
uvicorn==0.35.0
uvloop==0.21.0
watchfiles==1.1.0
webencodings==0.5.1
websockets==15.0.1
Werkzeug==3.1.3
wrapt==1.17.3
xlrd==2.0.2
xlsxwriter==3.2.9
youtube-transcript-api==1.0.3

4
run.sh
View File

@@ -3,8 +3,8 @@
echo "Starting DocProcessor with Gunicorn..."
exec gunicorn -w 2 -k uvicorn.workers.UvicornWorker --forwarded-allow-ips='*' main:app -b 0.0.0.0:8000 &
exec gunicorn -w 4 --threads 2 -k uvicorn.workers.UvicornWorker --forwarded-allow-ips='*' main:app -b 0.0.0.0:8000 &
echo "Done"
echo "Starting huey..."
exec huey_consumer.py main.huey -w 2 &
exec huey_consumer.py main.huey -w 4 &
echo "Done"

272
settings .yml.default Normal file
View File

@@ -0,0 +1,272 @@
# settings.yml
# General application settings
app_settings:
max_file_size_mb: 2000 # Maximum upload size in Megabytes
# Allowed extensions (list will be normalized to a set by the server)
allowed_all_extensions:
- .pdf
- .ps
- .eps
- .png
- .jpg
- .jpeg
- .tiff
- .tif
- .gif
- .bmp
- .webp
- .svg
- .jxl
- .avif
- .ppm
- .mp3
- .m4a
- .ogg
- .flac
- .opus
- .wav
- .aac
- .mp4
- .mkv
- .mov
- .webm
- .avi
- .flv
- .md
- .txt
- .html
- .docx
- .odt
- .rst
- .epub
- .mobi
- .azw3
- .pptx
- .xlsx
# Settings for Optical Character Recognition (OCR) tasks
ocr_settings:
ocrmypdf:
deskew: true
clean: true
optimize: 1
force_ocr: true
# Settings for audio transcription tasks
transcription_settings:
whisper:
compute_type: "int8"
allowed_models:
- "tiny"
- "base"
- "small"
- "medium"
- "large-v3"
- "distil-large-v2"
# optional: specify device if workers have GPU (e.g. "cuda" or "cpu")
# device: "cpu"
# --- Conversion Tool Definitions ---
# The server validates placeholders against an allowlist:
# {input}, {output}, {output_dir}, {output_ext}, {quality}, {speed},
# {preset}, {device}, {dpi}, {samplerate}, {bitdepth}
conversion_tools:
libreoffice:
name: "LibreOffice"
# Use {filter} so we can supply liboffce export filters like "txt:Text"
command_template: 'libreoffice --headless --convert-to {filter} --outdir {output_dir} {input}'
timeout: 120
# Optional: per-format export filter. If missing for a format, server falls back to the extension.
filters:
pdf: "pdf"
docx: "docx"
odt: "odt"
html: "html"
rtf: "rtf"
txt: "txt:Text"
xml: "xml"
epub: "epub"
xlsx: "xlsx"
ods: "ods"
csv: "csv:Text"
pptx: "pptx"
odp: "odp"
svg: "svg"
formats:
pdf: "PDF"
docx: "Word Document"
odt: "OpenDocument Text"
html: "HTML"
rtf: "Rich Text Format"
txt: "Plain Text"
xml: "Word 2003 XML"
epub: "EPUB"
xlsx: "Excel Spreadsheet"
ods: "OpenDocument Spreadsheet"
csv: "CSV"
pptx: "PowerPoint Presentation"
odp: "OpenDocument Presentation"
svg: "SVG"
pandoc:
name: "Pandoc"
command_template: 'pandoc --standalone {input} -o {output} --to={output_ext} --pdf-engine=xelatex'
timeout: 60
formats:
docx: "Word Document"
odt: "OpenDocument Text"
pdf: "PDF"
rtf: "Rich Text Format"
txt: "Plain Text"
tex: "LaTeX"
man: "Groff Man Page"
epub: "EPUB v3 Book"
epub2: "EPUB v2 Book"
html: "HTML"
html5: "HTML5"
pptx: "PowerPoint Presentation"
beamer: "Beamer PDF Slides"
slidy: "Slidy HTML Slides"
md: "Markdown"
rst: "reStructuredText"
jira: "Jira Wiki Markup"
mediawiki: "MediaWiki Markup"
ghostscript_pdf:
name: "Ghostscript (PDF)"
# placeholders used: {preset}, {output}, {input}
command_template: 'gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dNOPAUSE -dQUIET -dBATCH {preset} -sOutputFile={output} {input}'
timeout: 60
formats:
screen: "PDF (Optimized for Screen)"
ebook: "PDF (Optimized for Ebooks)"
printer: "PDF (Optimized for Print)"
archive: "PDF/A (for Archiving)"
ghostscript_image:
name: "Ghostscript (Image)"
# placeholders used: {device}, {dpi}, {output}, {input}
command_template: 'gs -dNOPAUSE -dBATCH -sDEVICE={device} -r{dpi} -sOutputFile={output} {input}'
timeout: 60
formats:
jpeg_72: "JPEG Image (72 DPI)"
jpeg_300: "JPEG Image (300 DPI)"
png16m_150: "PNG Image (150 DPI)"
png16m_300: "PNG Image (300 DPI)"
tiff24nc_300: "TIFF Image (300 DPI)"
tiff24nc_600: "TIFF Image (600 DPI)"
calibre:
name: "Calibre (ebook-convert)"
command_template: 'ebook-convert {input} {output}'
timeout: 60
formats:
epub: "EPUB"
mobi: "MOBI"
azw3: "Amazon Kindle"
pdf: "PDF"
docx: "Word Document"
ffmpeg:
name: "FFmpeg"
command_template: 'ffmpeg -i {input} -y -preset medium {output}'
timeout: 300
formats:
mp4: "MP4 Video"
mkv: "MKV Video"
mov: "MOV Video"
webm: "WebM Video"
mp3: "MP3 Audio"
wav: "WAV Audio"
flac: "FLAC Audio"
gif: "Animated GIF"
vips:
name: "VIPS"
command_template: 'vips copy {input} {output}[Q=90]'
timeout: 60
formats:
jpg: "JPEG Image (Q90)"
png: "PNG Image"
webp: "WebP Image (Q90)"
tiff: "TIFF Image"
avif: "AVIF Image"
graphicsmagick:
name: "GraphicsMagick"
command_template: 'gm convert {input} -quality 90 {output}'
timeout: 60
formats:
jpg: "JPEG Image (Q90)"
png: "PNG Image"
webp: "WebP Image"
tiff: "TIFF Image"
pdf: "PDF from Images"
inkscape:
name: "Inkscape"
command_template: 'inkscape {input} --export-filename={output}'
timeout: 30
formats:
svg: "SVG (Plain)"
png: "PNG Image (96 DPI)"
pdf: "PDF Document"
libjxl:
name: "libjxl (cjxl)"
command_template: 'cjxl {input} {output} -q 90'
timeout: 30
formats:
jxl: "JPEG XL (Q90)"
resvg:
name: "resvg"
command_template: 'resvg {input} {output}'
timeout: 30
formats:
png: "PNG from SVG"
potrace:
name: "Potrace"
command_template: 'potrace {input} --svg -o {output}'
timeout: 30
formats:
svg: "SVG from Bitmap"
markitdown:
name: "Markitdown"
command_template: 'markitdown {input} -o {output}'
timeout: 30
formats:
md: "Markdown from Everything!"
pngquant:
name: "pngquant"
command_template: 'pngquant --quality={quality} --speed {speed} --force --output {output} {input}'
timeout: 30
formats:
png_hq: "PNG (High Quality Compression)"
png_mq: "PNG (Medium Quality Compression)"
png_fast: "PNG (Fast Compression)"
sox:
name: "SoX Audio Converter"
command_template: 'sox {input} -r {samplerate} -b {bitdepth} {output}'
timeout: 120
formats:
wav_48k_24b: "WAV (48kHz, 24-bit)"
wav_44k_16b: "WAV (CD, 44.1kHz, 16-bit)"
flac_48k_24b: "FLAC (48kHz, 24-bit)"
flac_44k_16b: "FLAC (CD, 44.1kHz, 16-bit)"
ogg_32k_16b: "Ogg Vorbis (32kHz)"
ogg_16k_16b: "Ogg Vorbis (16kHz, Voice)"
mozjpeg:
name: "MozJPEG"
command_template: 'cjpeg -quality {quality} -outfile {output} {input}'
timeout: 30
formats:
jpg_q85: "JPEG (High Quality)"
jpg_q75: "JPEG (Web Quality)"
jpg_q60: "JPEG (Aggressive Compression)"

View File

@@ -1,179 +1,242 @@
# settings.yml
# General application settings
app_settings:
max_file_size_mb: 2000 # Maximum upload size in Megabytes
# Settings for Optical Character Recognition (OCR) tasks
max_file_size_mb: '2000'
allowed_all_extensions:
- .pdf
- .ps
- .eps
- .png
- .jpg
- .jpeg
- .tiff
- .tif
- .gif
- .bmp
- .webp
- .svg
- .jxl
- .avif
- .ppm
- .mp3
- .m4a
- .ogg
- .flac
- .opus
- .wav
- .aac
- .mp4
- .mkv
- .mov
- .webm
- .avi
- .flv
- .md
- .txt
- .html
- .docx
- .odt
- .rst
- .epub
- .mobi
- .azw3
- .pptx
- .xlsx
ocr_settings:
ocrmypdf:
deskew: true
clean: true
optimize: 1
force_ocr: true
# Settings for audio transcription tasks
transcription_settings:
whisper:
compute_type: "int8"
compute_type: int8
allowed_models:
- "tiny"
- "base"
- "small"
- "medium"
- "large-v3"
- "distil-large-v2"
# --- Conversion Tool Definitions ---
# Each tool's command is a single string. The backend uses shlex to parse it,
# so you can use quotes for arguments with spaces.
# Use placeholders like: {input}, {output}, {output_dir}, {output_ext}, etc.
- tiny
- base
- small
- medium
- large-v3
- distil-large-v2
conversion_tools:
libreoffice:
name: "LibreOffice"
command_template: 'libreoffice --headless --convert-to {output_ext} --outdir {output_dir} {input}'
name: LibreOffice
command_template: libreoffice --headless --convert-to {filter} --outdir {output_dir}
{input}
timeout: 300
filters:
pdf: pdf
docx: docx
odt: odt
html: html
rtf: rtf
txt: txt:Text
xml: xml
epub: epub
xlsx: xlsx
ods: ods
csv: csv:Text
pptx: pptx
odp: odp
svg: svg
formats:
pdf: "PDF"
docx: "Word Document"
odt: "OpenDocument Text"
html: "HTML"
rtf: "Rich Text Format"
txt: "Plain Text"
xml: "Word 2003 XML"
epub: "EPUB"
xlsx: "Excel Spreadsheet"
ods: "OpenDocument Spreadsheet"
csv: "CSV"
pptx: "PowerPoint Presentation"
odp: "OpenDocument Presentation"
svg: "SVG"
pdf: PDF
docx: Word Document
odt: OpenDocument Text
html: HTML
rtf: Rich Text Format
txt: Plain Text
xml: Word 2003 XML
epub: EPUB
xlsx: Excel Spreadsheet
ods: OpenDocument Spreadsheet
csv: CSV
pptx: PowerPoint Presentation
odp: OpenDocument Presentation
svg: SVG
pandoc:
name: "Pandoc"
command_template: 'pandoc --standalone {input} -o {output}'
name: Pandoc
command_template: pandoc --standalone {input} -o {output} --to={output_ext} --pdf-engine=xelatex
timeout: 300
formats:
docx: "Word Document"
odt: "OpenDocument Text"
pdf: "PDF"
rtf: "Rich Text Format"
txt: "Plain Text"
tex: "LaTeX"
man: "Groff Man Page"
epub: "EPUB v3 Book"
epub2: "EPUB v2 Book"
html: "HTML"
html5: "HTML5"
pptx: "PowerPoint Presentation"
beamer: "Beamer PDF Slides"
slidy: "Slidy HTML Slides"
md: "Markdown"
rst: "reStructuredText"
jira: "Jira Wiki Markup"
mediawiki: "MediaWiki Markup"
docx: Word Document
odt: OpenDocument Text
pdf: PDF
rtf: Rich Text Format
txt: Plain Text
tex: LaTeX
man: Groff Man Page
epub: EPUB v3 Book
epub2: EPUB v2 Book
html: HTML
html5: HTML5
pptx: PowerPoint Presentation
beamer: Beamer PDF Slides
slidy: Slidy HTML Slides
md: Markdown
rst: reStructuredText
jira: Jira Wiki Markup
mediawiki: MediaWiki Markup
ghostscript_pdf:
name: "Ghostscript (PDF)"
command_template: 'gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dNOPAUSE -dQUIET -dBATCH {preset} -sOutputFile={output} {input}'
name: Ghostscript (PDF)
command_template: gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dNOPAUSE -dQUIET
-dBATCH {preset} -sOutputFile={output} {input}
timeout: 60
formats:
screen: "PDF (Optimized for Screen)"
ebook: "PDF (Optimized for Ebooks)"
printer: "PDF (Optimized for Print)"
archive: "PDF/A (for Archiving)"
screen: PDF (Optimized for Screen)
ebook: PDF (Optimized for Ebooks)
printer: PDF (Optimized for Print)
archive: PDF/A (for Archiving)
ghostscript_image:
name: "Ghostscript (Image)"
command_template: 'gs -dNOPAUSE -dBATCH -sDEVICE={device} -r{dpi} -sOutputFile={output} {input}'
name: Ghostscript (Image)
command_template: gs -dNOPAUSE -dBATCH -sDEVICE={device} -r{dpi} -sOutputFile={output}
{input}
timeout: 60
formats:
jpeg_72: "JPEG Image (72 DPI)"
jpeg_300: "JPEG Image (300 DPI)"
png16m_150: "PNG Image (150 DPI)"
png16m_300: "PNG Image (300 DPI)"
tiff24nc_300: "TIFF Image (300 DPI)"
tiff24nc_600: "TIFF Image (600 DPI)"
jpeg_72: JPEG Image (72 DPI)
jpeg_300: JPEG Image (300 DPI)
png16m_150: PNG Image (150 DPI)
png16m_300: PNG Image (300 DPI)
tiff24nc_300: TIFF Image (300 DPI)
tiff24nc_600: TIFF Image (600 DPI)
calibre:
name: "Calibre (ebook-convert)"
command_template: 'ebook-convert {input} {output}'
name: Calibre (ebook-convert)
command_template: ebook-convert {input} {output}
timeout: 600
formats:
epub: "EPUB"
mobi: "MOBI"
azw3: "Amazon Kindle"
pdf: "PDF"
docx: "Word Document"
epub: EPUB
mobi: MOBI
azw3: Amazon Kindle
pdf: PDF
docx: Word Document
ffmpeg:
name: "FFmpeg"
command_template: 'ffmpeg -i {input} -y -preset medium {output}'
name: FFmpeg
command_template: ffmpeg -i {input} -y -preset medium {output}
timeout: 600
formats:
mp4: "MP4 Video"
mkv: "MKV Video"
mov: "MOV Video"
webm: "WebM Video"
mp3: "MP3 Audio"
wav: "WAV Audio"
flac: "FLAC Audio"
gif: "Animated GIF"
mp4: MP4 Video
mkv: MKV Video
mov: MOV Video
webm: WebM Video
mp3: MP3 Audio
wav: WAV Audio
flac: FLAC Audio
gif: Animated GIF
vips:
name: "VIPS"
command_template: 'vips copy {input} {output}[Q=90]'
name: VIPS
command_template: vips copy {input} {output}[Q=90]
timeout: 60
formats:
jpg: "JPEG Image (Q90)"
png: "PNG Image"
webp: "WebP Image (Q90)"
tiff: "TIFF Image"
avif: "AVIF Image"
jpg: JPEG Image (Q90)
png: PNG Image
webp: WebP Image (Q90)
tiff: TIFF Image
avif: AVIF Image
graphicsmagick:
name: "GraphicsMagick"
command_template: 'gm convert {input} -quality 90 {output}'
name: GraphicsMagick
command_template: gm convert {input} -quality 90 {output}
timeout: 60
formats:
jpg: "JPEG Image (Q90)"
png: "PNG Image"
webp: "WebP Image"
tiff: "TIFF Image"
pdf: "PDF from Images"
jpg: JPEG Image (Q90)
png: PNG Image
webp: WebP Image
tiff: TIFF Image
pdf: PDF from Images
inkscape:
name: "Inkscape"
command_template: 'inkscape {input} --export-filename={output}'
name: Inkscape
command_template: inkscape {input} --export-filename={output}
timeout: 30
formats:
svg: "SVG (Plain)"
png: "PNG Image (96 DPI)"
pdf: "PDF Document"
svg: SVG (Plain)
png: PNG Image (96 DPI)
pdf: PDF Document
libjxl:
name: "libjxl (cjxl)"
command_template: 'cjxl {input} {output} -q 90'
name: libjxl (cjxl)
command_template: cjxl {input} {output} -q 90
timeout: 30
formats:
jxl: "JPEG XL (Q90)"
jxl: JPEG XL (Q90)
resvg:
name: "resvg"
command_template: 'resvg {input} {output}'
name: resvg
command_template: resvg {input} {output}
timeout: 30
formats:
png: "PNG from SVG"
png: PNG from SVG
potrace:
name: "Potrace"
command_template: 'potrace {input} --svg -o {output}'
name: Potrace
command_template: potrace {input} --svg -o {output}
timeout: 30
formats:
svg: "SVG from Bitmap"
svg: SVG from Bitmap
markitdown:
name: "Markitdown"
command_template: 'markitdown {input} -o {output}'
name: Markitdown
command_template: markitdown {input} -o {output}
timeout: 300
formats:
md: "Markdown from Everything!"
md: Markdown from Everything!
pngquant:
name: "pngquant"
command_template: 'pngquant --quality={quality} --speed {speed} --force --output {output} {input}'
name: pngquant
command_template: pngquant --quality={quality} --speed {speed} --force --output
{output} {input}
timeout: 300
formats:
png_hq: "PNG (High Quality Compression)"
png_mq: "PNG (Medium Quality Compression)"
png_fast: "PNG (Fast Compression)"
png_hq: PNG (High Quality Compression)
png_mq: PNG (Medium Quality Compression)
png_fast: PNG (Fast Compression)
sox:
name: "SoX Audio Converter"
command_template: 'sox {input} -r {samplerate} -b {bitdepth} {output}'
name: SoX Audio Converter
command_template: sox {input} -r {samplerate} -b {bitdepth} {output}
timeout: 600
formats:
wav_48k_24b: "WAV (48kHz, 24-bit)"
wav_44k_16b: "WAV (CD, 44.1kHz, 16-bit)"
flac_48k_24b: "FLAC (48kHz, 24-bit)"
flac_44k_16b: "FLAC (CD, 44.1kHz, 16-bit)"
ogg_32k: "Ogg Vorbis (32kHz)"
ogg_16k: "Ogg Vorbis (16kHz, Voice)"
wav_48k_24b: WAV (48kHz, 24-bit)
wav_44k_16b: WAV (CD, 44.1kHz, 16-bit)
flac_48k_24b: FLAC (48kHz, 24-bit)
flac_44k_16b: FLAC (CD, 44.1kHz, 16-bit)
ogg_32k_16b: Ogg Vorbis (32kHz)
ogg_16k_16b: Ogg Vorbis (16kHz, Voice)
mozjpeg:
name: "MozJPEG"
command_template: 'cjpeg -quality {quality} -outfile {output} {input}'
name: MozJPEG
command_template: cjpeg -quality {quality} -outfile {output} {input}
timeout: 30
formats:
jpg_q85: "JPEG (High Quality)"
jpg_q75: "JPEG (Web Quality)"
jpg_q60: "JPEG (Aggressive Compression)"
jpg_q85: JPEG (High Quality)
jpg_q75: JPEG (Web Quality)
jpg_q60: JPEG (Aggressive Compression)

View File

@@ -1,5 +1,3 @@
/* static/css/style.css */
:root {
/* Core */
--bg-color: #000000;
@@ -449,6 +447,110 @@ button[type="submit"]:disabled {
}
/* --- START: Drag and Drop and Dialog Styles --- */
.drag-overlay {
position: fixed;
inset: 0;
z-index: 9999;
display: none; /* Hidden by default */
justify-content: center;
align-items: center;
background-color: rgba(0, 0, 0, 0.7);
backdrop-filter: blur(5px);
}
body.dragging .drag-overlay {
display: flex; /* Shown when body has .dragging class */
}
.drag-overlay-content {
border: 3px dashed var(--primary-color);
border-radius: 12px;
padding: 2rem 4rem;
text-align: center;
background-color: rgba(0, 0, 0, 0.2);
}
.drag-overlay-content p {
margin: 0;
font-size: 1.5rem;
font-weight: 500;
color: var(--primary-color);
}
.dialog-overlay {
position: fixed;
inset: 0;
z-index: 10000;
display: none; /* Hidden by default */
justify-content: center;
align-items: center;
background-color: rgba(0, 0, 0, 0.7);
backdrop-filter: blur(5px);
}
.dialog-overlay.visible {
display: flex; /* Show when .visible class is added */
}
.dialog-box {
background: var(--card-bg);
border: 1px solid var(--border-color);
border-radius: 8px;
padding: 1.5rem;
width: 100%;
max-width: 450px;
text-align: center;
box-shadow: 0 10px 30px rgba(0,0,0,0.5);
}
.dialog-box h2 {
margin-top: 0;
font-size: 1.5rem;
}
.dialog-box p {
color: var(--muted-text);
margin-bottom: 1.5rem;
}
.dialog-actions {
display: grid;
grid-template-columns: 1fr;
gap: 0.75rem;
margin-bottom: 1rem;
}
.dialog-actions button {
display: block;
width: 100%;
background: transparent;
border: 1px solid var(--border-color);
color: var(--text-color);
padding: 0.65rem 1rem;
font-size: 1rem;
font-weight: 600;
border-radius: 5px;
cursor: pointer;
transition: background-color 0.15s ease, border-color 0.15s ease;
}
.dialog-actions button:hover {
background: var(--primary-hover);
border-color: var(--primary-hover);
}
.dialog-secondary-action {
background-color: transparent !important;
border: 1px solid var(--border-color) !important;
}
.dialog-secondary-action:hover {
background-color: rgba(255, 255, 255, 0.05) !important;
}
.dialog-cancel {
background: none;
border: none;
color: var(--muted-text);
cursor: pointer;
font-size: 0.9rem;
padding: 0.5rem;
}
.dialog-cancel:hover {
color: var(--text-color);
}
/* --- END: Drag and Drop and Dialog Styles --- */
/* Spinner */
.spinner-small {
border: 3px solid rgba(255,255,255,0.1);
@@ -467,7 +569,6 @@ button[type="submit"]:disabled {
/* Mobile responsive table */
@media (max-width: 768px) {
/* ... (no changes in this section) ... */
.table-wrapper {
border: none;
background-color: transparent;

View File

@@ -1,6 +1,17 @@
// static/js/script.js
document.addEventListener('DOMContentLoaded', () => {
// --- User Locale and Timezone Detection (Corrected Implementation) ---
const USER_LOCALE = navigator.language || 'en-US'; // Fallback to en-US
const USER_TIMEZONE = Intl.DateTimeFormat().resolvedOptions().timeZone;
const DATETIME_FORMAT_OPTIONS = {
year: 'numeric',
month: 'short',
day: 'numeric',
hour: 'numeric',
minute: '2-digit',
timeZone: USER_TIMEZONE,
};
console.log(`Using locale: ${USER_LOCALE} and timezone: ${USER_TIMEZONE}`);
// --- Element Selectors ---
const jobListBody = document.getElementById('job-list-body');
@@ -11,16 +22,35 @@ document.addEventListener('DOMContentLoaded', () => {
const audioForm = document.getElementById('audio-form');
const audioFileInput = document.getElementById('audio-file-input');
const audioFileName = document.getElementById('audio-file-name');
const modelSizeSelect = document.getElementById('model-size-select');
const conversionForm = document.getElementById('conversion-form');
const conversionFileInput = document.getElementById('conversion-file-input');
const conversionFileName = document.getElementById('conversion-file-name');
const outputFormatSelect = document.getElementById('output-format-select');
// MODIFICATION: Store the Choices.js instance in a variable
let conversionChoices = null;
// START: Drag and Drop additions
const dragOverlay = document.getElementById('drag-overlay');
const actionDialog = document.getElementById('action-dialog');
const dialogFileCount = document.getElementById('dialog-file-count');
// Dialog Views
const dialogInitialView = document.getElementById('dialog-initial-actions');
const dialogConvertView = document.getElementById('dialog-convert-view');
// Dialog Buttons
const dialogConvertBtn = document.getElementById('dialog-action-convert');
const dialogOcrBtn = document.getElementById('dialog-action-ocr');
const dialogTranscribeBtn = document.getElementById('dialog-action-transcribe');
const dialogCancelBtn = document.getElementById('dialog-action-cancel');
const dialogStartConversionBtn = document.getElementById('dialog-start-conversion');
const dialogBackBtn = document.getElementById('dialog-back');
// Dialog Select
const dialogOutputFormatSelect = document.getElementById('dialog-output-format-select');
// END: Drag and Drop additions
let conversionChoices = null;
let dialogConversionChoices = null; // For the dialog's format selector
const activePolls = new Map();
let stagedFiles = null; // To hold files from a drop event
// --- Main Event Listeners ---
pdfFileInput.addEventListener('change', () => updateFileName(pdfFileInput, pdfFileName));
@@ -38,12 +68,221 @@ document.addEventListener('DOMContentLoaded', () => {
}
});
// --- Helper Functions ---
function formatBytes(bytes, decimals = 1) {
if (!+bytes) return '0 Bytes'; // Handles 0, null, undefined
const k = 1024;
const dm = decimals < 0 ? 0 : decimals;
const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return `${parseFloat((bytes / Math.pow(k, i)).toFixed(dm))} ${sizes[i]}`;
}
// --- Core Job Submission Logic (Refactored for reuse) ---
async function submitJob(endpoint, formData, originalFilename) {
try {
const response = await fetch(endpoint, { method: 'POST', body: formData });
if (!response.ok) {
const errorData = await response.json();
throw new Error(errorData.detail || `HTTP error! Status: ${response.status}`);
}
const result = await response.json();
const preliminaryJob = {
id: result.job_id,
status: 'pending',
progress: 0,
original_filename: originalFilename,
input_filesize: formData.get('file').size,
task_type: endpoint.includes('ocr') ? 'ocr' : (endpoint.includes('transcribe') ? 'transcription' : 'conversion'),
created_at: new Date().toISOString() // Create preliminary UTC timestamp
};
renderJobRow(preliminaryJob);
startPolling(result.job_id);
} catch (error) {
console.error('Error submitting job:', error);
alert(`Submission failed for ${originalFilename}: ${error.message}`);
}
}
// --- Original Form Submission Handler (Now uses submitJob) ---
async function handleFormSubmit(event, endpoint, form) {
event.preventDefault();
const fileInput = form.querySelector('input[type="file"]');
if (fileInput.files.length === 0) return;
const submitButton = form.querySelector('button[type="submit"]');
submitButton.disabled = true;
// Convert FileList to an array to loop through it
const files = Array.from(fileInput.files);
// Process each file as a separate job
for (const file of files) {
const formData = new FormData();
formData.append('file', file);
// Append other form data if it exists
const outputFormat = form.querySelector('select[name="output_format"]');
if (outputFormat) {
formData.append('output_format', outputFormat.value);
}
const modelSize = form.querySelector('select[name="model_size"]');
if (modelSize) {
formData.append('model_size', modelSize.value);
}
// Await each job submission to process them sequentially
await submitJob(endpoint, formData, file.name);
}
// Reset the form UI after all jobs have been submitted
const fileNameDisplay = form.querySelector('.file-name');
form.reset();
if (fileNameDisplay) {
fileNameDisplay.textContent = 'No file chosen';
fileNameDisplay.title = 'No file chosen';
}
if (form.id === 'conversion-form' && conversionChoices) {
conversionChoices.clearInput();
conversionChoices.setValue([]);
}
submitButton.disabled = false;
}
// --- START: Drag and Drop Implementation ---
function setupDragAndDropListeners() {
let dragCounter = 0; // Counter to manage enter/leave events reliably
window.addEventListener('dragenter', (e) => {
e.preventDefault();
dragCounter++;
document.body.classList.add('dragging');
});
window.addEventListener('dragleave', (e) => {
e.preventDefault();
dragCounter--;
if (dragCounter === 0) {
document.body.classList.remove('dragging');
}
});
window.addEventListener('dragover', (e) => {
e.preventDefault(); // This is necessary to allow a drop
});
window.addEventListener('drop', (e) => {
e.preventDefault();
dragCounter = 0; // Reset counter
document.body.classList.remove('dragging');
// Only handle the drop if it's on our designated overlay
if (e.target === dragOverlay || dragOverlay.contains(e.target)) {
const files = e.dataTransfer.files;
if (files && files.length > 0) {
stagedFiles = files;
showActionDialog();
}
}
});
}
function showActionDialog() {
dialogFileCount.textContent = stagedFiles.length;
// Clone options from main form's select to the dialog's select
dialogOutputFormatSelect.innerHTML = outputFormatSelect.innerHTML;
// Clean up previous Choices.js instance if it exists
if (dialogConversionChoices) {
dialogConversionChoices.destroy();
}
// Initialize a new Choices.js instance for the dialog
dialogConversionChoices = new Choices(dialogOutputFormatSelect, {
searchEnabled: true,
itemSelectText: 'Select',
shouldSort: false,
placeholder: true,
placeholderValue: 'Select a format...',
});
// Ensure the initial view is shown
dialogInitialView.style.display = 'grid';
dialogConvertView.style.display = 'none';
actionDialog.classList.add('visible');
}
function closeActionDialog() {
actionDialog.classList.remove('visible');
stagedFiles = null;
// Important: Destroy the Choices instance to prevent memory leaks
if (dialogConversionChoices) {
// Explicitly hide the dropdown before destroying
dialogConversionChoices.hideDropdown();
dialogConversionChoices.destroy();
dialogConversionChoices = null;
}
}
// --- Dialog Button and Action Listeners ---
dialogConvertBtn.addEventListener('click', () => {
// Switch to the conversion view
dialogInitialView.style.display = 'none';
dialogConvertView.style.display = 'block';
});
dialogBackBtn.addEventListener('click', () => {
// Switch back to the initial view
dialogInitialView.style.display = 'grid';
dialogConvertView.style.display = 'none';
});
dialogStartConversionBtn.addEventListener('click', () => handleDialogAction('convert'));
dialogOcrBtn.addEventListener('click', () => handleDialogAction('ocr'));
dialogTranscribeBtn.addEventListener('click', () => handleDialogAction('transcribe'));
dialogCancelBtn.addEventListener('click', closeActionDialog);
function handleDialogAction(action) {
if (!stagedFiles) return;
let endpoint = '';
const formDataArray = [];
for (const file of stagedFiles) {
const formData = new FormData();
formData.append('file', file);
if (action === 'convert') {
const selectedFormat = dialogConversionChoices.getValue(true);
if (!selectedFormat) {
alert('Please select a format to convert to.');
return;
}
formData.append('output_format', selectedFormat);
endpoint = '/convert-file';
} else if (action === 'ocr') {
endpoint = '/ocr-pdf';
} else if (action === 'transcribe') {
formData.append('model_size', modelSizeSelect.value);
endpoint = '/transcribe-audio';
}
formDataArray.push({ formData, name: file.name });
}
formDataArray.forEach(item => {
submitJob(endpoint, item.formData, item.name);
});
closeActionDialog();
}
// --- END: Drag and Drop Implementation ---
function initializeConversionSelector() {
// MODIFICATION: Destroy the old instance if it exists before creating a new one
if (conversionChoices) {
conversionChoices.destroy();
}
conversionChoices = new Choices(outputFormatSelect, {
searchEnabled: true,
itemSelectText: 'Select',
@@ -65,7 +304,7 @@ document.addEventListener('DOMContentLoaded', () => {
for (const formatKey in tool.formats) {
group.choices.push({
value: `${toolKey}_${formatKey}`,
label: `${formatKey.toUpperCase()} - ${tool.formats[formatKey]}`
label: `${tool.name} - ${formatKey.toUpperCase()} (${tool.formats[formatKey]})`
});
}
choicesArray.push(group);
@@ -73,56 +312,21 @@ document.addEventListener('DOMContentLoaded', () => {
conversionChoices.setChoices(choicesArray, 'value', 'label', true);
}
// --- Helper Functions ---
function updateFileName(input, nameDisplay) {
const fileName = input.files.length > 0 ? input.files[0].name : 'No file chosen';
nameDisplay.textContent = fileName;
nameDisplay.title = fileName;
}
async function handleFormSubmit(event, endpoint, form) {
event.preventDefault();
const fileInput = form.querySelector('input[type="file"]');
const fileNameDisplay = form.querySelector('.file-name');
if (!fileInput.files[0]) return;
const formData = new FormData(form);
const submitButton = form.querySelector('button[type="submit"]');
submitButton.disabled = true;
try {
const response = await fetch(endpoint, { method: 'POST', body: formData });
if (!response.ok) {
const errorData = await response.json();
throw new Error(errorData.detail || `HTTP error! Status: ${response.status}`);
}
const result = await response.json();
const preliminaryJob = {
id: result.job_id,
status: 'pending',
progress: 0,
original_filename: fileInput.files[0].name,
task_type: endpoint.includes('ocr') ? 'ocr' : (endpoint.includes('transcribe') ? 'transcription' : 'conversion'),
created_at: new Date().toISOString()
};
renderJobRow(preliminaryJob);
startPolling(result.job_id);
} catch (error) {
console.error('Error submitting job:', error);
alert(`Submission failed: ${error.message}`);
} finally {
form.reset();
if (fileNameDisplay) fileNameDisplay.textContent = 'No file chosen';
// MODIFICATION: Use the stored instance to correctly reset the dropdown
// without causing an error.
if (form.id === 'conversion-form' && conversionChoices) {
conversionChoices.clearInput();
conversionChoices.setValue([]); // Clears the selected value
}
submitButton.disabled = false;
const numFiles = input.files.length;
let displayText = 'No file chosen';
let displayTitle = 'No file chosen';
if (numFiles === 1) {
displayText = input.files[0].name;
displayTitle = input.files[0].name;
} else if (numFiles > 1) {
displayText = `${numFiles} files selected`;
// Create a title attribute to show all filenames on hover
displayTitle = Array.from(input.files).map(file => file.name).join(', ');
}
nameDisplay.textContent = displayText;
nameDisplay.title = displayTitle;
}
async function handleCancelJob(jobId) {
@@ -161,7 +365,7 @@ document.addEventListener('DOMContentLoaded', () => {
}
} catch (error) {
console.error("Couldn't load job history:", error);
jobListBody.innerHTML = '<tr><td colspan="5" style="text-align: center;">Could not load job history.</td></tr>';
jobListBody.innerHTML = '<tr><td colspan="6" style="text-align: center;">Could not load job history.</td></tr>';
}
}
@@ -214,7 +418,12 @@ document.addEventListener('DOMContentLoaded', () => {
taskTypeLabel = 'Conversion';
}
const formattedDate = new Date(job.created_at).toLocaleString();
// --- CORRECTED DATE FORMATTING ---
// Takes the UTC string from the server (or the preliminary job)
// and formats it using the user's detected locale and timezone.
const submittedDate = new Date(job.created_at);
const formattedDate = submittedDate.toLocaleString(USER_LOCALE, DATETIME_FORMAT_OPTIONS);
let statusHtml = `<span class="job-status-badge status-${job.status}">${job.status}</span>`;
if (job.status === 'processing') {
const progressClass = (job.task_type === 'transcription' && job.progress > 0) ? '' : 'indeterminate';
@@ -233,9 +442,21 @@ document.addEventListener('DOMContentLoaded', () => {
actionHtml = `<span class="error-text"${errorTitle}>Failed</span>`;
}
// --- File Size Logic ---
let fileSizeHtml = '<span>-</span>';
if (job.input_filesize) {
let sizeString = formatBytes(job.input_filesize);
if (job.status === 'completed' && job.output_filesize) {
sizeString += `${formatBytes(job.output_filesize)}`;
}
fileSizeHtml = `<span class="cell-value">${sizeString}</span>`;
}
const escapedFilename = job.original_filename ? job.original_filename.replace(/</g, "&lt;").replace(/>/g, "&gt;") : "No filename";
row.innerHTML = `
<td data-label="File"><span class="cell-value" title="${escapedFilename}">${escapedFilename}</span></td>
<td data-label="File Size">${fileSizeHtml}</td>
<td data-label="Task"><span class="cell-value">${taskTypeLabel}</span></td>
<td data-label="Submitted"><span class="cell-value">${formattedDate}</span></td>
<td data-label="Status"><span class="cell-value">${statusHtml}</span></td>
@@ -246,4 +467,5 @@ document.addEventListener('DOMContentLoaded', () => {
// --- Initial Load ---
initializeConversionSelector();
loadInitialJobs();
setupDragAndDropListeners();
});

View File

@@ -27,7 +27,7 @@
<fieldset>
<legend><h2>File Conversion</h2></legend>
<div class="file-input-wrapper">
<input type="file" name="file" id="conversion-file-input" required>
<input type="file" name="file" id="conversion-file-input" required multiple>
<label for="conversion-file-input" class="file-input-label">Choose File...</label>
<span id="conversion-file-name" class="file-name">No file chosen</span>
</div>
@@ -45,7 +45,7 @@
<fieldset>
<legend><h2>PDF OCR</h2></legend>
<div class="file-input-wrapper">
<input type="file" name="file" id="pdf-file-input" accept=".pdf" required>
<input type="file" name="file" id="pdf-file-input" accept=".pdf" required multiple>
<label for="pdf-file-input" class="file-input-label">Choose PDF...</label>
<span id="pdf-file-name" class="file-name">No file chosen</span>
</div>
@@ -59,7 +59,7 @@
<fieldset>
<legend><h2>Transcribe Audio</h2></legend>
<div class="file-input-wrapper">
<input type="file" name="file" id="audio-file-input" accept="audio/*" required>
<input type="file" name="file" id="audio-file-input" accept="audio/*" required multiple>
<label for="audio-file-input" class="file-input-label">Choose Audio...</label>
<span id="audio-file-name" class="file-name">No file chosen</span>
</div>
@@ -87,6 +87,7 @@
<thead>
<tr>
<th>File</th>
<th>File Size</th>
<th>Task</th>
<th>Submitted</th>
<th>Status</th>
@@ -101,6 +102,36 @@
</main>
</div>
<div id="drag-overlay" class="drag-overlay">
<div class="drag-overlay-content">
<p>Drop files anywhere to begin</p>
</div>
</div>
<div id="action-dialog" class="dialog-overlay">
<div class="dialog-box">
<h2>Choose Action</h2>
<p><span id="dialog-file-count"></span> file(s) dropped. What would you like to do?</p>
<div id="dialog-initial-actions" class="dialog-actions">
<button id="dialog-action-convert">Convert</button>
<button id="dialog-action-ocr">OCR</button>
<button id="dialog-action-transcribe">Transcribe</button>
</div>
<div id="dialog-convert-view" style="display: none;">
<div class="form-control" style="text-align: left; margin-bottom: 1rem;">
<label for="dialog-output-format-select">Convert To</label>
<select id="dialog-output-format-select" required></select>
</div>
<div class="dialog-actions">
<button id="dialog-start-conversion">Start Conversion</button>
<button id="dialog-back" class="dialog-secondary-action">Back</button>
</div>
</div>
<button id="dialog-action-cancel" class="dialog-cancel">Cancel</button>
</div>
</div>
<script>
window.APP_CONFIG = {
conversionTools: {{ conversion_tools | tojson }}