TTS and webhooks

This commit is contained in:
2025-09-20 12:49:48 +00:00
parent 33705559b0
commit ed58453466
9 changed files with 1291 additions and 293 deletions

View File

@@ -2,3 +2,4 @@ LOCAL_ONLY=True
SECRET_KEY= SECRET_KEY=
UPLOADS_DIR=./uploads UPLOADS_DIR=./uploads
PROCESSED_DIR=./processed PROCESSED_DIR=./processed
OMP_NUM_THREADS=1

3
.gitignore vendored
View File

@@ -12,3 +12,6 @@ app.log
/config/* /config/*
my.settings.yml my.settings.yml
jobs.db-* jobs.db-*
venv312
models
config

View File

@@ -10,6 +10,7 @@ services:
- SECRET_KEY= # if using auth - SECRET_KEY= # if using auth
- UPLOADS_DIR=/app/uploads - UPLOADS_DIR=/app/uploads
- PROCESSED_DIR=/app/processed - PROCESSED_DIR=/app/processed
- OMP_NUM_THREADS=1
#user: "1000:1000" #user: "1000:1000"
ports: ports:
- "6969:8000" - "6969:8000"

1291
main.py

File diff suppressed because it is too large Load Diff

View File

@@ -13,6 +13,8 @@ faster-whisper
ocrmypdf ocrmypdf
pytesseract pytesseract
pypdf pypdf
piper-tts
kokoro-tts
# Configuration & Utilities # Configuration & Utilities
werkzeug werkzeug

View File

@@ -1,13 +1,31 @@
auth_settings: auth_settings:
oidc_client_id: filewiz oidc_client_id: filewiz
oidc_client_secret: oidc_client_secret:
oidc_server_metadata_url: https://accounts.test.de/oidc/.well-known/openid-configuration oidc_server_metadata_url: https://accounts.example.com/oidc/.well-known/openid-configuration
oidc_userinfo_endpoint: https://accounts.test.de/oidc/me oidc_userinfo_endpoint: https://accounts.example.com/oidc/me
oidc_end_session_endpoint: https://accounts.test.de/oidc/session/end oidc_end_session_endpoint: https://accounts.example.com/oidc/session/end
admin_users: admin_users:
- admin@local.com - user@example.com
web_hook_settings:
enabled: False
allow_chunked_api_uploads": False
allowed_callback_urls:
callback_bearer_token":
tts_settings:
piper:
model_dir: "./models/tts"
use_cuda: False
synthesis_config:
length_scale: 1.0
noise_scale: 0.667
noise_w: 0.8
kokoro:
model_dir: "./models/tts/kokoro"
command_template: "kokoro-tts {input} {output} --model {model_path} --voices {voices_path} --voice {model_name}"
app_settings: app_settings:
max_file_size_mb: '2000' max_file_size_mb: '2000'
# app_public_url: "http://localhost:8000" # Uncomment and set to your public URL if downloads don't work correctly
allowed_all_extensions: allowed_all_extensions:
- .aac - .aac
- .aiff - .aiff

View File

@@ -278,7 +278,7 @@ input[type="file"] {
.actions-group { .actions-group {
display: grid; display: grid;
grid-template-columns: repeat(3, 1fr); /* 3 columns for wide screens */ grid-template-columns: repeat(4, 1fr); /* 3 columns for wide screens */
gap: 1.5rem; gap: 1.5rem;
margin-top: 1.5rem; margin-top: 1.5rem;
} }

View File

@@ -2,6 +2,18 @@ document.addEventListener('DOMContentLoaded', () => {
// --- Constants --- // --- Constants ---
const CHUNK_SIZE = 5 * 1024 * 1024; // 5 MB chunks const CHUNK_SIZE = 5 * 1024 * 1024; // 5 MB chunks
// Allow server to provide API prefix (e.g. "/api/v1") via window.APP_CONFIG.api_base
const API_BASE = (window.APP_CONFIG && window.APP_CONFIG.api_base) ? window.APP_CONFIG.api_base.replace(/\/$/, '') : '';
function apiUrl(path) {
// path may start with or without a leading slash
if (!path) return API_BASE || '/';
if (path.startsWith('/')) {
return `${API_BASE}${path}`;
}
return `${API_BASE}/${path}`;
}
// --- User Locale and Timezone Detection --- // --- User Locale and Timezone Detection ---
const USER_LOCALE = navigator.language || 'en-US'; const USER_LOCALE = navigator.language || 'en-US';
const USER_TIMEZONE = Intl.DateTimeFormat().resolvedOptions().timeZone; const USER_TIMEZONE = Intl.DateTimeFormat().resolvedOptions().timeZone;
@@ -24,9 +36,11 @@ document.addEventListener('DOMContentLoaded', () => {
const mainFileName = document.getElementById('main-file-name'); const mainFileName = document.getElementById('main-file-name');
const mainOutputFormatSelect = document.getElementById('main-output-format-select'); const mainOutputFormatSelect = document.getElementById('main-output-format-select');
const mainModelSizeSelect = document.getElementById('main-model-size-select'); const mainModelSizeSelect = document.getElementById('main-model-size-select');
const mainTtsModelSelect = document.getElementById('main-tts-model-select');
const startConversionBtn = document.getElementById('start-conversion-btn'); const startConversionBtn = document.getElementById('start-conversion-btn');
const startOcrBtn = document.getElementById('start-ocr-btn'); const startOcrBtn = document.getElementById('start-ocr-btn');
const startTranscriptionBtn = document.getElementById('start-transcription-btn'); const startTranscriptionBtn = document.getElementById('start-transcription-btn');
const startTtsBtn = document.getElementById('start-tts-btn');
const jobListBody = document.getElementById('job-list-body'); const jobListBody = document.getElementById('job-list-body');
@@ -36,37 +50,50 @@ document.addEventListener('DOMContentLoaded', () => {
const dialogFileCount = document.getElementById('dialog-file-count'); const dialogFileCount = document.getElementById('dialog-file-count');
const dialogInitialView = document.getElementById('dialog-initial-actions'); const dialogInitialView = document.getElementById('dialog-initial-actions');
const dialogConvertView = document.getElementById('dialog-convert-view'); const dialogConvertView = document.getElementById('dialog-convert-view');
const dialogTtsView = document.getElementById('dialog-tts-view');
const dialogConvertBtn = document.getElementById('dialog-action-convert'); const dialogConvertBtn = document.getElementById('dialog-action-convert');
const dialogOcrBtn = document.getElementById('dialog-action-ocr'); const dialogOcrBtn = document.getElementById('dialog-action-ocr');
const dialogTranscribeBtn = document.getElementById('dialog-action-transcribe'); const dialogTranscribeBtn = document.getElementById('dialog-action-transcribe');
const dialogTtsBtn = document.getElementById('dialog-action-tts');
const dialogCancelBtn = document.getElementById('dialog-action-cancel'); const dialogCancelBtn = document.getElementById('dialog-action-cancel');
const dialogStartConversionBtn = document.getElementById('dialog-start-conversion'); const dialogStartConversionBtn = document.getElementById('dialog-start-conversion');
const dialogStartTtsBtn = document.getElementById('dialog-start-tts');
const dialogBackBtn = document.getElementById('dialog-back'); const dialogBackBtn = document.getElementById('dialog-back');
const dialogBackTtsBtn = document.getElementById('dialog-back-tts');
const dialogOutputFormatSelect = document.getElementById('dialog-output-format-select'); const dialogOutputFormatSelect = document.getElementById('dialog-output-format-select');
const dialogTtsModelSelect = document.getElementById('dialog-tts-model-select');
// --- State Variables --- // --- State Variables ---
let conversionChoices = null; let conversionChoices = null;
let modelChoices = null; // For the model dropdown instance let transcriptionChoices = null;
let ttsChoices = null;
let dialogConversionChoices = null; let dialogConversionChoices = null;
let dialogTtsChoices = null;
let ttsModelsCache = []; // Cache for formatted TTS models list
const activePolls = new Map(); const activePolls = new Map();
let stagedFiles = null; let stagedFiles = null;
// --- Authentication-aware Fetch Wrapper --- // --- Authentication-aware Fetch Wrapper ---
/** async function authFetch(url, options = {}) {
* A wrapper around the native fetch API that handles 401 Unauthorized responses. // Normalize URL through apiUrl() if a bare endpoint is provided
* If a 401 is received, it assumes the session has expired and redirects to the login page. if (typeof url === 'string' && url.startsWith('/')) {
* @param {string} url - The URL to fetch. url = apiUrl(url);
* @param {object} options - The options for the fetch request. }
* @returns {Promise<Response>} - A promise that resolves to the fetch Response.
*/ // Add default options: include credentials and accept JSON by default
async function authFetch(url, options) { options = Object.assign({}, options);
if (!Object.prototype.hasOwnProperty.call(options, 'credentials')) {
options.credentials = 'include';
}
options.headers = options.headers || {};
if (!options.headers.Accept) options.headers.Accept = 'application/json';
const response = await fetch(url, options); const response = await fetch(url, options);
if (response.status === 401) { if (response.status === 401) {
// Use a simple alert for now. A more sophisticated modal could be used.
alert('Your session has expired. You will be redirected to the login page.'); alert('Your session has expired. You will be redirected to the login page.');
window.location.href = '/login'; window.location.href = apiUrl('/login');
// Throw an error to stop the promise chain of the calling function
throw new Error('Session expired'); throw new Error('Session expired');
} }
return response; return response;
@@ -141,7 +168,8 @@ document.addEventListener('DOMContentLoaded', () => {
body: JSON.stringify(finalizePayload), body: JSON.stringify(finalizePayload),
}); });
if (!finalizeResponse.ok) { if (!finalizeResponse.ok) {
const errorData = await finalizeResponse.json(); let errorData = {};
try { errorData = await finalizeResponse.json(); } catch (e) {}
throw new Error(errorData.detail || 'Finalization failed'); throw new Error(errorData.detail || 'Finalization failed');
} }
const result = await finalizeResponse.json(); const result = await finalizeResponse.json();
@@ -203,13 +231,23 @@ document.addEventListener('DOMContentLoaded', () => {
} }
options.output_format = selectedFormat; options.output_format = selectedFormat;
} else if (taskType === 'transcription') { } else if (taskType === 'transcription') {
options.model_size = mainModelSizeSelect.value; const selectedModel = transcriptionChoices.getValue(true);
options.model_size = selectedModel;
} else if (taskType === 'tts') {
const selectedModel = ttsChoices.getValue(true);
if (!selectedModel) {
alert('Please select a voice model.');
return;
}
options.model_name = selectedModel;
} }
// Disable buttons during upload process // Disable buttons during upload process
startConversionBtn.disabled = true; startConversionBtn.disabled = true;
startOcrBtn.disabled = true; startOcrBtn.disabled = true;
startTranscriptionBtn.disabled = true; startTranscriptionBtn.disabled = true;
startTtsBtn.disabled = true;
const uploadPromises = files.map(file => uploadFileInChunks(file, taskType, options)); const uploadPromises = files.map(file => uploadFileInChunks(file, taskType, options));
await Promise.allSettled(uploadPromises); await Promise.allSettled(uploadPromises);
@@ -220,6 +258,7 @@ document.addEventListener('DOMContentLoaded', () => {
startConversionBtn.disabled = false; startConversionBtn.disabled = false;
startOcrBtn.disabled = false; startOcrBtn.disabled = false;
startTranscriptionBtn.disabled = false; startTranscriptionBtn.disabled = false;
startTtsBtn.disabled = false;
} }
@@ -251,17 +290,25 @@ document.addEventListener('DOMContentLoaded', () => {
function showActionDialog() { function showActionDialog() {
dialogFileCount.textContent = stagedFiles.length; dialogFileCount.textContent = stagedFiles.length;
dialogOutputFormatSelect.innerHTML = mainOutputFormatSelect.innerHTML; // Use main select as template
// Setup Conversion Dropdown
dialogOutputFormatSelect.innerHTML = mainOutputFormatSelect.innerHTML;
if (dialogConversionChoices) dialogConversionChoices.destroy(); if (dialogConversionChoices) dialogConversionChoices.destroy();
dialogConversionChoices = new Choices(dialogOutputFormatSelect, { dialogConversionChoices = new Choices(dialogOutputFormatSelect, {
searchEnabled: true, searchEnabled: true, itemSelectText: 'Select', shouldSort: false, placeholder: true, placeholderValue: 'Select a format...',
itemSelectText: 'Select',
shouldSort: false,
placeholder: true,
placeholderValue: 'Select a format...',
}); });
// Setup TTS Dropdown
if (dialogTtsChoices) dialogTtsChoices.destroy();
dialogTtsChoices = new Choices(dialogTtsModelSelect, {
searchEnabled: true, itemSelectText: 'Select', shouldSort: false, placeholder: true, placeholderValue: 'Select a voice...',
});
dialogTtsChoices.setChoices(ttsModelsCache, 'value', 'label', true);
dialogInitialView.style.display = 'grid'; dialogInitialView.style.display = 'grid';
dialogConvertView.style.display = 'none'; dialogConvertView.style.display = 'none';
dialogTtsView.style.display = 'none';
actionDialog.classList.add('visible'); actionDialog.classList.add('visible');
} }
@@ -269,21 +316,34 @@ document.addEventListener('DOMContentLoaded', () => {
actionDialog.classList.remove('visible'); actionDialog.classList.remove('visible');
stagedFiles = null; stagedFiles = null;
if (dialogConversionChoices) { if (dialogConversionChoices) {
dialogConversionChoices.hideDropdown();
dialogConversionChoices.destroy(); dialogConversionChoices.destroy();
dialogConversionChoices = null; dialogConversionChoices = null;
} }
if (dialogTtsChoices) {
dialogTtsChoices.destroy();
dialogTtsChoices = null;
}
} }
// --- Dialog Button Listeners ---
dialogConvertBtn.addEventListener('click', () => { dialogConvertBtn.addEventListener('click', () => {
dialogInitialView.style.display = 'none'; dialogInitialView.style.display = 'none';
dialogConvertView.style.display = 'block'; dialogConvertView.style.display = 'block';
}); });
dialogTtsBtn.addEventListener('click', () => {
dialogInitialView.style.display = 'none';
dialogTtsView.style.display = 'block';
});
dialogBackBtn.addEventListener('click', () => { dialogBackBtn.addEventListener('click', () => {
dialogInitialView.style.display = 'grid'; dialogInitialView.style.display = 'grid';
dialogConvertView.style.display = 'none'; dialogConvertView.style.display = 'none';
}); });
dialogBackTtsBtn.addEventListener('click', () => {
dialogInitialView.style.display = 'grid';
dialogTtsView.style.display = 'none';
});
dialogStartConversionBtn.addEventListener('click', () => handleDialogAction('conversion')); dialogStartConversionBtn.addEventListener('click', () => handleDialogAction('conversion'));
dialogStartTtsBtn.addEventListener('click', () => handleDialogAction('tts'));
dialogOcrBtn.addEventListener('click', () => handleDialogAction('ocr')); dialogOcrBtn.addEventListener('click', () => handleDialogAction('ocr'));
dialogTranscribeBtn.addEventListener('click', () => handleDialogAction('transcription')); dialogTranscribeBtn.addEventListener('click', () => handleDialogAction('transcription'));
dialogCancelBtn.addEventListener('click', closeActionDialog); dialogCancelBtn.addEventListener('click', closeActionDialog);
@@ -300,23 +360,81 @@ document.addEventListener('DOMContentLoaded', () => {
options.output_format = selectedFormat; options.output_format = selectedFormat;
} else if (action === 'transcription') { } else if (action === 'transcription') {
options.model_size = mainModelSizeSelect.value; options.model_size = mainModelSizeSelect.value;
} else if (action === 'tts') {
const selectedModel = dialogTtsChoices.getValue(true);
if (!selectedModel) {
alert('Please select a voice model.');
return;
}
options.model_name = selectedModel;
} }
Array.from(stagedFiles).forEach(file => uploadFileInChunks(file, action, options)); Array.from(stagedFiles).forEach(file => uploadFileInChunks(file, action, options));
closeActionDialog(); closeActionDialog();
} }
/** // -----------------------
* Initializes all Choices.js dropdowns on the page. // TTS models loader (robust)
*/ // -----------------------
async function loadTtsModels() {
try {
const response = await authFetch('/api/v1/tts-voices');
if (!response.ok) throw new Error('Failed to fetch TTS voices.');
const voicesData = await response.json();
// voicesData might be an object map { id: meta } or an array [{ id, name, language, ... }]
const voicesArray = [];
if (Array.isArray(voicesData)) {
for (const v of voicesData) {
// Accept either { id, name, language } or { voice_id, title, locale }
const id = v.id || v.voice_id || v.voice || v.name || null;
const name = v.name || v.title || v.display_name || id || 'Unknown';
const lang = (v.language && (v.language.name_native || v.language.name)) || v.locale || (id ? id.split(/[_-]/)[0] : 'Unknown');
if (id) voicesArray.push({ id, name, lang });
}
} else if (voicesData && typeof voicesData === 'object') {
for (const key in voicesData) {
if (!Object.prototype.hasOwnProperty.call(voicesData, key)) continue;
const v = voicesData[key];
const id = v.id || key;
const name = v.name || v.title || v.display_name || id;
const lang = (v.language && (v.language.name_native || v.language.name)) || v.locale || (id ? id.split(/[_-]/)[0] : 'Unknown');
voicesArray.push({ id, name, lang });
}
} else {
throw new Error('Unexpected voices payload');
}
// Group by language
const groups = {};
for (const v of voicesArray) {
const langLabel = v.lang || 'Unknown';
if (!groups[langLabel]) {
groups[langLabel] = { label: langLabel, id: langLabel, disabled: false, choices: [] };
}
groups[langLabel].choices.push({
value: v.id,
label: `${v.name}`
});
}
ttsModelsCache = Object.values(groups).sort((a,b) => a.label.localeCompare(b.label));
// If ttsChoices exists, update it; otherwise the initializer will set choices
if (ttsChoices) {
ttsChoices.setChoices(ttsModelsCache, 'value', 'label', true);
}
} catch (error) {
console.error("Couldn't load TTS voices:", error);
if (error.message !== 'Session expired') {
if (ttsChoices) {
ttsChoices.setChoices([{ value: '', label: 'Error loading voices', disabled: true }], 'value', 'label');
}
}
}
}
function initializeSelectors() { function initializeSelectors() {
// --- Conversion Dropdown ---
if (conversionChoices) conversionChoices.destroy(); if (conversionChoices) conversionChoices.destroy();
conversionChoices = new Choices(mainOutputFormatSelect, { conversionChoices = new Choices(mainOutputFormatSelect, {
searchEnabled: true, searchEnabled: true, itemSelectText: 'Select', shouldSort: false, placeholder: true, placeholderValue: 'Select a format...',
itemSelectText: 'Select',
shouldSort: false,
placeholder: true,
placeholderValue: 'Select a format...',
}); });
const tools = window.APP_CONFIG.conversionTools || {}; const tools = window.APP_CONFIG.conversionTools || {};
const choicesArray = []; const choicesArray = [];
@@ -324,22 +442,22 @@ document.addEventListener('DOMContentLoaded', () => {
const tool = tools[toolKey]; const tool = tools[toolKey];
const group = { label: tool.name, id: toolKey, disabled: false, choices: [] }; const group = { label: tool.name, id: toolKey, disabled: false, choices: [] };
for (const formatKey in tool.formats) { for (const formatKey in tool.formats) {
group.choices.push({ group.choices.push({ value: `${toolKey}_${formatKey}`, label: `${tool.name} - ${formatKey.toUpperCase()} (${tool.formats[formatKey]})` });
value: `${toolKey}_${formatKey}`,
label: `${tool.name} - ${formatKey.toUpperCase()} (${tool.formats[formatKey]})`
});
} }
choicesArray.push(group); choicesArray.push(group);
} }
conversionChoices.setChoices(choicesArray, 'value', 'label', true); conversionChoices.setChoices(choicesArray, 'value', 'label', true);
// --- Model Size Dropdown --- if (transcriptionChoices) transcriptionChoices.destroy();
if (modelChoices) modelChoices.destroy(); transcriptionChoices = new Choices(mainModelSizeSelect, {
modelChoices = new Choices(mainModelSizeSelect, { searchEnabled: false, shouldSort: false, itemSelectText: '',
searchEnabled: false, // Disables the search box
shouldSort: false, // Keeps the original <option> order
itemSelectText: '', // Hides the "Press to select" tooltip
}); });
if (ttsChoices) ttsChoices.destroy();
ttsChoices = new Choices(mainTtsModelSelect, {
searchEnabled: true, itemSelectText: 'Select', shouldSort: false, placeholder: true, placeholderValue: 'Select voice...',
});
loadTtsModels();
} }
function updateFileName(input, nameDisplay) { function updateFileName(input, nameDisplay) {
@@ -359,7 +477,8 @@ document.addEventListener('DOMContentLoaded', () => {
try { try {
const response = await authFetch(`/job/${jobId}/cancel`, { method: 'POST' }); const response = await authFetch(`/job/${jobId}/cancel`, { method: 'POST' });
if (!response.ok) { if (!response.ok) {
const errorData = await response.json(); let errorData = {};
try { errorData = await response.json(); } catch (e) {}
throw new Error(errorData.detail || 'Failed to cancel job.'); throw new Error(errorData.detail || 'Failed to cancel job.');
} }
stopPolling(jobId); stopPolling(jobId);
@@ -422,7 +541,7 @@ document.addEventListener('DOMContentLoaded', () => {
} }
function renderJobRow(job) { function renderJobRow(job) {
const rowId = job.id.startsWith('upload-') ? job.id : `job-${job.id}`; const rowId = job.id && String(job.id).startsWith('upload-') ? job.id : `job-${job.id}`;
let row = document.getElementById(rowId); let row = document.getElementById(rowId);
if (!row) { if (!row) {
row = document.createElement('tr'); row = document.createElement('tr');
@@ -434,9 +553,11 @@ document.addEventListener('DOMContentLoaded', () => {
if (job.task_type === 'conversion' && job.processed_filepath) { if (job.task_type === 'conversion' && job.processed_filepath) {
const extension = job.processed_filepath.split('.').pop(); const extension = job.processed_filepath.split('.').pop();
taskTypeLabel = `Convert to ${extension.toUpperCase()}`; taskTypeLabel = `Convert to ${extension.toUpperCase()}`;
} else if (job.task_type === 'tts') {
taskTypeLabel = 'Synthesize Speech';
} }
const submittedDate = new Date(job.created_at); const submittedDate = job.created_at ? new Date(job.created_at) : new Date();
const formattedDate = submittedDate.toLocaleString(USER_LOCALE, DATETIME_FORMAT_OPTIONS); const formattedDate = submittedDate.toLocaleString(USER_LOCALE, DATETIME_FORMAT_OPTIONS);
let statusHtml = `<span class="job-status-badge status-${job.status}">${job.status}</span>`; let statusHtml = `<span class="job-status-badge status-${job.status}">${job.status}</span>`;
@@ -445,7 +566,7 @@ document.addEventListener('DOMContentLoaded', () => {
statusHtml += `<div class="progress-bar-container"><div class="progress-bar" style="width: ${job.progress || 0}%"></div></div>`; statusHtml += `<div class="progress-bar-container"><div class="progress-bar" style="width: ${job.progress || 0}%"></div></div>`;
} else if (job.status === 'processing') { } else if (job.status === 'processing') {
const progressClass = (job.task_type === 'transcription' && job.progress > 0) ? '' : 'indeterminate'; const progressClass = (job.task_type === 'transcription' && job.progress > 0) ? '' : 'indeterminate';
const progressWidth = job.task_type === 'transcription' ? job.progress : 100; const progressWidth = (job.task_type === 'transcription' && job.progress > 0) ? job.progress : 100;
statusHtml += `<div class="progress-bar-container"><div class="progress-bar ${progressClass}" style="width: ${progressWidth}%"></div></div>`; statusHtml += `<div class="progress-bar-container"><div class="progress-bar ${progressClass}" style="width: ${progressWidth}%"></div></div>`;
} }
@@ -454,7 +575,7 @@ document.addEventListener('DOMContentLoaded', () => {
actionHtml = `<button class="cancel-button" data-job-id="${job.id}">Cancel</button>`; actionHtml = `<button class="cancel-button" data-job-id="${job.id}">Cancel</button>`;
} else if (job.status === 'completed' && job.processed_filepath) { } else if (job.status === 'completed' && job.processed_filepath) {
const downloadFilename = job.processed_filepath.split(/[\\/]/).pop(); const downloadFilename = job.processed_filepath.split(/[\\/]/).pop();
actionHtml = `<a href="/download/${downloadFilename}" class="download-button" download>Download</a>`; actionHtml = `<a href="${apiUrl('/download')}/${encodeURIComponent(downloadFilename)}" class="download-button" download>Download</a>`;
} else if (job.status === 'failed') { } else if (job.status === 'failed') {
const errorTitle = job.error_message ? ` title="${job.error_message.replace(/"/g, '&quot;')}"` : ''; const errorTitle = job.error_message ? ` title="${job.error_message.replace(/"/g, '&quot;')}"` : '';
actionHtml = `<span class="error-text"${errorTitle}>Failed</span>`; actionHtml = `<span class="error-text"${errorTitle}>Failed</span>`;
@@ -485,6 +606,7 @@ document.addEventListener('DOMContentLoaded', () => {
startConversionBtn.addEventListener('click', () => handleTaskRequest('conversion')); startConversionBtn.addEventListener('click', () => handleTaskRequest('conversion'));
startOcrBtn.addEventListener('click', () => handleTaskRequest('ocr')); startOcrBtn.addEventListener('click', () => handleTaskRequest('ocr'));
startTranscriptionBtn.addEventListener('click', () => handleTaskRequest('transcription')); startTranscriptionBtn.addEventListener('click', () => handleTaskRequest('transcription'));
startTtsBtn.addEventListener('click', () => handleTaskRequest('tts'));
mainFileInput.addEventListener('change', () => updateFileName(mainFileInput, mainFileName)); mainFileInput.addEventListener('change', () => updateFileName(mainFileInput, mainFileName));
jobListBody.addEventListener('click', (event) => { jobListBody.addEventListener('click', (event) => {
@@ -505,7 +627,7 @@ document.addEventListener('DOMContentLoaded', () => {
if (loginContainer) loginContainer.style.display = 'flex'; if (loginContainer) loginContainer.style.display = 'flex';
if (loginButton) { if (loginButton) {
loginButton.addEventListener('click', () => { loginButton.addEventListener('click', () => {
window.location.href = '/login'; window.location.href = apiUrl('/login');
}); });
} }
} }

View File

@@ -69,16 +69,22 @@
<div class="form-control"> <div class="form-control">
<label for="main-model-size-select">Model Size</label> <label for="main-model-size-select">Model Size</label>
<select name="model_size" id="main-model-size-select"> <select name="model_size" id="main-model-size-select">
<option value="tiny">Tiny (Fastest, lower accuracy)</option> {% for model in whisper_models %}
<option value="base" selected>Base</option> <option value="{{ model }}">{{ model }}</option>
<option value="small">Small (Better accuracy)</option> {% endfor %}
<option value="medium">Medium (High accuracy)</option>
<option value="large-v3">Large v3 (Best accuracy, slow)</option>
<option value="distil-large-v2">Distilled Large v2</option>
</select> </select>
</div> </div>
<button type="button" id="start-transcription-btn" class="main-action-button">Start Transcription</button> <button type="button" id="start-transcription-btn" class="main-action-button">Start Transcription</button>
</fieldset> </fieldset>
<fieldset class="action-fieldset">
<legend><h2>Text-to-Speech</h2></legend>
<div class="form-control">
<label for="main-tts-model-select">Voice Model</label>
<select name="model_name" id="main-tts-model-select" required></select>
</div>
<button type="button" id="start-tts-btn" class="main-action-button">Start Synthesis</button>
</fieldset>
</div> </div>
</form> </form>
</section> </section>
@@ -120,6 +126,7 @@
<button id="dialog-action-convert">Convert</button> <button id="dialog-action-convert">Convert</button>
<button id="dialog-action-ocr">OCR</button> <button id="dialog-action-ocr">OCR</button>
<button id="dialog-action-transcribe">Transcribe</button> <button id="dialog-action-transcribe">Transcribe</button>
<button id="dialog-action-tts">Synthesize Speech</button>
</div> </div>
<div id="dialog-convert-view" style="display: none;"> <div id="dialog-convert-view" style="display: none;">
@@ -133,6 +140,17 @@
</div> </div>
</div> </div>
<div id="dialog-tts-view" style="display: none;">
<div class="form-control" style="text-align: left; margin-bottom: 1rem;">
<label for="dialog-tts-model-select">Voice Model</label>
<select id="dialog-tts-model-select" required></select>
</div>
<div class="dialog-actions">
<button id="dialog-start-tts">Start Synthesis</button>
<button id="dialog-back-tts" class="dialog-secondary-action">Back</button>
</div>
</div>
<button id="dialog-action-cancel" class="dialog-cancel">Cancel</button> <button id="dialog-action-cancel" class="dialog-cancel">Cancel</button>
</div> </div>
</div> </div>