914 lines
41 KiB
HTML
914 lines
41 KiB
HTML
<!DOCTYPE html>
|
|
<html lang="vi">
|
|
|
|
<head>
|
|
<meta charset="UTF-8" />
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
<title>MarkItDown vs Docling — LLM Input Processing</title>
|
|
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css" rel="stylesheet" />
|
|
<link href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.3/font/bootstrap-icons.min.css" rel="stylesheet" />
|
|
<style>
|
|
body {
|
|
background: #f8f9fa;
|
|
}
|
|
|
|
/* Upload zone */
|
|
#UploadZone {
|
|
border: 2px dashed #dee2e6;
|
|
border-radius: .5rem;
|
|
padding: 3rem 1.5rem;
|
|
text-align: center;
|
|
cursor: pointer;
|
|
transition: border-color .2s, background .2s;
|
|
}
|
|
|
|
#UploadZone:hover,
|
|
#UploadZone.dragover {
|
|
border-color: #0d6efd;
|
|
background: #f0f6ff;
|
|
}
|
|
|
|
#UploadZone input[type="file"] {
|
|
display: none;
|
|
}
|
|
|
|
/* Result pane */
|
|
.ResultPre {
|
|
max-height: 460px;
|
|
overflow: auto;
|
|
white-space: pre-wrap;
|
|
word-break: break-word;
|
|
font-size: .78rem;
|
|
background: #f8f9fa;
|
|
}
|
|
|
|
.PreviewPane {
|
|
max-height: 460px;
|
|
overflow: auto;
|
|
font-size: .85rem;
|
|
padding: 1rem;
|
|
line-height: 1.7;
|
|
}
|
|
|
|
.PreviewPane table {
|
|
border-collapse: collapse;
|
|
width: 100%;
|
|
margin: .5rem 0;
|
|
}
|
|
|
|
.PreviewPane th,
|
|
.PreviewPane td {
|
|
border: 1px solid #dee2e6;
|
|
padding: .3rem .6rem;
|
|
font-size: .8rem;
|
|
}
|
|
|
|
.PreviewPane th {
|
|
background: #f1f3f5;
|
|
}
|
|
|
|
.PreviewPane code {
|
|
background: #f1f3f5;
|
|
padding: 1px 4px;
|
|
border-radius: 3px;
|
|
font-size: .85em;
|
|
}
|
|
|
|
.PreviewPane blockquote {
|
|
border-left: 3px solid #dee2e6;
|
|
padding-left: .75rem;
|
|
color: #6c757d;
|
|
}
|
|
</style>
|
|
</head>
|
|
|
|
<body>
|
|
|
|
<nav class="navbar navbar-light bg-white border-bottom px-4 py-2">
|
|
<span class="navbar-brand fw-bold mb-0">MarkItDown <span class="text-muted fw-normal">vs</span> Docling</span>
|
|
<span class="badge bg-primary-subtle text-primary">Demo — LLM Input Processing</span>
|
|
</nav>
|
|
|
|
<div class="container-xl py-4">
|
|
|
|
<!-- Upload card -->
|
|
<div class="card shadow-sm mb-4">
|
|
<div class="card-body">
|
|
<h6 class="card-title fw-semibold mb-3">Tải lên tài liệu để so sánh</h6>
|
|
|
|
<!-- Input mode tabs -->
|
|
<ul class="nav nav-tabs mb-3" id="InputModeTabs">
|
|
<li class="nav-item">
|
|
<a class="nav-link active py-1 px-3 small" href="#" onclick="SwitchInputMode('file',this);return false">
|
|
<i class="bi bi-file-earmark-text me-1"></i>File
|
|
</a>
|
|
</li>
|
|
<li class="nav-item">
|
|
<a class="nav-link py-1 px-3 small" href="#" onclick="SwitchInputMode('youtube',this);return false">
|
|
<i class="bi bi-youtube me-1 text-danger"></i>YouTube
|
|
</a>
|
|
</li>
|
|
</ul>
|
|
|
|
<!-- File upload pane -->
|
|
<div id="FilePane">
|
|
<div id="UploadZone">
|
|
<input type="file" id="FileInput"
|
|
accept=".pdf,.docx,.xlsx,.pptx,.html,.htm,.csv,.txt,.jpg,.jpeg,.png,.tiff,.tif,.bmp,.md,.epub,.zip,.asciidoc,.adoc" />
|
|
<i class="bi bi-file-earmark-text fs-1 text-secondary"></i>
|
|
<p class="text-muted mt-2 mb-1">Kéo thả hoặc click để chọn file</p>
|
|
<div id="FileName" class="fw-semibold text-primary small"></div>
|
|
</div>
|
|
|
|
<div class="d-flex flex-wrap gap-1 mt-2">
|
|
<span class="badge bg-secondary-subtle text-secondary">PDF</span>
|
|
<span class="badge bg-secondary-subtle text-secondary">DOCX</span>
|
|
<span class="badge bg-secondary-subtle text-secondary">XLSX</span>
|
|
<span class="badge bg-secondary-subtle text-secondary">PPTX</span>
|
|
<span class="badge bg-secondary-subtle text-secondary">HTML</span>
|
|
<span class="badge bg-secondary-subtle text-secondary">CSV</span>
|
|
<span class="badge bg-secondary-subtle text-secondary">TXT</span>
|
|
<span class="badge bg-secondary-subtle text-secondary">JPG/PNG</span>
|
|
<span class="badge bg-secondary-subtle text-secondary">EPUB</span>
|
|
<span class="badge bg-secondary-subtle text-secondary">TIFF</span>
|
|
<span class="badge bg-secondary-subtle text-secondary">ASCIIDoc</span>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- YouTube URL pane -->
|
|
<div id="YoutubePane" class="d-none">
|
|
<div class="input-group">
|
|
<span class="input-group-text bg-danger text-white"><i class="bi bi-youtube"></i></span>
|
|
<input type="url" class="form-control" id="YoutubeUrl"
|
|
placeholder="https://www.youtube.com/watch?v=..."
|
|
oninput="OnYoutubeInput()" />
|
|
<button class="btn btn-outline-secondary" onclick="document.getElementById('YoutubeUrl').value='';OnYoutubeInput()">
|
|
<i class="bi bi-x"></i>
|
|
</button>
|
|
</div>
|
|
<div class="form-text mt-1">
|
|
<i class="bi bi-info-circle me-1"></i>
|
|
<b>MarkItDown</b>: dùng <code>yt-dlp</code> native |
|
|
<b>Docling</b>: trích transcript → convert markdown
|
|
</div>
|
|
</div>
|
|
|
|
<!-- Controls row -->
|
|
<div class="d-flex flex-wrap align-items-center gap-3 mt-3">
|
|
<div class="d-flex align-items-center gap-2" id="DoclingFmtWrap">
|
|
<label class="form-label mb-0 small fw-medium" for="DoclingFormat">Docling format</label>
|
|
<select class="form-select form-select-sm" id="DoclingFormat" style="width:auto">
|
|
<option value="markdown">Markdown</option>
|
|
<option value="json">JSON</option>
|
|
<option value="html">HTML</option>
|
|
<option value="text">Plain Text</option>
|
|
</select>
|
|
</div>
|
|
|
|
<div class="form-check form-switch mb-0">
|
|
<input class="form-check-input" type="checkbox" id="LlmToggle" checked onchange="OnLlmToggle()" />
|
|
<label class="form-check-label small fw-medium" for="LlmToggle" id="LlmToggleLabel">LLM bật</label>
|
|
</div>
|
|
|
|
<button class="btn btn-primary btn-sm" id="ConvertBtn" disabled onclick="RunConversion()">
|
|
<i class="bi bi-play-fill me-1"></i>Chuyển đổi & So sánh
|
|
</button>
|
|
<button class="btn btn-outline-secondary btn-sm" onclick="ClearResults()">
|
|
<i class="bi bi-x-circle me-1"></i>Xoá
|
|
</button>
|
|
</div>
|
|
|
|
<!-- LLM Prompt panel -->
|
|
<div id="LlmPanel" class="mt-3 d-none">
|
|
<div class="row g-3">
|
|
<!-- Col 1: Custom prompt -->
|
|
<div class="col-md-6">
|
|
<div class="h-100 p-3 rounded border bg-primary-subtle">
|
|
<div class="d-flex justify-content-between align-items-center mb-2">
|
|
<span class="small fw-semibold text-primary"><i class="bi bi-pencil-square me-1"></i>Custom LLM
|
|
Prompt</span>
|
|
<button class="btn btn-link btn-sm p-0 text-primary text-decoration-underline"
|
|
onclick="ResetPrompt()">Xoá</button>
|
|
</div>
|
|
<textarea class="form-control form-control-sm" id="LlmPrompt" rows="6"
|
|
placeholder="Để trống = dùng Default prompt bên phải Nhập custom system prompt để override..."></textarea>
|
|
<div class="form-text mt-1 text-primary small">
|
|
<b>MarkItDown</b>: vision prompt | <b>Docling</b>: enrich prompt
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<!-- Col 2: Default prompts with tabs -->
|
|
<div class="col-md-6">
|
|
<div class="h-100 p-3 rounded border bg-light">
|
|
<ul class="nav nav-tabs nav-sm mb-2" id="DefaultPromptTabs">
|
|
<li class="nav-item">
|
|
<a class="nav-link active py-1 px-2 small" href="#"
|
|
onclick="SwitchDefaultTab('Md',this);return false">
|
|
<span class="badge bg-primary rounded-circle p-1 me-1"> </span>MarkItDown
|
|
</a>
|
|
</li>
|
|
<li class="nav-item">
|
|
<a class="nav-link py-1 px-2 small" href="#" onclick="SwitchDefaultTab('Dl',this);return false">
|
|
<span class="badge bg-success rounded-circle p-1 me-1"> </span>Docling
|
|
</a>
|
|
</li>
|
|
</ul>
|
|
<div id="DefaultPrompt-Md">
|
|
<textarea class="form-control form-control-sm font-monospace" id="MdDefaultPrompt" rows="5" readonly
|
|
style="font-size:.7rem;resize:none;background:#fff"></textarea>
|
|
<button class="btn btn-outline-primary btn-sm mt-2 w-100" onclick="UsePrompt('Md')">
|
|
<i class="bi bi-arrow-left me-1"></i>Dùng prompt này
|
|
</button>
|
|
</div>
|
|
<div id="DefaultPrompt-Dl" class="d-none">
|
|
<textarea class="form-control form-control-sm font-monospace" id="DlDefaultPrompt" rows="5" readonly
|
|
style="font-size:.7rem;resize:none;background:#fff"></textarea>
|
|
<button class="btn btn-outline-success btn-sm mt-2 w-100" onclick="UsePrompt('Dl')">
|
|
<i class="bi bi-arrow-left me-1"></i>Dùng prompt này
|
|
</button>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- Settings card -->
|
|
<div class="card shadow-sm mb-4">
|
|
<div class="card-header py-2">
|
|
<span class="fw-semibold small"><i class="bi bi-gear me-2"></i>Cài đặt LLM (Ollama)</span>
|
|
</div>
|
|
<div class="card-body" id="SettingsBody">
|
|
<div class="row g-3 align-items-end">
|
|
<div class="col-md-5">
|
|
<label class="form-label fw-medium small">Ollama Base URL</label>
|
|
<div class="input-group input-group-sm">
|
|
<input type="url" class="form-control" id="SettingUrl" placeholder="https://your-ollama-server/v1" />
|
|
<button class="btn btn-outline-secondary" onclick="FetchModels()" title="Tải danh sách model">
|
|
<i class="bi bi-arrow-clockwise"></i>
|
|
</button>
|
|
</div>
|
|
<div class="form-text">OpenAI-compatible endpoint</div>
|
|
</div>
|
|
<div class="col-md-3">
|
|
<label class="form-label fw-medium small">Model mặc định</label>
|
|
<select class="form-select form-select-sm" id="SettingModel">
|
|
<option value="">-- chọn model --</option>
|
|
</select>
|
|
<div class="form-text">Convert + Docling enrich</div>
|
|
</div>
|
|
<div class="col-md-4">
|
|
<label class="form-label fw-medium small">Model Format</label>
|
|
<select class="form-select form-select-sm" id="CleanupModel">
|
|
<option value="">-- dùng model trên --</option>
|
|
</select>
|
|
<div class="form-text">Để trống = dùng model mặc định</div>
|
|
</div>
|
|
</div>
|
|
<div class="row g-2 mt-3 text-center">
|
|
<div class="col-6">
|
|
<div class="border rounded py-2 px-3">
|
|
<div class="small text-muted mb-1"><span
|
|
class="badge bg-primary rounded-circle p-1 me-1"> </span>MarkItDown LLM</div>
|
|
<div id="MdLlmStatus"><span class="spinner-border spinner-border-sm"></span></div>
|
|
</div>
|
|
</div>
|
|
<div class="col-6">
|
|
<div class="border rounded py-2 px-3">
|
|
<div class="small text-muted mb-1"><span
|
|
class="badge bg-success rounded-circle p-1 me-1"> </span>Docling LLM</div>
|
|
<div id="DlLlmStatus"><span class="spinner-border spinner-border-sm"></span></div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="mt-3 text-end">
|
|
<button class="btn btn-primary btn-sm px-4" id="SaveSettingsBtn" onclick="SaveSettings()">
|
|
<span id="SaveSpinner" class="spinner-border spinner-border-sm d-none"></span>
|
|
<i class="bi bi-check-lg me-1"></i>Lưu & Áp dụng
|
|
</button>
|
|
</div>
|
|
|
|
</div>
|
|
</div>
|
|
|
|
<!-- Progress row -->
|
|
<div class="row g-3 mb-4 d-none" id="ProgressRow">
|
|
<div class="col-md-6">
|
|
<div class="card h-100">
|
|
<div class="card-body">
|
|
<h6 class="card-title d-flex align-items-center gap-2">
|
|
<span class="badge bg-primary rounded-circle p-1"> </span>MarkItDown
|
|
</h6>
|
|
<div class="d-flex align-items-center gap-2" id="MdStatusLine">
|
|
<div class="spinner-border spinner-border-sm text-primary d-none" id="MdSpinner"></div>
|
|
<small id="MdStatus" class="text-muted">Đang chờ...</small>
|
|
</div>
|
|
<div class="row g-2 mt-2 d-none" id="MdMetrics">
|
|
<div class="col-4">
|
|
<div class="border rounded text-center py-2" id="MdTimeCard">
|
|
<div class="fw-bold" id="MdTimeVal">-</div>
|
|
<div class="text-muted" style="font-size:.7rem">ms</div>
|
|
</div>
|
|
</div>
|
|
<div class="col-4">
|
|
<div class="border rounded text-center py-2">
|
|
<div class="fw-bold" id="MdCharsVal">-</div>
|
|
<div class="text-muted" style="font-size:.7rem">ký tự</div>
|
|
</div>
|
|
</div>
|
|
<div class="col-4">
|
|
<div class="border rounded text-center py-2">
|
|
<div class="fw-bold" id="MdLinesVal">-</div>
|
|
<div class="text-muted" style="font-size:.7rem">dòng</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="col-md-6">
|
|
<div class="card h-100">
|
|
<div class="card-body">
|
|
<h6 class="card-title d-flex align-items-center gap-2">
|
|
<span class="badge bg-success rounded-circle p-1"> </span>Docling
|
|
</h6>
|
|
<div class="d-flex align-items-center gap-2" id="DlStatusLine">
|
|
<div class="spinner-border spinner-border-sm text-success d-none" id="DlSpinner"></div>
|
|
<small id="DlStatus" class="text-muted">Đang chờ...</small>
|
|
</div>
|
|
<div class="row g-2 mt-2 d-none" id="DlMetrics">
|
|
<div class="col-3">
|
|
<div class="border rounded text-center py-2" id="DlTimeCard">
|
|
<div class="fw-bold" id="DlTimeVal">-</div>
|
|
<div class="text-muted" style="font-size:.7rem">ms</div>
|
|
</div>
|
|
</div>
|
|
<div class="col-3">
|
|
<div class="border rounded text-center py-2">
|
|
<div class="fw-bold" id="DlCharsVal">-</div>
|
|
<div class="text-muted" style="font-size:.7rem">ký tự</div>
|
|
</div>
|
|
</div>
|
|
<div class="col-3">
|
|
<div class="border rounded text-center py-2">
|
|
<div class="fw-bold" id="DlLinesVal">-</div>
|
|
<div class="text-muted" style="font-size:.7rem">dòng</div>
|
|
</div>
|
|
</div>
|
|
<div class="col-3 d-none" id="DlPagesCard">
|
|
<div class="border rounded text-center py-2">
|
|
<div class="fw-bold" id="DlPagesVal">-</div>
|
|
<div class="text-muted" style="font-size:.7rem">trang</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- Compare banner -->
|
|
<div class="card shadow-sm mb-4 d-none" id="CompareBanner">
|
|
<div class="card-body">
|
|
<h6 class="card-title fw-semibold mb-3"><i class="bi bi-bar-chart-line me-1"></i>Tóm tắt so sánh</h6>
|
|
<table class="table table-sm table-bordered mb-0">
|
|
<thead class="table-light">
|
|
<tr>
|
|
<th>Tiêu chí</th>
|
|
<th>MarkItDown</th>
|
|
<th>Docling</th>
|
|
<th class="text-muted">Ghi chú</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody id="CompareRows"></tbody>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- Results -->
|
|
<div class="row g-3 mb-4">
|
|
<div class="col-md-6">
|
|
<div class="card shadow-sm h-100" id="MdResultCard">
|
|
<div class="card-header d-flex justify-content-between align-items-center py-2">
|
|
<span class="fw-semibold small d-flex align-items-center gap-2">
|
|
<span class="badge bg-primary rounded-circle p-1"> </span>MarkItDown
|
|
</span>
|
|
<div class="d-flex align-items-center gap-2">
|
|
<button class="btn btn-outline-warning btn-sm py-0 px-2 d-none" id="CleanupBtn"
|
|
onclick="CleanupMarkItDown()" title="Dùng LLM tái cấu trúc output">
|
|
<span id="CleanupSpinner" class="spinner-border spinner-border-sm d-none"></span>
|
|
✨ Làm đẹp
|
|
</button>
|
|
<button class="btn btn-outline-secondary btn-sm py-0 px-2 d-none" id="MdDownloadBtn"
|
|
onclick="DownloadMd('Md')" title="Tải xuống .md">
|
|
<i class="bi bi-download"></i> .md
|
|
</button>
|
|
<ul class="nav nav-tabs card-header-tabs border-0" id="MdTabs">
|
|
<li class="nav-item"><a class="nav-link active py-1 px-2 small" href="#"
|
|
onclick="SwitchTab('Md','Raw',this);return false">Raw</a></li>
|
|
<li class="nav-item"><a class="nav-link py-1 px-2 small" href="#"
|
|
onclick="SwitchTab('Md','Preview',this);return false">Preview</a></li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
<div class="card-body p-0" id="MdBody">
|
|
<div class="text-center text-muted py-5 small"><i class="bi bi-upload fs-3 d-block mb-2"></i>Tải file lên để
|
|
xem kết quả</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="col-md-6">
|
|
<div class="card shadow-sm h-100" id="DlResultCard">
|
|
<div class="card-header d-flex justify-content-between align-items-center py-2">
|
|
<span class="fw-semibold small d-flex align-items-center gap-2">
|
|
<span class="badge bg-success rounded-circle p-1"> </span>Docling
|
|
</span>
|
|
<div class="d-flex align-items-center gap-2">
|
|
<button class="btn btn-outline-secondary btn-sm py-0 px-2 d-none" id="DlDownloadBtn"
|
|
onclick="DownloadMd('Dl')" title="Tải xuống .md">
|
|
<i class="bi bi-download"></i> .md
|
|
</button>
|
|
<ul class="nav nav-tabs card-header-tabs border-0" id="DlTabs">
|
|
<li class="nav-item"><a class="nav-link active py-1 px-2 small" href="#"
|
|
onclick="SwitchTab('Dl','Raw',this);return false">Raw</a></li>
|
|
<li class="nav-item"><a class="nav-link py-1 px-2 small" href="#"
|
|
onclick="SwitchTab('Dl','Preview',this);return false">Preview</a></li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
<div class="card-body p-0" id="DlBody">
|
|
<div class="text-center text-muted py-5 small"><i class="bi bi-upload fs-3 d-block mb-2"></i>Tải file lên để
|
|
xem kết quả</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- History -->
|
|
<h6 class="fw-semibold mb-3">Lịch sử chuyển đổi gần đây</h6>
|
|
<div class="row g-3">
|
|
<div class="col-md-6">
|
|
<div class="card shadow-sm">
|
|
<div class="card-header py-2 small fw-semibold d-flex align-items-center gap-2">
|
|
<span class="badge bg-primary rounded-circle p-1"> </span>MarkItDown
|
|
</div>
|
|
<div id="MdHistory">
|
|
<div class="text-center text-muted py-3 small">Chưa có lịch sử</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="col-md-6">
|
|
<div class="card shadow-sm">
|
|
<div class="card-header py-2 small fw-semibold d-flex align-items-center gap-2">
|
|
<span class="badge bg-success rounded-circle p-1"> </span>Docling
|
|
</div>
|
|
<div id="DlHistory">
|
|
<div class="text-center text-muted py-3 small">Chưa có lịch sử</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
</div><!-- /container -->
|
|
|
|
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/js/bootstrap.bundle.min.js"></script>
|
|
<script src="https://cdn.jsdelivr.net/npm/marked@13/marked.min.js"></script>
|
|
<script>
|
|
const MarkItDownUrl = '/api/markitdown';
|
|
const DoclingUrl = '/api/docling';
|
|
|
|
let CurrentFile = null;
|
|
let CurrentYoutubeUrl = '';
|
|
let InputMode = 'file'; // 'file' | 'youtube'
|
|
let MdContent = '';
|
|
let DlContent = '';
|
|
|
|
// ── Input mode toggle ─────────────────────────────────────────
|
|
function SwitchInputMode(Mode, Link) {
|
|
InputMode = Mode;
|
|
document.querySelectorAll('#InputModeTabs .nav-link').forEach(L => L.classList.remove('active'));
|
|
Link.classList.add('active');
|
|
SetDisplay('FilePane', Mode === 'file');
|
|
SetDisplay('YoutubePane', Mode === 'youtube');
|
|
// Re-evaluate button state
|
|
if (Mode === 'file') {
|
|
document.getElementById('ConvertBtn').disabled = !CurrentFile;
|
|
} else {
|
|
document.getElementById('ConvertBtn').disabled = !CurrentYoutubeUrl;
|
|
}
|
|
}
|
|
|
|
function OnYoutubeInput() {
|
|
CurrentYoutubeUrl = document.getElementById('YoutubeUrl').value.trim();
|
|
document.getElementById('ConvertBtn').disabled = !CurrentYoutubeUrl;
|
|
}
|
|
|
|
// ── File input ────────────────────────────────────────────────
|
|
const UploadZone = document.getElementById('UploadZone');
|
|
const FileInput = document.getElementById('FileInput');
|
|
|
|
UploadZone.addEventListener('click', () => FileInput.click());
|
|
UploadZone.addEventListener('dragover', e => { e.preventDefault(); UploadZone.classList.add('dragover'); });
|
|
UploadZone.addEventListener('dragleave', () => UploadZone.classList.remove('dragover'));
|
|
UploadZone.addEventListener('drop', e => {
|
|
e.preventDefault();
|
|
UploadZone.classList.remove('dragover');
|
|
if (e.dataTransfer.files[0]) SelectFile(e.dataTransfer.files[0]);
|
|
});
|
|
FileInput.addEventListener('change', () => { if (FileInput.files[0]) SelectFile(FileInput.files[0]); });
|
|
|
|
function SelectFile(File_) {
|
|
CurrentFile = File_;
|
|
document.getElementById('FileName').textContent = File_.name + ' (' + FormatBytes(File_.size) + ')';
|
|
document.getElementById('ConvertBtn').disabled = false;
|
|
}
|
|
|
|
function FormatBytes(Bytes) {
|
|
if (Bytes < 1024) return Bytes + ' B';
|
|
if (Bytes < 1024 * 1024) return (Bytes / 1024).toFixed(1) + ' KB';
|
|
return (Bytes / (1024 * 1024)).toFixed(1) + ' MB';
|
|
}
|
|
|
|
// ── LLM toggle ────────────────────────────────────────────────
|
|
function OnLlmToggle() {
|
|
const IsOn = document.getElementById('LlmToggle').checked;
|
|
document.getElementById('LlmToggleLabel').textContent = IsOn ? 'LLM bật' : 'LLM tắt';
|
|
document.getElementById('LlmPanel').classList.toggle('d-none', !IsOn);
|
|
localStorage.setItem('llm_enabled', IsOn ? '1' : '0');
|
|
}
|
|
|
|
function ResetPrompt() {
|
|
document.getElementById('LlmPrompt').value = '';
|
|
localStorage.removeItem('llm_prompt');
|
|
}
|
|
|
|
document.getElementById('LlmPrompt').addEventListener('input', () => {
|
|
localStorage.setItem('llm_prompt', document.getElementById('LlmPrompt').value);
|
|
});
|
|
|
|
// ── Conversion ────────────────────────────────────────────────
|
|
async function RunConversion() {
|
|
if (InputMode === 'file' && !CurrentFile) return;
|
|
if (InputMode === 'youtube' && !CurrentYoutubeUrl) return;
|
|
|
|
document.getElementById('ConvertBtn').disabled = true;
|
|
SetDisplay('ProgressRow', true);
|
|
SetDisplay('CompareBanner', false);
|
|
document.getElementById('MdBody').innerHTML = '<div class="text-center text-muted py-5 small"><div class="spinner-border spinner-border-sm mb-2"></div><br>Đang xử lý...</div>';
|
|
document.getElementById('DlBody').innerHTML = '<div class="text-center text-muted py-5 small"><div class="spinner-border spinner-border-sm text-success mb-2"></div><br>Đang xử lý...</div>';
|
|
SetDisplay('MdMetrics', false);
|
|
SetDisplay('DlMetrics', false);
|
|
|
|
SetSpinner('Md', true, 'Đang chuyển đổi...');
|
|
SetSpinner('Dl', true, 'Đang chuyển đổi...');
|
|
|
|
const DoclingFmt = document.getElementById('DoclingFormat').value;
|
|
const UseLlm = document.getElementById('LlmToggle').checked;
|
|
const CustomPrompt = document.getElementById('LlmPrompt').value.trim();
|
|
|
|
let MdPromise, DlPromise;
|
|
if (InputMode === 'youtube') {
|
|
MdPromise = ConvertMarkItDownUrl(CurrentYoutubeUrl, UseLlm, CustomPrompt);
|
|
DlPromise = ConvertDoclingUrl(CurrentYoutubeUrl, DoclingFmt, UseLlm, CustomPrompt);
|
|
} else {
|
|
MdPromise = ConvertMarkItDown(CurrentFile, UseLlm, CustomPrompt);
|
|
DlPromise = ConvertDocling(CurrentFile, DoclingFmt, UseLlm, CustomPrompt);
|
|
}
|
|
|
|
const [MdResult, DlResult] = await Promise.allSettled([MdPromise, DlPromise]);
|
|
|
|
document.getElementById('ConvertBtn').disabled = false;
|
|
ShowCompare(MdResult, DlResult);
|
|
LoadHistory();
|
|
}
|
|
|
|
async function ConvertMarkItDown(File_, UseLlm, CustomPrompt) {
|
|
const Form = new FormData();
|
|
Form.append('file', File_);
|
|
const Params = new URLSearchParams({ use_llm: UseLlm });
|
|
if (CustomPrompt) Params.set('llm_prompt', CustomPrompt);
|
|
const T0 = performance.now();
|
|
const Res = await fetch(MarkItDownUrl + '/convert?' + Params, { method: 'POST', body: Form });
|
|
const Ms = Math.round(performance.now() - T0);
|
|
if (!Res.ok) throw new Error((await Res.json()).detail || Res.statusText);
|
|
const Data = await Res.json();
|
|
return { Content: Data.markdown, Ms, LlmEnabled: Data.llm_enabled };
|
|
}
|
|
|
|
async function ConvertMarkItDownUrl(Url, UseLlm, CustomPrompt) {
|
|
const T0 = performance.now();
|
|
const Res = await fetch(MarkItDownUrl + '/convert-url', {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({ url: Url, use_llm: UseLlm, llm_prompt: CustomPrompt || null }),
|
|
});
|
|
const Ms = Math.round(performance.now() - T0);
|
|
if (!Res.ok) throw new Error((await Res.json()).detail || Res.statusText);
|
|
const Data = await Res.json();
|
|
return { Content: Data.markdown, Ms, LlmEnabled: Data.llm_enabled };
|
|
}
|
|
|
|
async function ConvertDocling(File_, Fmt, UseLlm, CustomPrompt) {
|
|
const Form = new FormData();
|
|
Form.append('file', File_);
|
|
const Params = new URLSearchParams({ output_format: Fmt, use_llm: UseLlm });
|
|
if (CustomPrompt) Params.set('llm_prompt', CustomPrompt);
|
|
const T0 = performance.now();
|
|
const Res = await fetch(DoclingUrl + '/convert?' + Params, { method: 'POST', body: Form });
|
|
const Ms = Math.round(performance.now() - T0);
|
|
if (!Res.ok) throw new Error((await Res.json()).detail || Res.statusText);
|
|
const Data = await Res.json();
|
|
return { Content: Data.content, Ms, Pages: Data.page_count, LlmEnabled: Data.llm_enabled };
|
|
}
|
|
|
|
async function ConvertDoclingUrl(Url, Fmt, UseLlm, CustomPrompt) {
|
|
const T0 = performance.now();
|
|
const Res = await fetch(DoclingUrl + '/convert-url', {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({ url: Url, output_format: Fmt, use_llm: UseLlm, llm_prompt: CustomPrompt || null }),
|
|
});
|
|
const Ms = Math.round(performance.now() - T0);
|
|
if (!Res.ok) throw new Error((await Res.json()).detail || Res.statusText);
|
|
const Data = await Res.json();
|
|
return { Content: Data.content, Ms, Pages: Data.page_count, LlmEnabled: Data.llm_enabled };
|
|
}
|
|
|
|
function SetDisplay(Id, Visible) {
|
|
document.getElementById(Id).classList.toggle('d-none', !Visible);
|
|
}
|
|
|
|
function SetSpinner(Prefix, Active, Msg) {
|
|
document.getElementById(Prefix + 'Spinner').classList.toggle('d-none', !Active);
|
|
document.getElementById(Prefix + 'Status').textContent = Msg;
|
|
}
|
|
|
|
function ShowCompare(MdResult, DlResult) {
|
|
const MdOk = MdResult.status === 'fulfilled';
|
|
const DlOk = DlResult.status === 'fulfilled';
|
|
|
|
if (MdOk) {
|
|
MdContent = MdResult.value.Content;
|
|
const LlmTag = MdResult.value.LlmEnabled ? ' 🤖' : '';
|
|
SetSpinner('Md', false, '✅ Hoàn tất (' + MdResult.value.Ms + ' ms)' + LlmTag);
|
|
ShowMetrics('Md', MdResult.value);
|
|
RenderResult('Md', MdContent);
|
|
document.getElementById('CleanupBtn').classList.remove('d-none');
|
|
document.getElementById('MdDownloadBtn').classList.remove('d-none');
|
|
} else {
|
|
MdContent = '';
|
|
SetSpinner('Md', false, '❌ ' + MdResult.reason.message);
|
|
document.getElementById('MdBody').innerHTML = '<div class="alert alert-danger m-3 small">' + EscHtml(MdResult.reason.message) + '</div>';
|
|
}
|
|
|
|
if (DlOk) {
|
|
DlContent = DlResult.value.Content;
|
|
document.getElementById('DlDownloadBtn').classList.remove('d-none');
|
|
const LlmTag = DlResult.value.LlmEnabled ? ' 🤖' : '';
|
|
SetSpinner('Dl', false, '✅ Hoàn tất (' + DlResult.value.Ms + ' ms)' + LlmTag);
|
|
ShowMetrics('Dl', DlResult.value);
|
|
RenderResult('Dl', DlContent);
|
|
} else {
|
|
DlContent = '';
|
|
SetSpinner('Dl', false, '❌ ' + DlResult.reason.message);
|
|
document.getElementById('DlBody').innerHTML = '<div class="alert alert-danger m-3 small">' + EscHtml(DlResult.reason.message) + '</div>';
|
|
}
|
|
|
|
if (MdOk && DlOk) BuildCompareTable(MdResult.value, DlResult.value);
|
|
}
|
|
|
|
function ShowMetrics(Prefix, Data) {
|
|
const Lines = Data.Content.split('\n').length;
|
|
const Chars = Data.Content.length;
|
|
document.getElementById(Prefix + 'TimeVal').textContent = Data.Ms.toLocaleString();
|
|
document.getElementById(Prefix + 'CharsVal').textContent = Chars.toLocaleString();
|
|
document.getElementById(Prefix + 'LinesVal').textContent = Lines.toLocaleString();
|
|
if (Prefix === 'Dl' && Data.Pages) {
|
|
document.getElementById('DlPagesVal').textContent = Data.Pages;
|
|
document.getElementById('DlPagesCard').classList.remove('d-none');
|
|
}
|
|
SetDisplay(Prefix + 'Metrics', true);
|
|
}
|
|
|
|
function BuildCompareTable(Md, Dl) {
|
|
const Rows = [
|
|
{ Label: 'Thời gian xử lý', MdVal: Md.Ms + ' ms', DlVal: Dl.Ms + ' ms', MdWin: Md.Ms < Dl.Ms, Note: 'Thấp hơn = nhanh hơn' },
|
|
{ Label: 'Độ dài output', MdVal: Md.Content.length + ' ký tự', DlVal: Dl.Content.length + ' ký tự', MdWin: Md.Content.length >= Dl.Content.length, Note: 'Nhiều hơn = giữ được nội dung hơn' },
|
|
{ Label: 'Số dòng', MdVal: Md.Content.split('\n').length, DlVal: Dl.Content.split('\n').length, MdWin: Md.Content.split('\n').length >= Dl.Content.split('\n').length, Note: '' },
|
|
];
|
|
document.getElementById('CompareRows').innerHTML = Rows.map(R => `
|
|
<tr>
|
|
<td>${R.Label}</td>
|
|
<td class="${R.MdWin ? 'text-success fw-semibold' : 'text-muted'}">${R.MdWin ? '🏆 ' : ''}${R.MdVal}</td>
|
|
<td class="${!R.MdWin ? 'text-success fw-semibold' : 'text-muted'}">${!R.MdWin ? '🏆 ' : ''}${R.DlVal}</td>
|
|
<td class="text-muted small">${R.Note}</td>
|
|
</tr>`).join('');
|
|
SetDisplay('CompareBanner', true);
|
|
}
|
|
|
|
// ── Render result ─────────────────────────────────────────────
|
|
function RenderResult(Prefix, Content_) {
|
|
document.getElementById(Prefix + 'Body').innerHTML = `
|
|
<div id="${Prefix}-RawPane"><pre class="ResultPre p-3 m-0">${EscHtml(Content_)}</pre></div>
|
|
<div id="${Prefix}-PreviewPane" class="d-none"><div class="PreviewPane">${marked.parse(Content_)}</div></div>`;
|
|
}
|
|
|
|
function SwitchTab(Prefix, Tab, Link) {
|
|
document.querySelectorAll('#' + Prefix + 'Tabs .nav-link').forEach(L => L.classList.remove('active'));
|
|
Link.classList.add('active');
|
|
document.getElementById(Prefix + '-RawPane').classList.toggle('d-none', Tab !== 'Raw');
|
|
document.getElementById(Prefix + '-PreviewPane').classList.toggle('d-none', Tab !== 'Preview');
|
|
}
|
|
|
|
function EscHtml(Str) {
|
|
return String(Str).replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').replace(/"/g, '"');
|
|
}
|
|
|
|
async function CleanupMarkItDown() {
|
|
if (!MdContent) return;
|
|
const Btn = document.getElementById('CleanupBtn');
|
|
const Spinner = document.getElementById('CleanupSpinner');
|
|
Btn.disabled = true;
|
|
Spinner.classList.remove('d-none');
|
|
try {
|
|
const CustomPrompt = document.getElementById('LlmPrompt').value.trim();
|
|
const Res = await fetch(MarkItDownUrl + '/cleanup', {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({ text: MdContent, prompt: CustomPrompt || null, model: document.getElementById('CleanupModel').value.trim() || null }),
|
|
});
|
|
if (!Res.ok) throw new Error((await Res.json()).detail || Res.statusText);
|
|
const Data = await Res.json();
|
|
MdContent = Data.text;
|
|
RenderResult('Md', MdContent);
|
|
document.querySelector('#MdTabs .nav-link:last-child').click();
|
|
} catch (E) {
|
|
alert('Cleanup failed: ' + E.message);
|
|
} finally {
|
|
Btn.disabled = false;
|
|
Spinner.classList.add('d-none');
|
|
}
|
|
}
|
|
|
|
function ClearResults() {
|
|
MdContent = ''; DlContent = '';
|
|
CurrentFile = null;
|
|
CurrentYoutubeUrl = '';
|
|
SetDisplay('ProgressRow', false);
|
|
SetDisplay('CompareBanner', false);
|
|
document.getElementById('MdBody').innerHTML = '<div class="text-center text-muted py-5 small"><i class="bi bi-upload fs-3 d-block mb-2"></i>Tải file lên để xem kết quả</div>';
|
|
document.getElementById('DlBody').innerHTML = '<div class="text-center text-muted py-5 small"><i class="bi bi-upload fs-3 d-block mb-2"></i>Tải file lên để xem kết quả</div>';
|
|
document.getElementById('FileName').textContent = '';
|
|
document.getElementById('YoutubeUrl').value = '';
|
|
document.getElementById('ConvertBtn').disabled = true;
|
|
document.getElementById('CleanupBtn').classList.add('d-none');
|
|
document.getElementById('MdDownloadBtn').classList.add('d-none');
|
|
document.getElementById('DlDownloadBtn').classList.add('d-none');
|
|
FileInput.value = '';
|
|
}
|
|
|
|
// ── History ───────────────────────────────────────────────────
|
|
async function LoadHistory() {
|
|
try {
|
|
const [MdData, DlData] = await Promise.all([
|
|
fetch(MarkItDownUrl + '/history?limit=8').then(R => R.json()),
|
|
fetch(DoclingUrl + '/history?limit=8').then(R => R.json()),
|
|
]);
|
|
RenderHistory('MdHistory', MdData);
|
|
RenderHistory('DlHistory', DlData);
|
|
} catch (E) {
|
|
console.warn('History load failed', E);
|
|
}
|
|
}
|
|
|
|
function RenderHistory(ElId, Items) {
|
|
const El = document.getElementById(ElId);
|
|
if (!Items || !Items.length) {
|
|
El.innerHTML = '<div class="text-center text-muted py-3 small">Chưa có lịch sử</div>';
|
|
return;
|
|
}
|
|
El.innerHTML = '<ul class="list-group list-group-flush">' +
|
|
Items.map(Item => `
|
|
<li class="list-group-item d-flex justify-content-between align-items-center py-2 px-3">
|
|
<span class="small fw-medium text-truncate me-2" style="max-width:60%">${EscHtml(Item.filename)}</span>
|
|
<span class="d-flex gap-1 align-items-center flex-shrink-0">
|
|
${Item.file_type ? `<span class="badge bg-secondary-subtle text-secondary">${Item.file_type}</span>` : ''}
|
|
${Item.llm_enabled ? '<span class="badge bg-primary-subtle text-primary">🤖 LLM</span>' : ''}
|
|
<span class="text-muted" style="font-size:.7rem">${FormatDate(Item.created_at)}</span>
|
|
</span>
|
|
</li>`).join('') +
|
|
'</ul>';
|
|
}
|
|
|
|
function FormatDate(Str) {
|
|
try { return new Date(Str).toLocaleTimeString('vi-VN'); } catch { return Str; }
|
|
}
|
|
|
|
// ── Settings modal ───────────────────────────────────────────
|
|
async function LoadSettings() {
|
|
try {
|
|
const [Md, Dl] = await Promise.all([
|
|
fetch(MarkItDownUrl + '/settings').then(R => R.json()),
|
|
fetch(DoclingUrl + '/settings').then(R => R.json()),
|
|
]);
|
|
document.getElementById('SettingUrl').value = Md.ollama_base_url || '';
|
|
document.getElementById('MdDefaultPrompt').value = Md.default_prompt || '';
|
|
document.getElementById('DlDefaultPrompt').value = Dl.default_prompt || '';
|
|
RenderLlmStatus('MdLlmStatus', Md);
|
|
RenderLlmStatus('DlLlmStatus', Dl);
|
|
await FetchModels(Md.ollama_model, Md.cleanup_model);
|
|
} catch (E) {
|
|
console.warn('Settings load failed', E);
|
|
}
|
|
}
|
|
|
|
async function FetchModels(SelectModel, SelectCleanup) {
|
|
try {
|
|
const Data = await fetch(MarkItDownUrl + '/models').then(R => R.json());
|
|
const Models = Data.models || [];
|
|
PopulateSelect('SettingModel', Models, SelectModel || null, '-- chọn model --');
|
|
PopulateSelect('CleanupModel', Models, SelectCleanup || null, '-- dùng model trên --');
|
|
} catch (E) {
|
|
console.warn('FetchModels failed', E);
|
|
}
|
|
}
|
|
|
|
function PopulateSelect(ElId, Models, Selected, EmptyLabel) {
|
|
const Sel = document.getElementById(ElId);
|
|
const Prev = Sel.value || Selected;
|
|
Sel.innerHTML = `<option value="">${EmptyLabel}</option>` +
|
|
Models.map(M => `<option value="${M}"${M === Prev ? ' selected' : ''}>${M}</option>`).join('');
|
|
}
|
|
|
|
function UsePrompt(Prefix) {
|
|
const Src = Prefix === 'Md' ? 'MdDefaultPrompt' : 'DlDefaultPrompt';
|
|
const Text = document.getElementById(Src).value;
|
|
document.getElementById('LlmPrompt').value = Text;
|
|
localStorage.setItem('llm_prompt', Text);
|
|
}
|
|
|
|
function DownloadMd(Prefix) {
|
|
const Content = Prefix === 'Md' ? MdContent : DlContent;
|
|
if (!Content) return;
|
|
const BaseName = (CurrentFile ? CurrentFile.name.replace(/\.[^.]+$/, '') : 'output');
|
|
const Suffix = Prefix === 'Md' ? '_markitdown' : '_docling';
|
|
const Blob = new Blob([Content], { type: 'text/markdown' });
|
|
const A = document.createElement('a');
|
|
A.href = URL.createObjectURL(Blob);
|
|
A.download = BaseName + Suffix + '.md';
|
|
A.click();
|
|
URL.revokeObjectURL(A.href);
|
|
}
|
|
|
|
function SwitchDefaultTab(Prefix, Link) {
|
|
document.querySelectorAll('#DefaultPromptTabs .nav-link').forEach(L => L.classList.remove('active'));
|
|
Link.classList.add('active');
|
|
document.getElementById('DefaultPrompt-Md').classList.toggle('d-none', Prefix !== 'Md');
|
|
document.getElementById('DefaultPrompt-Dl').classList.toggle('d-none', Prefix !== 'Dl');
|
|
}
|
|
|
|
function RenderLlmStatus(ElId, Data) {
|
|
const El = document.getElementById(ElId);
|
|
El.innerHTML = Data.llm_enabled
|
|
? `<span class="badge bg-success-subtle text-success">✅ ${Data.ollama_model}</span>`
|
|
: `<span class="badge bg-secondary-subtle text-secondary">⚪ Tắt</span>`;
|
|
}
|
|
|
|
async function SaveSettings() {
|
|
const Url = document.getElementById('SettingUrl').value.trim();
|
|
const Model = document.getElementById('SettingModel').value || 'llava';
|
|
const CleanupModel = document.getElementById('CleanupModel').value;
|
|
const Btn = document.getElementById('SaveSettingsBtn');
|
|
const Spin = document.getElementById('SaveSpinner');
|
|
Btn.disabled = true; Spin.classList.remove('d-none');
|
|
try {
|
|
const Body = JSON.stringify({ ollama_base_url: Url || null, ollama_model: Model, cleanup_model: CleanupModel || null });
|
|
const [Md, Dl] = await Promise.all([
|
|
fetch(MarkItDownUrl + '/settings', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: Body }).then(R => R.json()),
|
|
fetch(DoclingUrl + '/settings', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: Body }).then(R => R.json()),
|
|
]);
|
|
RenderLlmStatus('MdLlmStatus', Md);
|
|
RenderLlmStatus('DlLlmStatus', Dl);
|
|
} catch (E) {
|
|
alert('Lưu thất bại: ' + E.message);
|
|
} finally {
|
|
Btn.disabled = false; Spin.classList.add('d-none');
|
|
}
|
|
}
|
|
|
|
LoadSettings();
|
|
|
|
// init — restore saved state
|
|
const _savedPrompt = localStorage.getItem('llm_prompt');
|
|
if (_savedPrompt) document.getElementById('LlmPrompt').value = _savedPrompt;
|
|
const _savedLlm = localStorage.getItem('llm_enabled');
|
|
if (_savedLlm === '0') {
|
|
document.getElementById('LlmToggle').checked = false;
|
|
}
|
|
OnLlmToggle();
|
|
LoadHistory();
|
|
</script>
|
|
</body>
|
|
|
|
</html> |