AI-markdown/frontend/index.html

480 lines
25 KiB
HTML

<!DOCTYPE html>
<html lang="vi">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>MarkItDown vs Docling vs Unlimited-OCR — LLM Input Processing</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css" rel="stylesheet" />
<link href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.3/font/bootstrap-icons.min.css" rel="stylesheet" />
<link href="https://cdn.jsdelivr.net/npm/github-markdown-css@5/github-markdown-light.css" rel="stylesheet" />
<link href="/style.css" rel="stylesheet" />
<script>
function App() {
return {
inputMode: 'file',
currentFile: null,
youtubeUrl: '',
doclingFmt: 'markdown',
useLlm: localStorage.getItem('llm_enabled') !== '0',
llmPrompt: localStorage.getItem('llm_prompt') || '',
promptTab: 'Md',
converting: false,
compareRows: [],
cleanupRunning: false,
defaultPrompts: { Md: '', Dl: '', Uo: '' },
llmStatus: { md: false, dl: false, uo: false },
md: pane('md', 'MarkItDown', 'bg-primary', 'text-primary'),
dl: pane('dl', 'Docling', 'bg-success', 'text-success'),
uo: pane('uo', 'Unlimited-OCR', 'bg-danger', 'text-danger'),
get canConvert() { return this.inputMode === 'file' ? !!this.currentFile : !!this.youtubeUrl },
async init() { await Promise.all([this.loadStatus(), this.loadHistory()]) },
onFile(e) { this.currentFile = e.target.files[0] || null },
onDrop(e) { this.currentFile = e.dataTransfer.files[0] || null },
fmtBytes(b) { return b < 1024 ? b+' B' : b < 1048576 ? (b/1024).toFixed(1)+' KB' : (b/1048576).toFixed(1)+' MB' },
fmtTime(s) { try { return new Date(s).toLocaleTimeString('vi-VN') } catch { return s } },
// ── LLM server status (read-only) ─────────────────────
async loadStatus() {
try {
const [sm, sd, su] = await Promise.all([
fetch('/api/markitdown/settings').then(r => r.json()),
fetch('/api/docling/settings').then(r => r.json()),
fetch('/api/unlimited-ocr/settings').then(r => r.json()),
])
this.llmStatus = { md: sm.llm_enabled, dl: sd.llm_enabled, uo: su.ocr_enabled }
this.defaultPrompts.Md = sm.default_prompt || ''
this.defaultPrompts.Dl = sd.default_prompt || ''
this.defaultPrompts.Uo = su.default_prompt || ''
} catch {}
},
// ── Conversion ────────────────────────────────────────
async convert() {
if (!this.canConvert || this.converting) return
this.converting = true; this.compareRows = []
const prompt = this.llmPrompt.trim() || null
const isYt = this.inputMode === 'youtube'
for (const p of [this.md, this.dl, this.uo])
Object.assign(p, { loading:true, done:false, content:'', preview:'', error:null, status:'Đang xử lý...' })
const qs = (extra = {}) => new URLSearchParams({ use_llm: this.useLlm, ...extra, ...(prompt ? { llm_prompt: prompt } : {}) })
const [r1, r2, r3] = await Promise.allSettled([
isYt ? postJson('/api/markitdown/convert-url', { url: this.youtubeUrl, use_llm: this.useLlm, llm_prompt: prompt })
: postForm('/api/markitdown/convert?' + qs(), this.currentFile),
isYt ? postJson('/api/docling/convert-url', { url: this.youtubeUrl, output_format: this.doclingFmt, use_llm: this.useLlm, llm_prompt: prompt })
: postForm('/api/docling/convert?' + qs({ output_format: this.doclingFmt }), this.currentFile),
isYt ? Promise.reject(new Error('Unlimited-OCR không hỗ trợ YouTube URL'))
: postForm('/api/unlimited-ocr/convert?' + qs(), this.currentFile),
])
applyResult(this.md, r1)
applyResult(this.dl, r2)
applyResult(this.uo, r3)
this.converting = false
if (this.md.content || this.dl.content) this.buildCompare()
await this.loadHistory()
localStorage.setItem('llm_prompt', this.llmPrompt)
localStorage.setItem('llm_enabled', this.useLlm ? '1' : '0')
},
buildCompare() {
const [m, d, u] = [this.md, this.dl, this.uo]
const times = [m,d,u].filter(p => p.done && !p.error).map(p => p.ms)
const minMs = Math.min(...times)
const maxLen = Math.max(m.content.length, d.content.length, u.content.length)
this.compareRows = [
{ label:'Thời gian xử lý',
mdVal: m.ms+'ms', dlVal: d.ms+'ms', uoVal: u.done&&!u.error ? u.ms+'ms' : '—',
mdBest: m.ms===minMs&&!m.error, dlBest: d.ms===minMs&&!d.error, uoBest: u.ms===minMs&&!u.error,
note: 'Thấp hơn = nhanh hơn' },
{ label:'Độ dài output',
mdVal: m.content.length+' ký tự', dlVal: d.content.length+' ký tự', uoVal: u.content ? u.content.length+' ký tự' : '—',
mdBest: m.content.length===maxLen, dlBest: d.content.length===maxLen, uoBest: u.content&&u.content.length===maxLen,
note: 'Nhiều hơn = giữ được nhiều hơn' },
]
},
clearAll() {
this.currentFile = null; this.youtubeUrl = ''; this.compareRows = []
for (const p of [this.md, this.dl, this.uo])
Object.assign(p, { loading:false, done:false, content:'', preview:'', error:null, status:'' })
},
async runCleanup() {
if (!this.md.content || this.cleanupRunning) return
this.cleanupRunning = true
try {
const res = await fetch('/api/markitdown/cleanup', {
method: 'POST', headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text: this.md.content, prompt: this.llmPrompt || null }),
})
if (!res.ok) throw new Error((await res.json()).detail)
const d = await res.json()
this.md.content = d.text; this.md.preview = SafeParse(d.text); this.md.tab = 'preview'
} catch (e) { alert('Cleanup failed: ' + e.message) }
this.cleanupRunning = false
},
async loadHistory() {
try {
const [hm, hd, hu] = await Promise.all([
fetch('/api/markitdown/history?limit=8').then(r => r.json()),
fetch('/api/docling/history?limit=8').then(r => r.json()),
fetch('/api/unlimited-ocr/history?limit=8').then(r => r.json()),
])
this.md.history = hm; this.dl.history = hd; this.uo.history = hu
} catch {}
},
download(key) {
const p = this[key], base = this.currentFile ? this.currentFile.name.replace(/\.[^.]+$/, '') : 'output'
const a = Object.assign(document.createElement('a'), {
href: URL.createObjectURL(new Blob([p.content], { type: 'text/markdown' })),
download: `${base}_${key}.md`,
})
a.click(); URL.revokeObjectURL(a.href)
},
}
}
function pane(id, label, badgeClass, spinnerClass) {
return { id, label, badgeClass, spinnerClass, tab: 'raw',
loading: false, done: false, error: null, content: '', preview: '',
llmEnabled: false, ms: 0, status: '', history: [] }
}
async function postForm(url, file) {
const fd = new FormData(); fd.append('file', file)
const t0 = performance.now()
const res = await fetch(url, { method: 'POST', body: fd })
const ms = Math.round(performance.now() - t0)
if (!res.ok) { const e = await res.json().catch(() => ({ detail: res.statusText })); throw new Error(e.detail) }
return { data: await res.json(), ms }
}
async function postJson(url, body) {
const t0 = performance.now()
const res = await fetch(url, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(body) })
const ms = Math.round(performance.now() - t0)
if (!res.ok) { const e = await res.json().catch(() => ({ detail: res.statusText })); throw new Error(e.detail) }
return { data: await res.json(), ms }
}
function applyResult(pane, result) {
if (result.status === 'fulfilled') {
const { data, ms } = result.value
pane.content = data.markdown || data.content || ''
pane.preview = SafeParse(pane.content)
pane.llmEnabled = !!data.llm_enabled
pane.ms = ms
pane.status = `${ms} ms` + (pane.llmEnabled ? ' 🤖' : '')
pane.error = null
} else {
pane.content = ''; pane.error = result.reason.message
pane.status = `${result.reason.message}`
}
pane.loading = false; pane.done = true
}
</script>
</head>
<body>
<div id="app-nav"></div>
<script src="/layout.js"></script>
<div x-data="App()" x-init="init()">
<div class="container-xl py-4">
<!-- ── Upload card ─────────────────────────────────────────── -->
<div class="card shadow-sm mb-4">
<div class="card-body">
<h6 class="card-title fw-semibold mb-3">Tải lên tài liệu để so sánh</h6>
<!-- Mode tabs -->
<ul class="nav nav-tabs mb-3">
<li class="nav-item">
<a class="nav-link py-1 px-3 small" :class="{ active: inputMode==='file' }"
href="#" @click.prevent="inputMode='file'">
<i class="bi bi-file-earmark-text me-1"></i>File
</a>
</li>
<li class="nav-item">
<a class="nav-link py-1 px-3 small" :class="{ active: inputMode==='youtube' }"
href="#" @click.prevent="inputMode='youtube'">
<i class="bi bi-youtube me-1 text-danger"></i>YouTube
</a>
</li>
</ul>
<!-- File zone -->
<div x-show="inputMode==='file'">
<div class="mb-2"
style="border:2px dashed #dee2e6;border-radius:.5rem;padding:3rem 1.5rem;text-align:center;cursor:pointer;transition:border-color .2s,background .2s"
@click="$refs.fileInput.click()"
@dragover.prevent
@drop.prevent="onDrop($event)">
<input type="file" x-ref="fileInput" style="display:none"
accept=".pdf,.docx,.xlsx,.pptx,.html,.htm,.csv,.txt,.jpg,.jpeg,.png,.tiff,.tif,.bmp,.md,.epub,.zip,.asciidoc,.adoc,.webp"
@change="onFile($event)" />
<i class="bi bi-file-earmark-text fs-1 text-secondary"></i>
<p class="text-muted mt-2 mb-1">Kéo thả hoặc click để chọn file</p>
<div class="fw-semibold text-primary small"
x-text="currentFile ? currentFile.name + ' (' + fmtBytes(currentFile.size) + ')' : ''"></div>
</div>
<div class="d-flex flex-wrap gap-1">
<template x-for="ext in ['PDF','DOCX','XLSX','PPTX','HTML','CSV','TXT','JPG/PNG','EPUB','TIFF','ASCIIDoc']" :key="ext">
<span class="badge bg-secondary-subtle text-secondary" x-text="ext"></span>
</template>
</div>
</div>
<!-- YouTube -->
<div x-show="inputMode==='youtube'">
<div class="input-group">
<span class="input-group-text bg-danger text-white"><i class="bi bi-youtube"></i></span>
<input type="url" class="form-control" x-model="youtubeUrl" placeholder="https://www.youtube.com/watch?v=..." />
<button class="btn btn-outline-secondary" @click="youtubeUrl=''"><i class="bi bi-x"></i></button>
</div>
<div class="form-text mt-1">
<i class="bi bi-info-circle me-1"></i>
<b>MarkItDown</b>: yt-dlp &nbsp;|&nbsp;
<b>Docling</b>: transcript → markdown &nbsp;|&nbsp;
<b>Unlimited-OCR</b>: <span class="text-warning-emphasis">không hỗ trợ URL</span>
</div>
</div>
<!-- Controls -->
<div class="d-flex flex-wrap align-items-center gap-3 mt-3">
<div class="d-flex align-items-center gap-2">
<label class="form-label mb-0 small fw-medium">Docling format</label>
<select class="form-select form-select-sm" x-model="doclingFmt" style="width:auto">
<option value="markdown">Markdown</option>
<option value="json">JSON</option>
<option value="html">HTML</option>
<option value="text">Plain Text</option>
</select>
</div>
<div class="form-check form-switch mb-0">
<input class="form-check-input" type="checkbox" x-model="useLlm" id="LlmToggle" />
<label class="form-check-label small fw-medium" for="LlmToggle"
x-text="useLlm ? 'LLM bật' : 'LLM tắt'"></label>
</div>
<!-- Server LLM status dots -->
<div class="d-flex align-items-center gap-1">
<template x-for="[key, cls] in [['md','bg-primary'],['dl','bg-success'],['uo','bg-danger']]" :key="key">
<span class="badge rounded-circle p-1" :class="llmStatus[key] ? cls : 'bg-secondary-subtle'"
:title="$data[key].label + (llmStatus[key] ? ': LLM sẵn sàng' : ': LLM chưa cấu hình')">&nbsp;</span>
</template>
<span class="text-muted" style="font-size:.72rem">server status</span>
</div>
<button class="btn btn-primary btn-sm" :disabled="!canConvert || converting" @click="convert()">
<span x-show="converting" class="spinner-border spinner-border-sm me-1"></span>
<i x-show="!converting" class="bi bi-play-fill me-1"></i>Chuyển đổi & So sánh
</button>
<button class="btn btn-outline-secondary btn-sm" @click="clearAll()">
<i class="bi bi-x-circle me-1"></i>Xoá
</button>
</div>
<!-- LLM Prompt panel -->
<div x-show="useLlm" class="mt-3">
<div class="row g-3">
<div class="col-md-6">
<div class="h-100 p-3 rounded border bg-primary-subtle">
<div class="d-flex justify-content-between align-items-center mb-2">
<span class="small fw-semibold text-primary"><i class="bi bi-pencil-square me-1"></i>Custom LLM Prompt</span>
<button class="btn btn-link btn-sm p-0 text-primary" @click="llmPrompt=''">Xoá</button>
</div>
<textarea class="form-control form-control-sm" x-model="llmPrompt" rows="6"
placeholder="Để trống = dùng Default prompt&#10;&#10;Nhập custom system prompt để override..."></textarea>
<div class="form-text mt-1 text-primary small">
<b>MarkItDown</b>: vision &nbsp;|&nbsp; <b>Docling</b>: enrich &nbsp;|&nbsp; <b>Unlimited-OCR</b>: OCR
</div>
</div>
</div>
<div class="col-md-6">
<div class="h-100 p-3 rounded border bg-light">
<ul class="nav nav-tabs mb-2">
<template x-for="[key, label, cls] in [['Md','MarkItDown','bg-primary'],['Dl','Docling','bg-success'],['Uo','Unlimited-OCR','bg-danger']]" :key="key">
<li class="nav-item">
<a class="nav-link py-1 px-2 small" :class="{ active: promptTab===key }"
href="#" @click.prevent="promptTab=key">
<span class="badge rounded-circle p-1 me-1" :class="cls">&nbsp;</span>
<span x-text="label"></span>
</a>
</li>
</template>
</ul>
<textarea class="form-control form-control-sm font-monospace" rows="5" readonly
style="font-size:.7rem;resize:none;background:#fff"
:value="defaultPrompts[promptTab]"></textarea>
<button class="btn btn-outline-primary btn-sm mt-2 w-100"
@click="llmPrompt = defaultPrompts[promptTab]">
<i class="bi bi-arrow-left me-1"></i>Dùng prompt này
</button>
</div>
</div>
</div>
</div>
</div>
</div>
<!-- ── Progress row ───────────────────────────────────────── -->
<div class="row g-3 mb-4" x-show="converting || md.done || dl.done || uo.done">
<template x-for="key in ['md','dl','uo']" :key="key">
<div class="col-md-4">
<div class="card h-100">
<div class="card-body">
<h6 class="card-title d-flex align-items-center gap-2">
<span class="badge rounded-circle p-1" :class="$data[key].badgeClass">&nbsp;</span>
<span x-text="$data[key].label"></span>
</h6>
<div class="d-flex align-items-center gap-2">
<div x-show="$data[key].loading" class="spinner-border spinner-border-sm" :class="$data[key].spinnerClass"></div>
<small class="text-muted" x-text="$data[key].status || 'Đang chờ...'"></small>
</div>
<div x-show="$data[key].done && $data[key].content" class="row g-2 mt-2">
<div class="col-4"><div class="border rounded text-center py-2">
<div class="fw-bold" x-text="$data[key].ms.toLocaleString()"></div>
<div class="text-muted" style="font-size:.7rem">ms</div>
</div></div>
<div class="col-4"><div class="border rounded text-center py-2">
<div class="fw-bold" x-text="$data[key].content.length.toLocaleString()"></div>
<div class="text-muted" style="font-size:.7rem">ký tự</div>
</div></div>
<div class="col-4"><div class="border rounded text-center py-2">
<div class="fw-bold" x-text="$data[key].content.split('\n').length"></div>
<div class="text-muted" style="font-size:.7rem">dòng</div>
</div></div>
</div>
</div>
</div>
</div>
</template>
</div>
<!-- ── Compare table ──────────────────────────────────────── -->
<div class="card shadow-sm mb-4" x-show="compareRows.length">
<div class="card-body">
<h6 class="card-title fw-semibold mb-3"><i class="bi bi-bar-chart-line me-1"></i>Tóm tắt so sánh</h6>
<table class="table table-sm table-bordered mb-0">
<thead class="table-light">
<tr><th>Tiêu chí</th><th>MarkItDown</th><th>Docling</th><th>Unlimited-OCR</th><th class="text-muted">Ghi chú</th></tr>
</thead>
<tbody>
<template x-for="r in compareRows" :key="r.label">
<tr>
<td x-text="r.label"></td>
<td :class="r.mdBest ? 'text-success fw-semibold' : 'text-muted'" x-text="(r.mdBest?'🏆 ':'')+r.mdVal"></td>
<td :class="r.dlBest ? 'text-success fw-semibold' : 'text-muted'" x-text="(r.dlBest?'🏆 ':'')+r.dlVal"></td>
<td :class="r.uoBest ? 'text-success fw-semibold' : 'text-muted'" x-text="(r.uoBest?'🏆 ':'')+r.uoVal"></td>
<td class="text-muted small" x-text="r.note"></td>
</tr>
</template>
</tbody>
</table>
</div>
</div>
<!-- ── Result cards ───────────────────────────────────────── -->
<div class="row g-3 mb-4">
<template x-for="key in ['md','dl','uo']" :key="key">
<div class="col-md-4">
<div class="card shadow-sm h-100">
<div class="card-header d-flex justify-content-between align-items-center py-2">
<span class="fw-semibold small d-flex align-items-center gap-2">
<span class="badge rounded-circle p-1" :class="$data[key].badgeClass">&nbsp;</span>
<span x-text="$data[key].label"></span>
<span x-show="$data[key].llmEnabled" class="badge bg-primary-subtle text-primary" style="font-size:.65rem">🤖 LLM</span>
</span>
<div class="d-flex align-items-center gap-1">
<button x-show="key==='md' && md.content" class="btn btn-outline-warning btn-sm py-0 px-2"
:disabled="cleanupRunning" @click="runCleanup()">
<span x-show="cleanupRunning" class="spinner-border spinner-border-sm"></span>
<span x-show="!cleanupRunning">✨ Làm đẹp</span>
</button>
<button x-show="$data[key].content" class="btn btn-outline-secondary btn-sm py-0 px-2"
@click="download(key)"><i class="bi bi-download"></i> .md</button>
<ul class="nav nav-tabs card-header-tabs border-0">
<li class="nav-item">
<a class="nav-link py-1 px-2 small" :class="{ active: $data[key].tab==='raw' }"
href="#" @click.prevent="$data[key].tab='raw'">Raw</a>
</li>
<li class="nav-item">
<a class="nav-link py-1 px-2 small" :class="{ active: $data[key].tab==='preview' }"
href="#" @click.prevent="$data[key].tab='preview'">Preview</a>
</li>
</ul>
</div>
</div>
<div class="card-body p-0">
<div x-show="$data[key].loading" class="text-center text-muted py-5 small">
<div class="spinner-border spinner-border-sm mb-2" :class="$data[key].spinnerClass"></div>
<br>Đang xử lý...
</div>
<div x-show="!$data[key].loading && $data[key].error"
class="alert alert-danger m-3 small" x-text="$data[key].error"></div>
<pre x-show="!$data[key].loading && !$data[key].error && $data[key].tab==='raw'"
class="ResultPre p-3 m-0" x-text="$data[key].content"></pre>
<div x-show="!$data[key].loading && !$data[key].error && $data[key].tab==='preview'"
class="PreviewPane markdown-body" x-html="$data[key].preview"></div>
<div x-show="!$data[key].loading && !$data[key].error && !$data[key].content"
class="text-center text-muted py-5 small">
<i class="bi bi-upload fs-3 d-block mb-2"></i>Tải file lên để xem kết quả
</div>
</div>
</div>
</div>
</template>
</div>
<!-- ── History ────────────────────────────────────────────── -->
<h6 class="fw-semibold mb-3">Lịch sử chuyển đổi gần đây</h6>
<div class="row g-3">
<template x-for="key in ['md','dl','uo']" :key="key">
<div class="col-md-4">
<div class="card shadow-sm">
<div class="card-header py-2 small fw-semibold d-flex align-items-center gap-2">
<span class="badge rounded-circle p-1" :class="$data[key].badgeClass">&nbsp;</span>
<span x-text="$data[key].label"></span>
</div>
<div x-show="!$data[key].history.length" class="text-center text-muted py-3 small">Chưa có lịch sử</div>
<ul x-show="$data[key].history.length" class="list-group list-group-flush">
<template x-for="item in $data[key].history" :key="item.id">
<li class="list-group-item d-flex justify-content-between align-items-center py-2 px-3">
<span class="small fw-medium text-truncate me-2" style="max-width:60%" x-text="item.filename"></span>
<span class="d-flex gap-1 align-items-center flex-shrink-0">
<span x-show="item.file_type" class="badge bg-secondary-subtle text-secondary" x-text="item.file_type"></span>
<span x-show="item.llm_enabled" class="badge bg-primary-subtle text-primary">🤖 LLM</span>
<span class="text-muted" style="font-size:.7rem" x-text="fmtTime(item.created_at)"></span>
</span>
</li>
</template>
</ul>
</div>
</div>
</template>
</div>
</div>
</div>
<script src="https://cdn.jsdelivr.net/npm/marked@13/marked.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/dompurify@3/dist/purify.min.js"></script>
<script>
marked.use({ gfm: true, breaks: true });
function SafeParse(Md) {
return DOMPurify.sanitize(marked.parse(Md || ''), { USE_PROFILES: { html: true } });
}
</script>
<script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.14.9/dist/cdn.min.js"></script>
</body>
</html>