Migrate to Laravel app + unified services; add email conversion
- Move docling/markitdown services under services/ alongside new unlimited-ocr and vision services - Add Laravel app for email-to-markdown conversion and OCR frontend - Add email export tooling and example emails/output - Update docker-compose, Caddyfile, and frontend assets Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
6ba704865f
commit
3f8765a581
|
|
@ -0,0 +1,3 @@
|
|||
{
|
||||
"git.ignoreLimitWarning": true
|
||||
}
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
-- unlimited_ocr database tables
|
||||
CREATE TABLE IF NOT EXISTS conversions (
|
||||
id SERIAL PRIMARY KEY,
|
||||
filename VARCHAR(255) NOT NULL,
|
||||
file_type VARCHAR(50),
|
||||
output_format VARCHAR(20) DEFAULT 'markdown',
|
||||
content TEXT,
|
||||
page_count INTEGER,
|
||||
llm_enabled BOOLEAN DEFAULT TRUE,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
|
@ -1,14 +1,16 @@
|
|||
# ============================================================
|
||||
# AI Markdown Demo — MarkItDown vs Docling
|
||||
# AI Markdown Demo — MarkItDown vs Docling vs Unlimited-OCR
|
||||
# ============================================================
|
||||
# Naming convention: <layer>-<service>
|
||||
#
|
||||
# ui :8484 — Comparison UI (nginx)
|
||||
# api-markitdown:8282 — MarkItDown API (Microsoft)
|
||||
# api-docling :8383 — Docling API (IBM)
|
||||
# db-markitdown :5432 — PostgreSQL for markitdown
|
||||
# db-docling :— — PostgreSQL for docling (internal only)
|
||||
# db-admin :5050 — Adminer
|
||||
# ui :8484 — Comparison UI (Laravel)
|
||||
# api-markitdown :8282 — MarkItDown API (Microsoft)
|
||||
# api-docling :8383 — Docling API (IBM)
|
||||
# api-unlimited-ocr :8181 — Unlimited-OCR API (Baidu)
|
||||
# db-markitdown :5432 — PostgreSQL for markitdown
|
||||
# db-docling :— — PostgreSQL for docling (internal only)
|
||||
# db-unlimited-ocr :— — PostgreSQL for unlimited-ocr (internal only)
|
||||
# db-admin :5050 — Adminer
|
||||
# ============================================================
|
||||
|
||||
x-healthcheck-defaults: &healthcheck-defaults
|
||||
|
|
@ -28,44 +30,98 @@ services:
|
|||
|
||||
# --- UI layer ---------------------------------------------------
|
||||
ui:
|
||||
image: caddy:alpine
|
||||
image: shinsenter/laravel:php8.3
|
||||
<<: *service-defaults
|
||||
ports:
|
||||
- "8484:80"
|
||||
volumes:
|
||||
- ./frontend/Caddyfile:/etc/caddy/Caddyfile:ro
|
||||
- ./frontend:/srv:ro
|
||||
- ./laravel-app:/var/www/html
|
||||
- .:/workspace:ro
|
||||
environment:
|
||||
APP_ENV: production
|
||||
APP_DEBUG: "false"
|
||||
LOG_CHANNEL: stderr
|
||||
depends_on:
|
||||
db-markitdown:
|
||||
condition: service_healthy
|
||||
db-docling:
|
||||
condition: service_healthy
|
||||
db-unlimited-ocr:
|
||||
condition: service_healthy
|
||||
|
||||
# --- API layer --------------------------------------------------
|
||||
api-markitdown:
|
||||
build:
|
||||
context: ./markitdown-service
|
||||
context: ./services/markitdown-service
|
||||
<<: *service-defaults
|
||||
ports:
|
||||
- "8282:8000"
|
||||
env_file: .env
|
||||
environment:
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER:-admin}:${POSTGRES_PASSWORD:-admin}@db-markitdown:5432/${POSTGRES_DB:-markitdown}
|
||||
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-}
|
||||
OLLAMA_MODEL: ${OLLAMA_MODEL:-llava}
|
||||
VISION_URL: ${VISION_URL:-http://api-vision:8000}
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
volumes:
|
||||
- .:/workspace:rw
|
||||
depends_on:
|
||||
db-markitdown:
|
||||
condition: service_healthy
|
||||
api-vision:
|
||||
condition: service_started
|
||||
|
||||
api-docling:
|
||||
build:
|
||||
context: ./docling-service
|
||||
context: ./services/docling-service
|
||||
<<: *service-defaults
|
||||
ports:
|
||||
- "8383:8000"
|
||||
env_file: .env
|
||||
environment:
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER:-admin}:${POSTGRES_PASSWORD:-admin}@db-docling:5432/docling
|
||||
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-}
|
||||
OLLAMA_MODEL: ${OLLAMA_MODEL:-llava}
|
||||
VISION_URL: ${VISION_URL:-http://api-vision:8000}
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
volumes:
|
||||
- .:/workspace:ro
|
||||
depends_on:
|
||||
db-docling:
|
||||
condition: service_healthy
|
||||
api-vision:
|
||||
condition: service_started
|
||||
|
||||
api-unlimited-ocr:
|
||||
build:
|
||||
context: ./services/unlimited-ocr-service
|
||||
<<: *service-defaults
|
||||
ports:
|
||||
- "8181:8000"
|
||||
env_file: .env
|
||||
environment:
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER:-admin}:${POSTGRES_PASSWORD:-admin}@db-unlimited-ocr:5432/unlimited_ocr
|
||||
OCR_BASE_URL: ${OCR_BASE_URL:-}
|
||||
OCR_API_KEY: ${OCR_API_KEY:-none}
|
||||
OCR_MODEL: ${OCR_MODEL:-Unlimited-OCR}
|
||||
OCR_DPI: ${OCR_DPI:-300}
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
depends_on:
|
||||
db-unlimited-ocr:
|
||||
condition: service_healthy
|
||||
|
||||
# --- Vision layer (shared OCRFlux, dùng chung cho markitdown + docling) ---
|
||||
api-vision:
|
||||
build:
|
||||
context: ./services/vision-service
|
||||
<<: *service-defaults
|
||||
ports:
|
||||
- "8585:8000"
|
||||
env_file: .env
|
||||
environment:
|
||||
LLM_BASE_URL: ${LLM_BASE_URL:-http://host.docker.internal:11436/v1}
|
||||
LLM_MODEL: ${LLM_MODEL:-ggml-org/Qwen2.5-VL-3B-Instruct-GGUF}
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
|
||||
# --- Database layer ---------------------------------------------
|
||||
db-markitdown:
|
||||
|
|
@ -100,6 +156,21 @@ services:
|
|||
<<: *healthcheck-defaults
|
||||
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-admin} -d docling"]
|
||||
|
||||
db-unlimited-ocr:
|
||||
image: postgres:16-alpine
|
||||
<<: *service-defaults
|
||||
env_file: .env
|
||||
environment:
|
||||
POSTGRES_DB: unlimited_ocr
|
||||
POSTGRES_USER: ${POSTGRES_USER:-admin}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-admin}
|
||||
volumes:
|
||||
- db_unlimited_ocr_data:/var/lib/postgresql/data
|
||||
- ./db/init_unlimited_ocr.sql:/docker-entrypoint-initdb.d/init.sql:ro
|
||||
healthcheck:
|
||||
<<: *healthcheck-defaults
|
||||
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-admin} -d unlimited_ocr"]
|
||||
|
||||
# --- Admin layer ------------------------------------------------
|
||||
db-admin:
|
||||
image: adminer:4.8.1
|
||||
|
|
@ -122,3 +193,4 @@ networks:
|
|||
volumes:
|
||||
db_markitdown_data:
|
||||
db_docling_data:
|
||||
db_unlimited_ocr_data:
|
||||
|
|
|
|||
|
|
@ -1,343 +0,0 @@
|
|||
import os
|
||||
import re
|
||||
import json
|
||||
import tempfile
|
||||
import logging
|
||||
from fastapi import UploadFile, HTTPException
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select
|
||||
from docling.document_converter import DocumentConverter, PdfFormatOption, ImageFormatOption
|
||||
from docling.datamodel.pipeline_options import PdfPipelineOptions, TesseractCliOcrOptions
|
||||
from docling.datamodel.base_models import InputFormat
|
||||
from app.models.ConvertModel import Conversion
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
import openai as _openai
|
||||
|
||||
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL") or None
|
||||
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llava")
|
||||
CLEANUP_MODEL = os.getenv("CLEANUP_MODEL", "")
|
||||
|
||||
def _build_converter() -> DocumentConverter:
|
||||
try:
|
||||
ocr_opts = TesseractCliOcrOptions()
|
||||
pdf_opts = PdfPipelineOptions(do_ocr=True, ocr_options=ocr_opts)
|
||||
logger.info("Docling: OCR enabled via Tesseract CLI")
|
||||
except Exception as e:
|
||||
logger.warning("Docling: Tesseract unavailable (%s) — OCR disabled", e)
|
||||
pdf_opts = PdfPipelineOptions(do_ocr=False)
|
||||
|
||||
# ImageFormatOption also uses StandardPdfPipeline — pass same pdf_opts
|
||||
# to prevent docling from falling back to RapidOCR / PP-OCRv6
|
||||
return DocumentConverter(format_options={
|
||||
InputFormat.PDF: PdfFormatOption(pipeline_options=pdf_opts),
|
||||
InputFormat.IMAGE: ImageFormatOption(pipeline_options=pdf_opts),
|
||||
})
|
||||
|
||||
converter = _build_converter()
|
||||
|
||||
_llm_client = None
|
||||
LLM_ACTIVE = False
|
||||
|
||||
|
||||
def _init_llm(base_url: str | None, model: str) -> bool:
|
||||
global OLLAMA_BASE_URL, OLLAMA_MODEL, LLM_ACTIVE, _llm_client
|
||||
if not base_url:
|
||||
OLLAMA_BASE_URL, OLLAMA_MODEL, LLM_ACTIVE, _llm_client = None, model, False, None
|
||||
return False
|
||||
try:
|
||||
client = _openai.OpenAI(base_url=base_url, api_key="ollama")
|
||||
OLLAMA_BASE_URL = base_url
|
||||
OLLAMA_MODEL = model
|
||||
_llm_client = client
|
||||
LLM_ACTIVE = True
|
||||
logger.info("Docling: LLM enabled via %s (model=%s)", base_url, model)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning("Docling: LLM init failed (%s)", e)
|
||||
LLM_ACTIVE = False
|
||||
return False
|
||||
|
||||
|
||||
_init_llm(OLLAMA_BASE_URL, OLLAMA_MODEL)
|
||||
|
||||
|
||||
DEFAULT_ENRICH_PROMPT = (
|
||||
"You are a document cleaning assistant. "
|
||||
"Fix OCR errors, normalise whitespace, and improve the Markdown structure. "
|
||||
"Return ONLY the raw Markdown text — no code fences, no commentary, no explanation."
|
||||
)
|
||||
|
||||
|
||||
def _llm_enrich(markdown: str, system_prompt: str | None = None) -> str:
|
||||
"""Send extracted markdown to LLM for cleanup. Optionally override the system prompt."""
|
||||
if not _llm_client or not markdown.strip():
|
||||
return markdown
|
||||
try:
|
||||
resp = _llm_client.chat.completions.create(
|
||||
model=OLLAMA_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": system_prompt or DEFAULT_ENRICH_PROMPT},
|
||||
{"role": "user", "content": markdown},
|
||||
],
|
||||
temperature=0,
|
||||
)
|
||||
result = resp.choices[0].message.content or markdown
|
||||
# llava tends to wrap output in code fences regardless of instructions — strip them
|
||||
result = re.sub(r"^```(?:markdown)?\s*\n?", "", result.strip())
|
||||
result = re.sub(r"\n?```\s*$", "", result.strip())
|
||||
return result.strip() or markdown
|
||||
except Exception as e:
|
||||
logger.warning("Docling: LLM enrichment failed (%s) — returning raw output", e)
|
||||
return markdown
|
||||
|
||||
|
||||
|
||||
async def convert_url(
|
||||
url: str,
|
||||
db: AsyncSession,
|
||||
output_format: str = "markdown",
|
||||
use_llm: bool = True,
|
||||
llm_prompt: str | None = None,
|
||||
) -> "Conversion":
|
||||
"""Fetch a YouTube (or any URL) transcript via yt-dlp, then convert with Docling."""
|
||||
try:
|
||||
import yt_dlp # noqa: PLC0415
|
||||
except ImportError:
|
||||
raise HTTPException(status_code=500, detail="yt-dlp not installed")
|
||||
|
||||
ydl_opts = {
|
||||
"quiet": True,
|
||||
"skip_download": True,
|
||||
"writesubtitles": True,
|
||||
"writeautomaticsub": True,
|
||||
"subtitleslangs": ["vi", "en"],
|
||||
"outtmpl": "%(id)s.%(ext)s",
|
||||
}
|
||||
|
||||
try:
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
info = ydl.extract_info(url, download=False)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=422, detail=f"yt-dlp error: {e}")
|
||||
|
||||
title = info.get("title", "YouTube Video")
|
||||
description = info.get("description", "") or ""
|
||||
channel = info.get("channel", info.get("uploader", ""))
|
||||
duration = info.get("duration_string", "")
|
||||
upload_date = info.get("upload_date", "")
|
||||
view_count = info.get("view_count")
|
||||
chapters = info.get("chapters") or []
|
||||
|
||||
# Build markdown from available metadata
|
||||
lines = [f"# {title}", ""]
|
||||
meta_rows = []
|
||||
if channel:
|
||||
meta_rows.append(f"**Kênh:** {channel}")
|
||||
if duration:
|
||||
meta_rows.append(f"**Thời lượng:** {duration}")
|
||||
if upload_date and len(upload_date) == 8:
|
||||
meta_rows.append(f"**Ngày đăng:** {upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:]}")
|
||||
if view_count is not None:
|
||||
meta_rows.append(f"**Lượt xem:** {view_count:,}")
|
||||
meta_rows.append(f"**URL:** {url}")
|
||||
lines.extend(meta_rows)
|
||||
lines.append("")
|
||||
|
||||
# Subtitles/transcript
|
||||
subtitles = info.get("subtitles") or {}
|
||||
auto_subtitles = info.get("automatic_captions") or {}
|
||||
|
||||
transcript_text = None
|
||||
for lang in ("vi", "en"):
|
||||
tracks = subtitles.get(lang) or auto_subtitles.get(lang)
|
||||
if tracks:
|
||||
# Find a json3 or srv3 track to extract plain text
|
||||
for track in tracks:
|
||||
if track.get("ext") in ("json3", "srv3", "ttml", "vtt"):
|
||||
try:
|
||||
import urllib.request
|
||||
with urllib.request.urlopen(track["url"], timeout=15) as r:
|
||||
raw = r.read().decode("utf-8", errors="ignore")
|
||||
# Strip VTT timestamps for vtt format
|
||||
if track.get("ext") == "vtt":
|
||||
cleaned = re.sub(r"\d{2}:\d{2}:\d{2}\.\d+ --> .*", "", raw)
|
||||
cleaned = re.sub(r"^\d+$", "", cleaned, flags=re.MULTILINE)
|
||||
cleaned = re.sub(r"<[^>]+>", "", cleaned)
|
||||
transcript_text = re.sub(r"\n{3,}", "\n\n", cleaned).strip()
|
||||
else:
|
||||
transcript_text = raw
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
if transcript_text:
|
||||
break
|
||||
|
||||
if transcript_text:
|
||||
lines += ["## Transcript / Phụ đề", "", transcript_text, ""]
|
||||
elif description:
|
||||
lines += ["## Mô tả", "", description[:3000], ""]
|
||||
else:
|
||||
lines += ["## Ghi chú", "", "_Không có transcript hoặc mô tả._", ""]
|
||||
|
||||
if chapters:
|
||||
lines += ["## Chapters", ""]
|
||||
for ch in chapters:
|
||||
start = ch.get("start_time", 0)
|
||||
m, s = divmod(int(start), 60)
|
||||
lines.append(f"- **{m:02d}:{s:02d}** — {ch.get('title', '')}")
|
||||
lines.append("")
|
||||
|
||||
markdown_text = "\n".join(lines)
|
||||
|
||||
# Write temp file and run through Docling
|
||||
video_id = info.get("id", "youtube")
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".md", mode="w", encoding="utf-8") as tmp:
|
||||
tmp.write(markdown_text)
|
||||
tmp_path = tmp.name
|
||||
|
||||
try:
|
||||
result = converter.convert(tmp_path)
|
||||
doc = result.document
|
||||
page_count = None
|
||||
|
||||
if output_format == "markdown":
|
||||
content = doc.export_to_markdown()
|
||||
elif output_format == "json":
|
||||
content = json.dumps(doc.export_to_dict(), ensure_ascii=False, indent=2)
|
||||
elif output_format == "html":
|
||||
content = doc.export_to_html()
|
||||
else:
|
||||
content = markdown_text
|
||||
|
||||
llm_used = False
|
||||
if _llm_client and use_llm and output_format in ("markdown", "text"):
|
||||
content = _llm_enrich(content, system_prompt=llm_prompt or None)
|
||||
llm_used = True
|
||||
|
||||
from app.models.ConvertModel import Conversion
|
||||
record = Conversion(
|
||||
filename=f"{video_id}.md",
|
||||
file_type="youtube",
|
||||
output_format=output_format,
|
||||
content=content,
|
||||
page_count=page_count,
|
||||
llm_enabled=llm_used,
|
||||
)
|
||||
db.add(record)
|
||||
await db.commit()
|
||||
await db.refresh(record)
|
||||
return record
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
os.unlink(tmp_path)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
ALLOWED_EXTENSIONS = {
|
||||
"pdf", "docx", "xlsx", "pptx",
|
||||
"html", "htm", "jpg", "jpeg", "png",
|
||||
"tiff", "tif", "bmp", "md", "txt", "asciidoc", "adoc"
|
||||
}
|
||||
OUTPUT_FORMATS = {"markdown", "json", "html", "text"}
|
||||
|
||||
|
||||
def _allowed_file(filename: str) -> bool:
|
||||
return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
|
||||
|
||||
|
||||
async def convert_file(
|
||||
file: UploadFile,
|
||||
db: AsyncSession,
|
||||
output_format: str = "markdown",
|
||||
use_llm: bool = True,
|
||||
llm_prompt: str | None = None,
|
||||
) -> Conversion:
|
||||
if not _allowed_file(file.filename):
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail=f"File type not allowed. Allowed: {', '.join(sorted(ALLOWED_EXTENSIONS))}"
|
||||
)
|
||||
if output_format not in OUTPUT_FORMATS:
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail=f"Output format not supported. Supported: {', '.join(sorted(OUTPUT_FORMATS))}"
|
||||
)
|
||||
|
||||
suffix = os.path.splitext(file.filename)[1]
|
||||
file_type = suffix.lstrip(".").lower()
|
||||
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
||||
tmp.write(await file.read())
|
||||
tmp_path = tmp.name
|
||||
|
||||
try:
|
||||
result = converter.convert(tmp_path)
|
||||
doc = result.document
|
||||
|
||||
page_count = len(doc.pages) if hasattr(doc, "pages") and doc.pages else None
|
||||
|
||||
if output_format == "markdown":
|
||||
content = doc.export_to_markdown()
|
||||
elif output_format == "json":
|
||||
content = json.dumps(doc.export_to_dict(), ensure_ascii=False, indent=2)
|
||||
elif output_format == "html":
|
||||
content = doc.export_to_html()
|
||||
elif output_format == "text":
|
||||
content = doc.export_to_markdown()
|
||||
content = re.sub(r"#{1,6}\s?", "", content)
|
||||
content = re.sub(r"\*\*(.+?)\*\*", r"\1", content)
|
||||
content = re.sub(r"\*(.+?)\*", r"\1", content)
|
||||
|
||||
# LLM enrichment — only for markdown / text output, and only if requested
|
||||
llm_used = False
|
||||
if _llm_client and use_llm and output_format in ("markdown", "text"):
|
||||
content = _llm_enrich(content, system_prompt=llm_prompt or None)
|
||||
llm_used = True
|
||||
|
||||
record = Conversion(
|
||||
filename=file.filename,
|
||||
file_type=file_type,
|
||||
output_format=output_format,
|
||||
content=content,
|
||||
page_count=page_count,
|
||||
llm_enabled=llm_used,
|
||||
)
|
||||
db.add(record)
|
||||
await db.commit()
|
||||
await db.refresh(record)
|
||||
return record
|
||||
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
os.unlink(tmp_path)
|
||||
|
||||
|
||||
async def get_conversion(conversion_id: int, db: AsyncSession) -> Conversion:
|
||||
result = await db.execute(select(Conversion).where(Conversion.id == conversion_id))
|
||||
record = result.scalar_one_or_none()
|
||||
if not record:
|
||||
raise HTTPException(status_code=404, detail="Conversion not found")
|
||||
return record
|
||||
|
||||
|
||||
async def get_history(db: AsyncSession, limit: int = 20) -> list[Conversion]:
|
||||
result = await db.execute(
|
||||
select(Conversion).order_by(Conversion.created_at.desc()).limit(limit)
|
||||
)
|
||||
return result.scalars().all()
|
||||
|
||||
|
||||
async def delete_conversion(conversion_id: int, db: AsyncSession) -> dict:
|
||||
result = await db.execute(select(Conversion).where(Conversion.id == conversion_id))
|
||||
record = result.scalar_one_or_none()
|
||||
if not record:
|
||||
raise HTTPException(status_code=404, detail="Conversion not found")
|
||||
await db.delete(record)
|
||||
await db.commit()
|
||||
return {"message": f"Conversion {conversion_id} deleted"}
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
# Ignore tất cả file input (email thô), giữ lại thư mục
|
||||
*
|
||||
!.gitignore
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
# Ignore tất cả file output (markdown đã convert), giữ lại thư mục
|
||||
*
|
||||
!.gitignore
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,97 @@
|
|||
# IP Supply — Shipment extraction from 1.md (4 prompts)
|
||||
|
||||
Compiled from a full read of `example/1.md` (all 10,546 lines). SNs quoted exactly.
|
||||
|
||||
## Summary counts
|
||||
- **Prompt 1 — Shipped:** ~145 shipment/return sections, ~700+ individual SNs (each has a `t#` tracking or is a confirmed outbound PO/return). **Kept/held:** ~12 explicit groups.
|
||||
- **Prompt 2 — Items with issue:** ~24 groups / 30+ faulty/troubleshooting SNs.
|
||||
- **Prompt 3 — Transferred to AUS:** ~10 "Import Australia" shipments, 150+ SNs.
|
||||
- **Prompt 4 — xxxx-xxxx-xxxx codes (shipped/outbound):** 27 codes.
|
||||
|
||||
> Note: prompt 1's full per-SN list is very large. This file captures the analytical lists (2/3/4) in full plus prompt-1 kept items and the shipment index. For every SN under each shipment, see the corresponding section in `1.md`.
|
||||
|
||||
---
|
||||
|
||||
## Prompt 1 — Kept / NOT shipped
|
||||
- QSFP-40G-SR-BD lot `10-2945-02` (TNT Import) — "total 8 units only, the rest keep in US", "*not here yet"
|
||||
- 4x JNP-SFP-25G-SR — "*not here yet"
|
||||
- C9200-STACK ×28 + STACK-T4-50CM ×10 — "keep aside, will ship AUS" (see AUS list)
|
||||
- C9120AXE-B FJC28081EXK — "already shipped to CMS last wk" (do not reship)
|
||||
- QFX top-cover kept aside (for C9120AXE-B FJC27151F4C return)
|
||||
- QSFP-40G-LR4 SPC23020002 — "can't find it here"
|
||||
- M.2 SSDs / EM7455/EM7430 355813079879315 — "→ next shipment" (held)
|
||||
- WS-C2960X-48TS-L FOC2111S08L — NOB, staged for AUS
|
||||
- MR86-HW Q3AE-DMMG-D9KV — "Do NOT ship"
|
||||
|
||||
## Prompt 2 — Items with an issue (PID | SN | reason)
|
||||
- C9300-24P-A/E | FCW2303DHNT | "van bi loi"; earlier "BOOT FAIL W..."
|
||||
- C9120AXE-B | FJC27041KKE | TS24, "recover them"
|
||||
- C9120AXE-E | FGL2812LK6C | TS12
|
||||
- C9120AXE-B | FJC27102CXZ | "no console / chuyen sang line khac"
|
||||
- SUP32/PFC2/MSFC2 | SAL1338Z2TC / SAL1339ZJD8 / SAL1339ZGPX | "Traceback ... Error75"
|
||||
- C9300-24P-E | FCW2238S0BA | "line 5 bi loi"
|
||||
- NIM-2GE-CU-SFP | FOC23503PHV, FOC24331W30 | bad → returned
|
||||
- MV86-HW | Q3AE-EGKE-B3X2, Q3AE-SZD8-3PPD, Q3AE-6S3U-Y6N5, Q3AE-VWFF-6MGG, Q3AE-8P53-P74T, Q3AE-SFXM-7CK2 | "failed test / LED orange"
|
||||
- C8300-1N1S-6T | FLM290910DT, FLM280510M6 | "RFID module broken, can't fix"
|
||||
- WS-SUP32-GE-3B | SAL09402986 | "damaged power connector" (returned)
|
||||
- MR86-HW (6 units, no SN) | — | "LED orange when tested" (OSI PO-25694)
|
||||
- N5K-C5596UP | (TS17/TS21) | "no console"
|
||||
- C1111-8PLTEEAWB (2) | — | "no console after restart"
|
||||
- WS-C3750X-48P/24P-S | batch | "most were bad, scratches"
|
||||
- DS-C9148T-K9 | JPG2603008A | "can't unbend chassis/ports"
|
||||
- CISCO2821 | FTX1134A22A, FTX0950C0GD, FTX1418AKP2 | missing FAN/PSU/faceplate
|
||||
- ISR4331/K9 | FLM1940W0QB | "PASS WITH WARNING"
|
||||
- WS-C3560G-24PS-S | FOC1124Z45D | "no console, no faceplate"
|
||||
- C9120AXE-B (return) | FGL2812LPJG, FJC27151F4C | return; one "without top cover from AUS"
|
||||
|
||||
## Prompt 3 — SN transferred to AUS / Australia
|
||||
(dedicated "Import Australia" shipments — key SNs)
|
||||
|
||||
**C9200-STACK batch (t#1ZJW78500439236591):**
|
||||
SFP-10G-LRM: FNS22050YND, FNS23051B30, FNS23051B32, FNS230514Q4, FNS23051B20, OPM23170H22
|
||||
C9200-STACK (28): JAE234004PC, JAE234004T3, JAE25360Z1S, JAE25360ZXV, JAE25360ZXW, JAE25360WQ5, JAE25210K5G, JAE25360U84, JAE25360U7W, JAE26440B4B, JAE26402RM2, FDO24260JB2, JAE2610039Z, JAE25210GZQ, FDO19321P8C, JAE25360VU0, FDO20120G3J, JAE23051476, JAE230601NH, FVH27510R69, JAE25161CTG, FDO24250TUS, FDO24250T0B, FDO24260G32
|
||||
STACK-T4-50CM (10): LCC2428GA36, LCC2618GGEJ, MOP2732A3H9, LCC2246GX01, LCC2625GFEE, MOC261800C4, MOC261801GT, MOC261700NW, MOP2732A3EX, LCC2320GCQW
|
||||
AIR-AP1562E-B-K9 FJC263610EP; EX-SFP-10GE-SR SCJ19YP0M8, SCJ19YP0V4, SCJ19YP02N, SAZH1TSH
|
||||
|
||||
**Import Australia UPS (t#1ZJW78500415223596):** UCSX-TPM-002C FCH270476WX, FCH26177EG0, FCH270476HD, FCH251572R7; PAN-SFP-PLUS-SR MU90S5H, MU60V2X, MU20TFU, MU211ZP; NIM-2GE-CU-SFP FOC250624L6, FOC250624QT, FOC24400EXW, FOC21452FDH, FOC263359EC; JNP-SFP-25G-SR S1A1M31A751BAB, S1A1M31A7517WR, S1A1M31A751BVZ, S1A1M31A751BFQ; NVME 22293A26BAF6, 22293A447730; NIM-ES2-8 SFOC194350CH, SFOC24484UR9
|
||||
|
||||
**Import Australia UPS (t#1ZJW78500423326317):** PWR-80W-AC DAB192754P8, DAB194450WP; MS120-8LP Q2BX-YLYW-556A; STACK-T4-50CM LCC2920G58F; XEON-6342 2A2109BF5F0F8383RKXA, 2A2134BF51728CD8RKXA, 2A226ABF4F28BBEARKXA, 2A2076BC995F5353RKXA, 3A4819C0FC3F792ARKXA, 3A481A1EB9C113CFRKXA; C9500-40X FJC254223DA
|
||||
|
||||
**Import Australia UPS (t#1ZJW78500431774363):** GLC-BX-D 080322BD001; N9K-C93180YC-EX FDO2252023P, FDO21482ECD, FDO21111F2A; WS-F6K-PFC3B SAD092705FY
|
||||
|
||||
**Import Australia Fedex/TNT (t#873308530323):** C9300-24T-A FJC2333S06H, FJC2333S05T; C9200-STACK (20) FVH29100T7W, FVH29100X6E, FVH2910151B, FVH29100XBB, FVH29100VEM, FVH29100TH7, FVH29100VQ1, FVH29100U1P, FVH29100TNL, JAE23180HMY, JAE25360VTV, FDO23290KA5, FDO24260J5C, JAE241108B5, FDO23290LAT, JAE26360XUR, JAE26382LDW, JAE25360VD8, FDO22263HYZ, FDO22040VZ4; C9200L-48P-4X-E FOC2608054S; WS-SUP32-GE-3B SAL12330S7D
|
||||
|
||||
**TNT/Fedex (make New in AUS, T#873256774277):** C9300-24T-E-WS FOC2721YA6K, FOC2721Y72K, FOC2648YCCD, FOC2721YA6F; WS-C2960X-48TS-L FOC2111S08L; C9120AXI FJC25432C7X, FJC25432CB7, FJC25432CA2, FJC25432C7S
|
||||
|
||||
## Prompt 4 — xxxx-xxxx-xxxx hyphenated codes (shipped/outbound)
|
||||
| code | context |
|
||||
|---|---|
|
||||
| 17-14693-01192 | ship PWR-115W-AC (DAB264260LD) |
|
||||
| 10-2945-02 | TNT Import — QSFP-40G-SR-BD lot (8 shipped) |
|
||||
| 10-14642-61136 | return C9300-24P-A (FJC2340S02R) |
|
||||
| 14-14726-39035 | ship MR36-HW (Q3AJ-49HS-7EJT) |
|
||||
| 04-14738-90912 | ship IPO-500-EXP-MOD (09FC19500851) |
|
||||
| 18-14713-58108 | ship 2x CP-840 (TCL251100KM, TCL2511007X) |
|
||||
| 02-14762-32221 | ship MA-MNT-MR-17-MR36 (CCA45679958) |
|
||||
| 17-14713-99371 | ship C9136I-ROW (FGL2702LXZV) |
|
||||
| 05-14746-55384 | ship VIC3-4FXS/DID (FOC14317P5Z) |
|
||||
| 02-14656-21410 | return WS-SUP32-GE-3B set |
|
||||
| 23-14741-66101 | ship AIR-ACC1530-PMK1 (JNV23453733) |
|
||||
| 18-14577-09030 | return 2x C9120AXE-B (FJC27141WFH, FJC24441S1F) |
|
||||
| 05-14777-19977 | ship 2x PVDM2-64 (FOC123546Z2) |
|
||||
| 03-14766-07874 | ship GLC-ZX-SM-RGD (SPC1432030T) |
|
||||
| 08-14781-17608 | ship 2x PWR-C1-440WDC (ART2210FCJL, ART2210FCDR) |
|
||||
| 27-14612-29455-P01 | AUS — Micron MTFDDAV240TDS |
|
||||
| 22-14679-31676 | return NIM-2GE-CU-SFP (FOC24331W30) |
|
||||
| 26-14668-35871 | return NIM-2GE-CU-SFP (FOC23503PHV) |
|
||||
| 17-14781-37655 | ship EX-UM-4SFPP-MR (XS0221420145) |
|
||||
| 14-14782-66558 | ship 2x WS-C3560G-24PS-E |
|
||||
| 18-14787-11692 | ship VIC3-4FXS/DID (FOC18375W7G) |
|
||||
| 16-14788-52149 | ship VIC3-4FXS/DID (FOC16036FFK) |
|
||||
| 21-14778-64209 | ship PVDM2-64 (FOC13313DX8) |
|
||||
| 14-14784-46115 | ship VIC3-4FXS/DID (FOC17361MNE) |
|
||||
| 13-14800-40754 | ship ASR-9902-2P-KIT |
|
||||
| 15-14694-40587 | return 2x MR86-HW |
|
||||
| 03-14718-50867 | return 2x PWR-C1-350WAC-P |
|
||||
|
||||
Excluded (incoming, not outbound): 12-14747-30779 (eBay receiving), 24-14738-31605 (return never completed).
|
||||
|
|
@ -0,0 +1,357 @@
|
|||
# IP Supply — Joe Bui → Daniel Vu Shipment Analysis
|
||||
|
||||
Compiled from a full read of `example/1.md` (all 10,546 lines). SNs are quoted exactly as written (including inconsistent lowercase and leading-`S`/`X` prefixes). Items #2, #3, #4 are cross-referenced against the shipped context.
|
||||
|
||||
---
|
||||
|
||||
## 1. SN Shipped vs SN Kept
|
||||
|
||||
### A) SHIPPED (grouped by section — each has a `t#` tracking number or is a confirmed outbound customer/PO shipment)
|
||||
|
||||
**1x C9300-24P-E → Network Craze (BTV) PO-P30531 — `t#1ZJW78500211632137`** (PID C9300-24P-E)
|
||||
FOC2427U033, LIT24223GHD (PWR-C1-715WAC), NID2425W1DA4, NID2421W0MS6, NID2410W0EUZ
|
||||
|
||||
**1x C9200CX-12P-2X2G-E → Summit 360 PO-75588 — `t#1ZJW78500335076497`**
|
||||
FJC28281TSH
|
||||
|
||||
**6x MA-MNT-MR-16 → OSI Global PO-25740 — `t#1ZJW78500318099129`**
|
||||
BCAA0004946, BCAA0004888, BCAA0004880, BCAA0004892, BCAA0004968, BCAA0004878
|
||||
|
||||
**15x PWR-C1-715WAC-P → Curvature PO-1360562-I — `t#381683654045`**
|
||||
ART2217P09A, DCC2304C1DA, DCC2805C48Y, DCC2823C1YN, ART2302P9CR, ART2302P9Y1, ART2302P9T4, ART2250P7JZ, DCC2530C4XE, DCC2541CJUN, DCC2541CGZK, DCC2541CJYF, DCC2541CJY7, ART2350P4N2, ART2302P9PF
|
||||
|
||||
**2x N540-RCKMT-19 → Procurri PO-AT 109692 — `t#872413464140`** (no unit SNs listed)
|
||||
|
||||
**40x GLC-BX-D / GLC-BX-U → DHD PO-72125 — `t#1ZJW78500335530889`** (PID GLC-BX-D / GLC-BX-U)
|
||||
ACW2853072T, ACW285306VG, ACW28530746, ACW2853074J, ACW2853074F, ACW285306V2, ACW285306XH, ACW28530757, ACW28530753, ACW2853074B, ACW2853073W, ACW285306XE, ACW285306WU, ACW2853073N, ACW2853072B, ACW28530755, ACW2853072V, ACW285306WW, ACW2853074K, ACW28530747, ACW285306XQ, ACW28530750, ACW2853072M, ACW2853073Q, ACW285306X0, ACW285306WZ, ACW285306X9, ACW285306WN, ACW2853075D, ACW2853074S, ACW28530749, ACW285306VQ, ACW2853075F, ACW2853075E, ACW285306V1, ACW28530751, ACW285306XJ, ACW285306WH, ACW28530754, ACW28530743, NDG21190929, NDG21364652, NDG21190449
|
||||
|
||||
**15x C9300-NM-8X → Curvature PO-1360736-I — `t#391683375950`**
|
||||
FJZ27177878, FOC250718RD, FJZ23141KYB, FOC21351WND, FJZ243504TU, FJZ23090CJC, FJZ24131BYC, FOC2243931X, FJZ23141MB3, FJZ23300DKJ, FOC221047UU, FJZ264676Q6, FJZ2436096X, FOC22442R41, FOC2243539R
|
||||
|
||||
**5x JNP-FAN-1RU → Terabit PO-2631 — `t#872509139509`** (new SNs SBCFX…, old SNs listed too)
|
||||
New: SBCFX3650, SBCFX3630, SBCFX3659, SBCFX3654, SBCGB5835 | Old: WF0216430251, WF0216430522, WF0216430139, WF0216430127, JNV23455010
|
||||
|
||||
**TNT Import Economy — `t#872479468286`** (mixed AUS/import box; see also list 3)
|
||||
PWR-C49E-300AC-F: QCS18391UD9, PAC18385DBZ | 14x C9120AXI: FJC25432C6Y, FJC25432C5G, FJC25432C9T, FJC25432CA1, FJC25432C7Q, FJC26361XQC, FJC25432C7T, FJC25432C5D, FJC25432C3D, FJC25432CAK, FJC25432C4F, FJC25432CBA, FJC25432C73, FJC25432C74 | QSFP-40G-SR-BD lot `10-2945-02` (see list 4) | C9200L-48P-4X-E: JAE263024R0, JAE263023SP
|
||||
|
||||
**10x AIR-ACC1530-PMK1 → Curvature PO-1360802-I — `t#381737217720`**
|
||||
JNV23453768, JNV23453766, JNV23453765, JNV23453767, JNV23453769, JNV23453771, JNV23453773, JNV23453779, JNV23453775, JNV23453777
|
||||
|
||||
**7x GLC-BX-U → DHD PO-72125 — `t#1ZJW78500326506931`**
|
||||
SNDG19490387, SNDG19490597, SNDG19490604, SNDG19490723, SNDG19490316, SNDG19490724, SNDG19491086
|
||||
|
||||
**100x C9120AXE-B-REF → CMS PO-0272806 — `t#872507852844`** (28x, PID C9120AXE-B)
|
||||
FJC27081CA2, FJC24381DM7, FJC24381E8W, FJC24311C2L, FJC24381DV4, FJC24401SHB, FJC250618AX, FJC24331ATB, FJC242916V0, FJC24381DV2, FJC250618KG, FJC2428180H, FJC24381DV9, FJC27102DKZ, FJC24511LK7, FJC24381DUJ, FJC27081CAY, FJC27081CA0, FJC27081CAX, FJC27081CA1, FJC25331KQW, FJC24401SKB, FJC242916W2, FJC242612Y5, FJC242817ZG, FJC270918G9, FJC27043K60, FJC271823UE
|
||||
|
||||
**1x C892FSP-K9 → DHD PO-72258 — `t#1ZJW78500336414128`**
|
||||
FGL1741211R (chassis), DTH2022D4TT (PSU)
|
||||
|
||||
**2x FAN-T2 → NetworkTigers PO-260601-5** (no t# but confirmed outbound blind ship)
|
||||
ACI232100KY, ACI232104V7
|
||||
|
||||
**4x MA-MNT-MR-16 → OSI Global PO-25782 — `t#1ZJW78500312990541`**
|
||||
BCAA0004882, BCAA0004886, BCAA0004685, BCAA0004681
|
||||
|
||||
**1x PWR-115W-AC (NIB) → ship** `17-14693-01192` — DAB264260LD (see list 4)
|
||||
|
||||
**12x GLC-FE-100FX-RGD → Curvature PO-1360771-C — `t#381735687048`**
|
||||
ACW25411AVL, ACW25411AVT, ACW25411AVU, ACW25411AVW, ACW25411AVM, ACW29350R80, ACW26092B5L, ACW25401EX5, ACW25401EWT, ACW25401EWS, ACW29350R5B, ACW29350R61
|
||||
|
||||
**2x C9300-24P-A → PIVIT PO-256093/255979 — `t#872564644148`**
|
||||
FCW2426D13K, FOC2432L240 | PWR: LIT23172YE6, LIT241822YE | FAN: NID2425W1DUR, NID2425W1DA9, NID2325W096X, NID2425W1D7F, NID2425W1D7G, NID2410W0EUY
|
||||
|
||||
**2x C9200L-48P-4X-E → OSI Global PO-25828 — `t#1ZJW78500338590210`**
|
||||
JAE254300DF, JAE254300C2 | PWR-C5-1KWAC: DCI25251CV2, ART2526D03V
|
||||
|
||||
**3x C9200CX-12P-2X2G-E → OSI Global PO-25601 — `t#1ZJW78500323402987`**
|
||||
FJC28311W43, FJC29281QU8, FJC28281TVG
|
||||
|
||||
**2x C9500X-ACCKIT-19I → Curvature PO-1360186-I — `t#381765230154`**
|
||||
JNV23455406, JNV23455408
|
||||
|
||||
**2x PWR-500-DC-F → DHD PO-72280 — `t#1ZJW78500333086177`**
|
||||
L101QR002SANP, L375SL00J2AHP
|
||||
|
||||
**1x C9500X-ACCKIT-19I → Network Craze PO-P30393 — `t#1ZJW78500126302164`**
|
||||
JNV23455398
|
||||
|
||||
**48x C9120AXE-B-REF → CMS PO-0272591 — `t#872563690107`** (16x)
|
||||
FJC24381E2G, FJC2420116U, FJC25111HC6, FJC25101JED, FJC271823UB, FJC271823UC, FJC242916W1, FJC270918G7, FJC24381DU8, FJC242916UX, FJC27081CAU, FJC262921GQ, FJC24401SL1, FJC242916V3, FJC262922NZ, FJC26261DEM
|
||||
|
||||
**1x C9300-24P-A return** `10-14642-61136` (return context — see list 4): FJC2340S02R + C9300-NM-8X FJZ23370ZN8, PWR LIT23272YEP, PWR ART2327D11J, FAN NID2233W0GF0/NID2233W0GEZ/NID2233W0GCP
|
||||
|
||||
**5x AIR-ACC1530-PMK1 → Curvature PO-1360813-I — `t#381764510886`**
|
||||
JNV23453780, JNV23453782, JNV23453784, JNV23453786, JNV23453788
|
||||
|
||||
**3x MA-MNT-MR-17 → PIVIT PO-256336 — `t#1ZJW78501322986956`**
|
||||
Bracket: JNV23455338, JNV23455340, JNV23455342 | Screw-kit: JNV23455022, JNV23455014, JNV23455024
|
||||
|
||||
**10x SFP-10/25G-LR-S → Summit 360 PO-75910 — `t#1ZJW78500335396543`**
|
||||
SINL2926020T, SINL292605CM, SINL292602ZQ, SINL292605JM, SINL2926035S, SINL292604R8, SINL292602KR, SINL29260305, SINL292605K0, SINL292605J9
|
||||
|
||||
**4x NXA-PAC-1100W-PE2 → ChannelWorks/Relutech PO-14846 — `t#872562736621`**
|
||||
ART2432F5YX, ART2432F5XJ, ART2336F4BH, ART2432F5TX
|
||||
|
||||
**14x MEM-C8200L-8GB → PO-23836** (blind ship; arrived pack 1ZW5715F0492097333)
|
||||
80AD011836824D7C05, 80AD01202343D248EC, 80AD01174932A1F851, 80AD011836824D7BFA, 80AD011836824D7D98, 80AD011836824D7D9C, 80AD01202343D2493A, 80AD01202343D248C4, 80AD01202343D2492F, 80AD01202343D248BD, 80AD011836824D7D9E, 80AD011836824D7C27, 80AD011836824D7C03, 80AD011836824D7C0F
|
||||
|
||||
**1x CISCO2511-DC → Strategic Telecom PO-5602**: chassis 251100148 / line16 SN 16203226; DC PSU from CISCO2511-DC 251362180. *(tracking t# ambiguous in source — see caveats)*
|
||||
|
||||
**1x C9300-48P-E → OSI Global PO-25874 — `t#1ZJW78500334596641`**
|
||||
FCW2411G0D2, DCC2352C5BZ (PWR), NID2403W0WGW, NID2403W0WGX, NID2403W0WGR (FAN)
|
||||
|
||||
**1x C9K-PWR-930WDC-R + CAB → Alliance Networking PO-13500 — `T#872621854979`**
|
||||
LIT23142VGU, JNV23455373
|
||||
|
||||
**1x AIR-CT2504-50-K9 → InTown Suites PO-1511** (no t#): PSZ19311FL7, PSU dab192152nt
|
||||
**1x AIR-CT2504-50-K9 → InTown Suites PO-1512** (no t#): PSZ19311BTJ, PSU DAB184550PZ
|
||||
|
||||
**1x MEM-4400-8G → Curvature PO-1360805-C — `t#381792604872`**: MT18KSF1G72AZ-1G6E1
|
||||
|
||||
**1x WS-C3560CX-8XPD-S → Network Craze PO-P30632 — `t#1ZJW78500234219032`**: FOC1943W1GE
|
||||
|
||||
**9x MA-MNT-MR-17 → CXtec PO-D1238993 — `t#1ZJW78501207890357`**
|
||||
Bracket: JNV23455339, JNV23455341, JNV23455343, JNV23455345, JNV23455347, JNV23455301, JNV23455303, JNV23455302, JNV23455304 | Screw-kit: JNV23455230, JNV23455403, JNV23455401, JNV23455399, JNV23455397, JNV23455409, JNV23455411, JNV23455407, JNV23455405
|
||||
|
||||
**4x MA-MNT-MR-17 → Network Craze PO-P30602 — `t#1ZJW78500234078220`**
|
||||
Bracket: JNV23455344, JNV23455346, JNV23455348, JNV23455337 | Screw-kit: JNV23455243, JNV23455241, JNV23455245, JNV23455247
|
||||
|
||||
**1x C3KX-PWR-440WDC → Tri Tower PO46160 — `t#872619441290`**: ART1851F57V
|
||||
**1x NXA-FAN-160CFM-PE → ChannelWorks PO-14860 — `t#872619260338`**: ACI2302029Q
|
||||
**1x DS-C9132T-K9 → PIVIT PO-256275 — `t#872615037247`**: JPG2303002Z; PSU ART2311FQQ2/ART2311FQQ3; FAN DCH2309RBW6/DCH2309RBU1/DCH2309RBWC/DCH2309RBUK
|
||||
**2x ISR4431/K9 → Procurri PO-AT 110114 — `t#872670021815`**: FJC2216A0RJ, FJC2216A0NW; PSU LIT220636NW/LIT220636LD
|
||||
**1x FACE-C9200L-48P-4X → P3 Systems PO-16605 — `t#872669917879`**: jnv23455079
|
||||
**2x ONS-SI-GE-EX → OSI PO-25799 — `t#1ZJW78500319978178`**: OPC14490579, OPC14490266
|
||||
**10x CP-840-BUN-K9 → Curvature PO-1359761-C — `t#381809072478`**: TCL283100SH, TCL283101CD, TCL283100SB, TCL283100W4, TCL283101KW, TCL28310148, TCL2831014A, TCL283101D7, TCL2831015E, TCL283101EK; CP-840-BAT VK24061166691, VK24061166724, VK24061161095, VK24061166799, VK24061167605, VK24061167630, VK24061166466, VK24061166507, VK24061166679, VK24061160412
|
||||
**1x ISR1100-4G → Cuadrant PO-20029** (no t#): FCH2324L000, PSU DAB2630Y34N
|
||||
**5x MA-MNT-MR-16 → OSI PO-25913 — `#1ZJW78500308687566`**: BCAA0004684, BCAA0004696, BCAA0004690, BCAA0004686, BCAA0004697
|
||||
**2x WS-C3850-24XU-E → Curvature PO-1360972-I — `t#381820077820`**: FCW2049D07B, FOC2022X1EY; PWR LIT1928181A/LIT193501E6
|
||||
**1x C9200CX-12P-2X2G-E → Summit 360 PO-75588 — `t#1ZJW78500306899233`**: FJC28311W9J
|
||||
**8x ASR1001-X-PWR-DC → Whaley PO-11876 — `t#1ZJW78500312491643`**: DTN1826W0B4, DTN2423W00N, DTN1826W0A8, DTN2423W02W, DTN2423W03U, DTN1826W0BK, DTN2423W00R, DTN1826W08S
|
||||
**1x C9200L-48P-4X-E → PIVIT PO-256499 — `t#872810044838`**: JAE254300TZ; PWR ART2518D02V
|
||||
**4x C9K-PWR-930WDC-R → Alliance PO-13501 — `t#872814251278`**: LIT23292H6V, LIT23142UVQ, LIT23142VEP, LIT23292GZ5; CAB JNV23455327, JNV23455376, JNV23455375, JNV23455374
|
||||
**2x PWR-C1-440WDC → OSI PO-26035 — `t#1ZJW78500310136665`**: ART2210FCD6, ART2045F6AN
|
||||
**1x MR36-HW → ship** `14-14726-39035`: Q3AJ-49HS-7EJT; MA-MNT-MR-17-MR36 bracket JNV23455306, screw JNV23455232
|
||||
**1x IPO-500-EXP-MOD (ship)** `04-14738-90912`: 09FC19500851
|
||||
**32x C9120AXE-B-REF → CMS PO-0272591 — `t#872795599363`** (14x): FJC2629226K, FJC26261MLJ, FJC271823UD, FGL2812LK6C, FJC253813PF, FJC253813N5, FJC242817ZV, FJC242916WK, FJC270918FR, FJC262921HB, FJC26291G8Q, FJC262921KW, FJC24511N52, FJC26261DF0
|
||||
**2x CP-840 (ship)** `18-14713-58108`: TCL251100KM, TCL2511007X; BAT VK20111200194, VK21010862870
|
||||
**1x NCS-5501 → Alturna PO-3029066 — `t#1ZJW78500421598260`**: FOC2240R0AT; PSU POG22138QA8/POG22138Q4A; FAN fox2240p4lk/fox2240p4ld
|
||||
**2x CP-840-BUN-K9 → Curvature PO-1359761-C — `t#381920793553`**: TCL2831014P, TCL2831013M; BAT VK24061167091, VK24061167651; CP-800-USBCH BCAA0003455, BCAA0003454
|
||||
**2x C9300-NM-2Y → PICS POUS021998 — `t#1ZJW78501219294694`**: FVH27311HXT, FVH271412E4
|
||||
**12x MA-MNT-MR-17 → Network Craze PO-P30763 — `t#1ZJW78500212235289`**: Bracket JNV23455305/07/09/11/13/15/17/19/21/23/08/10; Screw-kit JNV23455248/42/46/38/44/31/29/40/35/33/06/04
|
||||
**22x GLC-BX-U → DHD PO-72125 — `t#1ZJW78500303941272`**: ACW260317J1, ACW253814ZC, ACW22420VHX, ACW22360JX5, ACW22280L75, ACW25300BBS, ACW260317HU, ACW25300BC6, ACW260317HP, ACW25300BBL, ACW260317HR, ACW254116WP, ACW260317HL, ACW253814Z8, ACW260317HM, ACW253814ZK, ACW260317HK, SACW223704XE, SACW222004N6, SACW22320433, SNDG21330838, SNDG21316271
|
||||
**1x C9136I-ROW (ship)** `17-14713-99371`: FGL2702LXZV
|
||||
**1x C9K-T1-FANTRAY → CentricsIT PO-PPO161840 — `t#872868140028`**: ACI240502AL
|
||||
**1x VIC3-4FXS/DID (ship)** `05-14746-55384`: FOC14317P5Z
|
||||
**3x CW9166I-MR → OSI PO-26130 — `t#1ZJW78500339659109`**: WNH27251NKD, WNH27132J0E, WNH272727QY
|
||||
**5x FAN-T2 → P3 Systems PO-16613 — `t#873284420010`**: NID2322W0YG1, NID2322W0X6Z, NID2321W0J16, NID2322W0X6V, NID2543W12ZR
|
||||
**1x IE-3300-8T2S-E → LSI PO189062 — `T#872974553634`**: FCW2538YNGA
|
||||
**2x C9300-NM-8X → DHD PO-72397 — `t#1ZJW78500305025320`**: FOC21351W2T, FOC22311G6R
|
||||
**1x ISR4321-AX/K9 → OSI PO-26102 — `t#1ZJW78500329476692`**: FLM2235W0XR; PSU PST22398CLI
|
||||
**12x C9300-NM-8X → OSI PO-26122 — `t#1ZJW78500313613714`**: FOC241359GB, FJZ2519DM8M, FJZ25192EFE, FOC22363USG, FJZ262617SE, FOC25201VKC, FOC25201W04, FOC25201VE1, FOC23325Q9G, FOC25212688, FOC233148RN, FOC2521268E
|
||||
**2x C9200CX-12P-2X2G-E → CXtec PO-D1239684 — `t#1ZJW78501218435506`**: FJC28311VYA, FJC28281TSG
|
||||
**30x GLC-LH-SMD → Curvature PO-1361175-I — `t#381970115329`**: SAVJ205030ZH, SAVJ205030G6, SAVJ205030J5, SAVJ205030ZF, SAVJ205030JC, SAVJ205030JB, SAVJ205030S2, SAVJ205030ZZ, SAVJ205030S6, SAVJ19523TYX, SAVJ205030HN, SAVJ205030ZX, SAVJ205030Z5, SAVJ205237CE, SAVJ21093372, SAVJ19523187, SAVJ1952318U, SAVJ1952319G, SAVJ19523W6F, SAVJ2018345P, SAVJ1952325Z, SAVJ1952317W, SAVJ19523174, SAVJ212830YC, SAVJ205030G1, SAVJ205030H6, SAVJ205030LX, SFNS17290RWE, SFNS17290RVC, SFNS17290KG0
|
||||
**20x PWR-C1-715WAC-P → Curvature PO-1360734-I — `t#381939491149`**: LIT2732C7SJ, LIT2724CTA3, LIT2724CT2Q, LIT2724CTDQ, LIT2724CTMN, LIT2736A692, LIT2736A6LG, LIT2724CTSH, LIT2736A69F, LIT2736A6WK, DCC2503CGS5, DCC2322CA43, DCC2322CA2N, ART2250P7J0, DCC2322C9VT, ART2302P40J, DCC2709C15T, DCC2326C947, DCC2326C1G2, ART2252PEMA
|
||||
**2x N540-PWR400-A → Choice IT PO-9963 — `t#872971860863`**: LIT2833A9PW, LIT2633A3FW
|
||||
**1x CW9162I-MR + brackets → OSI PO-26157 — `T#1ZJW78500329611133`**: Q5AA-JNPC-SYDC
|
||||
**11x JNP-FAN-1RU → Terabit PO-2637 — `t#873024723380`**: SBCFX3636, SBCFX3638, SBCFX3629, SBCFX3637, SBCFX3631, SBCFX3633, SBCFX3626, SBCFX3630, SBCFX3628, SBCFX3627, SBCFX3639
|
||||
**2x C9300L-48P-4G → Optimum Data PO148044 — `T#873023423180`**: jnv23454010, jnv23454014
|
||||
**2x PAN-SFP-PLUS-SR → Curvature PO-1361282-C — `t#381995817390`**: AS21M9F, MUJ1XLG
|
||||
**1x DS-C9132T-K9 → PIVIT PO-256275 — `t#873022693197`**: JPG242700E5; PSU ART2343F4DR/ART2428F2FS; FAN DCH2450R9W5/DCH2450R9VX/DCH2425RWR8/DCH2425RWQ9
|
||||
**4x C892FSP-K9 → DHD PO-72376 — `t#1ZJW78500318706338`**: FGL17392360, FGL1740222S, FGL1740224X, FGL1740224E; PSU DTH2022D4L2/DTH2022D4QA/DTH2022D4SH/DTH2022D4M3
|
||||
**5x C8300-1N1S-6T → PICS POUK014158 — `t#6557395235`**: FLM27161325, FLM280411TT, FLM2508115B, FLM26031027, FLM284510NN; PWR-CC1-250WAC LIT2627AYQP, LIT2537AWA5, LIT2633A4Z9, LIT2633A5CK, LIT24303K7W, LIT2633A4J9, LIT2541CP77, LIT2541CQPG, LIT2633A9WZ, LIT24303KP9
|
||||
**4x C9300-NM-2Y → Network Craze PO-P30694 — `t#1ZJW78500210812382`**: SFVH30060WU5, SFVH30060X35, SFVH30060WS4, SFVH30060X1N
|
||||
**1x MA-MNT-MR-16 → Network Craze PO-P30630 — `t#1ZJW78500231876748`**: BCAA0004691
|
||||
**5x PWR-C5-125WAC → Curvature PO-1361370-I — `t#382045593645`**: LIT224646LA, LIT25132RKW, LIT252022AP, LIT25132Q3X, ART2515DD6L
|
||||
**2x MA-MNT-MR-16 → OSI PO-25945 — `t#1ZJW78500314925760`**: BCAA0004693, BCAA0004692
|
||||
**1x SFP-10/25G-LR-S → Network Craze PO-P30877 — `t#1ZJW78500114844379`**: INL292605CE
|
||||
**1x SFP-10/25G-LR-S → Network Craze PO-P30665 — `t#1ZJW78500208198186`**: INL2926049T
|
||||
**100x C9120AXE-B-REF → CMS PO-0272806 — `t#872714085922`** (21x): FJC27102CXY, FJC27102CSS, FJC27041KKE, FJC27081CAW, FJC27041K9U, FJC27041K89, FJC25081ALQ, FJC27081CA3, FJC25101JGZ, FJC25061LWW, FJC24381DUC, FJC26291GP3, FJC242916V4, FJC24381DWD, FJC250618B7, FJC25101JJW, FJC24381E5G, FJC25101JJV, FJC27102CXZ, FJC24441S1F, FJC27141WFH
|
||||
**1x ISR4431-VSEC/K9 → OSI PO-25916 — `t#1ZJW78500338315053`**: FJC1938D0HL; PVDM4-64 FOC20325TWL; PWR LIT192606KN/LIT192606UZ
|
||||
**1x GLC-ZX-SM-RGD (ship)** `03-14766-07874`: SPC1432030T
|
||||
**2x PVDM2-64 (ship)** `05-14777-19977`: FOC123546Z2 (then FOC12372KLW)
|
||||
**2x C9130AXI-K → ship (PP Invoice 151709)**: KWC25280F99, KWC25280F98 (then KWC25280FO6, KWC25280FIZ)
|
||||
**10x MA-ANT-27 → Curvature PO-1361163-C — `t#382071721362`**: JNV23455500, JNV23455498, JNV23455502, JNV23455504, JNV23455506, JNV23455508, JNV23455510, IPSNSN00406, IPSNSN00407
|
||||
**10x C9500-ACCKITH-19I → Curvature PO-1361430-I — `t#1ZJW78500317898428`**: BCAA0001977, BCAA0001825, BCAA0001834, BCAA0001832, BCAA0001978, BCAA0001830, BCAA0001836, BCAA0001987, BCAA0001983, BCAA0001833
|
||||
**1x PWR-C1-1100WAC + 1x C9300-48P-E → OSI PO-26259 — `t#1ZJW78500334123151`**: DTN2045V211; C9300-48P-E FOC2502L5ZA; FAN DCH2315RLG8/ACI29021K92/DCZ274108RR; PWR LIT2732C77P
|
||||
**8x WS-C2960L-16TS-LL → Network Craze PO-P30928 — `t#1ZJW78500208572815`**: FCW2117A3H3, FCW2222A3HU, FCW2204A60Q, FCW2212A3JN, FCW2212A3HN, FCW2121A5NT, FCW2204A60T, FCW2212A3MP
|
||||
**2x EX3400-24P → OSI PO-26193 — `t#1ZJW78500309505796`**: NW3619150623, NW0221080057; PSU 1EDX933091Y/1EDX93308VC/1EDX8450CFG/1EDXA390HGS; FAN VS0219111481/VS0219111477/VS0218393632/VS0218393630
|
||||
**1x C9300-24P-E → ACE IT PO-21047**: FOC2432L2T3; PWR LIT2724CTPM; FAN DCH2340R5US; NID2325W09CK, NID2421W0MS7
|
||||
**2x ASR1001-X-PWR-DC → DHD PO-72431 — `t#1ZJW78500111913844`**: DTN1826W0JX, DTN2617W05R
|
||||
**15x MV32-HW → Curvature PO-1361001-C — `t#1ZJW78500129707203` + `t#382142640681`**: Q2PV-2885-9WH5, Q2PV-599H-DNSD, Q2PV-C6EG-KB2J, Q2PV-A6NU-EQX8, Q2PV-DL34-7UEG, Q2PV-WP5A-5HGM, Q2PV-KQL5-GFS3, Q2PV-VPCA-HBCB, Q2PV-LQ7U-3RFS, Q2PV-GQMH-F5LJ, Q2PV-5ZQ9-5DD8, Q2PV-VW84-729Z, Q2PV-GFSK-KDPC, Q2PV-3X3G-2QU2, Q2PV-EJ4U-9EG4
|
||||
**2x PWR-C1-440WDC (ship)** `08-14781-17608`: ART2210FCJL, ART2210FCDR
|
||||
**20x GLC-LH-SM + 4x C9300L-STACK-KIT + 12x PWR-C1-1100WAC-P → Recurrent PO-P54352 — `t#873227845989`**: GLC-LH-SM OPA13145127, OPC11490602, OPC13452818, OPA12488354, OPA12461629, FNS1246118P, OPC12501727, OPA15380628, OPC12471538, OPC11440385, OPC10271313, FNS12240PMS, FNS1227077E, OPC11241282, AGC1131U7JP, AGC1346UBQH, OPA14370699, FNS11300241, OPA12486510, OPC11241469 | STACK-KIT FOC26247L3T, FOC25340E9D, FOC26484MCW, FOC254126JM, FOC25412EY3, FOC26213ULK, FOC25331HE5, FOC26247F8J + STACK-T3-50CM LCC2427PS6Q/LCC2427PR5P/LCC2427PJYI/LCC2427QZDW | PWR ART2231FKAQ, DCC2241DAJ1, QCS24053HRC, ART2231FKDK, DCC2511DY95, DCC2412D6E2, DCC2241DAE8, QCS24083FK4, ART2231FKDC, DCC2241DAGY, DCC2241DAG9, DCC2241DAH2
|
||||
**15x MR86-HW → OSI PO-25694 — `t#1ZJW78500303053437`**: Q3AE-AY5R-YV22, JNV23455414, Q3AE-2MKJ-V78D, Q3AE-3PRL-VL68, Q3AE-52NF-QY6E, Q3AE-RNQP-HXGT, Q3AE-9FKB-VBJ7, Q3AE-DDPF-UH9A; mounts cca45680063/065/073/151/165/163/149/157 (DO NOT ship Q3AE-DMMG-D9KV)
|
||||
**1x ISR4321-AX/K9 → OSI PO-26102 — `t#1ZJW78500329796793`**: FLM2043W0ZU
|
||||
**1x ACS-1900-RM-19 → Townsend PO-25552 — `t#873227456345`** (kit, no unit SN)
|
||||
**13x PWR-C1-1100WAC-P → Curvature PO-1361500-C — `t#382097988184`**: DCC2412D6F5, DCC2726D600, DCC2511DUJY, ART2445P0ZL, DCC2731D677, DCC2604DF8R, DCC2412D6GA, DCC2512D6P3, DCC2452DLB1, DCC2444D5UA, DCC2224D0D3, QCS24083FF7, DCC2509DLKS
|
||||
**4x STACK-T4-1M → NetworkTigers PO-260617-9**: LCC2250H0D6, LCC2249HJR3, LCC2250H0CT, LCC2249HJR4
|
||||
**1x C9300-NM-2Y → Network Craze PO-P30954 — `t#1ZJW78500230187182`**: SFVH30060X33
|
||||
**1x C9124AXE-B → Curvature PO-1361442-C — `t#382097919736`**: FJC2718199M
|
||||
**12x SFP-10/25G-LR-S → Knowledge Computers PO-131683 — `t#873226458385`**: MZH2823028E, MZH28230287, MZH282301J2, MZH282301J1, MZH282301J0, MZH282301J7, MZH282301JG, MZH282301HH, MZH282301H3, MZH282301H4, MZH282301H5, MZH282301H6
|
||||
**5x C8300-1N1S-6T → PICS POUS022509 — `t#873226023861`** (4x): FLM283610SG, FLM2837104P, FLM251010SH, FLM290910DT; PWR-CC1-250WAC LIT2537AVZ7/LIT2537AWF6/LIT2627CLY6/LIT2633A462/LIT2627CP7W/LIT2627AYNU/LIT2537AWAE/LIT2633A4MX
|
||||
**4x PVDM3-256 → Datacom PO-8202**: FOC15201M24, FOC19496SRC, FOC16064Z1Z, FOC1542480P
|
||||
**1x JNP-FAN-1RU → Terabit PO-2637 — `t#873282114550`**: SBCFX3604 (old WF0219412892)
|
||||
**3x N6K-C6001-FAN-B → Curvature PO-1361562-C — `t#382123623753`**: FOX2052PDHN, FOX2052PDE0, FOX2052PDGZ
|
||||
**4x C9300-24T-E-WS → (TNT) — `T#873256774277`**: FOC2721YA6K, FOC2721Y72K, FOC2648YCCD, FOC2721YA6F; pwr-c1-350wac-p DCC2647B46Z/DCC2721B0TT/DCC2721B0UH/DCC2721B0U3
|
||||
**2x DS-C9148T-48PETK9 + PSU/FAN/rail → OSI PO-26316 — `t#1ZJW78500308687477`**: JPG2603008A, JPG27110023; PSU ART2311FQQ1/ART2311FQZW/ART2350F0R3/ART2451F407; FAN DCH2450R9UE/DCH2425RWR5/DCH21320U0Z/DCH2450R9U6/DCH2309RBTW/DCH2309RBUD/DCH2309RBTY/DCH2309RBW9; rail JNV23455439/JNV23455441
|
||||
**1x C9120AXE-B → OSI PO-26027 — `T#1ZJW78500319054862`**: FJC26261DFS
|
||||
**3x C9300-24P-A → Trifecta PO-309537 — `t#873281900663` + `T#873324218515`**: FCW2426D13K, FJB2314B091, FJC2322S0UF; PWR LIT2724CTLN/LIT2724CTED/LIT2724CT36; FAN DCH2302RED8/DCH2302R76V/DCH2431REXC
|
||||
**2x NXK-ACC-KIT-1RU → Procurri PO-AT 111099 — `t#873281039577`**: JNV23454848, JNV23454854
|
||||
**1x PWR-500-DC-F → DHD PO-72280 — `t#1ZJW78500305395654`**: L101SL000QAPP
|
||||
**50x SFP-10G-LR → Curvature PO-1361607-I — `T#382142894893`**: acw260519b3, acw2605199k, acw2605199g, acw260519ak, acw2605199u, acw26091r0c, acw26091qyz, acw26091r01, acw26091qz5, acw26091qzu, acw252937ad, acw252935bd, acw252934vc, acw252933w4, acw252933w0, acw26110xwb, acw26110xwj, acw26110xxe, acw26110xx2, acw26110xwr, acw25291p6p, acw25291p75, acw25291p6s, acw25291p6a, acw25291p69, acw26110zdk, acw26110zet, acw26110zdl, acw26110zdb, acw26110zf1, acw2615189s, acw261518an, acw261518ak, acw2620042v, acw26200423, acw26192t8x, acw26192t8z, acw26192t8l, acw26192t8j, acw26192t92, acw26110yam, acw26110yap, acw26110ybw, acw26110yhe, acw26110yjb, acw26041w0q, acw26041w13, acw26041w1s, acw26041w1p, acw26041w0a
|
||||
**6x C9300-NM-8X + 10x SFP-10G-SR → Tredent PO-25754 — `t#1ZJW78500321246427`**: FJZ27411UR0, FJZ2626149P, FJZ2626146T, FOC230984VG, FJZ26261486, FJZ26251J9J; SFP SAVD1926A3ME, SAVD1909A2R2, SAVD1909A2P8, SAVD1926A5ZS, SAVD1926A0BV, SAVD1919AKHX, SAVD1926A3D7, SAVD1926A5ZG, SAVD1926A0NR, SAVD1926A0PJ
|
||||
**1x C8200-1N-4T → DHD PO-72509 — `t#1ZJW78500334942418`**: FGL2703L7EF
|
||||
**1x MA-ANT-27 → Curvature PO-1361163-C — `t#382139053391`**: JNV23455420
|
||||
**1x EX-UM-4SFPP-MR (ship)** `17-14781-37655`: XS0221420145
|
||||
**2x WS-C3560G-24PS-E (ship)** `14-14782-66558` (no unit SN)
|
||||
**2x C9300-48P-E + PWR → Data Networks PO-20158986 — `t#873411659986`**: FOC2530LZV6, FOC2720Y3K3; PWR SDCC2550C1TS, SLIT2842AJKX
|
||||
**20x C9300-NM-8X → Curvature PO-1361627-I — `T#1ZJW78500336683230`**: FOC22447FB0, FOC252520ZB, FJZ250407V3, FOC25062DMY, FJZ23141JWL, FJZ2444179V, FOC26452WP0, FOC222897W6, FOC221049XD, FOC24145T67, FOC24151HA4, FJZ2626139Z, FJZ26251ARU, FOC22442VA0, FOC22273U5U, FOC23246NZY, FJZ23321MWY, FOC230984BC, FJZ23150JUP, FOC23285F2V; faceplates JNV23454926/924/922/920/917/919
|
||||
**VIC3-4FXS/DID (ship)** `18-14787-11692`: FOC18375W7G; `16-14788-52149`: FOC16036FFK; `14-14784-46115`: FOC17361MNE
|
||||
**1x PVDM2-64 (ship)** `21-14778-64209`: FOC13313DX8
|
||||
**3x IE-4010-16S12P → LSI PO 189133 — `t#873471359207`**: FDO2311J4UU, FDO2326J0U6, FDO2233U0GV; PWR-RGD-AC-DC DTH2214024Q/DTH212901F7/DTH1941006H/DTH2230002D/DTH2230002G/MP5B10700CB
|
||||
**4x ISR4321-V/K9 + 2x ISR4331-V/K9 → CXtec PO-D1240982 — `t#1ZJW78501211711523`**: FLM2146V035, FLM2137V0YL, FLM2221W0C8, FLM2229V188; PSU JNV23452553/2555/2545/2547/2549/2551
|
||||
**2x GLC-T-RGD → Curvature PO-1361001-C — `t#873468937279`**: ACW28350BNZ, ACW28350C51
|
||||
**5x SFP-10G-T-X → ChannelWorks PO-14948 — `t#873468898750`**: ACW29080WSJ, ACW29080J4D, ACW29021EB6, ACW29081JY1, ACW29080FQL
|
||||
**10x GLC-LH-SM → GlobalNet Pro PO-16524**: SOPC15471170, SOPC15471254, SOPC15471186, SOPC15471175, SOPC15471394, SOPC15471495, SOPC15471241, SOPC15471196, SOPC15471373, SOPC15471239
|
||||
**6x MR46-HW → PO-23854**: Q3AC-EM2G-7YQB, Q3AC-HR8D-SKQQ, Q3AC-ZGGB-7KH8, Q3AC-FHVK-PUFD, Q3AC-EC8J-J5L4, Q3AC-MUBP-Z6H6
|
||||
**1x C8200-RM-19 → DHD PO-72509 — `t#1ZJW78500300871915`** (rack ear, no unit SN)
|
||||
**2x MEM-C8300-32GB → RetroHardware PO-12770 — `t#1ZJW78500304585709`**: S802C0F21122DBA344A, S802C0F21122DBA3464
|
||||
**11x CW9166I-MR → INTELECA PO-20105 — `t#873509498579`**: KWC27180B7W, KWC28490GA9, KWC270908BV, KWC2712060N, KWC27170EHK, KWC28490G6D, KWC27110UCV, KWC270908AP, KWC27110U5B, KWC27110UB0, KWC27110UCX
|
||||
**8x CS-DS-C32S-FAN-E + 4x CS-DS-CAC-650W-E → Procurri PO-DS 111503 — `t#873526593860`**: FAN DCH2309RBUK/DCH2309RBW6/DCH2309RBWC/DCH2309RBU1; PSU ART2311FQQ3/ART2504F89E/ART2311FQQ2/ART2504F8H7
|
||||
**2x AIR-AP1562I-S-K9 (ship, Invoice 151717)**: FGL2346L5CG, FGL2431N019
|
||||
**3x PWR-C49-300AC → Curvature PO-1361761-C — `t#873528345784`**: PAC14370LDZ, QCS1252B0A0, QCS1214B0CA
|
||||
**1x C9300X-24Y-A → Network Craze PO-P30894 — `t#1ZJW78500214710752`**: FOC2631YKE6; PWR DCC2717C5H0; FAN DCZ262502FD/DCZ262502H7/DCZ262502G4
|
||||
**1x C9300-48U-A → PIVIT PO-256970 — `t#1ZJW78500218523966`**: FJC25171FJT; PWR DTN2131V1MW; FAN DCH2245RP2S; NID2550W0SD2; DCZ28420RJH
|
||||
**32x J9150D → Curvature PO-1361323-C — `t#873507576964`**: CN82KBV1M1, CN82KBV0S8, CN82KBV51F, CN82KBV0MN, CN82KBV35B, CN82KBV1G0, CN82KBV5K9, CN82KBV19X, CN82KBV32Y, CN82KBV5TJ, CN83KBV52D, CN82KBV0DQ, CN82KBV7C0, CN82KBV19D, CN82KBV154, CN82KBV5N0, CN82KBV0JP, CN81KBV0KN, CN82KBV4YD, CN82KBV21P, CN82KBV57K, CN82KBV5TH, CN82KBV0MM, MY95KC90HW, MY81KC9070, CN13KJV57L, MY81KC910C, MY81KC907P, MY81KC908C, MY81KC90JG, MY81KC907N, MY81KC90CM
|
||||
**2x QSFP-100G-SR1.2 → Recurrent PO-P54386 — `t#873526921770`**: INL27130FVW, INL270405X3
|
||||
**13x MA-MNT-MR-16 → CXtec PO-D1239490 — `t#1ZJW78501215940537`**: CCA45680155, CCA45680167, CCA45680161, CCA45680061, CCA45680153, CCA45680075, CCA45680168, CCA45680159, CCA45680059, CCA45680067, CCA45680069, CCA45680071, BCAA0004947
|
||||
**1x C9120AXE-B → OSI PO-26415 — `t#1ZJW78500303834941`**: FJC27141WLU
|
||||
**3x CISCO891-K9 → OSI PO-26501 — `T#1zjw78500320830467`**: FTX183383M8, FTX18518357, FGL211323HR; PSU dth1918d2l7/dth2022d4q8/dth2022d4lz
|
||||
**3x C9200CX-12P-2X2G-E → OSI PO-26496 — `t#1ZJW78500334411250`**: FJC28281TXR, FJC28311W14, FJC28311W9J
|
||||
**6x C9300-NM-2Y → Trifecta PO-309601 — `T#873575620567`**: FJZ251911DH, FOC24165Z60, FJZ25030VJC, FOC24125U5Z, FJZ25030TZG
|
||||
**1x C9200CX-12P-2X2G-A → Network Craze PO-P31125 — `t#1ZJW78500231236840`**: FJC272310VC
|
||||
**42x PWR-C1-715WAC-P → Curvature PO-1361774-I — `T#873530090274`** (36x): LIT2736A6LU, LIT2732C75W, LIT2736A6ES, LIT2724CT3F, LIT2726A3G4, LIT2736A6GK, LIT2724CT6P, LIT2732C7FH, LIT2726A3Y9, LIT2732C7DV, LIT2732C7GC, LIT2736A6PC, LIT2726A35H, LIT2726A3TY, LIT2724CT9G, LIT2724CT29, LIT2724CT95, LIT2732C77V, LIT2736A6TU, LIT2726A3JE, LIT2732C7J6, LIT2736A6LH, LIT2732C7AY, LIT2732C765, LIT2732C77S, LIT2736A65F, LIT2736A6JD, LIT2732C7SR, LIT2724CT9T, LIT2736A6T2, LIT2724CTEP, LIT2732C7JQ, LIT2724CTPX, LIT2732C7GN, LIT2726A3VJ, DCC2913C1MB
|
||||
**1 button for IE-4010-16S12P → LSI PO 189133 — `t#1ZJW78504436235307`**: FDO244114ZK (module button, partial)
|
||||
|
||||
**Return shipments (outbound returns):** WS-SUP32-GE-3B set SAL09402986/SAL09402B2Q/SAL094027D8 (`02-14656-21410`); C9120AXE-B FJC27141WFH + FJC24441S1F (`18-14577-09030`); NIM-2GE-CU-SFP FOC24331W30 (`22-14679-31676`); NIM-2GE-CU-SFP FOC23503PHV (`26-14668-35871`); 2x MR86-HW Q3AE-VWFF-6MGG, Q3AE-EGKE-B3X2 (`15-14694-40587`); 2x PWR-C1-350WAC-P DCC2304B04R, DCC2503B41T (`03-14718-50867`)
|
||||
|
||||
*SHIPPED total: ~145 shipment/return sections, ~700+ individual serial numbers.*
|
||||
|
||||
### B) KEPT / NOT SHIPPED
|
||||
- QSFP-40G-SR-BD lot `10-2945-02` (TNT Import) — "total 8 units only, the rest keep in US", "*not here yet"
|
||||
- 4x JNP-SFP-25G-SR (TNT Import) — "*not here yet"
|
||||
- C9200-STACK ×28 + STACK-T4-50CM ×10 — "keep aside, will ship AUS" (see list 3)
|
||||
- C9120AXE-B FJC28081EXK — repeatedly "already shipped to CMS last wk" (historical, do not reship)
|
||||
- Top-cover kept aside (for C9120AXE-B FJC27151F4C return)
|
||||
- QSFP-40G-LR4 SPC23020002 — "can't find it here"
|
||||
- C9120AXE staged/keep aside (ship tomorrow): FJC27141WLU, FJC27081CEG, FJC262921J0, FJC2629225Y
|
||||
- M.2 SSDs / EM7455/EM7430 355813079879315 — held for next shipment
|
||||
- WS-C2960X-48TS-L FOC2111S08L — NOB, staged for AUS
|
||||
- MR86-HW Q3AE-DMMG-D9KV — "Do NOT ship"
|
||||
|
||||
*KEPT total: ~12 explicit kept/held item groups.*
|
||||
|
||||
---
|
||||
|
||||
## 2. Items With an Issue (faulty / troubleshooting / damaged / return)
|
||||
|
||||
| PID | SN | Reason |
|
||||
|---|---|---|
|
||||
| C9300-24P-A / C9300-24P-E | FCW2303DHNT | "line 2 van bi loi" (still faulty); earlier "khong boot duoc: BOOT FAIL W..." |
|
||||
| C9120AXE-B | FJC27041KKE | "TS24 ... reconnect again so we can recover them" |
|
||||
| C9120AXE-E | FGL2812LK6C | "TS12" troubleshooting (1/0 USEB) |
|
||||
| C9120AXE-B | FJC27102CXZ | "line 21 ... chuyen sang line khac giup e" (no console) |
|
||||
| SUP32 / PFC2 / MSFC2 | SAL1338Z2TC / SAL1339ZJD8 / SAL1339ZGPX | "bi loi -Traceback= ... Error75" |
|
||||
| C9300-24P-E | FCW2238S0BA | "line 5 bi loi" |
|
||||
| NIM-2GE-CU-SFP | FOC23503PHV, FOC24331W30 | bad → returned |
|
||||
| MV86-HW | Q3AE-EGKE-B3X2, Q3AE-SZD8-3PPD, Q3AE-6S3U-Y6N5, Q3AE-VWFF-6MGG, Q3AE-8P53-P74T, Q3AE-SFXM-7CK2 | "failed test" / LED orange |
|
||||
| C8300-1N1S-6T | FLM290910DT | "RFID module broken, can't fix" |
|
||||
| C8300-1N1S-6T | FLM280510M6 | "can't fix ... RFID module broken" |
|
||||
| WS-SUP32-GE-3B | SAL09402986 | "damaged power connector" (returned) |
|
||||
| MR86-HW (6 units, no SN) | — | "LED orange when tested" (OSI PO-25694) |
|
||||
| N5K-C5596UP | (TS17/TS21) | "PSU/FAN/LED ok but no console" |
|
||||
| C1111-8PLTEEAWB (2) | — | "no console after restart" |
|
||||
| WS-C3750X-48P/24P-S | batch | "most were bad, scratches" |
|
||||
| DS-C9148T-K9 | JPG2603008A | "can't unbend chassis/ports" |
|
||||
| CISCO2821 | FTX1134A22A, FTX0950C0GD, FTX1418AKP2 | missing FAN/PSU/faceplate |
|
||||
| ISR4331/K9 | FLM1940W0QB | "PASS WITH WARNING" |
|
||||
| WS-C3560G-24PS-S | FOC1124Z45D | "no console, no faceplate" |
|
||||
| ISR4331 (x2) | — | "PSU missing power cab" |
|
||||
| MR86-HW (return) | Q3AE-VWFF-6MGG, Q3AE-EGKE-B3X2 | returned faulty (`15-14694-40587`) |
|
||||
| C9120AXE-B (return) | FGL2812LPJG, FJC27151F4C | return (`18-14577-09030`); one "without top cover from AUS" |
|
||||
|
||||
*ISSUE total: ~24 item groups / 30+ individual faulty/troubleshooting SNs.*
|
||||
|
||||
---
|
||||
|
||||
## 3. SN Transferred to AUS / Australia
|
||||
|
||||
**C9200-STACK batch ("keep aside, will ship AUS" → Import Australia UPS `t#1ZJW78500439236591`):**
|
||||
SFP-10G-LRM: FNS22050YND, FNS23051B30, FNS23051B32, FNS230514Q4, FNS23051B20, OPM23170H22
|
||||
C9200-STACK (28): JAE234004PC, JAE234004T3, JAE25360Z1S, JAE25360ZXV, JAE25360ZXW, JAE25360WQ5, JAE25210K5G, JAE25360U84, JAE25360U7W, JAE26440B4B, JAE26402RM2, FDO24260JB2, JAE2610039Z, JAE25210GZQ, FDO19321P8C, JAE25360VU0, FDO20120G3J, JAE23051476, JAE230601NH, FVH27510R69, JAE25161CTG, FDO24250TUS, FDO24250T0B, FDO24260G32
|
||||
STACK-T4-50CM (10): LCC2428GA36, LCC2618GGEJ, MOP2732A3H9, LCC2246GX01, LCC2625GFEE, MOC261800C4, MOC261801GT, MOC261700NW, MOP2732A3EX, LCC2320GCQW
|
||||
AIR-AP1562E-B-K9 FJC263610EP; EX-SFP-10GE-SR SCJ19YP0M8, SCJ19YP0V4, SCJ19YP02N, SAZH1TSH
|
||||
|
||||
**Import Australia UPS `t#1ZJW78500415223596`:** UCSX-TPM-002C FCH270476WX, FCH26177EG0, FCH270476HD, FCH251572R7; PAN-SFP-PLUS-SR MU90S5H, MU60V2X, MU20TFU, MU211ZP; NIM-2GE-CU-SFP FOC250624L6, FOC250624QT, FOC24400EXW, FOC21452FDH, FOC263359EC; JNP-SFP-25G-SR S1A1M31A751BAB, S1A1M31A7517WR, S1A1M31A751BVZ, S1A1M31A751BFQ; UCSX-210C-M7 (3x); NVME-SSD 22293A26BAF6, 22293A447730; SHOKZ JNV23455443; NIM-ES2-8 SFOC194350CH, SFOC24484UR9
|
||||
|
||||
**Import Australia UPS (no t#):** UCSX-ML-V5Q50G FCH272977RT, FCH272270UZ, FCH272978JZ; MTFDDAV240TDS 213931914B8A, 213931914A5B, 214131CE6FB9; EM7455/EM7430 355813079879315
|
||||
|
||||
**Import Australia UPS `t#1ZJW78500423326317`:** PWR-80W-AC DAB192754P8, DAB194450WP; CMPCT-DIN-MNT JNV23455437; MS120-8LP Q2BX-YLYW-556A, PSU HSQD76M73B1; STACK-T4-50CM LCC2920G58F; SATA-SSD-240GB 11S00VN646Y0M127A83181; XEON-6342 2A2109BF5F0F8383RKXA, 2A2134BF51728CD8RKXA, 2A226ABF4F28BBEARKXA, 2A2076BC995F5353RKXA, 3A4819C0FC3F792ARKXA, 3A481A1EB9C113CFRKXA; C9500-40X FJC254223DA + PWR DCI2504M35A/DCI2536M3VC + fans DCH2526Y0LH/0LM/0LN/0LK/0LR
|
||||
|
||||
**Import Australia UPS `t#1ZJW78500431774363`:** GLC-BX-D S080322BD001 / 080322BD001; NEX-PLAYGROUND NPG02C100668500; N9K-C93180YC-EX FDO2252023P, FDO21482ECD, FDO21111F2A + NXA-FAN-30CFM-F NID2146H5I7/I8/I9/I4, NID2250H1B1/B7/197/19B + NCV2520R0QG/R0GW/R073/R0DN; WS-F6K-PFC3B SAD092705FY
|
||||
|
||||
**Import Australia Fedex/TNT `t#873308530323`:** NXA-FAN-30CFM-B NID1922I1SG/1SC/1S0, NID2135I0RK, NID1922I1SE, NID2106I25F, NID2108I38W, DCH2237R3N6, NID2103I35Z, DCH21260BFD, DCH21281B4X, DCH21370ELG; C9300-24T-A FJC2333S06H, FJC2333S05T; C9200-STACK (20) FVH29100T7W, FVH29100X6E, FVH2910151B, FVH29100XBB, FVH29100VEM, FVH29100TH7, FVH29100VQ1, FVH29100U1P, FVH29100TNL, JAE23180HMY, JAE25360VTV, FDO23290KA5, FDO24260J5C, JAE241108B5, FDO23290LAT, JAE26360XUR, JAE26382LDW, JAE25360VD8, FDO22263HYZ, FDO22040VZ4; C9200L-48P-4X-E FOC2608054S; WS-SUP32-GE-3B SAL12330S7D + MSFC2A SAL12330JA2 + PFC3B SAL12330MGV
|
||||
|
||||
**Import Australia UPS `t#1ZJW78500401856894`:** C3850-NM-2-40G FOC192205F3; C9300-24T-A FOC2217Q02J
|
||||
|
||||
**TNT / Fedex (make New in AUS) `T#873256774277`:** 4x C9300-24T-E-WS FOC2721YA6K, FOC2721Y72K, FOC2648YCCD, FOC2721YA6F; WS-C2960X-48TS-L FOC2111S08L; XL710-QDA2 7981133843, 951013; UCSC-PSUV2-1050DC LIT23223D4H, LIT23223AH7; C9120AXI FJC25432C7X, FJC25432CB7, FJC25432CA2, FJC25432C7S; C3850-NM-4-10G foc23262kmy, fjz233020cu
|
||||
|
||||
**Other AUS notes:** SSD-SATA-960G "already on the shipment to AUS"; N6K-C6001-FAN-B "already shipped to AUS"; MR86-HW / AIR-AP-BRACKET "recently arrived from AUS".
|
||||
|
||||
*AUS-transfer total: ~10 Import-Australia shipments, 150+ serial numbers.*
|
||||
|
||||
---
|
||||
|
||||
## 4. SN in xxxx-xxxx-xxxx Format — Shipped/Outbound Context
|
||||
|
||||
| Hyphenated code | Section / context | Outbound? |
|
||||
|---|---|---|
|
||||
| `17-14693-01192` | ship PWR-115W-AC (DAB264260LD) | Yes |
|
||||
| `10-2945-02` | TNT Import — QSFP-40G-SR-BD lot (8 shipped) | Yes |
|
||||
| `10-14642-61136` | return C9300-24P-A (FJC2340S02R) | Yes |
|
||||
| `14-14726-39035` | ship MR36-HW (Q3AJ-49HS-7EJT) | Yes |
|
||||
| `04-14738-90912` | ship IPO-500-EXP-MOD (09FC19500851) | Yes |
|
||||
| `18-14713-58108` | ship 2x CP-840 (TCL251100KM, TCL2511007X) | Yes |
|
||||
| `02-14762-32221` | ship MA-MNT-MR-17-MR36 (CCA45679958) | Yes |
|
||||
| `17-14713-99371` | ship C9136I-ROW (FGL2702LXZV) | Yes |
|
||||
| `05-14746-55384` | ship VIC3-4FXS/DID (FOC14317P5Z) | Yes |
|
||||
| `02-14656-21410` | return WS-SUP32-GE-3B set | Yes |
|
||||
| `23-14741-66101` | ship AIR-ACC1530-PMK1 (JNV23453733) | Yes |
|
||||
| `18-14577-09030` | return 2x C9120AXE-B (FJC27141WFH, FJC24441S1F) | Yes |
|
||||
| `05-14777-19977` | ship 2x PVDM2-64 (FOC123546Z2) | Yes |
|
||||
| `03-14766-07874` | ship GLC-ZX-SM-RGD (SPC1432030T) | Yes |
|
||||
| `08-14781-17608` | ship 2x PWR-C1-440WDC (ART2210FCJL, ART2210FCDR) | Yes |
|
||||
| `27-14612-29455-P01` | AUS — Micron MTFDDAV240TDS | Yes |
|
||||
| `22-14679-31676` | return NIM-2GE-CU-SFP (FOC24331W30) | Yes |
|
||||
| `26-14668-35871` | return NIM-2GE-CU-SFP (FOC23503PHV) | Yes |
|
||||
| `17-14781-37655` | ship EX-UM-4SFPP-MR (XS0221420145) | Yes |
|
||||
| `14-14782-66558` | ship 2x WS-C3560G-24PS-E | Yes |
|
||||
| `18-14787-11692` | ship VIC3-4FXS/DID (FOC18375W7G) | Yes |
|
||||
| `16-14788-52149` | ship VIC3-4FXS/DID (FOC16036FFK) | Yes |
|
||||
| `21-14778-64209` | ship PVDM2-64 (FOC13313DX8) | Yes |
|
||||
| `14-14784-46115` | ship VIC3-4FXS/DID (FOC17361MNE) | Yes |
|
||||
| `13-14800-40754` | ship ASR-9902-2P-KIT | Yes |
|
||||
| `15-14694-40587` | return 2x MR86-HW | Yes |
|
||||
| `03-14718-50867` | return 2x PWR-C1-350WAC-P | Yes |
|
||||
|
||||
Excluded (incoming, not outbound): `12-14747-30779` (eBay receiving), `24-14738-31605` (return never completed).
|
||||
|
||||
*Hyphenated-shipped total: 27 codes.*
|
||||
|
||||
---
|
||||
|
||||
## Caveats
|
||||
- CISCO2511-DC PO-5602 vs DS-C9132T-K9 PO-256275: the two sections reference tracking numbers near `t#872615359982` / `t#872615037247`; SN data for each is unambiguous but the exact t# pairing is flagged.
|
||||
- Many "Please help to ship/return" single-item sections have no UPS/FedEx `t#` in Joe's message (label pre-supplied); treated as outbound per domain rules.
|
||||
- Lowercase SNs and leading `S`/`X` prefixes are quoted exactly as in the source.
|
||||
|
|
@ -0,0 +1,285 @@
|
|||
## You said
|
||||
List all SN shipped , and the one kept
|
||||
|
||||
## Gemini said
|
||||
Based on the communications regarding
|
||||
PO-0272806
|
||||
(100x C9120AXE-B-REF to CMS Communications, Inc.), here are the serial numbers that were successfully shipped, as well as the unit that appears to have been kept
|
||||
.
|
||||
SNs Shipped (Total: 69 Units)
|
||||
A total of 69 units were shipped across three separate batches
|
||||
:
|
||||
Batch 1 (Shipped May 29, 2026 - 20 Units):
|
||||
The following 20 serial numbers were shipped: FJC270918FN, FJC27041K7D, FJC270918HB, FJC2428184B, FJC242612Y8, FJC25331KK9, FJC242612YA, FJC242612Y9, FJC27111BBT, FJC234916D2, FJC242916WL, FJC24401SKH, FJC250618K5, FJC24381E9Q, FJC28081EXK, FJC250618KB, FJC24401SL0, FJC25331M5V, FJC242916VZ, and FJC24381DWC
|
||||
.
|
||||
Batch 2 (Shipped June 1, 2026 - 28 Units):
|
||||
The following 28 serial numbers were shipped: FJC27081CA2, FJC24381DM7, FJC24381E8W, FJC24311C2L, FJC24381DV4, FJC24401SHB, FJC250618AX, FJC24331ATB, FJC242916V0, FJC24381DV2, FJC250618KG, FJC2428180H, FJC24381DV9, FJC27102DKZ, FJC24511LK7, FJC24381DUJ, FJC27081CAY, FJC27081CA0, FJC27081CAX, FJC27081CA1, FJC25331KQW, FJC24401SKB, FJC242916W2, FJC242612Y5, FJC242817ZG, FJC270918G9, FJC27043K60, and FJC271823UE
|
||||
.
|
||||
Batch 3 (Shipped June 5, 2026 - 21 Units):
|
||||
The following 21 serial numbers were shipped: FJC27102CXY, FJC27102CSS, FJC27041KKE, FJC27081CAW, FJC27041K9U, FJC27041K89, FJC25081ALQ, FJC27081CA3, FJC25101JGZ, FJC25061LWW, FJC24381DUC, FJC26291GP3, FJC242916V4, FJC24381DWD, FJC250618B7, FJC25101JJW, FJC24381E5G, FJC25101JJV, FJC27102CXZ, FJC24441S1F, and FJC27141WFH
|
||||
.
|
||||
The SN Kept
|
||||
FJC28081EXK
|
||||
This serial number was initially documented as shipped in the first batch on May 29, 2026
|
||||
. It was subsequently excluded from later shipments (such as PO-0272591 on June 2) with the explicit note that it had "already shipped to CMS last wk"
|
||||
. However, on June 8, an internal message noted to "connect to TS if you find this" regarding FJC28081EXK, indicating that despite being marked as shipped, it was likely kept or accidentally left behind in the facility
|
||||
.
|
||||
(Note: SN
|
||||
FJC27041KKE
|
||||
was also temporarily kept aside on June 2 for recovery, but it was ultimately shipped out in the third batch on June 5
|
||||
.)
|
||||
|
||||
## You said
|
||||
List all items with issue (SN + PID if possible)
|
||||
|
||||
## Gemini said
|
||||
Here are the items reported with issues, categorized by the type of problem:
|
||||
Boot, System, and Connectivity Failures
|
||||
PID:
|
||||
C9300-24P-A |
|
||||
SN:
|
||||
FCW2303DHNT - This unit experienced a boot failure and displayed a "BOOT FAIL" error.
|
||||
PID:
|
||||
SUP32 |
|
||||
SN:
|
||||
SAL1338Z2TC - This unit generated a traceback error (Error75).
|
||||
PID:
|
||||
pfc2 |
|
||||
SN:
|
||||
SAL1339ZJD8 - This component generated a traceback error alongside the SUP32.
|
||||
PID:
|
||||
msfc2 |
|
||||
SN:
|
||||
SAL1339ZGPX - This component generated a traceback error alongside the SUP32.
|
||||
PID:
|
||||
N5K-C5596UP |
|
||||
SN:
|
||||
Not specified - The PSU, fans, and LEDs powered on successfully, but the unit failed to provide any console output.
|
||||
Faulty or Failed Testing
|
||||
PID:
|
||||
NIM-2GE-CU-SFP |
|
||||
SN:
|
||||
FOC23503PHV - This module was explicitly marked as bad and faulty.
|
||||
PID:
|
||||
NIM-2GE-CU-SFP |
|
||||
SN:
|
||||
FOC24331W30 - This module was explicitly marked as bad and faulty.
|
||||
PID:
|
||||
NIM-2GE-CU |
|
||||
SN:
|
||||
Not specified (2 units) - These modules were tested in an ISR4331 and were suspected to be bad.
|
||||
PID:
|
||||
MV86-HW / MR86-HW |
|
||||
SNs:
|
||||
Q3AE-EGKE-B3X2, Q3AE-SZD8-3PPD, Q3AE-6S3U-Y6N5, Q3AE-VWFF-6MGG, Q3AE-8P53-P74T, Q3AE-SFXM-7CK2 - These units failed testing because they booted but the LED remained orange when an Ethernet cable was connected.
|
||||
PID:
|
||||
MR86-HW |
|
||||
SN:
|
||||
Q3AE-DMMG-D9KV - There was an explicit instruction not to ship this specific unit, indicating a probable fault.
|
||||
Physical Damage (Broken Parts, Bent Chassis, Scratches)
|
||||
PID:
|
||||
C8300-1N1S-6T |
|
||||
SN:
|
||||
FLM290910DT - This chassis had a broken RFID module that could not be fixed.
|
||||
PID:
|
||||
C8300-1N1S-6T |
|
||||
SN:
|
||||
FLM280510M6 - This unit had a broken RFID module that could not be fixed, and the aesthetics were deemed unfixable.
|
||||
PID:
|
||||
DS-C9148T-K9 |
|
||||
SN:
|
||||
JPG2603008A - This switch suffered from a bent chassis and bent ports that could not be repaired.
|
||||
PID:
|
||||
DS-CAC-650W-E |
|
||||
SNs:
|
||||
ART2311FQQ3, ART2504F89E, ART2311FQQ2, ART2504F8H7 - These units were noted as having broken handles that required physical replacement.
|
||||
PID:
|
||||
WS-SUP32-GE-3B (Component) |
|
||||
SN:
|
||||
SAL09402986 - This component was returned with a damaged power connector.
|
||||
PID:
|
||||
IE-4010-16S12P |
|
||||
SN:
|
||||
FDO244114ZK - This unit was missing a black plastic mode button, which required a full module replacement to fix.
|
||||
PID:
|
||||
C9120AXE |
|
||||
SN:
|
||||
FJC27141WMG - This chassis was set aside due to physical condition and described as "most ugly".
|
||||
PID:
|
||||
C3850-NM-4-10G |
|
||||
SNs:
|
||||
FOC23262KMY, FJZ233020CU - These modules were described as "most ugly" due to physical condition.
|
||||
PID:
|
||||
WS-C3750X-48P-S |
|
||||
SNs:
|
||||
Not specified - Most units in a large batch were reported as bad with many scratches on the top of the chassis.
|
||||
Missing Internal Components
|
||||
PID:
|
||||
CISCO2821 |
|
||||
SN:
|
||||
FTX1134A22A - This unit was missing its FAN, PSU, and FACEPLATE.
|
||||
PID:
|
||||
CISCO2821 |
|
||||
SN:
|
||||
FTX0950C0GD - This unit was missing its PSU and FACEPLATE.
|
||||
PID:
|
||||
CISCO2821 |
|
||||
SN:
|
||||
FTX1418AKP2 - This unit was missing its PSU and FAN.
|
||||
PID:
|
||||
ISR4331 |
|
||||
SNs:
|
||||
Not specified (2 units) - These units were missing the internal power cables that connect the PSU to the motherboard.
|
||||
|
||||
## You said
|
||||
List all SN transferred to AUS/Australia
|
||||
|
||||
## Gemini said
|
||||
Based on the records in
|
||||
aJoe.md
|
||||
, here are the serial numbers explicitly noted as being kept aside to ship or transferred to Australia (AUS):
|
||||
C9200-STACK Modules (June 1–2 Records)
|
||||
These units were instructed to be kept aside to ship to Australia
|
||||
:
|
||||
JAE234004PC
|
||||
JAE234004T3
|
||||
JAE25360Z1S
|
||||
JAE25360ZXV
|
||||
JAE25360ZXW
|
||||
JAE25360WQ5
|
||||
JAE25210K5G
|
||||
JAE25360U84
|
||||
JAE25360U7W
|
||||
JAE26440B4B
|
||||
JAE26402RM2
|
||||
FDO24260JB2
|
||||
JAE2610039Z
|
||||
JAE25210GZQ
|
||||
FDO19321P8C
|
||||
JAE25360VU0
|
||||
FDO20120G3J
|
||||
JAE23051476
|
||||
JAE230601NH
|
||||
FVH27510R69
|
||||
JAE25161CTG
|
||||
FDO24250TUS
|
||||
FDO24250T0B
|
||||
FDO24260G32
|
||||
STACK-T4-50CM Modules (June 1–2 Records)
|
||||
These stacking cables were also set aside for the Australia shipment
|
||||
:
|
||||
LCC2428GA36
|
||||
LCC2618GGEJ
|
||||
MOP2732A3H9
|
||||
LCC2246GX01
|
||||
LCC2625GFEE
|
||||
MOC261800C4
|
||||
MOC261801GT
|
||||
MOC261700NW
|
||||
MOP2732A3EX
|
||||
LCC2320GCQW
|
||||
Additional C9200-STACK Modules (June 19 Records)
|
||||
These serial numbers were documented under the "Import Australia Fedex / TNT" shipment log to increase the overall stack module transfer total
|
||||
:
|
||||
FVH29100T7W
|
||||
FVH29100X6E
|
||||
FVH2910151B
|
||||
FVH29100XBB
|
||||
FVH29100VEM
|
||||
FVH29100TH7
|
||||
FVH29100VQ1
|
||||
FVH29100U1P
|
||||
FVH29100TNL
|
||||
JAE23180HMY
|
||||
JAE25360VTV
|
||||
FDO23290KA5
|
||||
FDO24260J5C
|
||||
JAE241108B5
|
||||
FDO23290LAT
|
||||
JAE26360XUR
|
||||
JAE26382LDW
|
||||
JAE25360VD8
|
||||
FDO22263HYZ
|
||||
FDO22040VZ4
|
||||
(Note: It is also mentioned that a batch of
|
||||
SSD-SATA-960G
|
||||
units was successfully sent out on a shipment to Australia, though their specific serial numbers were not recorded in the text
|
||||
.)
|
||||
|
||||
## You said
|
||||
List all SN in this format that has been shipped:
|
||||
xxxx-xxxx-xxxx
|
||||
|
||||
## Gemini said
|
||||
Based on the provided document, here are all the serial numbers matching the
|
||||
xxxx-xxxx-xxxx
|
||||
format that have been explicitly shipped
|
||||
:
|
||||
MR36-HW
|
||||
Q3AJ-49HS-7EJT
|
||||
CW9162I-MR
|
||||
Q5AA-JNPC-SYDC
|
||||
MV32-HW
|
||||
Q2PV-2885-9WH5
|
||||
Q2PV-599H-DNSD
|
||||
Q2PV-C6EG-KB2J
|
||||
Q2PV-A6NU-EQX8
|
||||
Q2PV-DL34-7UEG
|
||||
Q2PV-WP5A-5HGM
|
||||
Q2PV-KQL5-GFS3
|
||||
Q2PV-VPCA-HBCB
|
||||
Q2PV-LQ7U-3RFS
|
||||
Q2PV-GQMH-F5LJ
|
||||
Q2PV-5ZQ9-5DD8
|
||||
Q2PV-VW84-729Z
|
||||
Q2PV-GFSK-KDPC
|
||||
Q2PV-3X3G-2QU2
|
||||
Q2PV-EJ4U-9EG4
|
||||
MR86-HW
|
||||
Q3AE-AY5R-YV22
|
||||
Q3AE-2MKJ-V78D
|
||||
Q3AE-3PRL-VL68
|
||||
Q3AE-52NF-QY6E
|
||||
Q3AE-RNQP-HXGT
|
||||
Q3AE-9FKB-VBJ7
|
||||
Q3AE-DDPF-UH9A
|
||||
MR46-HW
|
||||
Q3AC-EM2G-7YQB
|
||||
Q3AC-HR8D-SKQQ
|
||||
Q3AC-ZGGB-7KH8
|
||||
Q3AC-FHVK-PUFD
|
||||
Q3AC-EC8J-J5L4
|
||||
Q3AC-MUBP-Z6H6
|
||||
|
||||
## You said
|
||||
Can a 7B model running on RTX 4000 8Gb GPU answer the questions above ?
|
||||
|
||||
## Gemini said
|
||||
Yes, it is technically possible, but it would likely struggle to provide a perfectly accurate and complete answer. Running a 7B model on an 8GB GPU to perform complex data extraction requires significant compromises.
|
||||
Here is a breakdown of the technical and cognitive hurdles a 7B model would face with this specific task:
|
||||
1. Hardware Constraints (VRAM Limitations)
|
||||
An 8GB GPU cannot load a standard 16-bit 7B model, which requires roughly 14GB of VRAM just for the weights. To run it on an RTX 4000 (8GB), you would have to
|
||||
quantize
|
||||
the model (e.g., using 4-bit quantization formats like GGUF, AWQ, or EXL2).
|
||||
The Weight:
|
||||
A 4-bit 7B model takes up about 4 to 4.5 GB of VRAM.
|
||||
The KV Cache (Context Window):
|
||||
The document you provided is quite long and contains thousands of tokens. The remaining 3.5 GB of VRAM limits how much text you can feed the model at once. If the document exceeds the context limit that fits in your remaining VRAM, the model will throw an Out of Memory (OOM) error.
|
||||
2. Cognitive Limitations (Accuracy and Recall)
|
||||
Even if you fit the document into the model's context window, smaller 7B and 8B models (like Llama 3 8B or Mistral 7B) have known limitations with this specific type of heavy data-extraction task:
|
||||
The "Needle in a Haystack" Problem:
|
||||
7B models struggle to maintain attention across long texts. When asked to find scattered details (like "items with issues" mixed into dozens of emails), they frequently miss items or hallucinate incorrect associations.
|
||||
List Truncation:
|
||||
When asked to list 69 random, non-sequential alphanumeric strings (like the shipped serial numbers), 7B models almost always get "lazy." They will output the first 10 to 15 items and then stop, hallucinate patterns, or summarize the rest instead of completing the exact task.
|
||||
Pattern Matching:
|
||||
While a 7B model understands the request to find the
|
||||
xxxx-xxxx-xxxx
|
||||
format, an LLM is essentially guessing the next token based on probability. It is far less reliable at scanning for strict formatting rules over a massive text block than a simple Python Regular Expression (Regex) script.
|
||||
The Verdict
|
||||
You can run a quantized 7B model on your hardware, and it will give you a
|
||||
plausible-sounding
|
||||
answer. However, for a task requiring 100% accuracy in extracting dozens of random serial numbers from a long, noisy email thread, a 7B model will likely drop data, hallucinate serial numbers, or truncate the lists.
|
||||
Are you currently trying to set up a local AI environment for processing these types of inventory files?
|
||||
This conversation was created with Gemini Advanced. Gemini Advanced isn't available for you, but you can start a new chat about this topic.
|
||||
Learn more
|
||||
Gemini is AI and can make mistakes.
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 369 KiB |
|
|
@ -0,0 +1,411 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Export emails from emailfiles/cur to Markdown.
|
||||
|
||||
Output: emailfiles/output/<email_id>/
|
||||
markitdown/body.md markitdown/<stem>.md
|
||||
docling/body.md docling/<stem>.md
|
||||
|
||||
Image OCR: vision LLM (llama-server via SSH tunnel) → Tesseract fallback
|
||||
Tunnel: ssh -fNL 11436:127.0.0.1:11436 -J kai@192.168.171.2 root@172.25.57.233
|
||||
"""
|
||||
|
||||
import email, email.policy, http.client, json, logging, os, re, shutil
|
||||
import subprocess, sys, tempfile, uuid
|
||||
from pathlib import Path
|
||||
from tqdm import tqdm
|
||||
|
||||
# .env loader
|
||||
_env = Path(__file__).parent / ".env"
|
||||
if _env.exists():
|
||||
for _l in _env.read_text().splitlines():
|
||||
_l = _l.strip()
|
||||
if _l and not _l.startswith("#") and "=" in _l:
|
||||
_k, _v = _l.split("=", 1)
|
||||
os.environ.setdefault(_k.strip(), _v.strip())
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
_WORKING = os.environ.get("WORKING_DIR", "emails")
|
||||
MAILDIR = Path(__file__).parent / _WORKING / os.environ.get("WORKING_INPUT", "input")
|
||||
OUTPUT = Path(__file__).parent / _WORKING / os.environ.get("WORKING_OUTPUT", "output")
|
||||
|
||||
MARKITDOWN_HOST = "localhost"
|
||||
MARKITDOWN_PORT = 8282
|
||||
DOCLING_HOST = "localhost"
|
||||
DOCLING_PORT = 8383
|
||||
DOCLING_CONTAINER = "ai-markdown-api-docling-1"
|
||||
API_TIMEOUT = 300
|
||||
|
||||
VISION_HOST = os.environ.get("VISION_HOST", "localhost")
|
||||
VISION_PORT = int(os.environ.get("VISION_PORT", "11436"))
|
||||
|
||||
PADDLE_HOST = os.environ.get("PADDLE_HOST", "localhost")
|
||||
PADDLE_PORT = int(os.environ.get("PADDLE_PORT", "9999"))
|
||||
|
||||
IMAGE_EXTS = {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".tif", ".webp"}
|
||||
DOCLING_SUPPORTED = {".pdf", ".docx", ".xlsx", ".pptx", ".html", ".htm"} | IMAGE_EXTS
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Multipart POST helper
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _post(host, port, path, filepath, params="") -> str:
|
||||
boundary = uuid.uuid4().hex
|
||||
data = filepath.read_bytes()
|
||||
body = (
|
||||
f"--{boundary}\r\n"
|
||||
f'Content-Disposition: form-data; name="file"; filename="{filepath.name}"\r\n'
|
||||
f"Content-Type: application/octet-stream\r\n\r\n"
|
||||
).encode() + data + f"\r\n--{boundary}--\r\n".encode()
|
||||
url = path + (f"?{params}" if params else "")
|
||||
conn = http.client.HTTPConnection(host, port, timeout=API_TIMEOUT)
|
||||
try:
|
||||
conn.request("POST", url, body=body,
|
||||
headers={"Content-Type": f"multipart/form-data; boundary={boundary}",
|
||||
"Content-Length": str(len(body))})
|
||||
resp = conn.getresponse()
|
||||
raw = resp.read()
|
||||
if resp.status != 200:
|
||||
return f"<!-- API {resp.status} -->"
|
||||
d = json.loads(raw)
|
||||
return d.get("markdown") or d.get("content") or str(d)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def convert_markitdown(src: Path) -> str:
|
||||
try:
|
||||
return _post(MARKITDOWN_HOST, MARKITDOWN_PORT, "/convert", src, "use_llm=true")
|
||||
except Exception as e:
|
||||
return f"<!-- MarkItDown: {e} -->"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Vision LLM — llama-server via SSH tunnel
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _vision_available() -> bool:
|
||||
try:
|
||||
conn = http.client.HTTPConnection(VISION_HOST, VISION_PORT, timeout=3)
|
||||
conn.request("GET", "/health")
|
||||
conn.getresponse().read()
|
||||
conn.close()
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _vision_ocr(src: Path) -> str:
|
||||
"""Send image to llama-server, return markdown. Returns '' on failure."""
|
||||
import base64
|
||||
from PIL import Image
|
||||
import io
|
||||
|
||||
img = Image.open(src).convert("RGB")
|
||||
if img.width < 1200:
|
||||
scale = max(2, 1200 // img.width)
|
||||
img = img.resize((img.width * scale, img.height * scale), Image.LANCZOS)
|
||||
buf = io.BytesIO()
|
||||
img.save(buf, "JPEG", quality=92)
|
||||
b64 = base64.standard_b64encode(buf.getvalue()).decode()
|
||||
|
||||
payload = json.dumps({
|
||||
"messages": [{"role": "user", "content": [
|
||||
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64}"}},
|
||||
{"type": "text", "text": (
|
||||
"Read every cell in this table carefully. "
|
||||
"Output a Markdown table with ALL rows. "
|
||||
"Do not skip or duplicate any row. "
|
||||
"Return only the Markdown table."
|
||||
)},
|
||||
]}],
|
||||
"max_tokens": 700,
|
||||
"temperature": 0,
|
||||
}).encode()
|
||||
|
||||
conn = http.client.HTTPConnection(VISION_HOST, VISION_PORT, timeout=120)
|
||||
try:
|
||||
conn.request("POST", "/v1/chat/completions", body=payload,
|
||||
headers={"Content-Type": "application/json",
|
||||
"Content-Length": str(len(payload))})
|
||||
resp = conn.getresponse()
|
||||
d = json.loads(resp.read())
|
||||
if resp.status != 200:
|
||||
log.warning("vision %s: %s", resp.status, str(d)[:80])
|
||||
return ""
|
||||
content = d.get("choices", [{}])[0].get("message", {}).get("content", "").strip()
|
||||
return _dedup_rows(content)
|
||||
except Exception as e:
|
||||
log.warning("vision error: %s", e)
|
||||
return ""
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def _dedup_rows(md: str) -> str:
|
||||
seen, out = set(), []
|
||||
for line in md.splitlines():
|
||||
key = line.strip()
|
||||
if key.startswith("|") and key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
out.append(line)
|
||||
return "\n".join(out)
|
||||
|
||||
|
||||
def _is_data_image(src: Path) -> bool:
|
||||
"""True for wide landscape images likely to contain table/text data."""
|
||||
from PIL import Image
|
||||
try:
|
||||
w, h = Image.open(src).size
|
||||
return w >= 300 and h >= 60 and w / h >= 1.5
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PaddleOCR fallback — lightweight HTTP call to paddleocr_server on AI server
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _paddle_available() -> bool:
|
||||
try:
|
||||
conn = http.client.HTTPConnection(PADDLE_HOST, PADDLE_PORT, timeout=2)
|
||||
conn.request("GET", "/")
|
||||
conn.getresponse().read()
|
||||
conn.close()
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _paddle_ocr(src: Path) -> str:
|
||||
import base64
|
||||
data = base64.standard_b64encode(src.read_bytes()).decode()
|
||||
payload = json.dumps({"image_base64": data}).encode()
|
||||
conn = http.client.HTTPConnection(PADDLE_HOST, PADDLE_PORT, timeout=60)
|
||||
try:
|
||||
conn.request("POST", "/ocr", body=payload,
|
||||
headers={"Content-Type": "application/json",
|
||||
"Content-Length": str(len(payload))})
|
||||
resp = conn.getresponse()
|
||||
d = json.loads(resp.read())
|
||||
return d.get("markdown", "")
|
||||
except Exception as e:
|
||||
log.warning("paddle error: %s", e)
|
||||
return ""
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tesseract OCR fallback via Docling docker container
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _tesseract_ocr(src: Path) -> str:
|
||||
from PIL import Image
|
||||
img = Image.open(src)
|
||||
scaled = src.with_suffix(".ocr.png")
|
||||
img.resize((img.width * 3, img.height * 3), Image.LANCZOS).save(scaled)
|
||||
|
||||
container_path = f"/tmp/{scaled.name}"
|
||||
cp = subprocess.run(["docker", "cp", str(scaled), f"{DOCLING_CONTAINER}:{container_path}"],
|
||||
capture_output=True)
|
||||
if cp.returncode != 0:
|
||||
return ""
|
||||
script = f"""
|
||||
from docling.document_converter import DocumentConverter, ImageFormatOption, PdfFormatOption
|
||||
from docling.datamodel.pipeline_options import PdfPipelineOptions, TesseractCliOcrOptions
|
||||
from docling.datamodel.base_models import InputFormat
|
||||
opts = PdfPipelineOptions(do_ocr=True, ocr_options=TesseractCliOcrOptions())
|
||||
conv = DocumentConverter(format_options={{
|
||||
InputFormat.IMAGE: ImageFormatOption(pipeline_options=opts),
|
||||
InputFormat.PDF: PdfFormatOption(pipeline_options=opts),
|
||||
}})
|
||||
print(conv.convert('{container_path}').document.export_to_markdown())
|
||||
"""
|
||||
run = subprocess.run(["docker", "exec", DOCLING_CONTAINER, "python3", "-c", script],
|
||||
capture_output=True, text=True, timeout=120)
|
||||
lines = [l for l in run.stdout.splitlines() if not l.startswith("Loading weights")]
|
||||
return "\n".join(lines).strip()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Docling convert (for non-image files)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def convert_docling(src: Path) -> str:
|
||||
if src.suffix.lower() not in DOCLING_SUPPORTED:
|
||||
return f"<!-- unsupported: {src.suffix} -->"
|
||||
try:
|
||||
if src.suffix.lower() in IMAGE_EXTS:
|
||||
# 1. Vision LLM for data images (tables/screenshots)
|
||||
if _is_data_image(src) and _vision_available():
|
||||
result = _vision_ocr(src)
|
||||
if result:
|
||||
return result
|
||||
# 2. PaddleOCR fallback (better than Tesseract for dense text)
|
||||
if _paddle_available():
|
||||
result = _paddle_ocr(src)
|
||||
if result:
|
||||
return result
|
||||
# 3. Tesseract last resort
|
||||
return _tesseract_ocr(src)
|
||||
return _post(DOCLING_HOST, DOCLING_PORT, "/convert", src, "use_llm=false")
|
||||
except Exception as e:
|
||||
return f"<!-- Docling: {e} -->"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Email helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _email_id(name: str) -> str:
|
||||
return name.split(",")[0]
|
||||
|
||||
|
||||
def _html_body(msg) -> str | None:
|
||||
for part in msg.walk():
|
||||
if part.get_content_type() == "text/html":
|
||||
p = part.get_payload(decode=True)
|
||||
if p:
|
||||
return p.decode(part.get_content_charset() or "utf-8", errors="replace")
|
||||
for part in msg.walk():
|
||||
if part.get_content_type() == "text/plain":
|
||||
p = part.get_payload(decode=True)
|
||||
if p:
|
||||
return p.decode(part.get_content_charset() or "utf-8", errors="replace")
|
||||
return None
|
||||
|
||||
|
||||
def _write_md(path: Path, content: str):
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(content, encoding="utf-8")
|
||||
log.info(" wrote %s", path.relative_to(OUTPUT.parent))
|
||||
|
||||
|
||||
def _write_jpg(dest_dir: Path, src: Path):
|
||||
dest_dir.mkdir(parents=True, exist_ok=True)
|
||||
dest = dest_dir / (src.stem + ".jpg")
|
||||
if src.suffix.lower() in {".jpg", ".jpeg"}:
|
||||
shutil.copy2(src, dest)
|
||||
else:
|
||||
from PIL import Image
|
||||
try:
|
||||
Image.open(src).convert("RGB").save(dest, "JPEG")
|
||||
except Exception:
|
||||
shutil.copy2(src, dest_dir / src.name)
|
||||
dest = dest_dir / src.name
|
||||
log.info(" saved %s", dest.relative_to(OUTPUT.parent))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Process one email
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def process_email(filepath: Path, msg=None):
|
||||
if msg is None:
|
||||
with filepath.open("rb") as f:
|
||||
msg = email.message_from_binary_file(f, policy=email.policy.compat32)
|
||||
|
||||
eid = _email_id(filepath.name)
|
||||
log.info("Processing [%s] %s", eid[:30], msg.get("subject", "")[:60])
|
||||
|
||||
md_dir = OUTPUT / eid / "markitdown"
|
||||
doc_dir = OUTPUT / eid / "docling"
|
||||
|
||||
with tempfile.TemporaryDirectory() as _tmp:
|
||||
tmp = Path(_tmp)
|
||||
|
||||
# Build CID map and extract all parts to tmp
|
||||
cid_map: dict[str, str] = {}
|
||||
for part in msg.walk():
|
||||
fn = part.get_filename() or part.get_param("name")
|
||||
if not fn:
|
||||
continue
|
||||
payload = part.get_payload(decode=True)
|
||||
if not payload:
|
||||
continue
|
||||
safe = "".join(c if c.isalnum() or c in "._-" else "_" for c in fn)
|
||||
(tmp / safe).write_bytes(payload)
|
||||
ext = Path(safe).suffix.lower()
|
||||
out_name = Path(safe).stem + ".jpg" if ext in IMAGE_EXTS else safe
|
||||
cid = part.get("Content-ID", "").strip("<>")
|
||||
if cid:
|
||||
cid_map[cid] = out_name
|
||||
|
||||
def fix_cid(html: str) -> str:
|
||||
return re.sub(r'cid:([^\s"\'>\)]+)',
|
||||
lambda m: cid_map.get(m.group(1), f"cid:{m.group(1)}"), html)
|
||||
|
||||
def fix_img_comments(md: str, html: str) -> str:
|
||||
srcs = re.findall(r'<img[^>]+src=["\']?([^\s"\'>\)]+)', html, re.IGNORECASE)
|
||||
it = iter(srcs)
|
||||
return re.sub(r'<!-- image -->', lambda _: f"})" if (s := next(it, None)) else "<!-- image -->",
|
||||
md) if srcs else md
|
||||
|
||||
# Body
|
||||
body_html = _html_body(msg)
|
||||
if body_html:
|
||||
html_fixed = fix_cid(body_html)
|
||||
html_file = tmp / "body.html"
|
||||
html_file.write_text(html_fixed, encoding="utf-8")
|
||||
_write_md(md_dir / "body.md", convert_markitdown(html_file))
|
||||
docling_body = convert_docling(html_file)
|
||||
srcs = re.findall(r'<img[^>]+src=["\']?([^\s"\'>\)]+)', html_fixed, re.IGNORECASE)
|
||||
it = iter(srcs)
|
||||
docling_body = re.sub(r'<!-- image -->',
|
||||
lambda _: f"})", docling_body)
|
||||
_write_md(doc_dir / "body.md", docling_body)
|
||||
else:
|
||||
log.warning(" no body")
|
||||
|
||||
# Attachments
|
||||
for part in msg.walk():
|
||||
fn = part.get_filename() or part.get_param("name")
|
||||
if not fn:
|
||||
continue
|
||||
payload = part.get_payload(decode=True)
|
||||
if not payload:
|
||||
continue
|
||||
safe = "".join(c if c.isalnum() or c in "._-" else "_" for c in fn)
|
||||
att = tmp / safe
|
||||
att.write_bytes(payload)
|
||||
ext = Path(safe).suffix.lower()
|
||||
stem = Path(safe).stem
|
||||
if ext in IMAGE_EXTS:
|
||||
_write_jpg(md_dir, att)
|
||||
_write_jpg(doc_dir, att)
|
||||
ocr = convert_docling(att)
|
||||
if ocr and not ocr.startswith("<!--"):
|
||||
_write_md(doc_dir / f"{stem}.md", ocr)
|
||||
else:
|
||||
_write_md(md_dir / f"{stem}.md", convert_markitdown(att))
|
||||
_write_md(doc_dir / f"{stem}.md", convert_docling(att))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def main():
|
||||
if not MAILDIR.exists():
|
||||
sys.exit(f"Maildir not found: {MAILDIR}")
|
||||
files = [f for f in sorted(MAILDIR.iterdir()) if f.is_file()]
|
||||
log.info("Found %d emails — vision: %s:%s", len(files), VISION_HOST, VISION_PORT)
|
||||
with tqdm(files, unit="email", desc="Exporting", ncols=80) as bar:
|
||||
for fp in bar:
|
||||
try:
|
||||
with fp.open("rb") as f:
|
||||
msg = email.message_from_binary_file(f, policy=email.policy.compat32)
|
||||
bar.set_postfix_str(msg.get("subject", "")[:40], refresh=False)
|
||||
process_email(fp, msg)
|
||||
except Exception as e:
|
||||
log.error("Failed %s: %s", fp.name[:40], e)
|
||||
log.info("Done → %s", OUTPUT)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -10,6 +10,10 @@
|
|||
reverse_proxy api-docling:8000
|
||||
}
|
||||
|
||||
handle_path /api/unlimited-ocr/* {
|
||||
reverse_proxy api-unlimited-ocr:8000
|
||||
}
|
||||
|
||||
# Static files with SPA fallback
|
||||
handle {
|
||||
root * /srv
|
||||
|
|
|
|||
|
|
@ -0,0 +1,279 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="vi">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Email Convert — MarkItDown vs Docling</title>
|
||||
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css" rel="stylesheet" />
|
||||
<link href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.3/font/bootstrap-icons.min.css" rel="stylesheet" />
|
||||
<link href="/style.css" rel="stylesheet" />
|
||||
<script>
|
||||
function App() {
|
||||
return {
|
||||
basePath: '/workspace/emailfiles',
|
||||
rootEntries: [],
|
||||
expanded: {},
|
||||
browserLoading: false,
|
||||
selectedPath: null,
|
||||
selectedName: '',
|
||||
useLlm: true,
|
||||
dlFormat: 'markdown',
|
||||
converting: false,
|
||||
md: pane('md', 'MarkItDown', 'bg-primary', 'text-primary'),
|
||||
dl: pane('dl', 'Docling', 'bg-success', 'text-success'),
|
||||
|
||||
get visibleEntries() {
|
||||
const out = []
|
||||
const walk = (entries, depth) => {
|
||||
for (const e of entries) {
|
||||
out.push({ ...e, depth })
|
||||
if (e.type === 'dir' && this.expanded[e.path])
|
||||
walk(this.expanded[e.path], depth + 1)
|
||||
}
|
||||
}
|
||||
walk(this.rootEntries, 0)
|
||||
return out
|
||||
},
|
||||
|
||||
async init() { await this.loadDir(this.basePath, true) },
|
||||
|
||||
async loadDir(path, isRoot = false) {
|
||||
if (!isRoot && this.expanded[path]) {
|
||||
const copy = { ...this.expanded }; delete copy[path]; this.expanded = copy; return
|
||||
}
|
||||
if (isRoot) this.browserLoading = true
|
||||
try {
|
||||
const d = await fetch(`/api/markitdown/browse?path=${encodeURIComponent(path)}`).then(r => r.json())
|
||||
if (d.type === 'dir') {
|
||||
if (isRoot) this.rootEntries = d.entries
|
||||
else this.expanded = { ...this.expanded, [path]: d.entries }
|
||||
}
|
||||
} catch {}
|
||||
this.browserLoading = false
|
||||
},
|
||||
|
||||
async changeBase() { this.expanded = {}; this.rootEntries = []; await this.loadDir(this.basePath, true) },
|
||||
|
||||
onEntry(e) {
|
||||
if (e.type === 'dir') this.loadDir(e.path)
|
||||
else { this.selectedPath = e.path; this.selectedName = e.name }
|
||||
},
|
||||
|
||||
fileIcon(e) {
|
||||
if (e.type === 'dir') return this.expanded[e.path] ? 'bi-folder2-open text-warning' : 'bi-folder2 text-warning'
|
||||
const m = { '.pdf':'bi-file-earmark-pdf text-danger','.docx':'bi-file-earmark-word text-primary',
|
||||
'.xlsx':'bi-file-earmark-excel text-success','.html':'bi-file-earmark-code text-info',
|
||||
'.htm':'bi-file-earmark-code text-info','.md':'bi-markdown text-secondary',
|
||||
'.txt':'bi-file-earmark-text text-secondary','.png':'bi-file-earmark-image text-info',
|
||||
'.jpg':'bi-file-earmark-image','.jpeg':'bi-file-earmark-image',
|
||||
'.csv':'bi-file-earmark-spreadsheet text-success' }
|
||||
return m[e.ext] || 'bi-file-earmark text-secondary'
|
||||
},
|
||||
|
||||
async convert() {
|
||||
if (!this.selectedPath || this.converting) return
|
||||
this.converting = true
|
||||
for (const p of [this.md, this.dl])
|
||||
Object.assign(p, { loading: true, done: false, content: '', error: null, status: 'Đang xử lý...' })
|
||||
|
||||
const body = { path: this.selectedPath, use_llm: this.useLlm }
|
||||
const [r1, r2] = await Promise.allSettled([
|
||||
api('/api/markitdown/convert-path', body),
|
||||
api('/api/docling/convert-path', { ...body, output_format: this.dlFormat }),
|
||||
])
|
||||
applyResult(this.md, r1); applyResult(this.dl, r2)
|
||||
this.converting = false
|
||||
},
|
||||
|
||||
download(key) {
|
||||
const p = this[key], base = this.selectedName.replace(/\.[^.]+$/, '')
|
||||
const a = Object.assign(document.createElement('a'), {
|
||||
href: URL.createObjectURL(new Blob([p.content], { type: 'text/markdown' })),
|
||||
download: `${base}_${key}.md`
|
||||
})
|
||||
a.click(); URL.revokeObjectURL(a.href)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
function pane(id, label, badgeClass, spinnerClass) {
|
||||
return { id, label, badgeClass, spinnerClass, tab: 'raw',
|
||||
loading: false, done: false, error: null, content: '', preview: '',
|
||||
llmEnabled: false, ms: 0, status: '' }
|
||||
}
|
||||
|
||||
async function api(url, body) {
|
||||
const t0 = performance.now()
|
||||
const res = await fetch(url, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(body) })
|
||||
const ms = Math.round(performance.now() - t0)
|
||||
if (!res.ok) { const e = await res.json().catch(() => ({ detail: res.statusText })); throw new Error(e.detail) }
|
||||
return { data: await res.json(), ms }
|
||||
}
|
||||
|
||||
function applyResult(pane, result) {
|
||||
if (result.status === 'fulfilled') {
|
||||
const { data, ms } = result.value
|
||||
pane.content = data.markdown || data.content || ''
|
||||
pane.preview = marked.parse(pane.content)
|
||||
pane.llmEnabled = !!data.llm_enabled
|
||||
pane.ms = ms; pane.status = `✅ ${ms} ms`; pane.error = null
|
||||
} else {
|
||||
pane.content = ''; pane.error = result.reason.message; pane.status = `❌ ${result.reason.message}`
|
||||
}
|
||||
pane.loading = false; pane.done = true
|
||||
}
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<div id="app-nav"></div>
|
||||
<script src="/layout.js"></script>
|
||||
|
||||
<div x-data="App()" x-init="init()">
|
||||
<div class="container-xl py-4">
|
||||
<div class="row g-3">
|
||||
|
||||
<!-- ── File browser ──────────────────────────────────────── -->
|
||||
<div class="col-lg-4">
|
||||
<div class="card shadow-sm h-100">
|
||||
<div class="card-header py-2 d-flex align-items-center gap-2">
|
||||
<i class="bi bi-hdd-fill text-secondary"></i>
|
||||
<span class="fw-semibold small flex-grow-1">Duyệt file trên server</span>
|
||||
<button class="btn btn-sm btn-outline-secondary py-0 px-2" @click="changeBase()">
|
||||
<i class="bi bi-arrow-clockwise"></i>
|
||||
</button>
|
||||
</div>
|
||||
<div class="px-3 pt-2 pb-1 border-bottom">
|
||||
<select class="form-select form-select-sm" x-model="basePath" @change="changeBase()">
|
||||
<option value="/workspace/emailfiles">emailfiles/</option>
|
||||
<option value="/workspace/emailfiles/output">emailfiles/output/</option>
|
||||
<option value="/workspace">/ (root project)</option>
|
||||
</select>
|
||||
<div class="path-badge text-muted mt-1" x-text="basePath"></div>
|
||||
</div>
|
||||
<div class="card-body p-0">
|
||||
<div class="FileBrowser px-1 py-1">
|
||||
<div x-show="browserLoading" class="text-center text-muted py-4 small">
|
||||
<div class="spinner-border spinner-border-sm mb-2"></div><br>Đang tải...
|
||||
</div>
|
||||
<template x-if="!browserLoading">
|
||||
<div>
|
||||
<template x-for="e in visibleEntries" :key="e.path">
|
||||
<div class="FileEntry"
|
||||
:class="[e.type, selectedPath === e.path ? 'selected' : '', 'indent-' + Math.min(e.depth, 4)]"
|
||||
@click="onEntry(e)" :title="e.path">
|
||||
<i class="bi flex-shrink-0" :class="fileIcon(e)"></i>
|
||||
<span class="text-truncate" x-text="e.name"></span>
|
||||
</div>
|
||||
</template>
|
||||
</div>
|
||||
</template>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- ── Controls + Results ───────────────────────────────── -->
|
||||
<div class="col-lg-8">
|
||||
|
||||
<!-- Controls bar -->
|
||||
<div class="card shadow-sm mb-3">
|
||||
<div class="card-body py-2">
|
||||
<div class="d-flex flex-wrap align-items-center gap-2">
|
||||
<i class="bi bi-file-earmark-text fs-5 text-secondary"></i>
|
||||
<div class="flex-grow-1 overflow-hidden">
|
||||
<div class="fw-semibold small text-truncate" :class="selectedPath ? '' : 'text-muted'"
|
||||
x-text="selectedName || 'Chưa chọn file'"></div>
|
||||
<div class="path-badge text-muted" x-text="selectedPath"></div>
|
||||
</div>
|
||||
<div class="d-flex align-items-center gap-2 ms-auto flex-shrink-0">
|
||||
<label class="form-label mb-0 small fw-medium">Docling</label>
|
||||
<select class="form-select form-select-sm" x-model="dlFormat" style="width:auto">
|
||||
<option value="markdown">Markdown</option>
|
||||
<option value="json">JSON</option>
|
||||
<option value="html">HTML</option>
|
||||
<option value="text">Plain Text</option>
|
||||
</select>
|
||||
<div class="form-check form-switch mb-0">
|
||||
<input class="form-check-input" type="checkbox" x-model="useLlm" id="LlmToggle" />
|
||||
<label class="form-check-label small fw-medium" for="LlmToggle">LLM</label>
|
||||
</div>
|
||||
<button class="btn btn-primary btn-sm" :disabled="!selectedPath || converting" @click="convert()">
|
||||
<span x-show="converting" class="spinner-border spinner-border-sm me-1"></span>
|
||||
<i x-show="!converting" class="bi bi-play-fill me-1"></i>Chuyển đổi
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Status -->
|
||||
<div class="row g-2 mb-3" x-show="md.done || dl.done || converting">
|
||||
<template x-for="key in ['md','dl']" :key="key">
|
||||
<div class="col-6">
|
||||
<div class="card" :class="'border-' + $data[key].spinnerClass.replace('text-','') + '-subtle'">
|
||||
<div class="card-body py-2 px-3 d-flex align-items-center gap-2">
|
||||
<span class="badge rounded-circle p-1" :class="$data[key].badgeClass"> </span>
|
||||
<span class="small fw-semibold flex-grow-1" x-text="$data[key].label"></span>
|
||||
<span x-show="$data[key].loading" class="spinner-border spinner-border-sm" :class="$data[key].spinnerClass"></span>
|
||||
<small class="text-muted" x-text="$data[key].status"></small>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
</div>
|
||||
|
||||
<!-- Result cards -->
|
||||
<div class="row g-3">
|
||||
<template x-for="key in ['md','dl']" :key="key">
|
||||
<div class="col-md-6">
|
||||
<div class="card shadow-sm h-100">
|
||||
<div class="card-header d-flex justify-content-between align-items-center py-2">
|
||||
<span class="fw-semibold small d-flex align-items-center gap-2">
|
||||
<span class="badge rounded-circle p-1" :class="$data[key].badgeClass"> </span>
|
||||
<span x-text="$data[key].label"></span>
|
||||
<span x-show="$data[key].llmEnabled" class="badge bg-primary-subtle text-primary" style="font-size:.65rem">🤖 LLM</span>
|
||||
</span>
|
||||
<div class="d-flex align-items-center gap-1">
|
||||
<button x-show="$data[key].content" class="btn btn-outline-secondary btn-sm py-0 px-2"
|
||||
@click="download(key)"><i class="bi bi-download"></i></button>
|
||||
<ul class="nav nav-tabs card-header-tabs border-0">
|
||||
<li class="nav-item">
|
||||
<a class="nav-link py-1 px-2 small" :class="{ active: $data[key].tab==='raw' }"
|
||||
href="#" @click.prevent="$data[key].tab='raw'">Raw</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link py-1 px-2 small" :class="{ active: $data[key].tab==='preview' }"
|
||||
href="#" @click.prevent="$data[key].tab='preview'">Preview</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card-body p-0">
|
||||
<div x-show="$data[key].loading" class="text-center text-muted py-5 small">
|
||||
<div class="spinner-border spinner-border-sm mb-2" :class="$data[key].spinnerClass"></div>
|
||||
<br>Đang xử lý...
|
||||
</div>
|
||||
<div x-show="!$data[key].loading && $data[key].error"
|
||||
class="alert alert-danger m-3 small" x-text="$data[key].error"></div>
|
||||
<pre x-show="!$data[key].loading && !$data[key].error && $data[key].tab==='raw'"
|
||||
class="ResultPre p-3 m-0" x-text="$data[key].content"></pre>
|
||||
<div x-show="!$data[key].loading && !$data[key].error && $data[key].tab==='preview'"
|
||||
class="PreviewPane" x-html="$data[key].preview"></div>
|
||||
<div x-show="!$data[key].loading && !$data[key].error && !$data[key].content"
|
||||
class="text-center text-muted py-5 small">
|
||||
<i class="bi bi-hdd fs-3 d-block mb-2"></i>Chọn file bên trái
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script src="https://cdn.jsdelivr.net/npm/marked@13/marked.min.js"></script>
|
||||
<script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.14.9/dist/cdn.min.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
1257
frontend/index.html
1257
frontend/index.html
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,33 @@
|
|||
/* Shared layout — inject navbar, mark active tab by current pathname */
|
||||
(function () {
|
||||
const NAV_LINKS = [
|
||||
{ href: '/', icon: 'bi-upload', label: 'Upload File' },
|
||||
{ href: '/email-convert.html', icon: 'bi-envelope', label: 'Email Convert' },
|
||||
];
|
||||
|
||||
const path = location.pathname.replace(/\/$/, '') || '/';
|
||||
|
||||
const links = NAV_LINKS.map(L => {
|
||||
const active = (L.href === '/' ? path === '/' : path.startsWith(L.href.replace('.html', '')));
|
||||
return `<li class="nav-item">
|
||||
<a class="nav-link py-1 px-3 small${active ? ' active' : ''}" href="${L.href}">
|
||||
<i class="bi ${L.icon} me-1"></i>${L.label}
|
||||
</a>
|
||||
</li>`;
|
||||
}).join('');
|
||||
|
||||
const html = `
|
||||
<nav class="navbar navbar-light bg-white border-bottom px-4 py-2 d-flex justify-content-between">
|
||||
<div class="d-flex align-items-center gap-3">
|
||||
<span class="navbar-brand fw-bold mb-0">
|
||||
MarkItDown <span class="text-muted fw-normal">vs</span> Docling
|
||||
<span class="text-muted fw-normal">vs</span> Unlimited-OCR
|
||||
</span>
|
||||
<ul class="nav nav-pills">${links}</ul>
|
||||
</div>
|
||||
<span class="badge bg-primary-subtle text-primary">Demo — LLM Input Processing</span>
|
||||
</nav>`;
|
||||
|
||||
const el = document.getElementById('app-nav');
|
||||
if (el) el.outerHTML = html;
|
||||
})();
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
/* ── Shared styles ─────────────────────────────────────────── */
|
||||
body { background: #f8f9fa; }
|
||||
|
||||
/* Upload zone */
|
||||
#UploadZone {
|
||||
border: 2px dashed #dee2e6;
|
||||
border-radius: .5rem;
|
||||
padding: 3rem 1.5rem;
|
||||
text-align: center;
|
||||
cursor: pointer;
|
||||
transition: border-color .2s, background .2s;
|
||||
}
|
||||
#UploadZone:hover, #UploadZone.dragover {
|
||||
border-color: #0d6efd;
|
||||
background: #f0f6ff;
|
||||
}
|
||||
#UploadZone input[type="file"] { display: none; }
|
||||
|
||||
/* Result panes */
|
||||
.ResultPre {
|
||||
max-height: 460px;
|
||||
overflow: auto;
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
font-size: .78rem;
|
||||
background: #f8f9fa;
|
||||
}
|
||||
|
||||
/* github-markdown-css handles table/code/blockquote — only constrain height */
|
||||
.PreviewPane {
|
||||
max-height: 460px;
|
||||
overflow: auto;
|
||||
font-size: .85rem;
|
||||
}
|
||||
|
||||
/* Server file browser */
|
||||
.FileBrowser {
|
||||
height: 560px;
|
||||
overflow-y: auto;
|
||||
font-size: .82rem;
|
||||
}
|
||||
.FileEntry {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: .4rem;
|
||||
padding: .25rem .5rem;
|
||||
border-radius: .3rem;
|
||||
cursor: pointer;
|
||||
white-space: nowrap;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
user-select: none;
|
||||
}
|
||||
.FileEntry:hover { background: #e9ecef; }
|
||||
.FileEntry.selected { background: #cfe2ff; color: #084298; font-weight: 500; }
|
||||
.FileEntry.dir { color: #495057; }
|
||||
.FileEntry.file { color: #212529; }
|
||||
.indent-1 { padding-left: 1.4rem; }
|
||||
.indent-2 { padding-left: 2.6rem; }
|
||||
.indent-3 { padding-left: 3.8rem; }
|
||||
.indent-4 { padding-left: 5rem; }
|
||||
|
||||
.path-badge { font-size: .72rem; font-family: monospace; word-break: break-all; }
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
root = true
|
||||
|
||||
[*]
|
||||
charset = utf-8
|
||||
end_of_line = lf
|
||||
indent_size = 4
|
||||
indent_style = space
|
||||
insert_final_newline = true
|
||||
trim_trailing_whitespace = true
|
||||
|
||||
[*.md]
|
||||
trim_trailing_whitespace = false
|
||||
|
||||
[*.{yml,yaml}]
|
||||
indent_size = 2
|
||||
|
||||
[{compose,docker-compose}.{yml,yaml}]
|
||||
indent_size = 4
|
||||
|
|
@ -0,0 +1,65 @@
|
|||
APP_NAME=Laravel
|
||||
APP_ENV=local
|
||||
APP_KEY=
|
||||
APP_DEBUG=true
|
||||
APP_URL=http://localhost
|
||||
|
||||
APP_LOCALE=en
|
||||
APP_FALLBACK_LOCALE=en
|
||||
APP_FAKER_LOCALE=en_US
|
||||
|
||||
APP_MAINTENANCE_DRIVER=file
|
||||
# APP_MAINTENANCE_STORE=database
|
||||
|
||||
# PHP_CLI_SERVER_WORKERS=4
|
||||
|
||||
BCRYPT_ROUNDS=12
|
||||
|
||||
LOG_CHANNEL=stack
|
||||
LOG_STACK=single
|
||||
LOG_DEPRECATIONS_CHANNEL=null
|
||||
LOG_LEVEL=debug
|
||||
|
||||
DB_CONNECTION=sqlite
|
||||
# DB_HOST=127.0.0.1
|
||||
# DB_PORT=3306
|
||||
# DB_DATABASE=laravel
|
||||
# DB_USERNAME=root
|
||||
# DB_PASSWORD=
|
||||
|
||||
SESSION_DRIVER=database
|
||||
SESSION_LIFETIME=120
|
||||
SESSION_ENCRYPT=false
|
||||
SESSION_PATH=/
|
||||
SESSION_DOMAIN=null
|
||||
|
||||
BROADCAST_CONNECTION=log
|
||||
FILESYSTEM_DISK=local
|
||||
QUEUE_CONNECTION=database
|
||||
|
||||
CACHE_STORE=database
|
||||
# CACHE_PREFIX=
|
||||
|
||||
MEMCACHED_HOST=127.0.0.1
|
||||
|
||||
REDIS_CLIENT=phpredis
|
||||
REDIS_HOST=127.0.0.1
|
||||
REDIS_PASSWORD=null
|
||||
REDIS_PORT=6379
|
||||
|
||||
MAIL_MAILER=log
|
||||
MAIL_SCHEME=null
|
||||
MAIL_HOST=127.0.0.1
|
||||
MAIL_PORT=2525
|
||||
MAIL_USERNAME=null
|
||||
MAIL_PASSWORD=null
|
||||
MAIL_FROM_ADDRESS="hello@example.com"
|
||||
MAIL_FROM_NAME="${APP_NAME}"
|
||||
|
||||
AWS_ACCESS_KEY_ID=
|
||||
AWS_SECRET_ACCESS_KEY=
|
||||
AWS_DEFAULT_REGION=us-east-1
|
||||
AWS_BUCKET=
|
||||
AWS_USE_PATH_STYLE_ENDPOINT=false
|
||||
|
||||
VITE_APP_NAME="${APP_NAME}"
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
* text=auto eol=lf
|
||||
|
||||
*.blade.php diff=html
|
||||
*.css diff=css
|
||||
*.html diff=html
|
||||
*.md diff=markdown
|
||||
*.php diff=php
|
||||
|
||||
/.github export-ignore
|
||||
CHANGELOG.md export-ignore
|
||||
.styleci.yml export-ignore
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
*.log
|
||||
.DS_Store
|
||||
.env
|
||||
.env.backup
|
||||
.env.production
|
||||
.phpactor.json
|
||||
.phpunit.result.cache
|
||||
/.codex
|
||||
/.cursor/
|
||||
/.idea
|
||||
/.nova
|
||||
/.phpunit.cache
|
||||
/.vscode
|
||||
/.zed
|
||||
/auth.json
|
||||
/node_modules
|
||||
/public/build
|
||||
/public/fonts-manifest.dev.json
|
||||
/public/hot
|
||||
/public/storage
|
||||
/storage/*.key
|
||||
/storage/pail
|
||||
/vendor
|
||||
_ide_helper.php
|
||||
Homestead.json
|
||||
Homestead.yaml
|
||||
Thumbs.db
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
ignore-scripts=true
|
||||
audit=true
|
||||
|
|
@ -0,0 +1,58 @@
|
|||
<p align="center"><a href="https://laravel.com" target="_blank"><img src="https://raw.githubusercontent.com/laravel/art/master/logo-lockup/5%20SVG/2%20CMYK/1%20Full%20Color/laravel-logolockup-cmyk-red.svg" width="400" alt="Laravel Logo"></a></p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://github.com/laravel/framework/actions"><img src="https://github.com/laravel/framework/workflows/tests/badge.svg" alt="Build Status"></a>
|
||||
<a href="https://packagist.org/packages/laravel/framework"><img src="https://img.shields.io/packagist/dt/laravel/framework" alt="Total Downloads"></a>
|
||||
<a href="https://packagist.org/packages/laravel/framework"><img src="https://img.shields.io/packagist/v/laravel/framework" alt="Latest Stable Version"></a>
|
||||
<a href="https://packagist.org/packages/laravel/framework"><img src="https://img.shields.io/packagist/l/laravel/framework" alt="License"></a>
|
||||
</p>
|
||||
|
||||
## About Laravel
|
||||
|
||||
Laravel is a web application framework with expressive, elegant syntax. We believe development must be an enjoyable and creative experience to be truly fulfilling. Laravel takes the pain out of development by easing common tasks used in many web projects, such as:
|
||||
|
||||
- [Simple, fast routing engine](https://laravel.com/docs/routing).
|
||||
- [Powerful dependency injection container](https://laravel.com/docs/container).
|
||||
- Multiple back-ends for [session](https://laravel.com/docs/session) and [cache](https://laravel.com/docs/cache) storage.
|
||||
- Expressive, intuitive [database ORM](https://laravel.com/docs/eloquent).
|
||||
- Database agnostic [schema migrations](https://laravel.com/docs/migrations).
|
||||
- [Robust background job processing](https://laravel.com/docs/queues).
|
||||
- [Real-time event broadcasting](https://laravel.com/docs/broadcasting).
|
||||
|
||||
Laravel is accessible, powerful, and provides tools required for large, robust applications.
|
||||
|
||||
## Learning Laravel
|
||||
|
||||
Laravel has the most extensive and thorough [documentation](https://laravel.com/docs) and video tutorial library of all modern web application frameworks, making it a breeze to get started with the framework.
|
||||
|
||||
In addition, [Laracasts](https://laracasts.com) contains thousands of video tutorials on a range of topics including Laravel, modern PHP, unit testing, and JavaScript. Boost your skills by digging into our comprehensive video library.
|
||||
|
||||
You can also watch bite-sized lessons with real-world projects on [Laravel Learn](https://laravel.com/learn), where you will be guided through building a Laravel application from scratch while learning PHP fundamentals.
|
||||
|
||||
## Agentic Development
|
||||
|
||||
Laravel's predictable structure and conventions make it ideal for AI coding agents like Claude Code, Cursor, and GitHub Copilot. Install [Laravel Boost](https://laravel.com/docs/ai) to supercharge your AI workflow:
|
||||
|
||||
```bash
|
||||
composer require laravel/boost --dev
|
||||
|
||||
php artisan boost:install
|
||||
```
|
||||
|
||||
Boost provides your agent 15+ tools and skills that help agents build Laravel applications while following best practices.
|
||||
|
||||
## Contributing
|
||||
|
||||
Thank you for considering contributing to the Laravel framework! The contribution guide can be found in the [Laravel documentation](https://laravel.com/docs/contributions).
|
||||
|
||||
## Code of Conduct
|
||||
|
||||
In order to ensure that the Laravel community is welcoming to all, please review and abide by the [Code of Conduct](https://laravel.com/docs/contributions#code-of-conduct).
|
||||
|
||||
## Security Vulnerabilities
|
||||
|
||||
If you discover a security vulnerability within Laravel, please send an e-mail to Taylor Otwell via [taylor@laravel.com](mailto:taylor@laravel.com). All security vulnerabilities will be promptly addressed.
|
||||
|
||||
## License
|
||||
|
||||
The Laravel framework is open-sourced software licensed under the [MIT license](https://opensource.org/licenses/MIT).
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
<?php
|
||||
|
||||
namespace App\Http\Controllers;
|
||||
|
||||
use Illuminate\Http\Request;
|
||||
use Illuminate\Support\Facades\Http;
|
||||
use Spatie\RouteAttributes\Attributes\Any;
|
||||
use Spatie\RouteAttributes\Attributes\Middleware;
|
||||
use Spatie\RouteAttributes\Attributes\Where;
|
||||
|
||||
#[Middleware('web')]
|
||||
class ApiProxyController extends Controller
|
||||
{
|
||||
private const SERVICES = [
|
||||
'markitdown' => 'http://api-markitdown:8000',
|
||||
'docling' => 'http://api-docling:8000',
|
||||
'unlimited-ocr' => 'http://api-unlimited-ocr:8000',
|
||||
];
|
||||
|
||||
#[Any('/api/{service}/{path?}')]
|
||||
#[Where('path', '.*')]
|
||||
public function proxy(Request $request, string $service, string $path = '')
|
||||
{
|
||||
$base = self::SERVICES[$service] ?? abort(404, "Unknown service: $service");
|
||||
|
||||
$url = rtrim($base, '/') . '/' . ltrim($path, '/');
|
||||
if ($qs = $request->getQueryString()) {
|
||||
$url .= '?' . $qs;
|
||||
}
|
||||
|
||||
$http = Http::timeout(300)->withoutVerifying();
|
||||
|
||||
if ($request->hasFile('file')) {
|
||||
$file = $request->file('file');
|
||||
$response = $http
|
||||
->attach('file', file_get_contents($file->getRealPath()), $file->getClientOriginalName())
|
||||
->post($url);
|
||||
} else {
|
||||
$contentType = $request->header('Content-Type', 'application/json');
|
||||
$response = $http
|
||||
->withHeaders(['Content-Type' => $contentType])
|
||||
->withBody($request->getContent(), $contentType)
|
||||
->send($request->method(), $url);
|
||||
}
|
||||
|
||||
return response($response->body(), $response->status())
|
||||
->header('Content-Type', $response->header('Content-Type', 'application/json'));
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,125 @@
|
|||
<?php
|
||||
|
||||
namespace App\Http\Controllers;
|
||||
|
||||
use Illuminate\Http\Request;
|
||||
use Illuminate\Support\Facades\DB;
|
||||
use Spatie\RouteAttributes\Attributes\Delete;
|
||||
use Spatie\RouteAttributes\Attributes\Get;
|
||||
use Spatie\RouteAttributes\Attributes\Middleware;
|
||||
use Spatie\RouteAttributes\Attributes\Patch;
|
||||
use Spatie\RouteAttributes\Attributes\Post;
|
||||
|
||||
#[Middleware('web')]
|
||||
class BatchJobController extends Controller
|
||||
{
|
||||
private function db()
|
||||
{
|
||||
return DB::connection('db_markitdown');
|
||||
}
|
||||
|
||||
#[Get('/batch-jobs')]
|
||||
public function index()
|
||||
{
|
||||
$jobs = $this->db()->table('batch_jobs')
|
||||
->orderByDesc('created_at')
|
||||
->limit(20)
|
||||
->get()
|
||||
->map(fn($j) => [
|
||||
'id' => $j->id,
|
||||
'dir_name' => $j->dir_name,
|
||||
'dir_path' => $j->dir_path,
|
||||
'output_base' => $j->output_base,
|
||||
'total' => $j->total,
|
||||
'done_count' => $j->done_count,
|
||||
'finished' => (bool)$j->finished,
|
||||
'settings' => json_decode($j->settings, true),
|
||||
'created_at' => $j->created_at,
|
||||
]);
|
||||
|
||||
return response()->json($jobs);
|
||||
}
|
||||
|
||||
#[Post('/batch-jobs')]
|
||||
public function store(Request $request)
|
||||
{
|
||||
$data = $request->validate([
|
||||
'dir_path' => 'required|string',
|
||||
'dir_name' => 'required|string',
|
||||
'output_base' => 'nullable|string',
|
||||
'settings' => 'nullable|array',
|
||||
'files' => 'required|array',
|
||||
]);
|
||||
|
||||
$id = $this->db()->table('batch_jobs')->insertGetId([
|
||||
'dir_path' => $data['dir_path'],
|
||||
'dir_name' => $data['dir_name'],
|
||||
'output_base' => $data['output_base'] ?? null,
|
||||
'settings' => json_encode($data['settings'] ?? []),
|
||||
'files' => json_encode($data['files']),
|
||||
'results' => json_encode((object)[]),
|
||||
'total' => \count($data['files']),
|
||||
'done_count' => 0,
|
||||
'finished' => false,
|
||||
'created_at' => now(),
|
||||
'updated_at' => now(),
|
||||
]);
|
||||
|
||||
return response()->json(['id' => $id]);
|
||||
}
|
||||
|
||||
#[Get('/batch-jobs/{id}')]
|
||||
public function show(int $id)
|
||||
{
|
||||
$job = $this->db()->table('batch_jobs')->where('id', $id)->first();
|
||||
if (!$job) return response()->json(['error' => 'Not found'], 404);
|
||||
|
||||
return response()->json([
|
||||
'id' => $job->id,
|
||||
'dir_path' => $job->dir_path,
|
||||
'dir_name' => $job->dir_name,
|
||||
'output_base' => $job->output_base,
|
||||
'settings' => json_decode($job->settings, true),
|
||||
'files' => json_decode($job->files, true),
|
||||
'results' => json_decode($job->results, true),
|
||||
'total' => $job->total,
|
||||
'done_count' => $job->done_count,
|
||||
'finished' => (bool)$job->finished,
|
||||
'created_at' => $job->created_at,
|
||||
]);
|
||||
}
|
||||
|
||||
#[Patch('/batch-jobs/{id}')]
|
||||
public function update(Request $request, int $id)
|
||||
{
|
||||
$job = $this->db()->table('batch_jobs')->where('id', $id)->first();
|
||||
if (!$job) return response()->json(['error' => 'Not found'], 404);
|
||||
|
||||
$results = json_decode($job->results, true) ?? [];
|
||||
$path = $request->input('path');
|
||||
$entry = $request->input('entry', []);
|
||||
$done = $request->boolean('finished', false);
|
||||
|
||||
if ($path && $entry) {
|
||||
$results[$path] = $entry;
|
||||
}
|
||||
|
||||
$doneCount = \count(array_filter($results, fn($r) => isset($r['md']) || isset($r['dl'])));
|
||||
|
||||
$this->db()->table('batch_jobs')->where('id', $id)->update([
|
||||
'results' => json_encode($results),
|
||||
'done_count' => $doneCount,
|
||||
'finished' => $done,
|
||||
'updated_at' => now(),
|
||||
]);
|
||||
|
||||
return response()->json(['ok' => true, 'done_count' => $doneCount]);
|
||||
}
|
||||
|
||||
#[Delete('/batch-jobs/{id}')]
|
||||
public function destroy(int $id)
|
||||
{
|
||||
$this->db()->table('batch_jobs')->where('id', $id)->delete();
|
||||
return response()->json(['ok' => true]);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
<?php
|
||||
|
||||
namespace App\Http\Controllers;
|
||||
|
||||
abstract class Controller
|
||||
{
|
||||
//
|
||||
}
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
<?php
|
||||
|
||||
namespace App\Http\Controllers;
|
||||
|
||||
use Illuminate\Http\Request;
|
||||
use Spatie\RouteAttributes\Attributes\Get;
|
||||
use Spatie\RouteAttributes\Attributes\Middleware;
|
||||
|
||||
#[Middleware('web')]
|
||||
class PageController extends Controller
|
||||
{
|
||||
#[Get('/')]
|
||||
public function index()
|
||||
{
|
||||
return view('index');
|
||||
}
|
||||
|
||||
#[Get('/email-convert')]
|
||||
public function emailConvert()
|
||||
{
|
||||
$workingDir = env('WORKING_DIR', 'working');
|
||||
return view('email_convert', compact('workingDir'));
|
||||
}
|
||||
|
||||
#[Get('/workspace-file')]
|
||||
public function readFile(Request $request)
|
||||
{
|
||||
$path = $request->query('path', '');
|
||||
$abs = realpath($path);
|
||||
|
||||
if (!$abs || !str_starts_with($abs, '/workspace')) {
|
||||
return response()->json(['error' => 'Access denied'], 403);
|
||||
}
|
||||
if (!is_file($abs)) {
|
||||
return response()->json(['error' => 'File not found'], 404);
|
||||
}
|
||||
|
||||
$raw = file_get_contents($abs);
|
||||
$content = mb_convert_encoding($raw, 'UTF-8', 'UTF-8');
|
||||
// strip non-UTF8 bytes that would break json_encode
|
||||
$content = mb_convert_encoding($raw, 'UTF-8', mb_detect_encoding($raw, 'UTF-8,ISO-8859-1,Windows-1252', true) ?: 'UTF-8');
|
||||
return response()->json(['path' => $abs, 'content' => $content]);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
<?php
|
||||
|
||||
namespace App\Models;
|
||||
|
||||
// use Illuminate\Contracts\Auth\MustVerifyEmail;
|
||||
use Database\Factories\UserFactory;
|
||||
use Illuminate\Database\Eloquent\Attributes\Fillable;
|
||||
use Illuminate\Database\Eloquent\Attributes\Hidden;
|
||||
use Illuminate\Database\Eloquent\Factories\HasFactory;
|
||||
use Illuminate\Foundation\Auth\User as Authenticatable;
|
||||
use Illuminate\Notifications\Notifiable;
|
||||
|
||||
#[Fillable(['name', 'email', 'password'])]
|
||||
#[Hidden(['password', 'remember_token'])]
|
||||
class User extends Authenticatable
|
||||
{
|
||||
/** @use HasFactory<UserFactory> */
|
||||
use HasFactory, Notifiable;
|
||||
|
||||
/**
|
||||
* Get the attributes that should be cast.
|
||||
*
|
||||
* @return array<string, string>
|
||||
*/
|
||||
protected function casts(): array
|
||||
{
|
||||
return [
|
||||
'email_verified_at' => 'datetime',
|
||||
'password' => 'hashed',
|
||||
];
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
<?php
|
||||
|
||||
namespace App\Providers;
|
||||
|
||||
use Illuminate\Support\ServiceProvider;
|
||||
|
||||
class AppServiceProvider extends ServiceProvider
|
||||
{
|
||||
/**
|
||||
* Register any application services.
|
||||
*/
|
||||
public function register(): void
|
||||
{
|
||||
//
|
||||
}
|
||||
|
||||
/**
|
||||
* Bootstrap any application services.
|
||||
*/
|
||||
public function boot(): void
|
||||
{
|
||||
//
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
#!/usr/bin/env php
|
||||
<?php
|
||||
|
||||
use Illuminate\Foundation\Application;
|
||||
use Symfony\Component\Console\Input\ArgvInput;
|
||||
|
||||
define('LARAVEL_START', microtime(true));
|
||||
|
||||
// Register the Composer autoloader...
|
||||
require __DIR__.'/vendor/autoload.php';
|
||||
|
||||
// Bootstrap Laravel and handle the command...
|
||||
/** @var Application $app */
|
||||
$app = require_once __DIR__.'/bootstrap/app.php';
|
||||
|
||||
$status = $app->handleCommand(new ArgvInput);
|
||||
|
||||
exit($status);
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
<?php
|
||||
|
||||
use Illuminate\Foundation\Application;
|
||||
use Illuminate\Foundation\Configuration\Exceptions;
|
||||
use Illuminate\Foundation\Configuration\Middleware;
|
||||
use Illuminate\Http\Request;
|
||||
|
||||
return Application::configure(basePath: dirname(__DIR__))
|
||||
->withRouting(
|
||||
web: __DIR__.'/../routes/web.php',
|
||||
commands: __DIR__.'/../routes/console.php',
|
||||
health: '/up',
|
||||
)
|
||||
->withMiddleware(function (Middleware $middleware): void {
|
||||
//
|
||||
})
|
||||
->withExceptions(function (Exceptions $exceptions): void {
|
||||
$exceptions->shouldRenderJsonWhen(
|
||||
fn (Request $request) => $request->is('api/*'),
|
||||
);
|
||||
})->create();
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
*
|
||||
!.gitignore
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
<?php
|
||||
|
||||
use App\Providers\AppServiceProvider;
|
||||
|
||||
return [
|
||||
AppServiceProvider::class,
|
||||
];
|
||||
|
|
@ -0,0 +1,87 @@
|
|||
{
|
||||
"$schema": "https://getcomposer.org/schema.json",
|
||||
"name": "laravel/laravel",
|
||||
"type": "project",
|
||||
"description": "The skeleton application for the Laravel framework.",
|
||||
"keywords": ["laravel", "framework"],
|
||||
"license": "MIT",
|
||||
"require": {
|
||||
"php": "^8.3",
|
||||
"laravel/framework": "^13.17",
|
||||
"laravel/tinker": "^3.0.2",
|
||||
"spatie/laravel-route-attributes": "^1.28"
|
||||
},
|
||||
"require-dev": {
|
||||
"fakerphp/faker": "^1.24.1",
|
||||
"laravel/pail": "^1.2.7",
|
||||
"laravel/pao": "^1.1.2",
|
||||
"laravel/pint": "^1.29.3",
|
||||
"mockery/mockery": "^1.6.12",
|
||||
"nunomaduro/collision": "^8.9.4",
|
||||
"phpunit/phpunit": "^12.5.30"
|
||||
},
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"App\\": "app/",
|
||||
"Database\\Factories\\": "database/factories/",
|
||||
"Database\\Seeders\\": "database/seeders/"
|
||||
}
|
||||
},
|
||||
"autoload-dev": {
|
||||
"psr-4": {
|
||||
"Tests\\": "tests/"
|
||||
}
|
||||
},
|
||||
"scripts": {
|
||||
"setup": [
|
||||
"composer install",
|
||||
"@php -r \"file_exists('.env') || copy('.env.example', '.env');\"",
|
||||
"@php artisan key:generate",
|
||||
"@php artisan migrate --force",
|
||||
"npm install --ignore-scripts",
|
||||
"npm run build"
|
||||
],
|
||||
"dev": [
|
||||
"Composer\\Config::disableProcessTimeout",
|
||||
"npx concurrently -c \"#93c5fd,#c4b5fd,#fb7185,#fdba74\" \"php artisan serve\" \"php artisan queue:listen --tries=1 --timeout=0\" \"php artisan pail --timeout=0\" \"npm run dev\" --names=server,queue,logs,vite --kill-others"
|
||||
],
|
||||
"test": [
|
||||
"@php artisan config:clear --ansi @no_additional_args",
|
||||
"@php artisan test"
|
||||
],
|
||||
"post-autoload-dump": [
|
||||
"Illuminate\\Foundation\\ComposerScripts::postAutoloadDump",
|
||||
"@php artisan package:discover --ansi"
|
||||
],
|
||||
"post-update-cmd": [
|
||||
"@php artisan vendor:publish --tag=laravel-assets --ansi --force"
|
||||
],
|
||||
"post-root-package-install": [
|
||||
"@php -r \"file_exists('.env') || copy('.env.example', '.env');\""
|
||||
],
|
||||
"post-create-project-cmd": [
|
||||
"@php artisan key:generate --ansi",
|
||||
"@php -r \"file_exists('database/database.sqlite') || touch('database/database.sqlite');\"",
|
||||
"@php artisan migrate --graceful --ansi"
|
||||
],
|
||||
"pre-package-uninstall": [
|
||||
"Illuminate\\Foundation\\ComposerScripts::prePackageUninstall"
|
||||
]
|
||||
},
|
||||
"extra": {
|
||||
"laravel": {
|
||||
"dont-discover": []
|
||||
}
|
||||
},
|
||||
"config": {
|
||||
"optimize-autoloader": true,
|
||||
"preferred-install": "dist",
|
||||
"sort-packages": true,
|
||||
"allow-plugins": {
|
||||
"pestphp/pest-plugin": true,
|
||||
"php-http/discovery": true
|
||||
}
|
||||
},
|
||||
"minimum-stability": "stable",
|
||||
"prefer-stable": true
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,126 @@
|
|||
<?php
|
||||
|
||||
return [
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Application Name
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| This value is the name of your application, which will be used when the
|
||||
| framework needs to place the application's name in a notification or
|
||||
| other UI elements where an application name needs to be displayed.
|
||||
|
|
||||
*/
|
||||
|
||||
'name' => env('APP_NAME', 'Laravel'),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Application Environment
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| This value determines the "environment" your application is currently
|
||||
| running in. This may determine how you prefer to configure various
|
||||
| services the application utilizes. Set this in your ".env" file.
|
||||
|
|
||||
*/
|
||||
|
||||
'env' => env('APP_ENV', 'production'),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Application Debug Mode
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| When your application is in debug mode, detailed error messages with
|
||||
| stack traces will be shown on every error that occurs within your
|
||||
| application. If disabled, a simple generic error page is shown.
|
||||
|
|
||||
*/
|
||||
|
||||
'debug' => (bool) env('APP_DEBUG', false),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Application URL
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| This URL is used by the console to properly generate URLs when using
|
||||
| the Artisan command line tool. You should set this to the root of
|
||||
| the application so that it's available within Artisan commands.
|
||||
|
|
||||
*/
|
||||
|
||||
'url' => env('APP_URL', 'http://localhost'),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Application Timezone
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| Here you may specify the default timezone for your application, which
|
||||
| will be used by the PHP date and date-time functions. The timezone
|
||||
| is set to "UTC" by default as it is suitable for most use cases.
|
||||
|
|
||||
*/
|
||||
|
||||
'timezone' => 'UTC',
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Application Locale Configuration
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| The application locale determines the default locale that will be used
|
||||
| by Laravel's translation / localization methods. This option can be
|
||||
| set to any locale for which you plan to have translation strings.
|
||||
|
|
||||
*/
|
||||
|
||||
'locale' => env('APP_LOCALE', 'en'),
|
||||
|
||||
'fallback_locale' => env('APP_FALLBACK_LOCALE', 'en'),
|
||||
|
||||
'faker_locale' => env('APP_FAKER_LOCALE', 'en_US'),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Encryption Key
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| This key is utilized by Laravel's encryption services and should be set
|
||||
| to a random, 32 character string to ensure that all encrypted values
|
||||
| are secure. You should do this prior to deploying the application.
|
||||
|
|
||||
*/
|
||||
|
||||
'cipher' => 'AES-256-CBC',
|
||||
|
||||
'key' => env('APP_KEY'),
|
||||
|
||||
'previous_keys' => [
|
||||
...array_filter(
|
||||
explode(',', (string) env('APP_PREVIOUS_KEYS', ''))
|
||||
),
|
||||
],
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Maintenance Mode Driver
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| These configuration options determine the driver used to determine and
|
||||
| manage Laravel's "maintenance mode" status. The "cache" driver will
|
||||
| allow maintenance mode to be controlled across multiple machines.
|
||||
|
|
||||
| Supported drivers: "file", "cache"
|
||||
|
|
||||
*/
|
||||
|
||||
'maintenance' => [
|
||||
'driver' => env('APP_MAINTENANCE_DRIVER', 'file'),
|
||||
'store' => env('APP_MAINTENANCE_STORE', 'database'),
|
||||
],
|
||||
|
||||
];
|
||||
|
|
@ -0,0 +1,117 @@
|
|||
<?php
|
||||
|
||||
use App\Models\User;
|
||||
|
||||
return [
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Authentication Defaults
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| This option defines the default authentication "guard" and password
|
||||
| reset "broker" for your application. You may change these values
|
||||
| as required, but they're a perfect start for most applications.
|
||||
|
|
||||
*/
|
||||
|
||||
'defaults' => [
|
||||
'guard' => env('AUTH_GUARD', 'web'),
|
||||
'passwords' => env('AUTH_PASSWORD_BROKER', 'users'),
|
||||
],
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Authentication Guards
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| Next, you may define every authentication guard for your application.
|
||||
| Of course, a great default configuration has been defined for you
|
||||
| which utilizes session storage plus the Eloquent user provider.
|
||||
|
|
||||
| All authentication guards have a user provider, which defines how the
|
||||
| users are actually retrieved out of your database or other storage
|
||||
| system used by the application. Typically, Eloquent is utilized.
|
||||
|
|
||||
| Supported: "session"
|
||||
|
|
||||
*/
|
||||
|
||||
'guards' => [
|
||||
'web' => [
|
||||
'driver' => 'session',
|
||||
'provider' => 'users',
|
||||
],
|
||||
],
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| User Providers
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| All authentication guards have a user provider, which defines how the
|
||||
| users are actually retrieved out of your database or other storage
|
||||
| system used by the application. Typically, Eloquent is utilized.
|
||||
|
|
||||
| If you have multiple user tables or models you may configure multiple
|
||||
| providers to represent the model / table. These providers may then
|
||||
| be assigned to any extra authentication guards you have defined.
|
||||
|
|
||||
| Supported: "database", "eloquent"
|
||||
|
|
||||
*/
|
||||
|
||||
'providers' => [
|
||||
'users' => [
|
||||
'driver' => 'eloquent',
|
||||
'model' => env('AUTH_MODEL', User::class),
|
||||
],
|
||||
|
||||
// 'users' => [
|
||||
// 'driver' => 'database',
|
||||
// 'table' => 'users',
|
||||
// ],
|
||||
],
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Resetting Passwords
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| These configuration options specify the behavior of Laravel's password
|
||||
| reset functionality, including the table utilized for token storage
|
||||
| and the user provider that is invoked to actually retrieve users.
|
||||
|
|
||||
| The expiry time is the number of minutes that each reset token will be
|
||||
| considered valid. This security feature keeps tokens short-lived so
|
||||
| they have less time to be guessed. You may change this as needed.
|
||||
|
|
||||
| The throttle setting is the number of seconds a user must wait before
|
||||
| generating more password reset tokens. This prevents the user from
|
||||
| quickly generating a very large amount of password reset tokens.
|
||||
|
|
||||
*/
|
||||
|
||||
'passwords' => [
|
||||
'users' => [
|
||||
'provider' => 'users',
|
||||
'table' => env('AUTH_PASSWORD_RESET_TOKEN_TABLE', 'password_reset_tokens'),
|
||||
'expire' => 60,
|
||||
'throttle' => 60,
|
||||
],
|
||||
],
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Password Confirmation Timeout
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| Here you may define the number of seconds before a password confirmation
|
||||
| window expires and users are asked to re-enter their password via the
|
||||
| confirmation screen. By default, the timeout lasts for three hours.
|
||||
|
|
||||
*/
|
||||
|
||||
'password_timeout' => env('AUTH_PASSWORD_TIMEOUT', 10800),
|
||||
|
||||
];
|
||||
|
|
@ -0,0 +1,136 @@
|
|||
<?php
|
||||
|
||||
use Illuminate\Support\Str;
|
||||
|
||||
return [
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Default Cache Store
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| This option controls the default cache store that will be used by the
|
||||
| framework. This connection is utilized if another isn't explicitly
|
||||
| specified when running a cache operation inside the application.
|
||||
|
|
||||
*/
|
||||
|
||||
'default' => env('CACHE_STORE', 'database'),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Cache Stores
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| Here you may define all of the cache "stores" for your application as
|
||||
| well as their drivers. You may even define multiple stores for the
|
||||
| same cache driver to group types of items stored in your caches.
|
||||
|
|
||||
| Supported drivers: "array", "database", "file", "memcached",
|
||||
| "redis", "dynamodb", "storage", "octane",
|
||||
| "session", "failover", "null"
|
||||
|
|
||||
*/
|
||||
|
||||
'stores' => [
|
||||
|
||||
'array' => [
|
||||
'driver' => 'array',
|
||||
'serialize' => false,
|
||||
],
|
||||
|
||||
'database' => [
|
||||
'driver' => 'database',
|
||||
'connection' => env('DB_CACHE_CONNECTION'),
|
||||
'table' => env('DB_CACHE_TABLE', 'cache'),
|
||||
'lock_connection' => env('DB_CACHE_LOCK_CONNECTION'),
|
||||
'lock_table' => env('DB_CACHE_LOCK_TABLE'),
|
||||
],
|
||||
|
||||
'file' => [
|
||||
'driver' => 'file',
|
||||
'path' => storage_path('framework/cache/data'),
|
||||
'lock_path' => storage_path('framework/cache/data'),
|
||||
],
|
||||
|
||||
'storage' => [
|
||||
'driver' => 'storage',
|
||||
'disk' => env('CACHE_STORAGE_DISK'),
|
||||
'path' => env('CACHE_STORAGE_PATH', 'framework/cache/data'),
|
||||
],
|
||||
|
||||
'memcached' => [
|
||||
'driver' => 'memcached',
|
||||
'persistent_id' => env('MEMCACHED_PERSISTENT_ID'),
|
||||
'sasl' => [
|
||||
env('MEMCACHED_USERNAME'),
|
||||
env('MEMCACHED_PASSWORD'),
|
||||
],
|
||||
'options' => [
|
||||
// Memcached::OPT_CONNECT_TIMEOUT => 2000,
|
||||
],
|
||||
'servers' => [
|
||||
[
|
||||
'host' => env('MEMCACHED_HOST', '127.0.0.1'),
|
||||
'port' => env('MEMCACHED_PORT', 11211),
|
||||
'weight' => 100,
|
||||
],
|
||||
],
|
||||
],
|
||||
|
||||
'redis' => [
|
||||
'driver' => 'redis',
|
||||
'connection' => env('REDIS_CACHE_CONNECTION', 'cache'),
|
||||
'lock_connection' => env('REDIS_CACHE_LOCK_CONNECTION', 'default'),
|
||||
],
|
||||
|
||||
'dynamodb' => [
|
||||
'driver' => 'dynamodb',
|
||||
'key' => env('AWS_ACCESS_KEY_ID'),
|
||||
'secret' => env('AWS_SECRET_ACCESS_KEY'),
|
||||
'region' => env('AWS_DEFAULT_REGION', 'us-east-1'),
|
||||
'table' => env('DYNAMODB_CACHE_TABLE', 'cache'),
|
||||
'endpoint' => env('DYNAMODB_ENDPOINT'),
|
||||
],
|
||||
|
||||
'octane' => [
|
||||
'driver' => 'octane',
|
||||
],
|
||||
|
||||
'failover' => [
|
||||
'driver' => 'failover',
|
||||
'stores' => [
|
||||
'database',
|
||||
'array',
|
||||
],
|
||||
],
|
||||
|
||||
],
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Cache Key Prefix
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| When utilizing the APC, database, memcached, Redis, and DynamoDB cache
|
||||
| stores, there might be other applications using the same cache. For
|
||||
| that reason, you may prefix every cache key to avoid collisions.
|
||||
|
|
||||
*/
|
||||
|
||||
'prefix' => env('CACHE_PREFIX', Str::slug((string) env('APP_NAME', 'laravel')).'-cache-'),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Serializable Classes
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| This value determines the classes that can be unserialized from cache
|
||||
| storage. By default, no PHP classes will be unserialized from your
|
||||
| cache to prevent gadget chain attacks if your APP_KEY is leaked.
|
||||
|
|
||||
*/
|
||||
|
||||
'serializable_classes' => false,
|
||||
|
||||
];
|
||||
|
|
@ -0,0 +1,224 @@
|
|||
<?php
|
||||
|
||||
use Illuminate\Support\Str;
|
||||
use Pdo\Mysql;
|
||||
|
||||
return [
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Default Database Connection Name
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| Here you may specify which of the database connections below you wish
|
||||
| to use as your default connection for database operations. This is
|
||||
| the connection which will be utilized unless another connection
|
||||
| is explicitly specified when you execute a query / statement.
|
||||
|
|
||||
*/
|
||||
|
||||
'default' => env('DB_CONNECTION', 'sqlite'),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Database Connections
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| Below are all of the database connections defined for your application.
|
||||
| An example configuration is provided for each database system which
|
||||
| is supported by Laravel. You're free to add / remove connections.
|
||||
|
|
||||
*/
|
||||
|
||||
'connections' => [
|
||||
|
||||
'sqlite' => [
|
||||
'driver' => 'sqlite',
|
||||
'url' => env('DB_URL'),
|
||||
'database' => env('DB_DATABASE', database_path('database.sqlite')),
|
||||
'prefix' => '',
|
||||
'foreign_key_constraints' => env('DB_FOREIGN_KEYS', true),
|
||||
'busy_timeout' => null,
|
||||
'journal_mode' => null,
|
||||
'synchronous' => null,
|
||||
'transaction_mode' => 'DEFERRED',
|
||||
],
|
||||
|
||||
'mysql' => [
|
||||
'driver' => 'mysql',
|
||||
'url' => env('DB_URL'),
|
||||
'host' => env('DB_HOST', '127.0.0.1'),
|
||||
'port' => env('DB_PORT', '3306'),
|
||||
'database' => env('DB_DATABASE', 'laravel'),
|
||||
'username' => env('DB_USERNAME', 'root'),
|
||||
'password' => env('DB_PASSWORD', ''),
|
||||
'unix_socket' => env('DB_SOCKET', ''),
|
||||
'charset' => env('DB_CHARSET', 'utf8mb4'),
|
||||
'collation' => env('DB_COLLATION', 'utf8mb4_unicode_ci'),
|
||||
'prefix' => '',
|
||||
'prefix_indexes' => true,
|
||||
'strict' => true,
|
||||
'engine' => null,
|
||||
'options' => extension_loaded('pdo_mysql') ? array_filter([
|
||||
Mysql::ATTR_SSL_CA => env('MYSQL_ATTR_SSL_CA'),
|
||||
]) : [],
|
||||
],
|
||||
|
||||
'mariadb' => [
|
||||
'driver' => 'mariadb',
|
||||
'url' => env('DB_URL'),
|
||||
'host' => env('DB_HOST', '127.0.0.1'),
|
||||
'port' => env('DB_PORT', '3306'),
|
||||
'database' => env('DB_DATABASE', 'laravel'),
|
||||
'username' => env('DB_USERNAME', 'root'),
|
||||
'password' => env('DB_PASSWORD', ''),
|
||||
'unix_socket' => env('DB_SOCKET', ''),
|
||||
'charset' => env('DB_CHARSET', 'utf8mb4'),
|
||||
'collation' => env('DB_COLLATION', 'utf8mb4_unicode_ci'),
|
||||
'prefix' => '',
|
||||
'prefix_indexes' => true,
|
||||
'strict' => true,
|
||||
'engine' => null,
|
||||
'options' => extension_loaded('pdo_mysql') ? array_filter([
|
||||
Mysql::ATTR_SSL_CA => env('MYSQL_ATTR_SSL_CA'),
|
||||
]) : [],
|
||||
],
|
||||
|
||||
'pgsql' => [
|
||||
'driver' => 'pgsql',
|
||||
'url' => env('DB_URL'),
|
||||
'host' => env('DB_HOST', '127.0.0.1'),
|
||||
'port' => env('DB_PORT', '5432'),
|
||||
'database' => env('DB_DATABASE', 'laravel'),
|
||||
'username' => env('DB_USERNAME', 'root'),
|
||||
'password' => env('DB_PASSWORD', ''),
|
||||
'charset' => env('DB_CHARSET', 'utf8'),
|
||||
'prefix' => '',
|
||||
'prefix_indexes' => true,
|
||||
'search_path' => 'public',
|
||||
'sslmode' => env('DB_SSLMODE', 'prefer'),
|
||||
],
|
||||
|
||||
// ── AI Markdown services ──────────────────────────────────
|
||||
'db_markitdown' => [
|
||||
'driver' => 'pgsql',
|
||||
'host' => env('DB_MARKITDOWN_HOST', 'db-markitdown'),
|
||||
'port' => env('DB_MARKITDOWN_PORT', '5432'),
|
||||
'database' => env('DB_MARKITDOWN_NAME', 'markitdown'),
|
||||
'username' => env('DB_MARKITDOWN_USER', 'admin'),
|
||||
'password' => env('DB_MARKITDOWN_PASS', 'admin'),
|
||||
'charset' => 'utf8',
|
||||
'prefix' => '',
|
||||
'search_path' => 'public',
|
||||
'sslmode' => 'prefer',
|
||||
],
|
||||
|
||||
'db_docling' => [
|
||||
'driver' => 'pgsql',
|
||||
'host' => env('DB_DOCLING_HOST', 'db-docling'),
|
||||
'port' => env('DB_DOCLING_PORT', '5432'),
|
||||
'database' => env('DB_DOCLING_NAME', 'docling'),
|
||||
'username' => env('DB_DOCLING_USER', 'admin'),
|
||||
'password' => env('DB_DOCLING_PASS', 'admin'),
|
||||
'charset' => 'utf8',
|
||||
'prefix' => '',
|
||||
'search_path' => 'public',
|
||||
'sslmode' => 'prefer',
|
||||
],
|
||||
|
||||
'db_unlimited_ocr' => [
|
||||
'driver' => 'pgsql',
|
||||
'host' => env('DB_UNLIMITED_OCR_HOST', 'db-unlimited-ocr'),
|
||||
'port' => env('DB_UNLIMITED_OCR_PORT', '5432'),
|
||||
'database' => env('DB_UNLIMITED_OCR_NAME', 'unlimited_ocr'),
|
||||
'username' => env('DB_UNLIMITED_OCR_USER', 'admin'),
|
||||
'password' => env('DB_UNLIMITED_OCR_PASS', 'admin'),
|
||||
'charset' => 'utf8',
|
||||
'prefix' => '',
|
||||
'search_path' => 'public',
|
||||
'sslmode' => 'prefer',
|
||||
],
|
||||
|
||||
'sqlsrv' => [
|
||||
'driver' => 'sqlsrv',
|
||||
'url' => env('DB_URL'),
|
||||
'host' => env('DB_HOST', 'localhost'),
|
||||
'port' => env('DB_PORT', '1433'),
|
||||
'database' => env('DB_DATABASE', 'laravel'),
|
||||
'username' => env('DB_USERNAME', 'root'),
|
||||
'password' => env('DB_PASSWORD', ''),
|
||||
'charset' => env('DB_CHARSET', 'utf8'),
|
||||
'prefix' => '',
|
||||
'prefix_indexes' => true,
|
||||
// 'encrypt' => env('DB_ENCRYPT', 'yes'),
|
||||
// 'trust_server_certificate' => env('DB_TRUST_SERVER_CERTIFICATE', 'false'),
|
||||
],
|
||||
|
||||
],
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Migration Repository Table
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| This table keeps track of all the migrations that have already run for
|
||||
| your application. Using this information, we can determine which of
|
||||
| the migrations on disk haven't actually been run on the database.
|
||||
|
|
||||
*/
|
||||
|
||||
'migrations' => [
|
||||
'table' => 'migrations',
|
||||
'update_date_on_publish' => true,
|
||||
],
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Redis Databases
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| Redis is an open source, fast, and advanced key-value store that also
|
||||
| provides a richer body of commands than a typical key-value system
|
||||
| such as Memcached. You may define your connection settings here.
|
||||
|
|
||||
*/
|
||||
|
||||
'redis' => [
|
||||
|
||||
'client' => env('REDIS_CLIENT', 'phpredis'),
|
||||
|
||||
'options' => [
|
||||
'cluster' => env('REDIS_CLUSTER', 'redis'),
|
||||
'prefix' => env('REDIS_PREFIX', Str::slug((string) env('APP_NAME', 'laravel')).'-database-'),
|
||||
'persistent' => env('REDIS_PERSISTENT', false),
|
||||
],
|
||||
|
||||
'default' => [
|
||||
'url' => env('REDIS_URL'),
|
||||
'host' => env('REDIS_HOST', '127.0.0.1'),
|
||||
'username' => env('REDIS_USERNAME'),
|
||||
'password' => env('REDIS_PASSWORD'),
|
||||
'port' => env('REDIS_PORT', '6379'),
|
||||
'database' => env('REDIS_DB', '0'),
|
||||
'max_retries' => env('REDIS_MAX_RETRIES', 3),
|
||||
'backoff_algorithm' => env('REDIS_BACKOFF_ALGORITHM', 'decorrelated_jitter'),
|
||||
'backoff_base' => env('REDIS_BACKOFF_BASE', 100),
|
||||
'backoff_cap' => env('REDIS_BACKOFF_CAP', 1000),
|
||||
],
|
||||
|
||||
'cache' => [
|
||||
'url' => env('REDIS_URL'),
|
||||
'host' => env('REDIS_HOST', '127.0.0.1'),
|
||||
'username' => env('REDIS_USERNAME'),
|
||||
'password' => env('REDIS_PASSWORD'),
|
||||
'port' => env('REDIS_PORT', '6379'),
|
||||
'database' => env('REDIS_CACHE_DB', '1'),
|
||||
'max_retries' => env('REDIS_MAX_RETRIES', 3),
|
||||
'backoff_algorithm' => env('REDIS_BACKOFF_ALGORITHM', 'decorrelated_jitter'),
|
||||
'backoff_base' => env('REDIS_BACKOFF_BASE', 100),
|
||||
'backoff_cap' => env('REDIS_BACKOFF_CAP', 1000),
|
||||
],
|
||||
|
||||
],
|
||||
|
||||
];
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
<?php
|
||||
|
||||
return [
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Default Filesystem Disk
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| Here you may specify the default filesystem disk that should be used
|
||||
| by the framework. The "local" disk, as well as a variety of cloud
|
||||
| based disks are available to your application for file storage.
|
||||
|
|
||||
*/
|
||||
|
||||
'default' => env('FILESYSTEM_DISK', 'local'),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Filesystem Disks
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| Below you may configure as many filesystem disks as necessary, and you
|
||||
| may even configure multiple disks for the same driver. Examples for
|
||||
| most supported storage drivers are configured here for reference.
|
||||
|
|
||||
| Supported drivers: "local", "ftp", "sftp", "s3"
|
||||
|
|
||||
*/
|
||||
|
||||
'disks' => [
|
||||
|
||||
'local' => [
|
||||
'driver' => 'local',
|
||||
'root' => storage_path('app/private'),
|
||||
'serve' => true,
|
||||
'throw' => false,
|
||||
'report' => false,
|
||||
],
|
||||
|
||||
'public' => [
|
||||
'driver' => 'local',
|
||||
'root' => storage_path('app/public'),
|
||||
'url' => rtrim(env('APP_URL', 'http://localhost'), '/').'/storage',
|
||||
'visibility' => 'public',
|
||||
'throw' => false,
|
||||
'report' => false,
|
||||
],
|
||||
|
||||
's3' => [
|
||||
'driver' => 's3',
|
||||
'key' => env('AWS_ACCESS_KEY_ID'),
|
||||
'secret' => env('AWS_SECRET_ACCESS_KEY'),
|
||||
'region' => env('AWS_DEFAULT_REGION'),
|
||||
'bucket' => env('AWS_BUCKET'),
|
||||
'url' => env('AWS_URL'),
|
||||
'endpoint' => env('AWS_ENDPOINT'),
|
||||
'use_path_style_endpoint' => env('AWS_USE_PATH_STYLE_ENDPOINT', false),
|
||||
'throw' => false,
|
||||
'report' => false,
|
||||
],
|
||||
|
||||
],
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Symbolic Links
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| Here you may configure the symbolic links that will be created when the
|
||||
| `storage:link` Artisan command is executed. The array keys should be
|
||||
| the locations of the links and the values should be their targets.
|
||||
|
|
||||
*/
|
||||
|
||||
'links' => [
|
||||
public_path('storage') => storage_path('app/public'),
|
||||
],
|
||||
|
||||
];
|
||||
|
|
@ -0,0 +1,132 @@
|
|||
<?php
|
||||
|
||||
use Monolog\Handler\NullHandler;
|
||||
use Monolog\Handler\StreamHandler;
|
||||
use Monolog\Handler\SyslogUdpHandler;
|
||||
use Monolog\Processor\PsrLogMessageProcessor;
|
||||
|
||||
return [
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Default Log Channel
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| This option defines the default log channel that is utilized to write
|
||||
| messages to your logs. The value provided here should match one of
|
||||
| the channels present in the list of "channels" configured below.
|
||||
|
|
||||
*/
|
||||
|
||||
'default' => env('LOG_CHANNEL', 'stack'),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Deprecations Log Channel
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| This option controls the log channel that should be used to log warnings
|
||||
| regarding deprecated PHP and library features. This allows you to get
|
||||
| your application ready for upcoming major versions of dependencies.
|
||||
|
|
||||
*/
|
||||
|
||||
'deprecations' => [
|
||||
'channel' => env('LOG_DEPRECATIONS_CHANNEL', 'null'),
|
||||
'trace' => env('LOG_DEPRECATIONS_TRACE', false),
|
||||
],
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Log Channels
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| Here you may configure the log channels for your application. Laravel
|
||||
| utilizes the Monolog PHP logging library, which includes a variety
|
||||
| of powerful log handlers and formatters that you're free to use.
|
||||
|
|
||||
| Available drivers: "single", "daily", "slack", "syslog",
|
||||
| "errorlog", "monolog", "custom", "stack"
|
||||
|
|
||||
*/
|
||||
|
||||
'channels' => [
|
||||
|
||||
'stack' => [
|
||||
'driver' => 'stack',
|
||||
'channels' => explode(',', (string) env('LOG_STACK', 'single')),
|
||||
'ignore_exceptions' => false,
|
||||
],
|
||||
|
||||
'single' => [
|
||||
'driver' => 'single',
|
||||
'path' => storage_path('logs/laravel.log'),
|
||||
'level' => env('LOG_LEVEL', 'debug'),
|
||||
'replace_placeholders' => true,
|
||||
],
|
||||
|
||||
'daily' => [
|
||||
'driver' => 'daily',
|
||||
'path' => storage_path('logs/laravel.log'),
|
||||
'level' => env('LOG_LEVEL', 'debug'),
|
||||
'days' => env('LOG_DAILY_DAYS', 14),
|
||||
'replace_placeholders' => true,
|
||||
],
|
||||
|
||||
'slack' => [
|
||||
'driver' => 'slack',
|
||||
'url' => env('LOG_SLACK_WEBHOOK_URL'),
|
||||
'username' => env('LOG_SLACK_USERNAME', env('APP_NAME', 'Laravel')),
|
||||
'emoji' => env('LOG_SLACK_EMOJI', ':boom:'),
|
||||
'level' => env('LOG_LEVEL', 'critical'),
|
||||
'replace_placeholders' => true,
|
||||
],
|
||||
|
||||
'papertrail' => [
|
||||
'driver' => 'monolog',
|
||||
'level' => env('LOG_LEVEL', 'debug'),
|
||||
'handler' => env('LOG_PAPERTRAIL_HANDLER', SyslogUdpHandler::class),
|
||||
'handler_with' => [
|
||||
'host' => env('PAPERTRAIL_URL'),
|
||||
'port' => env('PAPERTRAIL_PORT'),
|
||||
'connectionString' => 'tls://'.env('PAPERTRAIL_URL').':'.env('PAPERTRAIL_PORT'),
|
||||
],
|
||||
'processors' => [PsrLogMessageProcessor::class],
|
||||
],
|
||||
|
||||
'stderr' => [
|
||||
'driver' => 'monolog',
|
||||
'level' => env('LOG_LEVEL', 'debug'),
|
||||
'handler' => StreamHandler::class,
|
||||
'handler_with' => [
|
||||
'stream' => 'php://stderr',
|
||||
],
|
||||
'formatter' => env('LOG_STDERR_FORMATTER'),
|
||||
'processors' => [PsrLogMessageProcessor::class],
|
||||
],
|
||||
|
||||
'syslog' => [
|
||||
'driver' => 'syslog',
|
||||
'level' => env('LOG_LEVEL', 'debug'),
|
||||
'facility' => env('LOG_SYSLOG_FACILITY', LOG_USER),
|
||||
'replace_placeholders' => true,
|
||||
],
|
||||
|
||||
'errorlog' => [
|
||||
'driver' => 'errorlog',
|
||||
'level' => env('LOG_LEVEL', 'debug'),
|
||||
'replace_placeholders' => true,
|
||||
],
|
||||
|
||||
'null' => [
|
||||
'driver' => 'monolog',
|
||||
'handler' => NullHandler::class,
|
||||
],
|
||||
|
||||
'emergency' => [
|
||||
'path' => storage_path('logs/laravel.log'),
|
||||
],
|
||||
|
||||
],
|
||||
|
||||
];
|
||||
|
|
@ -0,0 +1,118 @@
|
|||
<?php
|
||||
|
||||
return [
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Default Mailer
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| This option controls the default mailer that is used to send all email
|
||||
| messages unless another mailer is explicitly specified when sending
|
||||
| the message. All additional mailers can be configured within the
|
||||
| "mailers" array. Examples of each type of mailer are provided.
|
||||
|
|
||||
*/
|
||||
|
||||
'default' => env('MAIL_MAILER', 'log'),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Mailer Configurations
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| Here you may configure all of the mailers used by your application plus
|
||||
| their respective settings. Several examples have been configured for
|
||||
| you and you are free to add your own as your application requires.
|
||||
|
|
||||
| Laravel supports a variety of mail "transport" drivers that can be used
|
||||
| when delivering an email. You may specify which one you're using for
|
||||
| your mailers below. You may also add additional mailers if needed.
|
||||
|
|
||||
| Supported: "smtp", "sendmail", "mailgun", "ses", "ses-v2",
|
||||
| "postmark", "resend", "log", "array",
|
||||
| "failover", "roundrobin"
|
||||
|
|
||||
*/
|
||||
|
||||
'mailers' => [
|
||||
|
||||
'smtp' => [
|
||||
'transport' => 'smtp',
|
||||
'scheme' => env('MAIL_SCHEME'),
|
||||
'url' => env('MAIL_URL'),
|
||||
'host' => env('MAIL_HOST', '127.0.0.1'),
|
||||
'port' => env('MAIL_PORT', 2525),
|
||||
'username' => env('MAIL_USERNAME'),
|
||||
'password' => env('MAIL_PASSWORD'),
|
||||
'timeout' => null,
|
||||
'local_domain' => env('MAIL_EHLO_DOMAIN', parse_url((string) env('APP_URL', 'http://localhost'), PHP_URL_HOST)),
|
||||
],
|
||||
|
||||
'ses' => [
|
||||
'transport' => 'ses',
|
||||
],
|
||||
|
||||
'postmark' => [
|
||||
'transport' => 'postmark',
|
||||
// 'message_stream_id' => env('POSTMARK_MESSAGE_STREAM_ID'),
|
||||
// 'client' => [
|
||||
// 'timeout' => 5,
|
||||
// ],
|
||||
],
|
||||
|
||||
'resend' => [
|
||||
'transport' => 'resend',
|
||||
],
|
||||
|
||||
'sendmail' => [
|
||||
'transport' => 'sendmail',
|
||||
'path' => env('MAIL_SENDMAIL_PATH', '/usr/sbin/sendmail -bs -i'),
|
||||
],
|
||||
|
||||
'log' => [
|
||||
'transport' => 'log',
|
||||
'channel' => env('MAIL_LOG_CHANNEL'),
|
||||
],
|
||||
|
||||
'array' => [
|
||||
'transport' => 'array',
|
||||
],
|
||||
|
||||
'failover' => [
|
||||
'transport' => 'failover',
|
||||
'mailers' => [
|
||||
'smtp',
|
||||
'log',
|
||||
],
|
||||
'retry_after' => 60,
|
||||
],
|
||||
|
||||
'roundrobin' => [
|
||||
'transport' => 'roundrobin',
|
||||
'mailers' => [
|
||||
'ses',
|
||||
'postmark',
|
||||
],
|
||||
'retry_after' => 60,
|
||||
],
|
||||
|
||||
],
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Global "From" Address
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| You may wish for all emails sent by your application to be sent from
|
||||
| the same address. Here you may specify a name and address that is
|
||||
| used globally for all emails that are sent by your application.
|
||||
|
|
||||
*/
|
||||
|
||||
'from' => [
|
||||
'address' => env('MAIL_FROM_ADDRESS', 'hello@example.com'),
|
||||
'name' => env('MAIL_FROM_NAME', env('APP_NAME', 'Laravel')),
|
||||
],
|
||||
|
||||
];
|
||||
|
|
@ -0,0 +1,129 @@
|
|||
<?php
|
||||
|
||||
return [
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Default Queue Connection Name
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| Laravel's queue supports a variety of backends via a single, unified
|
||||
| API, giving you convenient access to each backend using identical
|
||||
| syntax for each. The default queue connection is defined below.
|
||||
|
|
||||
*/
|
||||
|
||||
'default' => env('QUEUE_CONNECTION', 'database'),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Queue Connections
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| Here you may configure the connection options for every queue backend
|
||||
| used by your application. An example configuration is provided for
|
||||
| each backend supported by Laravel. You're also free to add more.
|
||||
|
|
||||
| Drivers: "sync", "database", "beanstalkd", "sqs", "redis",
|
||||
| "deferred", "background", "failover", "null"
|
||||
|
|
||||
*/
|
||||
|
||||
'connections' => [
|
||||
|
||||
'sync' => [
|
||||
'driver' => 'sync',
|
||||
],
|
||||
|
||||
'database' => [
|
||||
'driver' => 'database',
|
||||
'connection' => env('DB_QUEUE_CONNECTION'),
|
||||
'table' => env('DB_QUEUE_TABLE', 'jobs'),
|
||||
'queue' => env('DB_QUEUE', 'default'),
|
||||
'retry_after' => (int) env('DB_QUEUE_RETRY_AFTER', 90),
|
||||
'after_commit' => false,
|
||||
],
|
||||
|
||||
'beanstalkd' => [
|
||||
'driver' => 'beanstalkd',
|
||||
'host' => env('BEANSTALKD_QUEUE_HOST', 'localhost'),
|
||||
'queue' => env('BEANSTALKD_QUEUE', 'default'),
|
||||
'retry_after' => (int) env('BEANSTALKD_QUEUE_RETRY_AFTER', 90),
|
||||
'block_for' => 0,
|
||||
'after_commit' => false,
|
||||
],
|
||||
|
||||
'sqs' => [
|
||||
'driver' => 'sqs',
|
||||
'key' => env('AWS_ACCESS_KEY_ID'),
|
||||
'secret' => env('AWS_SECRET_ACCESS_KEY'),
|
||||
'prefix' => env('SQS_PREFIX', 'https://sqs.us-east-1.amazonaws.com/your-account-id'),
|
||||
'queue' => env('SQS_QUEUE', 'default'),
|
||||
'suffix' => env('SQS_SUFFIX'),
|
||||
'region' => env('AWS_DEFAULT_REGION', 'us-east-1'),
|
||||
'after_commit' => false,
|
||||
],
|
||||
|
||||
'redis' => [
|
||||
'driver' => 'redis',
|
||||
'connection' => env('REDIS_QUEUE_CONNECTION', 'default'),
|
||||
'queue' => env('REDIS_QUEUE', 'default'),
|
||||
'retry_after' => (int) env('REDIS_QUEUE_RETRY_AFTER', 90),
|
||||
'block_for' => null,
|
||||
'after_commit' => false,
|
||||
],
|
||||
|
||||
'deferred' => [
|
||||
'driver' => 'deferred',
|
||||
],
|
||||
|
||||
'background' => [
|
||||
'driver' => 'background',
|
||||
],
|
||||
|
||||
'failover' => [
|
||||
'driver' => 'failover',
|
||||
'connections' => [
|
||||
'database',
|
||||
'deferred',
|
||||
],
|
||||
],
|
||||
|
||||
],
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Job Batching
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| The following options configure the database and table that store job
|
||||
| batching information. These options can be updated to any database
|
||||
| connection and table which has been defined by your application.
|
||||
|
|
||||
*/
|
||||
|
||||
'batching' => [
|
||||
'database' => env('DB_CONNECTION', 'sqlite'),
|
||||
'table' => 'job_batches',
|
||||
],
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Failed Queue Jobs
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| These options configure the behavior of failed queue job logging so you
|
||||
| can control how and where failed jobs are stored. Laravel ships with
|
||||
| support for storing failed jobs in a simple file or in a database.
|
||||
|
|
||||
| Supported drivers: "database-uuids", "dynamodb", "file", "null"
|
||||
|
|
||||
*/
|
||||
|
||||
'failed' => [
|
||||
'driver' => env('QUEUE_FAILED_DRIVER', 'database-uuids'),
|
||||
'database' => env('DB_CONNECTION', 'sqlite'),
|
||||
'table' => 'failed_jobs',
|
||||
],
|
||||
|
||||
];
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
<?php
|
||||
|
||||
return [
|
||||
/*
|
||||
* Automatic registration of routes will only happen if this setting is `true`
|
||||
*/
|
||||
'enabled' => true,
|
||||
|
||||
/*
|
||||
* Controllers in these directories that have routing attributes
|
||||
* will automatically be registered.
|
||||
*
|
||||
* Optionally, you can specify group configuration by using key/values
|
||||
*/
|
||||
'directories' => [
|
||||
app_path('Http/Controllers'),
|
||||
/*
|
||||
app_path('Http/Controllers/Api') => [
|
||||
'prefix' => 'api',
|
||||
'middleware' => 'api',
|
||||
// only register routes in files that match the patterns
|
||||
'patterns' => ['*Controller.php'],
|
||||
// do not register routes in files that match the patterns
|
||||
'not_patterns' => [],
|
||||
],
|
||||
*/
|
||||
],
|
||||
|
||||
/*
|
||||
* This middleware will be applied to all routes.
|
||||
*/
|
||||
'middleware' => [
|
||||
\Illuminate\Routing\Middleware\SubstituteBindings::class,
|
||||
],
|
||||
|
||||
/*
|
||||
* When enabled, implicitly scoped bindings will be enabled by default.
|
||||
* You can override this behaviour by using the `ScopeBindings` attribute, and passing `false` to it.
|
||||
*
|
||||
* Possible values:
|
||||
* - null: use the default behaviour
|
||||
* - true: enable implicitly scoped bindings for all routes
|
||||
* - false: disable implicitly scoped bindings for all routes
|
||||
*/
|
||||
'scope-bindings' => null,
|
||||
];
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
<?php
|
||||
|
||||
return [
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Third Party Services
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| This file is for storing the credentials for third party services such
|
||||
| as Mailgun, Postmark, AWS and more. This file provides the de facto
|
||||
| location for this type of information, allowing packages to have
|
||||
| a conventional file to locate the various service credentials.
|
||||
|
|
||||
*/
|
||||
|
||||
'postmark' => [
|
||||
'key' => env('POSTMARK_API_KEY'),
|
||||
],
|
||||
|
||||
'resend' => [
|
||||
'key' => env('RESEND_API_KEY'),
|
||||
],
|
||||
|
||||
'ses' => [
|
||||
'key' => env('AWS_ACCESS_KEY_ID'),
|
||||
'secret' => env('AWS_SECRET_ACCESS_KEY'),
|
||||
'region' => env('AWS_DEFAULT_REGION', 'us-east-1'),
|
||||
],
|
||||
|
||||
'slack' => [
|
||||
'notifications' => [
|
||||
'bot_user_oauth_token' => env('SLACK_BOT_USER_OAUTH_TOKEN'),
|
||||
'channel' => env('SLACK_BOT_USER_DEFAULT_CHANNEL'),
|
||||
],
|
||||
],
|
||||
|
||||
];
|
||||
|
|
@ -0,0 +1,233 @@
|
|||
<?php
|
||||
|
||||
use Illuminate\Support\Str;
|
||||
|
||||
return [
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Default Session Driver
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| This option determines the default session driver that is utilized for
|
||||
| incoming requests. Laravel supports a variety of storage options to
|
||||
| persist session data. Database storage is a great default choice.
|
||||
|
|
||||
| Supported: "file", "cookie", "database", "memcached",
|
||||
| "redis", "dynamodb", "array"
|
||||
|
|
||||
*/
|
||||
|
||||
'driver' => env('SESSION_DRIVER', 'database'),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Session Lifetime
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| Here you may specify the number of minutes that you wish the session
|
||||
| to be allowed to remain idle before it expires. If you want them
|
||||
| to expire immediately when the browser is closed then you may
|
||||
| indicate that via the expire_on_close configuration option.
|
||||
|
|
||||
*/
|
||||
|
||||
'lifetime' => (int) env('SESSION_LIFETIME', 120),
|
||||
|
||||
'expire_on_close' => env('SESSION_EXPIRE_ON_CLOSE', false),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Session Encryption
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| This option allows you to easily specify that all of your session data
|
||||
| should be encrypted before it's stored. All encryption is performed
|
||||
| automatically by Laravel and you may use the session like normal.
|
||||
|
|
||||
*/
|
||||
|
||||
'encrypt' => env('SESSION_ENCRYPT', false),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Session File Location
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| When utilizing the "file" session driver, the session files are placed
|
||||
| on disk. The default storage location is defined here; however, you
|
||||
| are free to provide another location where they should be stored.
|
||||
|
|
||||
*/
|
||||
|
||||
'files' => storage_path('framework/sessions'),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Session Database Connection
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| When using the "database" or "redis" session drivers, you may specify a
|
||||
| connection that should be used to manage these sessions. This should
|
||||
| correspond to a connection in your database configuration options.
|
||||
|
|
||||
*/
|
||||
|
||||
'connection' => env('SESSION_CONNECTION'),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Session Database Table
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| When using the "database" session driver, you may specify the table to
|
||||
| be used to store sessions. Of course, a sensible default is defined
|
||||
| for you; however, you're welcome to change this to another table.
|
||||
|
|
||||
*/
|
||||
|
||||
'table' => env('SESSION_TABLE', 'sessions'),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Session Cache Store
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| When using one of the framework's cache driven session backends, you may
|
||||
| define the cache store which should be used to store the session data
|
||||
| between requests. This must match one of your defined cache stores.
|
||||
|
|
||||
| Affects: "dynamodb", "memcached", "redis"
|
||||
|
|
||||
*/
|
||||
|
||||
'store' => env('SESSION_STORE'),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Session Sweeping Lottery
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| Some session drivers must manually sweep their storage location to get
|
||||
| rid of old sessions from storage. Here are the chances that it will
|
||||
| happen on a given request. By default, the odds are 2 out of 100.
|
||||
|
|
||||
*/
|
||||
|
||||
'lottery' => [2, 100],
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Session Cookie Name
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| Here you may change the name of the session cookie that is created by
|
||||
| the framework. Typically, you should not need to change this value
|
||||
| since doing so does not grant a meaningful security improvement.
|
||||
|
|
||||
*/
|
||||
|
||||
'cookie' => env(
|
||||
'SESSION_COOKIE',
|
||||
Str::slug((string) env('APP_NAME', 'laravel')).'-session'
|
||||
),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Session Cookie Path
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| The session cookie path determines the path for which the cookie will
|
||||
| be regarded as available. Typically, this will be the root path of
|
||||
| your application, but you're free to change this when necessary.
|
||||
|
|
||||
*/
|
||||
|
||||
'path' => env('SESSION_PATH', '/'),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Session Cookie Domain
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| This value determines the domain and subdomains the session cookie is
|
||||
| available to. By default, the cookie will be available to the root
|
||||
| domain without subdomains. Typically, this shouldn't be changed.
|
||||
|
|
||||
*/
|
||||
|
||||
'domain' => env('SESSION_DOMAIN'),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| HTTPS Only Cookies
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| By setting this option to true, session cookies will only be sent back
|
||||
| to the server if the browser has a HTTPS connection. This will keep
|
||||
| the cookie from being sent to you when it can't be done securely.
|
||||
|
|
||||
*/
|
||||
|
||||
'secure' => env('SESSION_SECURE_COOKIE'),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| HTTP Access Only
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| Setting this value to true will prevent JavaScript from accessing the
|
||||
| value of the cookie and the cookie will only be accessible through
|
||||
| the HTTP protocol. It's unlikely you should disable this option.
|
||||
|
|
||||
*/
|
||||
|
||||
'http_only' => env('SESSION_HTTP_ONLY', true),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Same-Site Cookies
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| This option determines how your cookies behave when cross-site requests
|
||||
| take place, and can be used to mitigate CSRF attacks. By default, we
|
||||
| will set this value to "lax" to permit secure cross-site requests.
|
||||
|
|
||||
| See: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie#samesitesamesite-value
|
||||
|
|
||||
| Supported: "lax", "strict", "none", null
|
||||
|
|
||||
*/
|
||||
|
||||
'same_site' => env('SESSION_SAME_SITE', 'lax'),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Partitioned Cookies
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| Setting this value to true will tie the cookie to the top-level site for
|
||||
| a cross-site context. Partitioned cookies are accepted by the browser
|
||||
| when flagged "secure" and the Same-Site attribute is set to "none".
|
||||
|
|
||||
*/
|
||||
|
||||
'partitioned' => env('SESSION_PARTITIONED_COOKIE', false),
|
||||
|
||||
/*
|
||||
|--------------------------------------------------------------------------
|
||||
| Session Serialization
|
||||
|--------------------------------------------------------------------------
|
||||
|
|
||||
| This value controls the serialization strategy for session data, which
|
||||
| is JSON by default. Setting this to "php" allows the storage of PHP
|
||||
| objects in the session but can make an application vulnerable to
|
||||
| "gadget chain" serialization attacks if the APP_KEY is leaked.
|
||||
|
|
||||
| Supported: "json", "php"
|
||||
|
|
||||
*/
|
||||
|
||||
'serialization' => 'json',
|
||||
|
||||
];
|
||||
|
|
@ -0,0 +1 @@
|
|||
*.sqlite*
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
<?php
|
||||
|
||||
namespace Database\Factories;
|
||||
|
||||
use App\Models\User;
|
||||
use Illuminate\Database\Eloquent\Factories\Factory;
|
||||
use Illuminate\Support\Facades\Hash;
|
||||
use Illuminate\Support\Str;
|
||||
|
||||
/**
|
||||
* @extends Factory<User>
|
||||
*/
|
||||
class UserFactory extends Factory
|
||||
{
|
||||
/**
|
||||
* The current password being used by the factory.
|
||||
*/
|
||||
protected static ?string $password;
|
||||
|
||||
/**
|
||||
* Define the model's default state.
|
||||
*
|
||||
* @return array<string, mixed>
|
||||
*/
|
||||
public function definition(): array
|
||||
{
|
||||
return [
|
||||
'name' => fake()->name(),
|
||||
'email' => fake()->unique()->safeEmail(),
|
||||
'email_verified_at' => now(),
|
||||
'password' => static::$password ??= Hash::make('password'),
|
||||
'remember_token' => Str::random(10),
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicate that the model's email address should be unverified.
|
||||
*/
|
||||
public function unverified(): static
|
||||
{
|
||||
return $this->state(fn (array $attributes) => [
|
||||
'email_verified_at' => null,
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
<?php
|
||||
|
||||
use Illuminate\Database\Migrations\Migration;
|
||||
use Illuminate\Database\Schema\Blueprint;
|
||||
use Illuminate\Support\Facades\Schema;
|
||||
|
||||
return new class extends Migration
|
||||
{
|
||||
/**
|
||||
* Run the migrations.
|
||||
*/
|
||||
public function up(): void
|
||||
{
|
||||
Schema::create('users', function (Blueprint $table) {
|
||||
$table->id();
|
||||
$table->string('name');
|
||||
$table->string('email')->unique();
|
||||
$table->timestamp('email_verified_at')->nullable();
|
||||
$table->string('password');
|
||||
$table->rememberToken();
|
||||
$table->timestamps();
|
||||
});
|
||||
|
||||
Schema::create('password_reset_tokens', function (Blueprint $table) {
|
||||
$table->string('email')->primary();
|
||||
$table->string('token');
|
||||
$table->timestamp('created_at')->nullable();
|
||||
});
|
||||
|
||||
Schema::create('sessions', function (Blueprint $table) {
|
||||
$table->string('id')->primary();
|
||||
$table->foreignId('user_id')->nullable()->index();
|
||||
$table->string('ip_address', 45)->nullable();
|
||||
$table->text('user_agent')->nullable();
|
||||
$table->longText('payload');
|
||||
$table->integer('last_activity')->index();
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverse the migrations.
|
||||
*/
|
||||
public function down(): void
|
||||
{
|
||||
Schema::dropIfExists('users');
|
||||
Schema::dropIfExists('password_reset_tokens');
|
||||
Schema::dropIfExists('sessions');
|
||||
}
|
||||
};
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
<?php
|
||||
|
||||
use Illuminate\Database\Migrations\Migration;
|
||||
use Illuminate\Database\Schema\Blueprint;
|
||||
use Illuminate\Support\Facades\Schema;
|
||||
|
||||
return new class extends Migration
|
||||
{
|
||||
/**
|
||||
* Run the migrations.
|
||||
*/
|
||||
public function up(): void
|
||||
{
|
||||
Schema::create('cache', function (Blueprint $table) {
|
||||
$table->string('key')->primary();
|
||||
$table->mediumText('value');
|
||||
$table->bigInteger('expiration')->index();
|
||||
});
|
||||
|
||||
Schema::create('cache_locks', function (Blueprint $table) {
|
||||
$table->string('key')->primary();
|
||||
$table->string('owner');
|
||||
$table->bigInteger('expiration')->index();
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverse the migrations.
|
||||
*/
|
||||
public function down(): void
|
||||
{
|
||||
Schema::dropIfExists('cache');
|
||||
Schema::dropIfExists('cache_locks');
|
||||
}
|
||||
};
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
<?php
|
||||
|
||||
use Illuminate\Database\Migrations\Migration;
|
||||
use Illuminate\Database\Schema\Blueprint;
|
||||
use Illuminate\Support\Facades\Schema;
|
||||
|
||||
return new class extends Migration
|
||||
{
|
||||
/**
|
||||
* Run the migrations.
|
||||
*/
|
||||
public function up(): void
|
||||
{
|
||||
Schema::create('jobs', function (Blueprint $table) {
|
||||
$table->id();
|
||||
$table->string('queue')->index();
|
||||
$table->longText('payload');
|
||||
$table->unsignedSmallInteger('attempts');
|
||||
$table->unsignedInteger('reserved_at')->nullable();
|
||||
$table->unsignedInteger('available_at');
|
||||
$table->unsignedInteger('created_at');
|
||||
});
|
||||
|
||||
Schema::create('job_batches', function (Blueprint $table) {
|
||||
$table->string('id')->primary();
|
||||
$table->string('name');
|
||||
$table->integer('total_jobs');
|
||||
$table->integer('pending_jobs');
|
||||
$table->integer('failed_jobs');
|
||||
$table->longText('failed_job_ids');
|
||||
$table->mediumText('options')->nullable();
|
||||
$table->integer('cancelled_at')->nullable();
|
||||
$table->integer('created_at');
|
||||
$table->integer('finished_at')->nullable();
|
||||
});
|
||||
|
||||
Schema::create('failed_jobs', function (Blueprint $table) {
|
||||
$table->id();
|
||||
$table->string('uuid')->unique();
|
||||
$table->string('connection');
|
||||
$table->string('queue');
|
||||
$table->longText('payload');
|
||||
$table->longText('exception');
|
||||
$table->timestamp('failed_at')->useCurrent();
|
||||
|
||||
$table->index(['connection', 'queue', 'failed_at']);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverse the migrations.
|
||||
*/
|
||||
public function down(): void
|
||||
{
|
||||
Schema::dropIfExists('jobs');
|
||||
Schema::dropIfExists('job_batches');
|
||||
Schema::dropIfExists('failed_jobs');
|
||||
}
|
||||
};
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
<?php
|
||||
|
||||
namespace Database\Seeders;
|
||||
|
||||
use App\Models\User;
|
||||
use Illuminate\Database\Console\Seeds\WithoutModelEvents;
|
||||
use Illuminate\Database\Seeder;
|
||||
|
||||
class DatabaseSeeder extends Seeder
|
||||
{
|
||||
use WithoutModelEvents;
|
||||
|
||||
/**
|
||||
* Seed the application's database.
|
||||
*/
|
||||
public function run(): void
|
||||
{
|
||||
// User::factory(10)->create();
|
||||
|
||||
User::factory()->create([
|
||||
'name' => 'Test User',
|
||||
'email' => 'test@example.com',
|
||||
]);
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,16 @@
|
|||
{
|
||||
"$schema": "https://www.schemastore.org/package.json",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"build": "vite build",
|
||||
"dev": "vite"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@tailwindcss/vite": "^4.0.0",
|
||||
"concurrently": "^9.0.1",
|
||||
"laravel-vite-plugin": "^3.1",
|
||||
"tailwindcss": "^4.0.0",
|
||||
"vite": "^8.0.0"
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<phpunit xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:noNamespaceSchemaLocation="vendor/phpunit/phpunit/phpunit.xsd"
|
||||
bootstrap="vendor/autoload.php"
|
||||
colors="true"
|
||||
>
|
||||
<testsuites>
|
||||
<testsuite name="Unit">
|
||||
<directory>tests/Unit</directory>
|
||||
</testsuite>
|
||||
<testsuite name="Feature">
|
||||
<directory>tests/Feature</directory>
|
||||
</testsuite>
|
||||
</testsuites>
|
||||
<source>
|
||||
<include>
|
||||
<directory>app</directory>
|
||||
</include>
|
||||
</source>
|
||||
<php>
|
||||
<env name="APP_ENV" value="testing"/>
|
||||
<env name="APP_MAINTENANCE_DRIVER" value="file"/>
|
||||
<env name="BCRYPT_ROUNDS" value="4"/>
|
||||
<env name="BROADCAST_CONNECTION" value="null"/>
|
||||
<env name="CACHE_STORE" value="array"/>
|
||||
<env name="DB_CONNECTION" value="sqlite"/>
|
||||
<env name="DB_DATABASE" value=":memory:"/>
|
||||
<env name="DB_URL" value=""/>
|
||||
<env name="MAIL_MAILER" value="array"/>
|
||||
<env name="QUEUE_CONNECTION" value="sync"/>
|
||||
<env name="SESSION_DRIVER" value="array"/>
|
||||
<env name="PULSE_ENABLED" value="false"/>
|
||||
<env name="TELESCOPE_ENABLED" value="false"/>
|
||||
<env name="NIGHTWATCH_ENABLED" value="false"/>
|
||||
</php>
|
||||
</phpunit>
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
<IfModule mod_rewrite.c>
|
||||
<IfModule mod_negotiation.c>
|
||||
Options -MultiViews -Indexes
|
||||
</IfModule>
|
||||
|
||||
RewriteEngine On
|
||||
|
||||
# Handle Authorization Header
|
||||
RewriteCond %{HTTP:Authorization} .
|
||||
RewriteRule .* - [E=HTTP_AUTHORIZATION:%{HTTP:Authorization}]
|
||||
|
||||
# Handle X-XSRF-Token Header
|
||||
RewriteCond %{HTTP:x-xsrf-token} .
|
||||
RewriteRule .* - [E=HTTP_X_XSRF_TOKEN:%{HTTP:X-XSRF-Token}]
|
||||
|
||||
# Redirect Trailing Slashes If Not A Folder...
|
||||
RewriteCond %{REQUEST_FILENAME} !-d
|
||||
RewriteCond %{REQUEST_URI} (.+)/$
|
||||
RewriteRule ^ %1 [L,R=301]
|
||||
|
||||
# Send Requests To Front Controller...
|
||||
RewriteCond %{REQUEST_FILENAME} !-d
|
||||
RewriteCond %{REQUEST_FILENAME} !-f
|
||||
RewriteRule ^ index.php [L]
|
||||
</IfModule>
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
<?php
|
||||
|
||||
use Illuminate\Foundation\Application;
|
||||
use Illuminate\Http\Request;
|
||||
|
||||
define('LARAVEL_START', microtime(true));
|
||||
|
||||
// Determine if the application is in maintenance mode...
|
||||
if (file_exists($maintenance = __DIR__.'/../storage/framework/maintenance.php')) {
|
||||
require $maintenance;
|
||||
}
|
||||
|
||||
// Register the Composer autoloader...
|
||||
require __DIR__.'/../vendor/autoload.php';
|
||||
|
||||
// Bootstrap Laravel and handle the request...
|
||||
/** @var Application $app */
|
||||
$app = require_once __DIR__.'/../bootstrap/app.php';
|
||||
|
||||
$app->handleRequest(Request::capture());
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
User-agent: *
|
||||
Disallow:
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
@import 'tailwindcss';
|
||||
|
||||
@source '../../vendor/laravel/framework/src/Illuminate/Pagination/resources/views/*.blade.php';
|
||||
@source '../../storage/framework/views/*.php';
|
||||
|
||||
@theme {
|
||||
--font-sans: 'Instrument Sans', ui-sans-serif, system-ui, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji',
|
||||
'Segoe UI Symbol', 'Noto Color Emoji';
|
||||
}
|
||||
|
|
@ -0,0 +1 @@
|
|||
//
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,427 @@
|
|||
@extends('layouts.app')
|
||||
@section('title', 'Upload File — AI Markdown Demo')
|
||||
|
||||
@section('content')
|
||||
|
||||
{{-- ── Upload card ──────────────────────────────────────────────── --}}
|
||||
<div class="card shadow-sm mb-4">
|
||||
<div class="card-body">
|
||||
<h6 class="card-title fw-semibold mb-3">Tải lên tài liệu để so sánh</h6>
|
||||
|
||||
{{-- Mode tabs --}}
|
||||
<ul class="nav nav-tabs mb-3" id="modeTabs">
|
||||
<li class="nav-item">
|
||||
<a class="nav-link py-1 px-3 small active" href="#" data-mode="file">
|
||||
<i class="bi bi-file-earmark-text me-1"></i>File
|
||||
</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link py-1 px-3 small" href="#" data-mode="youtube">
|
||||
<i class="bi bi-youtube me-1 text-danger"></i>YouTube
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
{{-- File zone --}}
|
||||
<div id="fileZone">
|
||||
<div class="drop-zone mb-2" id="dropZone">
|
||||
<input type="file" id="fileInput" style="display:none"
|
||||
accept=".pdf,.docx,.xlsx,.pptx,.html,.htm,.csv,.txt,.jpg,.jpeg,.png,.tiff,.tif,.bmp,.md,.epub,.zip,.asciidoc,.adoc,.webp" />
|
||||
<i class="bi bi-file-earmark-text fs-1 text-secondary"></i>
|
||||
<p class="text-muted mt-2 mb-1">Kéo thả hoặc click để chọn file</p>
|
||||
<div id="fileInfo" class="fw-semibold text-primary small"></div>
|
||||
</div>
|
||||
<div class="d-flex flex-wrap gap-1">
|
||||
@foreach(['PDF','DOCX','XLSX','PPTX','HTML','CSV','TXT','JPG/PNG','EPUB','TIFF','ASCIIDoc'] as $ext)
|
||||
<span class="badge bg-secondary-subtle text-secondary">{{ $ext }}</span>
|
||||
@endforeach
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{{-- YouTube zone --}}
|
||||
<div id="youtubeZone" style="display:none">
|
||||
<div class="input-group">
|
||||
<span class="input-group-text bg-danger text-white"><i class="bi bi-youtube"></i></span>
|
||||
<input type="url" class="form-control" id="ytUrl" placeholder="https://www.youtube.com/watch?v=..." />
|
||||
<button class="btn btn-outline-secondary" id="ytClear"><i class="bi bi-x"></i></button>
|
||||
</div>
|
||||
<div class="form-text mt-1">
|
||||
<i class="bi bi-info-circle me-1"></i>
|
||||
<b>MarkItDown</b>: yt-dlp |
|
||||
<b>Docling</b>: transcript → markdown |
|
||||
<b>Unlimited-OCR</b>: <span class="text-warning-emphasis">không hỗ trợ URL</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{{-- Controls --}}
|
||||
<div class="d-flex flex-wrap align-items-center gap-3 mt-3">
|
||||
<div class="d-flex align-items-center gap-2">
|
||||
<label class="form-label mb-0 small fw-medium">Docling format</label>
|
||||
<select class="form-select form-select-sm" id="dlFormat" style="width:auto">
|
||||
<option value="markdown">Markdown</option>
|
||||
<option value="json">JSON</option>
|
||||
<option value="html">HTML</option>
|
||||
<option value="text">Plain Text</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="form-check form-switch mb-0">
|
||||
<input class="form-check-input" type="checkbox" id="llmToggle" checked />
|
||||
<label class="form-check-label small fw-medium" for="llmToggle">LLM</label>
|
||||
</div>
|
||||
<button class="btn btn-primary btn-sm" id="btnConvert" disabled>
|
||||
<i class="bi bi-play-fill me-1"></i>Chuyển đổi & So sánh
|
||||
</button>
|
||||
<button class="btn btn-outline-secondary btn-sm" id="btnClear">
|
||||
<i class="bi bi-x-circle me-1"></i>Xoá
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{{-- Prompt panel --}}
|
||||
<div id="promptPanel" class="mt-3">
|
||||
<div class="p-3 rounded border bg-primary-subtle">
|
||||
<div class="d-flex justify-content-between align-items-center mb-2">
|
||||
<span class="small fw-semibold text-primary"><i class="bi bi-pencil-square me-1"></i>Custom LLM Prompt</span>
|
||||
<button class="btn btn-link btn-sm p-0 text-primary" id="btnClearPrompt">Xoá</button>
|
||||
</div>
|
||||
<textarea class="form-control form-control-sm" id="llmPrompt" rows="3"
|
||||
placeholder="Để trống = dùng default prompt..."></textarea>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{{-- ── Status row ───────────────────────────────────────────────── --}}
|
||||
<div class="row g-3 mb-4" id="statusRow" style="display:none!important">
|
||||
@foreach([['md','MarkItDown','bg-primary','text-primary'],['dl','Docling','bg-success','text-success'],['uo','Unlimited-OCR','bg-danger','text-danger']] as [$k,$label,$bg,$tc])
|
||||
<div class="col-md-4">
|
||||
<div class="card h-100">
|
||||
<div class="card-body">
|
||||
<h6 class="card-title d-flex align-items-center gap-2">
|
||||
<span class="badge rounded-circle p-1 {{ $bg }}"> </span>{{ $label }}
|
||||
</h6>
|
||||
<div class="d-flex align-items-center gap-2">
|
||||
<div class="spinner-border spinner-border-sm {{ $tc }} status-spinner-{{ $k }}" style="display:none"></div>
|
||||
<small class="text-muted status-text-{{ $k }}">Đang chờ...</small>
|
||||
</div>
|
||||
<div class="row g-2 mt-2 status-stats-{{ $k }}" style="display:none">
|
||||
<div class="col-4"><div class="border rounded text-center py-2">
|
||||
<div class="fw-bold stat-ms-{{ $k }}">—</div>
|
||||
<div class="text-muted" style="font-size:.7rem">ms</div>
|
||||
</div></div>
|
||||
<div class="col-4"><div class="border rounded text-center py-2">
|
||||
<div class="fw-bold stat-len-{{ $k }}">—</div>
|
||||
<div class="text-muted" style="font-size:.7rem">ký tự</div>
|
||||
</div></div>
|
||||
<div class="col-4"><div class="border rounded text-center py-2">
|
||||
<div class="fw-bold stat-lines-{{ $k }}">—</div>
|
||||
<div class="text-muted" style="font-size:.7rem">dòng</div>
|
||||
</div></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@endforeach
|
||||
</div>
|
||||
|
||||
{{-- ── Result cards ─────────────────────────────────────────────── --}}
|
||||
<div class="row g-3 mb-4">
|
||||
@foreach([['md','MarkItDown','bg-primary','text-primary'],['dl','Docling','bg-success','text-success'],['uo','Unlimited-OCR','bg-danger','text-danger']] as [$k,$label,$bg,$tc])
|
||||
<div class="col-md-4">
|
||||
<div class="card shadow-sm h-100">
|
||||
<div class="card-header d-flex justify-content-between align-items-center py-2">
|
||||
<span class="fw-semibold small d-flex align-items-center gap-2">
|
||||
<span class="badge rounded-circle p-1 {{ $bg }}"> </span>
|
||||
{{ $label }}
|
||||
<span class="badge bg-primary-subtle text-primary llm-badge-{{ $k }}" style="display:none;font-size:.65rem">🤖 LLM</span>
|
||||
</span>
|
||||
<div class="d-flex align-items-center gap-1">
|
||||
@if($k === 'md')
|
||||
<button class="btn btn-outline-warning btn-sm py-0 px-2" id="btnCleanup" style="display:none">
|
||||
✨ Làm đẹp
|
||||
</button>
|
||||
@endif
|
||||
<button class="btn btn-outline-secondary btn-sm py-0 px-2 btn-dl-{{ $k }}" style="display:none">
|
||||
<i class="bi bi-download"></i> .md
|
||||
</button>
|
||||
@include('partials.pane-tabs', ['k' => $k])
|
||||
</div>
|
||||
</div>
|
||||
<div class="card-body p-0">
|
||||
@include('partials.pane-body', ['k' => $k, 'tc' => $tc])
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@endforeach
|
||||
</div>
|
||||
|
||||
{{-- ── History ──────────────────────────────────────────────────── --}}
|
||||
<h6 class="fw-semibold mb-3">Lịch sử chuyển đổi gần đây</h6>
|
||||
<div class="row g-3">
|
||||
@foreach([['md','MarkItDown','bg-primary'],['dl','Docling','bg-success'],['uo','Unlimited-OCR','bg-danger']] as [$k,$label,$bg])
|
||||
<div class="col-md-4">
|
||||
<div class="card shadow-sm">
|
||||
<div class="card-header py-2 small fw-semibold d-flex align-items-center gap-2">
|
||||
<span class="badge rounded-circle p-1 {{ $bg }}"> </span>{{ $label }}
|
||||
</div>
|
||||
<div class="history-{{ $k }}">
|
||||
<div class="text-center text-muted py-3 small">Chưa có lịch sử</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@endforeach
|
||||
</div>
|
||||
|
||||
@endsection
|
||||
|
||||
@section('scripts')
|
||||
<script>
|
||||
$.ajaxSetup({ headers: { 'X-CSRF-TOKEN': $('meta[name="csrf-token"]').attr('content') } })
|
||||
|
||||
$(function () {
|
||||
|
||||
// ── State ────────────────────────────────────────────────────
|
||||
let mode = 'file'
|
||||
let currentFile = null
|
||||
const results = { md: '', dl: '', uo: '' }
|
||||
const tabs = { md: 'raw', dl: 'raw', uo: 'raw' }
|
||||
|
||||
// ── Init ─────────────────────────────────────────────────────
|
||||
loadHistory()
|
||||
const savedPrompt = localStorage.getItem('llm_prompt')
|
||||
if (savedPrompt) $('#llmPrompt').val(savedPrompt)
|
||||
if (localStorage.getItem('llm_enabled') === '0') $('#llmToggle').prop('checked', false)
|
||||
togglePromptPanel()
|
||||
|
||||
// ── Mode tabs ────────────────────────────────────────────────
|
||||
$('#modeTabs a').on('click', function (e) {
|
||||
e.preventDefault()
|
||||
mode = $(this).data('mode')
|
||||
$('#modeTabs a').removeClass('active')
|
||||
$(this).addClass('active')
|
||||
$('#fileZone').toggle(mode === 'file')
|
||||
$('#youtubeZone').toggle(mode === 'youtube')
|
||||
updateConvertBtn()
|
||||
})
|
||||
|
||||
// ── File input ───────────────────────────────────────────────
|
||||
$('#dropZone').on('click', () => $('#fileInput').click())
|
||||
$('#fileInput').on('change', function () { setFile(this.files[0]) })
|
||||
$('#dropZone').on('dragover', function (e) { e.preventDefault(); $(this).addClass('drag-over') })
|
||||
$('#dropZone').on('dragleave', function () { $(this).removeClass('drag-over') })
|
||||
$('#dropZone').on('drop', function (e) {
|
||||
e.preventDefault(); $(this).removeClass('drag-over')
|
||||
setFile(e.originalEvent.dataTransfer.files[0])
|
||||
})
|
||||
function setFile(f) {
|
||||
if (!f) return
|
||||
currentFile = f
|
||||
const size = f.size < 1048576 ? (f.size/1024).toFixed(1)+' KB' : (f.size/1048576).toFixed(1)+' MB'
|
||||
$('#fileInfo').text(f.name + ' (' + size + ')')
|
||||
updateConvertBtn()
|
||||
}
|
||||
|
||||
// ── YouTube ──────────────────────────────────────────────────
|
||||
$('#ytUrl').on('input', updateConvertBtn)
|
||||
$('#ytClear').on('click', function () { $('#ytUrl').val(''); updateConvertBtn() })
|
||||
|
||||
function updateConvertBtn() {
|
||||
const ok = mode === 'file' ? !!currentFile : !!$('#ytUrl').val().trim()
|
||||
$('#btnConvert').prop('disabled', !ok)
|
||||
}
|
||||
|
||||
// ── LLM toggle ───────────────────────────────────────────────
|
||||
$('#llmToggle').on('change', function () {
|
||||
localStorage.setItem('llm_enabled', this.checked ? '1' : '0')
|
||||
togglePromptPanel()
|
||||
})
|
||||
function togglePromptPanel() {
|
||||
$('#promptPanel').toggle($('#llmToggle').is(':checked'))
|
||||
}
|
||||
$('#btnClearPrompt').on('click', function () { $('#llmPrompt').val('') })
|
||||
|
||||
// ── Clear ────────────────────────────────────────────────────
|
||||
$('#btnClear').on('click', function () {
|
||||
currentFile = null; $('#fileInfo').text(''); $('#ytUrl').val('')
|
||||
updateConvertBtn()
|
||||
['md','dl','uo'].forEach(k => resetPane(k))
|
||||
$('#statusRow').hide()
|
||||
})
|
||||
|
||||
// ── Convert ──────────────────────────────────────────────────
|
||||
$('#btnConvert').on('click', function () {
|
||||
const useLlm = $('#llmToggle').is(':checked')
|
||||
const prompt = $('#llmPrompt').val().trim() || null
|
||||
const dlFmt = $('#dlFormat').val()
|
||||
|
||||
$('#statusRow').show()
|
||||
$('#btnConvert').prop('disabled', true).html('<span class="spinner-border spinner-border-sm me-1"></span>Đang xử lý...')
|
||||
|
||||
const t0 = performance.now()
|
||||
;['md','dl','uo'].forEach(k => startPane(k))
|
||||
|
||||
const mkUrl = (base, extra = {}) => {
|
||||
const p = new URLSearchParams({ use_llm: useLlm, ...extra })
|
||||
if (prompt) p.set('llm_prompt', prompt)
|
||||
return base + '?' + p.toString()
|
||||
}
|
||||
|
||||
const requests = {
|
||||
md: mode === 'youtube'
|
||||
? $.ajax({ url: '/api/markitdown/convert-url', method: 'POST', contentType: 'application/json',
|
||||
data: JSON.stringify({ url: $('#ytUrl').val(), use_llm: useLlm, llm_prompt: prompt }) })
|
||||
: (function () {
|
||||
const fd = new FormData(); fd.append('file', currentFile)
|
||||
return $.ajax({ url: mkUrl('/api/markitdown/convert'), method: 'POST', data: fd,
|
||||
processData: false, contentType: false })
|
||||
})(),
|
||||
dl: mode === 'youtube'
|
||||
? $.ajax({ url: '/api/docling/convert-url', method: 'POST', contentType: 'application/json',
|
||||
data: JSON.stringify({ url: $('#ytUrl').val(), output_format: dlFmt, use_llm: useLlm, llm_prompt: prompt }) })
|
||||
: (function () {
|
||||
const fd = new FormData(); fd.append('file', currentFile)
|
||||
return $.ajax({ url: mkUrl('/api/docling/convert', { output_format: dlFmt }), method: 'POST',
|
||||
data: fd, processData: false, contentType: false })
|
||||
})(),
|
||||
uo: mode === 'youtube'
|
||||
? Promise.reject({ responseJSON: { detail: 'Unlimited-OCR không hỗ trợ YouTube URL' } })
|
||||
: (function () {
|
||||
const fd = new FormData(); fd.append('file', currentFile)
|
||||
return $.ajax({ url: mkUrl('/api/unlimited-ocr/convert'), method: 'POST', data: fd,
|
||||
processData: false, contentType: false })
|
||||
})(),
|
||||
}
|
||||
|
||||
const settled = k => new Promise(resolve => {
|
||||
const t1 = performance.now()
|
||||
Promise.resolve(requests[k])
|
||||
.then(d => resolve({ ok: true, data: d, ms: Math.round(performance.now() - t1) }))
|
||||
.catch(e => resolve({ ok: false, err: e?.responseJSON?.detail || e?.statusText || 'Lỗi không xác định' }))
|
||||
})
|
||||
|
||||
Promise.all(['md','dl','uo'].map(k => settled(k).then(r => ({ k, ...r })))).then(all => {
|
||||
all.forEach(({ k, ok, data, ms, err }) => {
|
||||
if (ok) finishPane(k, data, ms)
|
||||
else errorPane(k, err)
|
||||
})
|
||||
$('#btnConvert').prop('disabled', false).html('<i class="bi bi-play-fill me-1"></i>Chuyển đổi & So sánh')
|
||||
loadHistory()
|
||||
localStorage.setItem('llm_prompt', $('#llmPrompt').val())
|
||||
})
|
||||
})
|
||||
|
||||
// ── Pane helpers ─────────────────────────────────────────────
|
||||
function resetPane(k) {
|
||||
results[k] = ''; tabs[k] = 'raw'
|
||||
$(`.pane-idle-${k}`).show()
|
||||
$(`.pane-loading-${k}, .pane-error-${k}, .pane-raw-${k}, .pane-preview-${k}`).hide()
|
||||
$(`.status-spinner-${k}, .status-stats-${k}, .llm-badge-${k}, .btn-dl-${k}`).hide()
|
||||
$(`.status-text-${k}`).text('Đang chờ...')
|
||||
if (k === 'md') $('#btnCleanup').hide()
|
||||
}
|
||||
|
||||
function startPane(k) {
|
||||
$(`.pane-idle-${k}, .pane-error-${k}, .pane-raw-${k}, .pane-preview-${k}`).hide()
|
||||
$(`.pane-loading-${k}`).show()
|
||||
$(`.status-spinner-${k}`).show()
|
||||
$(`.status-text-${k}`).text('Đang xử lý...')
|
||||
}
|
||||
|
||||
function finishPane(k, data, ms) {
|
||||
const content = data.markdown || data.content || ''
|
||||
results[k] = content
|
||||
$(`.pane-loading-${k}`).hide()
|
||||
$(`.pane-raw-${k}`).text(content).show()
|
||||
$(`.pane-preview-${k}`).html(mdRender(content)).hide()
|
||||
tabs[k] = 'raw'
|
||||
$(`.tab-raw-${k}`).addClass('active')
|
||||
$(`.tab-preview-${k}`).removeClass('active')
|
||||
$(`.status-spinner-${k}`).hide()
|
||||
$(`.status-text-${k}`).text(`✅ ${ms} ms` + (data.llm_enabled ? ' 🤖' : ''))
|
||||
$(`.status-stats-${k}`).show()
|
||||
$(`.stat-ms-${k}`).text(ms.toLocaleString())
|
||||
$(`.stat-len-${k}`).text(content.length.toLocaleString())
|
||||
$(`.stat-lines-${k}`).text(content.split('\n').length)
|
||||
if (data.llm_enabled) $(`.llm-badge-${k}`).show()
|
||||
if (content) { $(`.btn-dl-${k}`).show() }
|
||||
if (k === 'md' && content) $('#btnCleanup').show()
|
||||
}
|
||||
|
||||
function errorPane(k, msg) {
|
||||
$(`.pane-loading-${k}`).hide()
|
||||
$(`.pane-error-${k}`).text(msg).show()
|
||||
$(`.status-spinner-${k}`).hide()
|
||||
$(`.status-text-${k}`).text('❌ ' + msg)
|
||||
}
|
||||
|
||||
// ── Tab switching ─────────────────────────────────────────────
|
||||
$(document).on('click', '[data-tab]', function (e) {
|
||||
e.preventDefault()
|
||||
const k = $(this).data('pane')
|
||||
const t = $(this).data('tab')
|
||||
tabs[k] = t
|
||||
$(`.tab-raw-${k}, .tab-preview-${k}`).removeClass('active')
|
||||
$(`.tab-${t}-${k}`).addClass('active')
|
||||
$(`.pane-raw-${k}, .pane-preview-${k}`).hide()
|
||||
$(`.pane-${t}-${k}`).show()
|
||||
})
|
||||
|
||||
// ── Download ─────────────────────────────────────────────────
|
||||
$(document).on('click', '[class*="btn-dl-"]', function () {
|
||||
const k = $(this).attr('class').match(/btn-dl-(\w+)/)[1]
|
||||
const name = (currentFile?.name || 'output').replace(/\.[^.]+$/, '')
|
||||
const a = document.createElement('a')
|
||||
a.href = URL.createObjectURL(new Blob([results[k]], { type: 'text/markdown' }))
|
||||
a.download = `${name}_${k}.md`
|
||||
a.click(); URL.revokeObjectURL(a.href)
|
||||
})
|
||||
|
||||
// ── Cleanup ──────────────────────────────────────────────────
|
||||
$('#btnCleanup').on('click', function () {
|
||||
if (!results.md) return
|
||||
$(this).prop('disabled', true).html('<span class="spinner-border spinner-border-sm"></span>')
|
||||
$.ajax({
|
||||
url: '/api/markitdown/cleanup',
|
||||
method: 'POST', contentType: 'application/json',
|
||||
data: JSON.stringify({ text: results.md, prompt: $('#llmPrompt').val() || null }),
|
||||
}).done(function (d) {
|
||||
results.md = d.text
|
||||
$(`.pane-raw-md`).text(d.text)
|
||||
$(`.pane-preview-md`).html(mdRender(d.text))
|
||||
}).fail(function (e) {
|
||||
alert('Cleanup thất bại: ' + (e?.responseJSON?.detail || 'Lỗi'))
|
||||
}).always(function () {
|
||||
$('#btnCleanup').prop('disabled', false).text('✨ Làm đẹp')
|
||||
})
|
||||
})
|
||||
|
||||
// ── History ──────────────────────────────────────────────────
|
||||
function loadHistory() {
|
||||
const endpoints = {
|
||||
md: '/api/markitdown/history?limit=8',
|
||||
dl: '/api/docling/history?limit=8',
|
||||
uo: '/api/unlimited-ocr/history?limit=8',
|
||||
}
|
||||
Object.entries(endpoints).forEach(([k, url]) => {
|
||||
$.getJSON(url).done(function (items) {
|
||||
const $el = $(`.history-${k}`)
|
||||
if (!items.length) { $el.html('<div class="text-center text-muted py-3 small">Chưa có lịch sử</div>'); return }
|
||||
const rows = items.map(item => {
|
||||
const t = item.created_at ? new Date(item.created_at).toLocaleTimeString('vi-VN') : ''
|
||||
const llm = item.llm_enabled ? '<span class="badge bg-primary-subtle text-primary">🤖 LLM</span>' : ''
|
||||
const ft = item.file_type ? `<span class="badge bg-secondary-subtle text-secondary">${item.file_type}</span>` : ''
|
||||
return `<li class="list-group-item d-flex justify-content-between align-items-center py-2 px-3">
|
||||
<span class="small fw-medium text-truncate me-2" style="max-width:60%">${item.filename}</span>
|
||||
<span class="d-flex gap-1 align-items-center flex-shrink-0">
|
||||
${ft} ${llm}
|
||||
<span class="text-muted" style="font-size:.7rem">${t}</span>
|
||||
</span>
|
||||
</li>`
|
||||
}).join('')
|
||||
$el.html(`<ul class="list-group list-group-flush">${rows}</ul>`)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
})
|
||||
</script>
|
||||
@endsection
|
||||
|
|
@ -0,0 +1,102 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="vi">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>@yield('title', 'AI Markdown Demo')</title>
|
||||
<meta name="csrf-token" content="{{ csrf_token() }}" />
|
||||
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css" rel="stylesheet" />
|
||||
<link href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.3/font/bootstrap-icons.min.css" rel="stylesheet" />
|
||||
<link href="https://cdn.jsdelivr.net/npm/github-markdown-css@5/github-markdown-light.css" rel="stylesheet" />
|
||||
<style>
|
||||
body { background: #f8f9fa; }
|
||||
.ResultPre {
|
||||
max-height: 460px; overflow: auto; white-space: pre-wrap;
|
||||
word-break: break-word; font-size: .78rem; background: #f8f9fa;
|
||||
}
|
||||
/* github-markdown-css handles all preview styling */
|
||||
.PreviewPane { max-height: 460px; overflow: auto; padding: 1rem; font-size: .85rem; }
|
||||
.drop-zone {
|
||||
border: 2px dashed #dee2e6; border-radius: .5rem;
|
||||
padding: 3rem 1.5rem; text-align: center; cursor: pointer;
|
||||
transition: border-color .2s, background .2s;
|
||||
}
|
||||
.drop-zone:hover, .drop-zone.drag-over { border-color: #0d6efd; background: #f0f6ff; }
|
||||
.FileBrowser { height: 560px; overflow-y: auto; font-size: .82rem; }
|
||||
.file-entry {
|
||||
display: flex; align-items: center; gap: .4rem;
|
||||
padding: .25rem .5rem; border-radius: .3rem; cursor: pointer;
|
||||
user-select: none; overflow: visible; position: relative;
|
||||
}
|
||||
.file-entry > span.text-truncate { min-width:0; overflow:hidden; text-overflow:ellipsis; white-space:nowrap; }
|
||||
.file-entry:hover { background: #e9ecef; }
|
||||
.file-entry.selected { background: #cfe2ff; color: #084298; font-weight: 500; }
|
||||
.file-entry.input-sel { background: #cfe2ff; color: #084298; font-weight: 500; }
|
||||
.file-entry.output-sel { background: #d1e7dd; color: #0a3622; }
|
||||
.act-active { background: rgba(255,255,255,.2) !important; }
|
||||
.tree-actions { display:none; gap:2px; align-items:center; flex-shrink:0; margin-left:auto; }
|
||||
.file-entry:hover .tree-actions { display:flex; }
|
||||
.btn-tree-act {
|
||||
border: none; background: none; padding: 1px 5px; border-radius: 3px;
|
||||
font-size: .75rem; cursor: pointer; color: #6c757d; line-height:1.4;
|
||||
}
|
||||
.btn-tree-act:hover { background: #dee2e6; color: #212529; }
|
||||
.btn-set-output:hover { color: #0a3622; background: #d1e7dd; }
|
||||
.btn-preview-md:hover { color: #084298; background: #cfe2ff; }
|
||||
.path-badge { font-size: .72rem; font-family: monospace; word-break: break-all; color: #6c757d; }
|
||||
.indent-1 { padding-left: 1.4rem; }
|
||||
.indent-2 { padding-left: 2.6rem; }
|
||||
.indent-3 { padding-left: 3.8rem; }
|
||||
.indent-4 { padding-left: 5rem; }
|
||||
</style>
|
||||
@yield('head')
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<nav class="navbar navbar-light bg-white border-bottom px-4 py-2 d-flex justify-content-between">
|
||||
<div class="d-flex align-items-center gap-3">
|
||||
<span class="navbar-brand fw-bold mb-0">
|
||||
MarkItDown <span class="text-muted fw-normal">vs</span> Docling
|
||||
<span class="text-muted fw-normal">vs</span> Unlimited-OCR
|
||||
</span>
|
||||
<ul class="nav nav-pills">
|
||||
<li class="nav-item">
|
||||
<a class="nav-link py-1 px-3 small {{ request()->is('/') ? 'active' : '' }}" href="/">
|
||||
<i class="bi bi-upload me-1"></i>Upload File
|
||||
</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link py-1 px-3 small {{ request()->is('email-convert') ? 'active' : '' }}" href="/email-convert">
|
||||
<i class="bi bi-envelope me-1"></i>Email Convert
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<span class="badge bg-primary-subtle text-primary">Demo — LLM Input Processing</span>
|
||||
</nav>
|
||||
|
||||
<div class="container-xl py-4">
|
||||
@yield('content')
|
||||
</div>
|
||||
|
||||
<script src="https://cdn.jsdelivr.net/npm/jquery@3.7.1/dist/jquery.min.js"></script>
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11/build/styles/github.min.css" />
|
||||
<script src="https://cdn.jsdelivr.net/gh/highlightjs/cdn-release@11/build/highlight.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/markdown-it@14/dist/markdown-it.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/dompurify@3/dist/purify.min.js"></script>
|
||||
<script>
|
||||
const _md = markdownit({
|
||||
html: true, linkify: true, typographer: true, breaks: true,
|
||||
highlight: (str, lang) => {
|
||||
if (lang && hljs.getLanguage(lang)) {
|
||||
try { return '<pre class="hljs"><code>' + hljs.highlight(str, { language: lang, ignoreIllegals: true }).value + '</code></pre>' }
|
||||
catch {}
|
||||
}
|
||||
return '<pre class="hljs"><code>' + _md.utils.escapeHtml(str) + '</code></pre>'
|
||||
}
|
||||
})
|
||||
const mdRender = s => DOMPurify.sanitize(_md.render(s || ''), { USE_PROFILES: { html: true } })
|
||||
</script>
|
||||
@yield('scripts')
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
{{-- @include('partials.pane-body', ['k' => $k, 'tc' => $tc, 'idle' => '...']) --}}
|
||||
@php $idle ??= 'Tải file lên để xem kết quả'; @endphp
|
||||
<div class="text-center text-muted py-5 small pane-idle-{{ $k }}">
|
||||
<i class="bi bi-upload fs-3 d-block mb-2"></i>{{ $idle }}
|
||||
</div>
|
||||
<div class="text-center text-muted py-5 small pane-loading-{{ $k }}" style="display:none">
|
||||
<div class="spinner-border spinner-border-sm mb-2 {{ $tc }}"></div><br>Đang xử lý...
|
||||
</div>
|
||||
<div class="alert alert-danger m-3 small pane-error-{{ $k }}" style="display:none"></div>
|
||||
<pre class="ResultPre p-3 m-0 pane-raw-{{ $k }}" style="display:none"></pre>
|
||||
<div class="PreviewPane markdown-body pane-preview-{{ $k }}" style="display:none"></div>
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
{{-- @include('partials.pane-tabs', ['k' => $k]) --}}
|
||||
<ul class="nav nav-tabs card-header-tabs border-0">
|
||||
<li class="nav-item">
|
||||
<a class="nav-link py-1 px-2 small active tab-raw-{{ $k }}"
|
||||
href="#" data-pane="{{ $k }}" data-tab="raw">Raw</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="nav-link py-1 px-2 small tab-preview-{{ $k }}"
|
||||
href="#" data-pane="{{ $k }}" data-tab="preview">Preview</a>
|
||||
</li>
|
||||
</ul>
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
<?php
|
||||
|
||||
use Illuminate\Foundation\Inspiring;
|
||||
use Illuminate\Support\Facades\Artisan;
|
||||
|
||||
Artisan::command('inspire', function () {
|
||||
$this->comment(Inspiring::quote());
|
||||
})->purpose('Display an inspiring quote');
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
<?php
|
||||
|
||||
// Routes are declared via PHP attributes on each controller.
|
||||
// See: spatie/laravel-route-attributes
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
*
|
||||
!private/
|
||||
!public/
|
||||
!.gitignore
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
*
|
||||
!.gitignore
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
*
|
||||
!.gitignore
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
compiled.php
|
||||
config.php
|
||||
down
|
||||
events.scanned.php
|
||||
maintenance.php
|
||||
routes.php
|
||||
routes.scanned.php
|
||||
schedule-*
|
||||
services.json
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
*
|
||||
!data/
|
||||
!.gitignore
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
*
|
||||
!.gitignore
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
*
|
||||
!.gitignore
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
*
|
||||
!.gitignore
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
*
|
||||
!.gitignore
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
*
|
||||
!.gitignore
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
<?php
|
||||
|
||||
namespace Tests\Feature;
|
||||
|
||||
// use Illuminate\Foundation\Testing\RefreshDatabase;
|
||||
use Tests\TestCase;
|
||||
|
||||
class ExampleTest extends TestCase
|
||||
{
|
||||
/**
|
||||
* A basic test example.
|
||||
*/
|
||||
public function test_the_application_returns_a_successful_response(): void
|
||||
{
|
||||
$response = $this->get('/');
|
||||
|
||||
$response->assertStatus(200);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
<?php
|
||||
|
||||
namespace Tests;
|
||||
|
||||
use Illuminate\Foundation\Testing\TestCase as BaseTestCase;
|
||||
|
||||
abstract class TestCase extends BaseTestCase
|
||||
{
|
||||
//
|
||||
}
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
<?php
|
||||
|
||||
namespace Tests\Unit;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
|
||||
class ExampleTest extends TestCase
|
||||
{
|
||||
/**
|
||||
* A basic test example.
|
||||
*/
|
||||
public function test_that_true_is_true(): void
|
||||
{
|
||||
$this->assertTrue(true);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
import { defineConfig } from 'vite';
|
||||
import laravel from 'laravel-vite-plugin';
|
||||
import { bunny } from 'laravel-vite-plugin/fonts';
|
||||
import tailwindcss from '@tailwindcss/vite';
|
||||
|
||||
export default defineConfig({
|
||||
plugins: [
|
||||
laravel({
|
||||
input: ['resources/css/app.css', 'resources/js/app.js'],
|
||||
refresh: true,
|
||||
fonts: [
|
||||
bunny('Instrument Sans', {
|
||||
weights: [400, 500, 600],
|
||||
}),
|
||||
],
|
||||
}),
|
||||
tailwindcss(),
|
||||
],
|
||||
server: {
|
||||
watch: {
|
||||
ignored: ['**/storage/framework/views/**'],
|
||||
},
|
||||
},
|
||||
});
|
||||
|
|
@ -1,219 +0,0 @@
|
|||
import os
|
||||
import tempfile
|
||||
import logging
|
||||
from fastapi import UploadFile, HTTPException
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select
|
||||
from markitdown import MarkItDown
|
||||
from app.models.ConvertModel import Conversion
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
import openai as _openai
|
||||
|
||||
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL") or None
|
||||
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llava")
|
||||
CLEANUP_MODEL = os.getenv("CLEANUP_MODEL", "")
|
||||
|
||||
LLM_ACTIVE = False
|
||||
_llm_client = None
|
||||
md_plain = MarkItDown()
|
||||
md = md_plain
|
||||
|
||||
|
||||
def _init_llm(base_url: str | None, model: str) -> bool:
|
||||
global OLLAMA_BASE_URL, OLLAMA_MODEL, LLM_ACTIVE, _llm_client, md
|
||||
if not base_url:
|
||||
OLLAMA_BASE_URL, OLLAMA_MODEL, LLM_ACTIVE, _llm_client, md = None, model, False, None, md_plain
|
||||
return False
|
||||
try:
|
||||
client = _openai.OpenAI(base_url=base_url, api_key="ollama")
|
||||
OLLAMA_BASE_URL = base_url
|
||||
OLLAMA_MODEL = model
|
||||
_llm_client = client
|
||||
md = MarkItDown(llm_client=client, llm_model=model)
|
||||
LLM_ACTIVE = True
|
||||
logger.info("MarkItDown: LLM enabled via %s (model=%s)", base_url, model)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning("MarkItDown: LLM init failed (%s)", e)
|
||||
LLM_ACTIVE = False
|
||||
return False
|
||||
|
||||
|
||||
_init_llm(OLLAMA_BASE_URL, OLLAMA_MODEL)
|
||||
|
||||
DEFAULT_CLEANUP_PROMPT = """You are a technical document formatter. \
|
||||
The text below was extracted from a multi-column PDF using OCR and is poorly structured: \
|
||||
columns are merged, headers are mixed with values, and content is out of order.
|
||||
|
||||
Your task:
|
||||
1. Identify the logical sections (e.g. PERFORMANCE, MEMORY, STORAGE, CONNECTIVITY, etc.)
|
||||
2. Under each section, format specs as a clean two-column Markdown table: | Spec | Value |
|
||||
3. Keep bullet lists where appropriate (e.g. ports, certifications)
|
||||
4. Remove duplicate lines and OCR artifacts (e.g. stray "---", lone "|", empty rows)
|
||||
5. Preserve all technical values exactly — do not paraphrase specs
|
||||
|
||||
Return ONLY the cleaned Markdown. No code fences, no commentary, no preamble."""
|
||||
|
||||
import re as _re
|
||||
|
||||
def llm_cleanup(text: str, prompt: str | None = None, model: str | None = None) -> str:
|
||||
if not _llm_client or not text.strip():
|
||||
return text
|
||||
try:
|
||||
resp = _llm_client.chat.completions.create(
|
||||
model=model or OLLAMA_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": prompt or DEFAULT_CLEANUP_PROMPT},
|
||||
{"role": "user", "content": text},
|
||||
],
|
||||
temperature=0,
|
||||
)
|
||||
result = resp.choices[0].message.content or text
|
||||
result = _re.sub(r"^```(?:markdown)?\s*\n?", "", result.strip())
|
||||
result = _re.sub(r"\n?```\s*$", "", result.strip())
|
||||
return result.strip() or text
|
||||
except Exception as e:
|
||||
logger.warning("MarkItDown: cleanup failed (%s)", e)
|
||||
return text
|
||||
|
||||
|
||||
ALLOWED_EXTENSIONS = {
|
||||
"pdf", "docx", "xlsx", "pptx",
|
||||
"html", "csv", "txt", "jpg", "jpeg", "png", "zip", "epub"
|
||||
}
|
||||
|
||||
YOUTUBE_PATTERN = _re.compile(
|
||||
r"(https?://)?(www\.)?(youtube\.com/watch|youtu\.be/|youtube\.com/shorts/)"
|
||||
)
|
||||
|
||||
|
||||
async def convert_url(
|
||||
url: str,
|
||||
db: AsyncSession,
|
||||
use_llm: bool = True,
|
||||
llm_prompt: str | None = None,
|
||||
) -> Conversion:
|
||||
use_llm_now = LLM_ACTIVE and use_llm
|
||||
if use_llm_now and llm_prompt:
|
||||
try:
|
||||
converter = MarkItDown(
|
||||
llm_client=_llm_client,
|
||||
llm_model=OLLAMA_MODEL,
|
||||
llm_prompt=llm_prompt,
|
||||
)
|
||||
except TypeError:
|
||||
converter = md
|
||||
elif use_llm_now:
|
||||
converter = md
|
||||
else:
|
||||
converter = md_plain
|
||||
|
||||
try:
|
||||
result = converter.convert(url)
|
||||
actual_llm = use_llm_now
|
||||
except Exception as llm_err:
|
||||
if use_llm_now and ("500" in str(llm_err) or "InternalServerError" in type(llm_err).__name__):
|
||||
logger.warning("MarkItDown: LLM failed (%s), retrying without LLM", llm_err)
|
||||
result = md_plain.convert(url)
|
||||
actual_llm = False
|
||||
else:
|
||||
raise HTTPException(status_code=500, detail=str(llm_err))
|
||||
|
||||
# Use last segment of URL as filename
|
||||
slug = url.rstrip("/").split("/")[-1].split("?")[0] or "youtube"
|
||||
filename = f"{slug}.md"
|
||||
|
||||
try:
|
||||
record = Conversion(
|
||||
filename=filename,
|
||||
file_type="youtube",
|
||||
markdown=result.text_content,
|
||||
llm_enabled=actual_llm,
|
||||
)
|
||||
db.add(record)
|
||||
await db.commit()
|
||||
await db.refresh(record)
|
||||
return record
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
def _allowed_file(filename: str) -> bool:
|
||||
return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
|
||||
|
||||
|
||||
async def convert_file(
|
||||
file: UploadFile,
|
||||
db: AsyncSession,
|
||||
use_llm: bool = True,
|
||||
llm_prompt: str | None = None,
|
||||
) -> Conversion:
|
||||
if not _allowed_file(file.filename):
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail=f"File type not allowed. Allowed: {', '.join(sorted(ALLOWED_EXTENSIONS))}"
|
||||
)
|
||||
|
||||
suffix = os.path.splitext(file.filename)[1]
|
||||
file_type = suffix.lstrip(".").lower()
|
||||
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
||||
tmp.write(await file.read())
|
||||
tmp_path = tmp.name
|
||||
|
||||
# Choose converter: LLM only if enabled globally AND requested per-call
|
||||
use_llm_now = LLM_ACTIVE and use_llm
|
||||
|
||||
# If custom prompt provided, create a one-off MarkItDown with that prompt
|
||||
if use_llm_now and llm_prompt:
|
||||
try:
|
||||
converter = MarkItDown(
|
||||
llm_client=_llm_client,
|
||||
llm_model=OLLAMA_MODEL,
|
||||
llm_prompt=llm_prompt,
|
||||
)
|
||||
except TypeError:
|
||||
# older markitdown versions may not support llm_prompt kwarg
|
||||
converter = md
|
||||
elif use_llm_now:
|
||||
converter = md
|
||||
else:
|
||||
converter = md_plain
|
||||
|
||||
try:
|
||||
try:
|
||||
result = converter.convert(tmp_path)
|
||||
actual_llm = use_llm_now
|
||||
except Exception as llm_err:
|
||||
# Ollama OOM / 500 — fallback to plain conversion without LLM
|
||||
if use_llm_now and ("500" in str(llm_err) or "InternalServerError" in type(llm_err).__name__):
|
||||
logger.warning("MarkItDown: LLM failed (%s), retrying without LLM", llm_err)
|
||||
result = md_plain.convert(tmp_path)
|
||||
actual_llm = False
|
||||
else:
|
||||
raise
|
||||
record = Conversion(
|
||||
filename=file.filename,
|
||||
file_type=file_type,
|
||||
markdown=result.text_content,
|
||||
llm_enabled=actual_llm,
|
||||
)
|
||||
db.add(record)
|
||||
await db.commit()
|
||||
await db.refresh(record)
|
||||
return record
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
os.unlink(tmp_path)
|
||||
|
||||
|
||||
async def get_history(db: AsyncSession, limit: int = 20) -> list[Conversion]:
|
||||
result = await db.execute(
|
||||
select(Conversion).order_by(Conversion.created_at.desc()).limit(limit)
|
||||
)
|
||||
return result.scalars().all()
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
"""Central config — single source of truth cho LLM/service settings.
|
||||
Đổi base_url / model chỉ cần sửa file này."""
|
||||
import os
|
||||
|
||||
# llama-server (Qwen2.5-VL) qua SSH tunnel; Docker reach host qua host.docker.internal
|
||||
LLM_BASE_URL = "http://host.docker.internal:11436/v1"
|
||||
LLM_MODEL = "ggml-org/Qwen2.5-VL-3B-Instruct-GGUF"
|
||||
CLEANUP_MODEL = os.getenv("CLEANUP_MODEL", "")
|
||||
|
||||
# Vision OCR gom về vision-service (container api-vision). Ảnh → POST {VISION_URL}/ocr
|
||||
VISION_URL = os.getenv("VISION_URL", "http://api-vision:8000")
|
||||
|
|
@ -13,22 +13,26 @@ class UrlRequest(BaseModel):
|
|||
use_llm: bool = True
|
||||
llm_prompt: str | None = None
|
||||
|
||||
class PathRequest(BaseModel):
|
||||
path: str
|
||||
output_format: str = "markdown"
|
||||
use_llm: bool = True
|
||||
llm_prompt: str | None = None
|
||||
|
||||
class SettingsRequest(BaseModel):
|
||||
ollama_base_url: str | None = None
|
||||
ollama_model: str = "llava"
|
||||
cleanup_model: str | None = None
|
||||
|
||||
class SettingsResponse(BaseModel):
|
||||
llm_enabled: bool
|
||||
ollama_base_url: str | None
|
||||
ollama_model: str
|
||||
llm_base_url: str | None
|
||||
llm_model: str
|
||||
cleanup_model: str | None = None
|
||||
default_prompt: str | None = None
|
||||
|
||||
SUPPORTED_INPUT_FORMATS = sorted([
|
||||
"pdf", "docx", "xlsx", "pptx",
|
||||
"html", "htm", "jpg", "jpeg", "png",
|
||||
"tiff", "tif", "bmp", "md", "txt", "asciidoc", "adoc"
|
||||
"tiff", "tif", "bmp", "md", "txt", "asciidoc", "adoc", "elm"
|
||||
])
|
||||
|
||||
SUPPORTED_OUTPUT_FORMATS = ["markdown", "json", "html", "text"]
|
||||
|
|
@ -38,34 +42,34 @@ SUPPORTED_OUTPUT_FORMATS = ["markdown", "json", "html", "text"]
|
|||
def get_settings():
|
||||
return SettingsResponse(
|
||||
llm_enabled=docling_service.LLM_ACTIVE,
|
||||
ollama_base_url=docling_service.OLLAMA_BASE_URL,
|
||||
ollama_model=docling_service.OLLAMA_MODEL,
|
||||
llm_base_url=docling_service.LLM_BASE_URL,
|
||||
llm_model=docling_service.LLM_MODEL,
|
||||
cleanup_model=docling_service.CLEANUP_MODEL or None,
|
||||
default_prompt=docling_service.DEFAULT_ENRICH_PROMPT,
|
||||
)
|
||||
|
||||
@router.post("/settings", response_model=SettingsResponse)
|
||||
def update_settings(req: SettingsRequest):
|
||||
docling_service._init_llm(req.ollama_base_url or None, req.ollama_model)
|
||||
docling_service._init_llm()
|
||||
docling_service.CLEANUP_MODEL = req.cleanup_model or ""
|
||||
return SettingsResponse(
|
||||
llm_enabled=docling_service.LLM_ACTIVE,
|
||||
ollama_base_url=docling_service.OLLAMA_BASE_URL,
|
||||
ollama_model=docling_service.OLLAMA_MODEL,
|
||||
llm_base_url=docling_service.LLM_BASE_URL,
|
||||
llm_model=docling_service.LLM_MODEL,
|
||||
cleanup_model=docling_service.CLEANUP_MODEL or None,
|
||||
default_prompt=docling_service.DEFAULT_ENRICH_PROMPT,
|
||||
)
|
||||
|
||||
@router.get("/health", response_model=HealthResponse)
|
||||
def health():
|
||||
from app.services.DoclingService import LLM_ACTIVE, OLLAMA_MODEL
|
||||
from app.services.DoclingService import LLM_ACTIVE, LLM_MODEL
|
||||
ocr = "tesseract" if _ocr_available() else "none"
|
||||
return HealthResponse(
|
||||
status="ok",
|
||||
supported_formats=SUPPORTED_INPUT_FORMATS,
|
||||
output_formats=SUPPORTED_OUTPUT_FORMATS,
|
||||
llm_enabled=LLM_ACTIVE,
|
||||
llm_model=OLLAMA_MODEL if LLM_ACTIVE else None,
|
||||
llm_model=LLM_MODEL if LLM_ACTIVE else None,
|
||||
ocr_engine=ocr,
|
||||
)
|
||||
|
||||
|
|
@ -125,6 +129,26 @@ async def get_conversion(conversion_id: int, db: AsyncSession = Depends(get_db))
|
|||
)
|
||||
|
||||
|
||||
@router.post("/convert-path")
|
||||
async def convert_path(req: PathRequest, db: AsyncSession = Depends(get_db)):
|
||||
import os
|
||||
from fastapi import HTTPException as _HTTPException
|
||||
abs_path = os.path.realpath(req.path)
|
||||
if not abs_path.startswith("/workspace"):
|
||||
raise _HTTPException(status_code=403, detail="Access denied")
|
||||
if not os.path.exists(abs_path) or not os.path.isfile(abs_path):
|
||||
raise _HTTPException(status_code=404, detail="File not found")
|
||||
record = await docling_service.convert_path(abs_path, db, req.output_format, use_llm=req.use_llm, llm_prompt=req.llm_prompt)
|
||||
return ConvertResponse(
|
||||
id=record.id,
|
||||
filename=record.filename,
|
||||
output_format=record.output_format,
|
||||
content=record.content,
|
||||
page_count=record.page_count,
|
||||
llm_enabled=record.llm_enabled,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/history", response_model=list[ConversionRecord])
|
||||
async def history(limit: int = 20, db: AsyncSession = Depends(get_db)):
|
||||
records = await docling_service.get_history(db, limit)
|
||||
|
|
@ -0,0 +1,632 @@
|
|||
import os
|
||||
import re
|
||||
import json
|
||||
import tempfile
|
||||
import logging
|
||||
from fastapi import UploadFile, HTTPException
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select
|
||||
from docling.document_converter import DocumentConverter, PdfFormatOption, ImageFormatOption
|
||||
from docling.datamodel.pipeline_options import PdfPipelineOptions, TesseractCliOcrOptions
|
||||
from docling.datamodel.base_models import InputFormat
|
||||
from app.models.ConvertModel import Conversion
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
import openai as _openai
|
||||
|
||||
from app.config import LLM_BASE_URL, LLM_MODEL, CLEANUP_MODEL, VISION_URL
|
||||
|
||||
def _build_converter() -> DocumentConverter:
|
||||
try:
|
||||
ocr_opts = TesseractCliOcrOptions()
|
||||
pdf_opts = PdfPipelineOptions(do_ocr=True, ocr_options=ocr_opts)
|
||||
logger.info("Docling: OCR enabled via Tesseract CLI")
|
||||
except Exception as e:
|
||||
logger.warning("Docling: Tesseract unavailable (%s) — OCR disabled", e)
|
||||
pdf_opts = PdfPipelineOptions(do_ocr=False)
|
||||
|
||||
# ImageFormatOption also uses StandardPdfPipeline — pass same pdf_opts
|
||||
# to prevent docling from falling back to RapidOCR / PP-OCRv6
|
||||
return DocumentConverter(format_options={
|
||||
InputFormat.PDF: PdfFormatOption(pipeline_options=pdf_opts),
|
||||
InputFormat.IMAGE: ImageFormatOption(pipeline_options=pdf_opts),
|
||||
})
|
||||
|
||||
converter = _build_converter()
|
||||
|
||||
_llm_client = None
|
||||
LLM_ACTIVE = False
|
||||
|
||||
|
||||
def _init_llm() -> bool:
|
||||
global LLM_ACTIVE, _llm_client
|
||||
try:
|
||||
_llm_client = _openai.OpenAI(base_url=LLM_BASE_URL, api_key="none")
|
||||
LLM_ACTIVE = True
|
||||
logger.info("Docling: LLM enabled via %s (model=%s)", LLM_BASE_URL, LLM_MODEL)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning("Docling: LLM init failed (%s)", e)
|
||||
LLM_ACTIVE = False
|
||||
return False
|
||||
|
||||
|
||||
_init_llm()
|
||||
|
||||
|
||||
DEFAULT_ENRICH_PROMPT = (
|
||||
"You are a document cleaning assistant. "
|
||||
"Fix OCR errors, normalise whitespace, and improve the Markdown structure. "
|
||||
"Return ONLY the raw Markdown text — no code fences, no commentary, no explanation."
|
||||
)
|
||||
|
||||
|
||||
def _llm_enrich(markdown: str, system_prompt: str | None = None) -> str:
|
||||
"""Text cleanup pass — DISABLED under OCRFlux (OCR-only model always emits its
|
||||
JSON schema regardless of prompt, which corrupts non-image text). Passthrough."""
|
||||
return markdown
|
||||
if not _llm_client or not markdown.strip():
|
||||
return markdown
|
||||
# Rough token estimate: 1 token ≈ 4 chars. Reserve ~800 tokens for system + response.
|
||||
max_chars = (4096 - 800) * 4
|
||||
truncated = markdown[:max_chars]
|
||||
if len(markdown) > max_chars:
|
||||
logger.warning("_llm_enrich: truncated input from %d to %d chars", len(markdown), max_chars)
|
||||
try:
|
||||
resp = _llm_client.chat.completions.create(
|
||||
model=LLM_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": system_prompt or DEFAULT_ENRICH_PROMPT},
|
||||
{"role": "user", "content": truncated},
|
||||
],
|
||||
temperature=0,
|
||||
)
|
||||
result = resp.choices[0].message.content or truncated
|
||||
# llava tends to wrap output in code fences regardless of instructions — strip them
|
||||
result = re.sub(r"^```(?:markdown)?\s*\n?", "", result.strip())
|
||||
result = re.sub(r"\n?```\s*$", "", result.strip())
|
||||
return result.strip() or markdown
|
||||
except Exception as e:
|
||||
logger.warning("Docling: LLM enrichment failed (%s) — returning raw output", e)
|
||||
return markdown
|
||||
|
||||
|
||||
|
||||
async def convert_url(
|
||||
url: str,
|
||||
db: AsyncSession,
|
||||
output_format: str = "markdown",
|
||||
use_llm: bool = True,
|
||||
llm_prompt: str | None = None,
|
||||
) -> "Conversion":
|
||||
"""Fetch a YouTube (or any URL) transcript via yt-dlp, then convert with Docling."""
|
||||
try:
|
||||
import yt_dlp # noqa: PLC0415
|
||||
except ImportError:
|
||||
raise HTTPException(status_code=500, detail="yt-dlp not installed")
|
||||
|
||||
ydl_opts = {
|
||||
"quiet": True,
|
||||
"skip_download": True,
|
||||
"writesubtitles": True,
|
||||
"writeautomaticsub": True,
|
||||
"subtitleslangs": ["vi", "en"],
|
||||
"outtmpl": "%(id)s.%(ext)s",
|
||||
}
|
||||
|
||||
try:
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
info = ydl.extract_info(url, download=False)
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=422, detail=f"yt-dlp error: {e}")
|
||||
|
||||
title = info.get("title", "YouTube Video")
|
||||
description = info.get("description", "") or ""
|
||||
channel = info.get("channel", info.get("uploader", ""))
|
||||
duration = info.get("duration_string", "")
|
||||
upload_date = info.get("upload_date", "")
|
||||
view_count = info.get("view_count")
|
||||
chapters = info.get("chapters") or []
|
||||
|
||||
# Build markdown from available metadata
|
||||
lines = [f"# {title}", ""]
|
||||
meta_rows = []
|
||||
if channel:
|
||||
meta_rows.append(f"**Kênh:** {channel}")
|
||||
if duration:
|
||||
meta_rows.append(f"**Thời lượng:** {duration}")
|
||||
if upload_date and len(upload_date) == 8:
|
||||
meta_rows.append(f"**Ngày đăng:** {upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:]}")
|
||||
if view_count is not None:
|
||||
meta_rows.append(f"**Lượt xem:** {view_count:,}")
|
||||
meta_rows.append(f"**URL:** {url}")
|
||||
lines.extend(meta_rows)
|
||||
lines.append("")
|
||||
|
||||
# Subtitles/transcript
|
||||
subtitles = info.get("subtitles") or {}
|
||||
auto_subtitles = info.get("automatic_captions") or {}
|
||||
|
||||
transcript_text = None
|
||||
for lang in ("vi", "en"):
|
||||
tracks = subtitles.get(lang) or auto_subtitles.get(lang)
|
||||
if tracks:
|
||||
# Find a json3 or srv3 track to extract plain text
|
||||
for track in tracks:
|
||||
if track.get("ext") in ("json3", "srv3", "ttml", "vtt"):
|
||||
try:
|
||||
import urllib.request
|
||||
with urllib.request.urlopen(track["url"], timeout=15) as r:
|
||||
raw = r.read().decode("utf-8", errors="ignore")
|
||||
# Strip VTT timestamps for vtt format
|
||||
if track.get("ext") == "vtt":
|
||||
cleaned = re.sub(r"\d{2}:\d{2}:\d{2}\.\d+ --> .*", "", raw)
|
||||
cleaned = re.sub(r"^\d+$", "", cleaned, flags=re.MULTILINE)
|
||||
cleaned = re.sub(r"<[^>]+>", "", cleaned)
|
||||
transcript_text = re.sub(r"\n{3,}", "\n\n", cleaned).strip()
|
||||
else:
|
||||
transcript_text = raw
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
if transcript_text:
|
||||
break
|
||||
|
||||
if transcript_text:
|
||||
lines += ["## Transcript / Phụ đề", "", transcript_text, ""]
|
||||
elif description:
|
||||
lines += ["## Mô tả", "", description[:3000], ""]
|
||||
else:
|
||||
lines += ["## Ghi chú", "", "_Không có transcript hoặc mô tả._", ""]
|
||||
|
||||
if chapters:
|
||||
lines += ["## Chapters", ""]
|
||||
for ch in chapters:
|
||||
start = ch.get("start_time", 0)
|
||||
m, s = divmod(int(start), 60)
|
||||
lines.append(f"- **{m:02d}:{s:02d}** — {ch.get('title', '')}")
|
||||
lines.append("")
|
||||
|
||||
markdown_text = "\n".join(lines)
|
||||
|
||||
# Write temp file and run through Docling
|
||||
video_id = info.get("id", "youtube")
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".md", mode="w", encoding="utf-8") as tmp:
|
||||
tmp.write(markdown_text)
|
||||
tmp_path = tmp.name
|
||||
|
||||
try:
|
||||
result = converter.convert(tmp_path)
|
||||
doc = result.document
|
||||
page_count = None
|
||||
|
||||
if output_format == "markdown":
|
||||
content = doc.export_to_markdown()
|
||||
elif output_format == "json":
|
||||
content = json.dumps(doc.export_to_dict(), ensure_ascii=False, indent=2)
|
||||
elif output_format == "html":
|
||||
content = doc.export_to_html()
|
||||
else:
|
||||
content = markdown_text
|
||||
|
||||
llm_used = False
|
||||
if _llm_client and use_llm and output_format in ("markdown", "text"):
|
||||
content = _llm_enrich(content, system_prompt=llm_prompt or None)
|
||||
llm_used = True
|
||||
|
||||
from app.models.ConvertModel import Conversion
|
||||
record = Conversion(
|
||||
filename=f"{video_id}.md",
|
||||
file_type="youtube",
|
||||
output_format=output_format,
|
||||
content=content,
|
||||
page_count=page_count,
|
||||
llm_enabled=llm_used,
|
||||
)
|
||||
db.add(record)
|
||||
await db.commit()
|
||||
await db.refresh(record)
|
||||
return record
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
os.unlink(tmp_path)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
ALLOWED_EXTENSIONS = {
|
||||
"pdf", "docx", "xlsx", "pptx",
|
||||
"html", "htm", "jpg", "jpeg", "png",
|
||||
"tiff", "tif", "bmp", "md", "txt", "asciidoc", "adoc", "elm"
|
||||
}
|
||||
OUTPUT_FORMATS = {"markdown", "json", "html", "text"}
|
||||
|
||||
|
||||
def _allowed_file(filename: str) -> bool:
|
||||
return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
|
||||
|
||||
|
||||
async def convert_file(
|
||||
file: UploadFile,
|
||||
db: AsyncSession,
|
||||
output_format: str = "markdown",
|
||||
use_llm: bool = True,
|
||||
llm_prompt: str | None = None,
|
||||
) -> Conversion:
|
||||
if not _allowed_file(file.filename):
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail=f"File type not allowed. Allowed: {', '.join(sorted(ALLOWED_EXTENSIONS))}"
|
||||
)
|
||||
if output_format not in OUTPUT_FORMATS:
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail=f"Output format not supported. Supported: {', '.join(sorted(OUTPUT_FORMATS))}"
|
||||
)
|
||||
|
||||
suffix = os.path.splitext(file.filename)[1]
|
||||
file_type = suffix.lstrip(".").lower()
|
||||
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
||||
tmp.write(await file.read())
|
||||
tmp_path = tmp.name
|
||||
|
||||
try:
|
||||
result = converter.convert(tmp_path)
|
||||
doc = result.document
|
||||
|
||||
page_count = len(doc.pages) if hasattr(doc, "pages") and doc.pages else None
|
||||
|
||||
if output_format == "markdown":
|
||||
content = doc.export_to_markdown()
|
||||
elif output_format == "json":
|
||||
content = json.dumps(doc.export_to_dict(), ensure_ascii=False, indent=2)
|
||||
elif output_format == "html":
|
||||
content = doc.export_to_html()
|
||||
elif output_format == "text":
|
||||
content = doc.export_to_markdown()
|
||||
content = re.sub(r"#{1,6}\s?", "", content)
|
||||
content = re.sub(r"\*\*(.+?)\*\*", r"\1", content)
|
||||
content = re.sub(r"\*(.+?)\*", r"\1", content)
|
||||
|
||||
# LLM enrichment — only for markdown / text output, and only if requested
|
||||
llm_used = False
|
||||
if _llm_client and use_llm and output_format in ("markdown", "text"):
|
||||
content = _llm_enrich(content, system_prompt=llm_prompt or None)
|
||||
llm_used = True
|
||||
|
||||
record = Conversion(
|
||||
filename=file.filename,
|
||||
file_type=file_type,
|
||||
output_format=output_format,
|
||||
content=content,
|
||||
page_count=page_count,
|
||||
llm_enabled=llm_used,
|
||||
)
|
||||
db.add(record)
|
||||
await db.commit()
|
||||
await db.refresh(record)
|
||||
return record
|
||||
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
os.unlink(tmp_path)
|
||||
|
||||
|
||||
def _is_email_file(path: str, file_type: str) -> bool:
|
||||
"""Detect email files by extension OR by reading the first few bytes."""
|
||||
if file_type in ("eml", "msg"):
|
||||
return True
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
head = f.read(2048).decode("utf-8", errors="ignore")
|
||||
email_headers = (
|
||||
"from:", "to:", "subject:", "date:", "mime-version:", "message-id:",
|
||||
"received:", "return-path:", "delivered-to:", "envelope-to:",
|
||||
"x-mailer:", "content-type:", "x-spam-", "reply-to:",
|
||||
)
|
||||
lines = [l.lower() for l in head.splitlines()[:30]]
|
||||
hits = sum(1 for l in lines if any(l.startswith(h) for h in email_headers))
|
||||
return hits >= 2
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
_IMAGE_MIME = {"image/jpeg", "image/jpg", "image/png", "image/gif", "image/bmp", "image/webp", "image/tiff"}
|
||||
_IMAGE_EXT = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp", ".tiff", ".tif"}
|
||||
_DOCLING_EXT = {".pdf", ".docx", ".xlsx", ".pptx", ".html", ".htm",
|
||||
".jpg", ".jpeg", ".png", ".tiff", ".tif", ".bmp",
|
||||
".md", ".txt", ".asciidoc", ".adoc", ".eml"}
|
||||
|
||||
|
||||
def _eml_to_html(path: str) -> tuple[str, list[tuple[str, bytes]]]:
|
||||
"""Parse email: extract HTML body, embed inline CID images as base64 data-URIs.
|
||||
Returns (html_string, ordered_images) where ordered_images is [(mime, raw_bytes), ...]
|
||||
in document appearance order — used to replace <!-- image --> placeholders later."""
|
||||
import email as _email, email.policy as _policy, re as _re, base64 as _b64
|
||||
|
||||
with open(path, "rb") as f:
|
||||
msg = _email.message_from_binary_file(f, policy=_policy.compat32)
|
||||
|
||||
html_body = None
|
||||
text_body = None
|
||||
cid_uri: dict[str, str] = {} # cid → data-URI
|
||||
cid_raw: dict[str, tuple[str, bytes]] = {} # cid → (mime, bytes)
|
||||
loose: list[tuple[str, bytes, str]] = [] # (mime, bytes, filename)
|
||||
|
||||
for part in msg.walk():
|
||||
ct = part.get_content_type()
|
||||
data = part.get_payload(decode=True)
|
||||
if not data:
|
||||
continue
|
||||
|
||||
if ct in _IMAGE_MIME or ct.startswith("image/"):
|
||||
cid = part.get("Content-ID", "").strip("<>")
|
||||
mime = ct if "/" in ct else "image/jpeg"
|
||||
b64 = _b64.standard_b64encode(data).decode()
|
||||
uri = f"data:{mime};base64,{b64}"
|
||||
if cid:
|
||||
cid_uri[cid] = uri
|
||||
cid_raw[cid] = (mime, data)
|
||||
else:
|
||||
fname = part.get_filename() or ""
|
||||
loose.append((mime, data, fname or "image"))
|
||||
continue
|
||||
|
||||
charset = part.get_content_charset() or "utf-8"
|
||||
decoded = data.decode(charset, errors="replace")
|
||||
if ct == "text/html" and html_body is None:
|
||||
html_body = decoded
|
||||
elif ct == "text/plain" and text_body is None:
|
||||
text_body = decoded
|
||||
|
||||
# Replace cid: src refs, tracking order of appearance
|
||||
inline_images: list[tuple[str, bytes]] = []
|
||||
|
||||
def _replace_cid(html: str) -> str:
|
||||
def _sub(m):
|
||||
cid = m.group(2)
|
||||
if cid in cid_raw:
|
||||
inline_images.append(cid_raw[cid])
|
||||
return m.group(1) + cid_uri[cid] + m.group(3)
|
||||
return m.group(0)
|
||||
return _re.sub(r'(src=["\'])cid:([^\s"\'>\)]+)(["\'])', _sub, html)
|
||||
|
||||
subject = msg.get("Subject", "")
|
||||
sender = msg.get("From", "")
|
||||
date = msg.get("Date", "")
|
||||
header = f"<h2>{subject}</h2><p><b>From:</b> {sender}<br><b>Date:</b> {date}</p><hr>"
|
||||
|
||||
loose_html = ""
|
||||
if loose:
|
||||
parts = []
|
||||
for mime, data, alt in loose:
|
||||
b64 = _b64.standard_b64encode(data).decode()
|
||||
uri = f"data:{mime};base64,{b64}"
|
||||
parts.append(f'<figure><img src="{uri}" alt="{alt}" style="max-width:100%"><figcaption>{alt}</figcaption></figure>')
|
||||
loose_html = "\n<hr><h3>Attachments</h3>\n" + "\n".join(parts)
|
||||
|
||||
if html_body:
|
||||
html_body = _replace_cid(html_body)
|
||||
if "<body" in html_body.lower():
|
||||
result = re.sub(r"(<body[^>]*>)", r"\1" + header, html_body, count=1, flags=re.IGNORECASE)
|
||||
html_str = result.rstrip().rstrip("</body></html>").rstrip() + loose_html + "\n</body></html>"
|
||||
else:
|
||||
html_str = f"<html><body>{header}{html_body}{loose_html}</body></html>"
|
||||
elif text_body:
|
||||
escaped = text_body.replace("&", "&").replace("<", "<").replace(">", ">")
|
||||
html_str = f"<html><body>{header}<pre>{escaped}</pre>{loose_html}</body></html>"
|
||||
else:
|
||||
html_str = f"<html><body>{header}<p><em>No body content found.</em></p>{loose_html}</body></html>"
|
||||
|
||||
ordered_images = inline_images + [(m, d) for m, d, _ in loose]
|
||||
return html_str, ordered_images
|
||||
|
||||
|
||||
def _ocr_image_llm(mime: str, data: bytes) -> str:
|
||||
"""OCR a single image via the shared vision-service (OCRFlux) → Markdown/HTML.
|
||||
'' on failure. Upscaling + prompt now live in vision-service."""
|
||||
import httpx
|
||||
try:
|
||||
resp = httpx.post(
|
||||
f"{VISION_URL}/ocr",
|
||||
files={"file": ("image", data, mime or "application/octet-stream")},
|
||||
timeout=300,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json().get("text", "")
|
||||
except Exception as e:
|
||||
logger.warning("Docling: vision OCR failed (%s)", e)
|
||||
return ""
|
||||
|
||||
|
||||
def _replace_image_placeholders(markdown: str, images: list[tuple[str, bytes]]) -> str:
|
||||
"""Replace Docling <!-- image --> placeholders with OCRFlux output.
|
||||
Each image is OCR'd; falls back to *[Image]* when nothing usable is extracted."""
|
||||
if not images or "<!-- image -->" not in markdown:
|
||||
return markdown
|
||||
img_iter = iter(images)
|
||||
def _sub(_m):
|
||||
try:
|
||||
mime, data = next(img_iter)
|
||||
except StopIteration:
|
||||
return "*[Image]*"
|
||||
return _ocr_image_llm(mime, data) or "*[Image]*"
|
||||
return re.sub(r"<!-- image -->", _sub, markdown)
|
||||
|
||||
|
||||
def _detect_ext(path: str, declared_ext: str) -> str:
|
||||
"""Return best-guess extension for file. Falls back to magic bytes."""
|
||||
if declared_ext and f".{declared_ext}" in _DOCLING_EXT:
|
||||
return f".{declared_ext}"
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
hdr = f.read(16)
|
||||
magic = {
|
||||
b"\x89PNG": ".png", b"\xff\xd8\xff": ".jpg",
|
||||
b"GIF8": ".gif", b"BM": ".bmp",
|
||||
b"II*\x00": ".tiff", b"MM\x00*": ".tiff",
|
||||
b"%PDF": ".pdf",
|
||||
b"PK\x03\x04": ".docx", # zip-based (docx/xlsx/pptx — good enough)
|
||||
}
|
||||
for sig, ext in magic.items():
|
||||
if hdr.startswith(sig):
|
||||
return ext
|
||||
except Exception:
|
||||
pass
|
||||
return f".{declared_ext}" if declared_ext else ""
|
||||
|
||||
|
||||
async def convert_path(
|
||||
path: str,
|
||||
db: AsyncSession,
|
||||
output_format: str = "markdown",
|
||||
use_llm: bool = True,
|
||||
llm_prompt: str | None = None,
|
||||
) -> Conversion:
|
||||
import os as _os, shutil as _shutil
|
||||
filename = _os.path.basename(path)
|
||||
file_type = _os.path.splitext(filename)[1].lstrip(".").lower()
|
||||
|
||||
if output_format not in OUTPUT_FORMATS:
|
||||
raise HTTPException(status_code=422, detail=f"Output format not supported: {output_format}")
|
||||
|
||||
actual_path = path
|
||||
tmp_html = None
|
||||
tmp_copy = None
|
||||
email_images: list[tuple[str, bytes]] = []
|
||||
|
||||
# ── Email / Maildir ──────────────────────────────────────────
|
||||
if _is_email_file(path, file_type):
|
||||
file_type = "eml"
|
||||
try:
|
||||
html_content, email_images = _eml_to_html(path)
|
||||
tmp_html = tempfile.NamedTemporaryFile(delete=False, suffix=".html", mode="w", encoding="utf-8")
|
||||
tmp_html.write(html_content)
|
||||
tmp_html.close()
|
||||
actual_path = tmp_html.name
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Email parse error: {e}")
|
||||
|
||||
# ── No / wrong extension → copy to temp file with detected ext ─
|
||||
elif not file_type or f".{file_type}" not in _DOCLING_EXT:
|
||||
detected = _detect_ext(path, file_type)
|
||||
if not detected or detected not in _DOCLING_EXT:
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail=f"Unsupported file format: '{file_type or 'no extension'}'"
|
||||
)
|
||||
tmp_copy = tempfile.NamedTemporaryFile(delete=False, suffix=detected)
|
||||
tmp_copy.close()
|
||||
_shutil.copy2(path, tmp_copy.name)
|
||||
actual_path = tmp_copy.name
|
||||
file_type = detected.lstrip(".")
|
||||
|
||||
# ── Standalone image file → OCRFlux directly (skip docling/Tesseract) ─
|
||||
if (f".{file_type}" in _IMAGE_EXT and use_llm and _llm_client
|
||||
and output_format in ("markdown", "text")):
|
||||
try:
|
||||
with open(actual_path, "rb") as _f:
|
||||
_img = _f.read()
|
||||
_mime = "image/png" if file_type == "png" else "image/jpeg"
|
||||
_ocr = _ocr_image_llm(_mime, _img)
|
||||
if _ocr:
|
||||
record = Conversion(
|
||||
filename=filename, file_type=file_type,
|
||||
output_format=output_format, content=_ocr,
|
||||
page_count=1, llm_enabled=True,
|
||||
)
|
||||
db.add(record); await db.commit(); await db.refresh(record)
|
||||
for _t in (tmp_html, tmp_copy):
|
||||
if _t:
|
||||
try: os.unlink(_t.name)
|
||||
except Exception: pass
|
||||
return record
|
||||
except Exception as e:
|
||||
logger.warning("Docling: OCRFlux image path failed (%s) — falling back", e)
|
||||
|
||||
try:
|
||||
result = converter.convert(actual_path)
|
||||
doc = result.document
|
||||
page_count = len(doc.pages) if hasattr(doc, "pages") and doc.pages else None
|
||||
|
||||
if output_format == "markdown":
|
||||
content = doc.export_to_markdown()
|
||||
elif output_format == "json":
|
||||
content = json.dumps(doc.export_to_dict(), ensure_ascii=False, indent=2)
|
||||
elif output_format == "html":
|
||||
content = doc.export_to_html()
|
||||
else:
|
||||
content = doc.export_to_markdown()
|
||||
content = re.sub(r"#{1,6}\s?", "", content)
|
||||
content = re.sub(r"\*\*(.+?)\*\*", r"\1", content)
|
||||
content = re.sub(r"\*(.+?)\*", r"\1", content)
|
||||
|
||||
llm_used = False
|
||||
if output_format in ("markdown", "text"):
|
||||
images_replaced = False
|
||||
# Step 1: replace <!-- image --> placeholders via Vision LLM OCR
|
||||
if email_images and use_llm and "<!-- image -->" in content:
|
||||
content = _replace_image_placeholders(content, email_images)
|
||||
images_replaced = True
|
||||
llm_used = bool(_llm_client)
|
||||
# Step 2: fallback — any remaining <!-- image --> → *[Image]*
|
||||
if "<!-- image -->" in content:
|
||||
content = content.replace("<!-- image -->", "*[Image]*")
|
||||
# Step 3: text LLM enrichment — SKIP when we just OCR'd images, because the
|
||||
# free-form cleanup model tends to summarise away the extracted tables.
|
||||
if _llm_client and use_llm and not images_replaced:
|
||||
content = _llm_enrich(content, system_prompt=llm_prompt or None)
|
||||
llm_used = True
|
||||
|
||||
record = Conversion(
|
||||
filename=filename,
|
||||
file_type=file_type,
|
||||
output_format=output_format,
|
||||
content=content,
|
||||
page_count=page_count,
|
||||
llm_enabled=llm_used,
|
||||
)
|
||||
db.add(record)
|
||||
await db.commit()
|
||||
await db.refresh(record)
|
||||
return record
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
for tmp in (tmp_html, tmp_copy):
|
||||
if tmp:
|
||||
try: os.unlink(tmp.name)
|
||||
except Exception: pass
|
||||
|
||||
|
||||
async def get_conversion(conversion_id: int, db: AsyncSession) -> Conversion:
|
||||
result = await db.execute(select(Conversion).where(Conversion.id == conversion_id))
|
||||
record = result.scalar_one_or_none()
|
||||
if not record:
|
||||
raise HTTPException(status_code=404, detail="Conversion not found")
|
||||
return record
|
||||
|
||||
|
||||
async def get_history(db: AsyncSession, limit: int = 20) -> list[Conversion]:
|
||||
result = await db.execute(
|
||||
select(Conversion).order_by(Conversion.created_at.desc()).limit(limit)
|
||||
)
|
||||
return result.scalars().all()
|
||||
|
||||
|
||||
async def delete_conversion(conversion_id: int, db: AsyncSession) -> dict:
|
||||
result = await db.execute(select(Conversion).where(Conversion.id == conversion_id))
|
||||
record = result.scalar_one_or_none()
|
||||
if not record:
|
||||
raise HTTPException(status_code=404, detail="Conversion not found")
|
||||
await db.delete(record)
|
||||
await db.commit()
|
||||
return {"message": f"Conversion {conversion_id} deleted"}
|
||||
|
|
@ -5,5 +5,6 @@ python-multipart
|
|||
asyncpg
|
||||
sqlalchemy[asyncio]
|
||||
openai
|
||||
httpx
|
||||
yt-dlp
|
||||
ffmpeg
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
"""Central config — single source of truth cho LLM/service settings.
|
||||
Đổi base_url / model chỉ cần sửa file này."""
|
||||
import os
|
||||
|
||||
# llama-server (Qwen2.5-VL) qua SSH tunnel; Docker reach host qua host.docker.internal
|
||||
LLM_BASE_URL = "http://host.docker.internal:11436/v1"
|
||||
LLM_MODEL = "ggml-org/Qwen2.5-VL-3B-Instruct-GGUF"
|
||||
CLEANUP_MODEL = os.getenv("CLEANUP_MODEL", "")
|
||||
|
||||
# Vision OCR gom về vision-service (container api-vision). Ảnh → POST {VISION_URL}/ocr
|
||||
VISION_URL = os.getenv("VISION_URL", "http://api-vision:8000")
|
||||
|
|
@ -19,14 +19,12 @@ class CleanupResponse(BaseModel):
|
|||
text: str
|
||||
|
||||
class SettingsRequest(BaseModel):
|
||||
ollama_base_url: str | None = None
|
||||
ollama_model: str = "llava"
|
||||
cleanup_model: str | None = None
|
||||
|
||||
class SettingsResponse(BaseModel):
|
||||
llm_enabled: bool
|
||||
ollama_base_url: str | None
|
||||
ollama_model: str
|
||||
llm_base_url: str | None
|
||||
llm_model: str
|
||||
cleanup_model: str | None = None
|
||||
default_prompt: str | None = None
|
||||
|
||||
|
|
@ -38,7 +36,7 @@ def health():
|
|||
return HealthResponse(
|
||||
status="ok",
|
||||
llm_enabled=markitdown_service.LLM_ACTIVE,
|
||||
llm_model=markitdown_service.OLLAMA_MODEL if markitdown_service.LLM_ACTIVE else None,
|
||||
llm_model=markitdown_service.LLM_MODEL if markitdown_service.LLM_ACTIVE else None,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -66,11 +64,11 @@ async def convert_url(
|
|||
|
||||
@router.get("/models")
|
||||
def list_models():
|
||||
if not markitdown_service.OLLAMA_BASE_URL:
|
||||
if not markitdown_service.LLM_BASE_URL:
|
||||
return {"models": []}
|
||||
try:
|
||||
import httpx, re
|
||||
base = re.sub(r"/v1/?$", "", markitdown_service.OLLAMA_BASE_URL.rstrip("/"))
|
||||
base = re.sub(r"/v1/?$", "", markitdown_service.LLM_BASE_URL.rstrip("/"))
|
||||
resp = httpx.get(f"{base}/api/tags", timeout=5)
|
||||
resp.raise_for_status()
|
||||
names = [m["name"] for m in resp.json().get("models", [])]
|
||||
|
|
@ -82,20 +80,20 @@ def list_models():
|
|||
def get_settings():
|
||||
return SettingsResponse(
|
||||
llm_enabled=markitdown_service.LLM_ACTIVE,
|
||||
ollama_base_url=markitdown_service.OLLAMA_BASE_URL,
|
||||
ollama_model=markitdown_service.OLLAMA_MODEL,
|
||||
llm_base_url=markitdown_service.LLM_BASE_URL,
|
||||
llm_model=markitdown_service.LLM_MODEL,
|
||||
cleanup_model=markitdown_service.CLEANUP_MODEL or None,
|
||||
default_prompt=markitdown_service.DEFAULT_CLEANUP_PROMPT,
|
||||
)
|
||||
|
||||
@router.post("/settings", response_model=SettingsResponse)
|
||||
def update_settings(req: SettingsRequest):
|
||||
markitdown_service._init_llm(req.ollama_base_url or None, req.ollama_model)
|
||||
markitdown_service._init_llm()
|
||||
markitdown_service.CLEANUP_MODEL = req.cleanup_model or ""
|
||||
return SettingsResponse(
|
||||
llm_enabled=markitdown_service.LLM_ACTIVE,
|
||||
ollama_base_url=markitdown_service.OLLAMA_BASE_URL,
|
||||
ollama_model=markitdown_service.OLLAMA_MODEL,
|
||||
llm_base_url=markitdown_service.LLM_BASE_URL,
|
||||
llm_model=markitdown_service.LLM_MODEL,
|
||||
cleanup_model=markitdown_service.CLEANUP_MODEL or None,
|
||||
default_prompt=markitdown_service.DEFAULT_CLEANUP_PROMPT,
|
||||
)
|
||||
|
|
@ -108,6 +106,66 @@ async def cleanup(req: CleanupRequest):
|
|||
return CleanupResponse(text=cleaned)
|
||||
|
||||
|
||||
class PathRequest(BaseModel):
|
||||
path: str
|
||||
use_llm: bool = True
|
||||
llm_prompt: str | None = None
|
||||
|
||||
|
||||
@router.get("/browse")
|
||||
def browse(path: str = Query("/workspace")):
|
||||
import os
|
||||
abs_path = os.path.realpath(path)
|
||||
if not abs_path.startswith("/workspace"):
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
if not os.path.exists(abs_path):
|
||||
raise HTTPException(status_code=404, detail="Path not found")
|
||||
if os.path.isfile(abs_path):
|
||||
return {"type": "file", "path": abs_path, "name": os.path.basename(abs_path)}
|
||||
entries = []
|
||||
try:
|
||||
for name in sorted(os.listdir(abs_path)):
|
||||
full = os.path.join(abs_path, name)
|
||||
entries.append({
|
||||
"name": name,
|
||||
"path": full,
|
||||
"type": "dir" if os.path.isdir(full) else "file",
|
||||
"ext": os.path.splitext(name)[1].lower() if os.path.isfile(full) else None,
|
||||
})
|
||||
except PermissionError:
|
||||
raise HTTPException(status_code=403, detail="Permission denied")
|
||||
return {"type": "dir", "path": abs_path, "entries": entries}
|
||||
|
||||
|
||||
@router.post("/convert-path")
|
||||
async def convert_path(req: PathRequest, db: AsyncSession = Depends(get_db)):
|
||||
import os
|
||||
abs_path = os.path.realpath(req.path)
|
||||
if not abs_path.startswith("/workspace"):
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
if not os.path.exists(abs_path) or not os.path.isfile(abs_path):
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
record = await markitdown_service.convert_path(abs_path, db, use_llm=req.use_llm, llm_prompt=req.llm_prompt)
|
||||
return record
|
||||
|
||||
|
||||
class WriteFileRequest(BaseModel):
|
||||
path: str
|
||||
content: str
|
||||
|
||||
|
||||
@router.post("/write-file")
|
||||
def write_file(req: WriteFileRequest):
|
||||
import os
|
||||
abs_path = os.path.realpath(req.path)
|
||||
if not abs_path.startswith("/workspace"):
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
os.makedirs(os.path.dirname(abs_path), exist_ok=True)
|
||||
with open(abs_path, "w", encoding="utf-8") as f:
|
||||
f.write(req.content)
|
||||
return {"path": abs_path, "bytes": len(req.content.encode())}
|
||||
|
||||
|
||||
@router.get("/history", response_model=list[ConversionRecord])
|
||||
async def history(limit: int = 20, db: AsyncSession = Depends(get_db)):
|
||||
records = await markitdown_service.get_history(db, limit)
|
||||
|
|
@ -0,0 +1,370 @@
|
|||
import os
|
||||
import tempfile
|
||||
import logging
|
||||
from fastapi import UploadFile, HTTPException
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select
|
||||
from markitdown import MarkItDown
|
||||
from app.models.ConvertModel import Conversion
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
import openai as _openai
|
||||
|
||||
from app.config import LLM_BASE_URL, LLM_MODEL, CLEANUP_MODEL, VISION_URL
|
||||
|
||||
LLM_ACTIVE = False
|
||||
_llm_client = None
|
||||
md_plain = MarkItDown()
|
||||
md = md_plain
|
||||
|
||||
|
||||
def _init_llm() -> bool:
|
||||
global LLM_ACTIVE, _llm_client, md
|
||||
try:
|
||||
client = _openai.OpenAI(base_url=LLM_BASE_URL, api_key="none")
|
||||
_llm_client = client
|
||||
md = MarkItDown(llm_client=client, llm_model=LLM_MODEL)
|
||||
LLM_ACTIVE = True
|
||||
logger.info("MarkItDown: LLM enabled via %s (model=%s)", LLM_BASE_URL, LLM_MODEL)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning("MarkItDown: LLM init failed (%s)", e)
|
||||
LLM_ACTIVE = False
|
||||
return False
|
||||
|
||||
|
||||
_init_llm()
|
||||
|
||||
DEFAULT_CLEANUP_PROMPT = """You are a technical document formatter. \
|
||||
The text below was extracted from a multi-column PDF using OCR and is poorly structured: \
|
||||
columns are merged, headers are mixed with values, and content is out of order.
|
||||
|
||||
Your task:
|
||||
1. Identify the logical sections (e.g. PERFORMANCE, MEMORY, STORAGE, CONNECTIVITY, etc.)
|
||||
2. Under each section, format specs as a clean two-column Markdown table: | Spec | Value |
|
||||
3. Keep bullet lists where appropriate (e.g. ports, certifications)
|
||||
4. Remove duplicate lines and OCR artifacts (e.g. stray "---", lone "|", empty rows)
|
||||
5. Preserve all technical values exactly — do not paraphrase specs
|
||||
|
||||
Return ONLY the cleaned Markdown. No code fences, no commentary, no preamble."""
|
||||
|
||||
import re as _re
|
||||
|
||||
def llm_cleanup(text: str, prompt: str | None = None, model: str | None = None) -> str:
|
||||
if not _llm_client or not text.strip():
|
||||
return text
|
||||
try:
|
||||
resp = _llm_client.chat.completions.create(
|
||||
model=model or LLM_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": prompt or DEFAULT_CLEANUP_PROMPT},
|
||||
{"role": "user", "content": text},
|
||||
],
|
||||
temperature=0,
|
||||
)
|
||||
result = resp.choices[0].message.content or text
|
||||
result = _re.sub(r"^```(?:markdown)?\s*\n?", "", result.strip())
|
||||
result = _re.sub(r"\n?```\s*$", "", result.strip())
|
||||
return result.strip() or text
|
||||
except Exception as e:
|
||||
logger.warning("MarkItDown: cleanup failed (%s)", e)
|
||||
return text
|
||||
|
||||
|
||||
# ── Vision-LLM image OCR — gom về vision-service (container api-vision) ───────
|
||||
def _ocr_image_llm(mime: str, data: bytes) -> str:
|
||||
"""OCR a single image via the shared vision-service (OCRFlux) → Markdown/HTML.
|
||||
'' on failure. Upscaling + prompt now live in vision-service."""
|
||||
import httpx
|
||||
try:
|
||||
resp = httpx.post(
|
||||
f"{VISION_URL}/ocr",
|
||||
files={"file": ("image", data, mime or "application/octet-stream")},
|
||||
timeout=300,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json().get("text", "")
|
||||
except Exception as e:
|
||||
logger.warning("MarkItDown: vision OCR failed (%s)", e)
|
||||
return ""
|
||||
|
||||
|
||||
def _ocr_cid_images(markdown: str, cid_map: dict[str, tuple[str, bytes]]) -> str:
|
||||
"""Replace MarkItDown  image links with OCRFlux OCR of each image.
|
||||
Falls back to *[Image]* when nothing usable is extracted."""
|
||||
if not cid_map or "cid:" not in markdown:
|
||||
return markdown
|
||||
def _sub(m):
|
||||
entry = cid_map.get(m.group(1).strip())
|
||||
if not entry:
|
||||
return "*[Image]*"
|
||||
return _ocr_image_llm(*entry) or "*[Image]*"
|
||||
return _re.sub(r"!\[[^\]]*\]\(cid:([^)]+)\)", _sub, markdown)
|
||||
|
||||
|
||||
|
||||
ALLOWED_EXTENSIONS = {
|
||||
"pdf", "docx", "xlsx", "pptx",
|
||||
"html", "csv", "txt", "jpg", "jpeg", "png", "zip", "epub"
|
||||
}
|
||||
|
||||
YOUTUBE_PATTERN = _re.compile(
|
||||
r"(https?://)?(www\.)?(youtube\.com/watch|youtu\.be/|youtube\.com/shorts/)"
|
||||
)
|
||||
|
||||
|
||||
async def convert_url(
|
||||
url: str,
|
||||
db: AsyncSession,
|
||||
use_llm: bool = True,
|
||||
llm_prompt: str | None = None,
|
||||
) -> Conversion:
|
||||
use_llm_now = LLM_ACTIVE and use_llm
|
||||
if use_llm_now and llm_prompt:
|
||||
try:
|
||||
converter = MarkItDown(
|
||||
llm_client=_llm_client,
|
||||
llm_model=LLM_MODEL,
|
||||
llm_prompt=llm_prompt,
|
||||
)
|
||||
except TypeError:
|
||||
converter = md
|
||||
elif use_llm_now:
|
||||
converter = md
|
||||
else:
|
||||
converter = md_plain
|
||||
|
||||
try:
|
||||
result = converter.convert(url)
|
||||
actual_llm = use_llm_now
|
||||
except Exception as llm_err:
|
||||
if use_llm_now and ("500" in str(llm_err) or "InternalServerError" in type(llm_err).__name__):
|
||||
logger.warning("MarkItDown: LLM failed (%s), retrying without LLM", llm_err)
|
||||
result = md_plain.convert(url)
|
||||
actual_llm = False
|
||||
else:
|
||||
raise HTTPException(status_code=500, detail=str(llm_err))
|
||||
|
||||
# Use last segment of URL as filename
|
||||
slug = url.rstrip("/").split("/")[-1].split("?")[0] or "youtube"
|
||||
filename = f"{slug}.md"
|
||||
|
||||
try:
|
||||
record = Conversion(
|
||||
filename=filename,
|
||||
file_type="youtube",
|
||||
markdown=result.text_content,
|
||||
llm_enabled=actual_llm,
|
||||
)
|
||||
db.add(record)
|
||||
await db.commit()
|
||||
await db.refresh(record)
|
||||
return record
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
def _allowed_file(filename: str) -> bool:
|
||||
return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
|
||||
|
||||
|
||||
async def convert_file(
|
||||
file: UploadFile,
|
||||
db: AsyncSession,
|
||||
use_llm: bool = True,
|
||||
llm_prompt: str | None = None,
|
||||
) -> Conversion:
|
||||
if not _allowed_file(file.filename):
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail=f"File type not allowed. Allowed: {', '.join(sorted(ALLOWED_EXTENSIONS))}"
|
||||
)
|
||||
|
||||
suffix = os.path.splitext(file.filename)[1]
|
||||
file_type = suffix.lstrip(".").lower()
|
||||
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
||||
tmp.write(await file.read())
|
||||
tmp_path = tmp.name
|
||||
|
||||
# Choose converter: LLM only if enabled globally AND requested per-call
|
||||
use_llm_now = LLM_ACTIVE and use_llm
|
||||
|
||||
# If custom prompt provided, create a one-off MarkItDown with that prompt
|
||||
if use_llm_now and llm_prompt:
|
||||
try:
|
||||
converter = MarkItDown(
|
||||
llm_client=_llm_client,
|
||||
llm_model=LLM_MODEL,
|
||||
llm_prompt=llm_prompt,
|
||||
)
|
||||
except TypeError:
|
||||
# older markitdown versions may not support llm_prompt kwarg
|
||||
converter = md
|
||||
elif use_llm_now:
|
||||
converter = md
|
||||
else:
|
||||
converter = md_plain
|
||||
|
||||
try:
|
||||
try:
|
||||
result = converter.convert(tmp_path)
|
||||
actual_llm = use_llm_now
|
||||
except Exception as llm_err:
|
||||
# LLM OOM / 500 — fallback to plain conversion without LLM
|
||||
if use_llm_now and ("500" in str(llm_err) or "InternalServerError" in type(llm_err).__name__):
|
||||
logger.warning("MarkItDown: LLM failed (%s), retrying without LLM", llm_err)
|
||||
result = md_plain.convert(tmp_path)
|
||||
actual_llm = False
|
||||
else:
|
||||
raise
|
||||
record = Conversion(
|
||||
filename=file.filename,
|
||||
file_type=file_type,
|
||||
markdown=result.text_content,
|
||||
llm_enabled=actual_llm,
|
||||
)
|
||||
db.add(record)
|
||||
await db.commit()
|
||||
await db.refresh(record)
|
||||
return record
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
os.unlink(tmp_path)
|
||||
|
||||
|
||||
def _is_email_file(path: str, file_type: str) -> bool:
|
||||
if file_type in ("eml", "msg"):
|
||||
return True
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
head = f.read(2048).decode("utf-8", errors="ignore")
|
||||
email_headers = (
|
||||
"from:", "to:", "subject:", "date:", "mime-version:", "message-id:",
|
||||
"received:", "return-path:", "delivered-to:", "envelope-to:",
|
||||
"x-mailer:", "content-type:", "x-spam-", "reply-to:",
|
||||
)
|
||||
lines = [l.lower() for l in head.splitlines()[:30]]
|
||||
return sum(1 for l in lines if any(l.startswith(h) for h in email_headers)) >= 2
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
async def convert_path(path: str, db: AsyncSession, use_llm: bool = True, llm_prompt: str | None = None) -> Conversion:
|
||||
import os, shutil
|
||||
filename = os.path.basename(path)
|
||||
file_type = os.path.splitext(filename)[1].lstrip(".").lower()
|
||||
|
||||
# Standalone image → OCRFlux directly (bypass MarkItDown's caption prompt,
|
||||
# which OCRFlux answers with raw JSON wrapped in "# Description:")
|
||||
if file_type in {"png", "jpg", "jpeg", "gif", "bmp", "webp", "tiff", "tif"} \
|
||||
and LLM_ACTIVE and use_llm and _llm_client:
|
||||
try:
|
||||
with open(path, "rb") as _f:
|
||||
_img = _f.read()
|
||||
_mime = "image/png" if file_type == "png" else "image/jpeg"
|
||||
_ocr = _ocr_image_llm(_mime, _img)
|
||||
if _ocr:
|
||||
record = Conversion(filename=filename, file_type=file_type,
|
||||
markdown=_ocr, llm_enabled=True)
|
||||
db.add(record); await db.commit(); await db.refresh(record)
|
||||
return record
|
||||
except Exception as e:
|
||||
logger.warning("MarkItDown: OCRFlux image path failed (%s) — falling back", e)
|
||||
|
||||
# Email files (with or without .eml extension) — extract HTML body → temp .html for MarkItDown
|
||||
tmp_eml = None
|
||||
actual_path = path
|
||||
cid_map: dict[str, tuple[str, bytes]] = {} # cid → (mime, raw_bytes) for vision OCR
|
||||
if _is_email_file(path, file_type):
|
||||
import tempfile, email as _email, email.policy as _policy, re as _re
|
||||
file_type = "eml"
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
msg = _email.message_from_binary_file(f, policy=_policy.compat32)
|
||||
html_body = text_body = None
|
||||
for part in msg.walk():
|
||||
ct = part.get_content_type()
|
||||
payload = part.get_payload(decode=True)
|
||||
if not payload:
|
||||
continue
|
||||
if ct.startswith("image/"):
|
||||
cid = part.get("Content-ID", "").strip("<>")
|
||||
if cid:
|
||||
cid_map[cid] = (ct, payload)
|
||||
continue
|
||||
charset = part.get_content_charset() or "utf-8"
|
||||
decoded = payload.decode(charset, errors="replace")
|
||||
if ct == "text/html" and html_body is None:
|
||||
html_body = decoded
|
||||
elif ct == "text/plain" and text_body is None:
|
||||
text_body = decoded
|
||||
subject = msg.get("Subject", "")
|
||||
sender = msg.get("From", "")
|
||||
date = msg.get("Date", "")
|
||||
header = f"<h2>{subject}</h2><p><b>From:</b> {sender}<br><b>Date:</b> {date}</p><hr>"
|
||||
body = html_body or (f"<pre>{text_body}</pre>" if text_body else "<em>No body</em>")
|
||||
if html_body and "<body" in html_body.lower():
|
||||
html_out = _re.sub(r"(<body[^>]*>)", r"\1" + header, html_body, count=1, flags=_re.IGNORECASE)
|
||||
else:
|
||||
html_out = f"<html><body>{header}{body}</body></html>"
|
||||
tmp_eml = tempfile.NamedTemporaryFile(delete=False, suffix=".html", mode="w", encoding="utf-8")
|
||||
tmp_eml.write(html_out)
|
||||
tmp_eml.close()
|
||||
actual_path = tmp_eml.name
|
||||
except Exception as e:
|
||||
logger.warning("Email parse failed, falling back to raw: %s", e)
|
||||
|
||||
use_llm_now = LLM_ACTIVE and use_llm
|
||||
if use_llm_now and llm_prompt:
|
||||
try:
|
||||
converter = MarkItDown(llm_client=_llm_client, llm_model=LLM_MODEL, llm_prompt=llm_prompt)
|
||||
except TypeError:
|
||||
converter = md
|
||||
elif use_llm_now:
|
||||
converter = md
|
||||
else:
|
||||
converter = md_plain
|
||||
|
||||
try:
|
||||
try:
|
||||
result = converter.convert(actual_path)
|
||||
actual_llm = use_llm_now
|
||||
except Exception as llm_err:
|
||||
if use_llm_now and ("500" in str(llm_err) or "InternalServerError" in type(llm_err).__name__):
|
||||
logger.warning("MarkItDown: LLM failed (%s), retrying without LLM", llm_err)
|
||||
result = md_plain.convert(path)
|
||||
actual_llm = False
|
||||
else:
|
||||
raise HTTPException(status_code=500, detail=str(llm_err))
|
||||
content = result.text_content
|
||||
# Email inline images: replace  links with Vision-LLM OCR text
|
||||
if cid_map and use_llm_now and "cid:" in content:
|
||||
content = _ocr_cid_images(content, cid_map)
|
||||
actual_llm = True
|
||||
record = Conversion(filename=filename, file_type=file_type, markdown=content, llm_enabled=actual_llm)
|
||||
db.add(record)
|
||||
await db.commit()
|
||||
await db.refresh(record)
|
||||
return record
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
await db.rollback()
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
if tmp_eml:
|
||||
import os as _os
|
||||
try: _os.unlink(tmp_eml.name)
|
||||
except Exception: pass
|
||||
|
||||
|
||||
async def get_history(db: AsyncSession, limit: int = 20) -> list[Conversion]:
|
||||
result = await db.execute(
|
||||
select(Conversion).order_by(Conversion.created_at.desc()).limit(limit)
|
||||
)
|
||||
return result.scalars().all()
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue