123 lines
4.3 KiB
Python
123 lines
4.3 KiB
Python
from fastapi import APIRouter, UploadFile, File, Depends, Query
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from app.models.ConvertModel import ConvertResponse, HealthResponse, ConversionRecord
|
|
from app.services import DoclingService as docling_service
|
|
from app.database import get_db
|
|
from pydantic import BaseModel
|
|
|
|
router = APIRouter()
|
|
|
|
class SettingsRequest(BaseModel):
|
|
ollama_base_url: str | None = None
|
|
ollama_model: str = "llava"
|
|
cleanup_model: str | None = None
|
|
|
|
class SettingsResponse(BaseModel):
|
|
llm_enabled: bool
|
|
ollama_base_url: str | None
|
|
ollama_model: str
|
|
cleanup_model: str | None = None
|
|
default_prompt: str | None = None
|
|
|
|
SUPPORTED_INPUT_FORMATS = sorted([
|
|
"pdf", "docx", "xlsx", "pptx",
|
|
"html", "htm", "jpg", "jpeg", "png",
|
|
"tiff", "tif", "bmp", "md", "txt", "asciidoc", "adoc"
|
|
])
|
|
|
|
SUPPORTED_OUTPUT_FORMATS = ["markdown", "json", "html", "text"]
|
|
|
|
|
|
@router.get("/settings", response_model=SettingsResponse)
|
|
def get_settings():
|
|
return SettingsResponse(
|
|
llm_enabled=docling_service.LLM_ACTIVE,
|
|
ollama_base_url=docling_service.OLLAMA_BASE_URL,
|
|
ollama_model=docling_service.OLLAMA_MODEL,
|
|
cleanup_model=docling_service.CLEANUP_MODEL or None,
|
|
default_prompt=docling_service.DEFAULT_ENRICH_PROMPT,
|
|
)
|
|
|
|
@router.post("/settings", response_model=SettingsResponse)
|
|
def update_settings(req: SettingsRequest):
|
|
docling_service._init_llm(req.ollama_base_url or None, req.ollama_model)
|
|
docling_service.CLEANUP_MODEL = req.cleanup_model or ""
|
|
return SettingsResponse(
|
|
llm_enabled=docling_service.LLM_ACTIVE,
|
|
ollama_base_url=docling_service.OLLAMA_BASE_URL,
|
|
ollama_model=docling_service.OLLAMA_MODEL,
|
|
cleanup_model=docling_service.CLEANUP_MODEL or None,
|
|
default_prompt=docling_service.DEFAULT_ENRICH_PROMPT,
|
|
)
|
|
|
|
@router.get("/health", response_model=HealthResponse)
|
|
def health():
|
|
from app.services.DoclingService import LLM_ACTIVE, OLLAMA_MODEL
|
|
ocr = "tesseract" if _ocr_available() else "none"
|
|
return HealthResponse(
|
|
status="ok",
|
|
supported_formats=SUPPORTED_INPUT_FORMATS,
|
|
output_formats=SUPPORTED_OUTPUT_FORMATS,
|
|
llm_enabled=LLM_ACTIVE,
|
|
llm_model=OLLAMA_MODEL if LLM_ACTIVE else None,
|
|
ocr_engine=ocr,
|
|
)
|
|
|
|
|
|
def _ocr_available() -> bool:
|
|
import shutil
|
|
return shutil.which("tesseract") is not None
|
|
|
|
|
|
@router.post("/convert", response_model=ConvertResponse)
|
|
async def convert(
|
|
file: UploadFile = File(...),
|
|
output_format: str = Query(default="markdown", description="Output format: markdown | json | html | text"),
|
|
use_llm: bool = Query(default=True, description="Run LLM enrichment on extracted text"),
|
|
llm_prompt: str | None = Query(default=None, description="Custom system prompt for LLM enrichment"),
|
|
db: AsyncSession = Depends(get_db),
|
|
):
|
|
record = await docling_service.convert_file(file, db, output_format, use_llm=use_llm, llm_prompt=llm_prompt)
|
|
return ConvertResponse(
|
|
id=record.id,
|
|
filename=record.filename,
|
|
output_format=record.output_format,
|
|
content=record.content,
|
|
page_count=record.page_count,
|
|
llm_enabled=record.llm_enabled,
|
|
)
|
|
|
|
|
|
@router.get("/conversions/{conversion_id}", response_model=ConvertResponse)
|
|
async def get_conversion(conversion_id: int, db: AsyncSession = Depends(get_db)):
|
|
record = await docling_service.get_conversion(conversion_id, db)
|
|
return ConvertResponse(
|
|
id=record.id,
|
|
filename=record.filename,
|
|
output_format=record.output_format,
|
|
content=record.content,
|
|
page_count=record.page_count,
|
|
llm_enabled=record.llm_enabled,
|
|
)
|
|
|
|
|
|
@router.get("/history", response_model=list[ConversionRecord])
|
|
async def history(limit: int = 20, db: AsyncSession = Depends(get_db)):
|
|
records = await docling_service.get_history(db, limit)
|
|
return [
|
|
ConversionRecord(
|
|
id=r.id,
|
|
filename=r.filename,
|
|
file_type=r.file_type,
|
|
output_format=r.output_format,
|
|
page_count=r.page_count,
|
|
created_at=str(r.created_at),
|
|
)
|
|
for r in records
|
|
]
|
|
|
|
|
|
@router.delete("/conversions/{conversion_id}")
|
|
async def delete_conversion(conversion_id: int, db: AsyncSession = Depends(get_db)):
|
|
return await docling_service.delete_conversion(conversion_id, db)
|