from fastapi import APIRouter, UploadFile, File, Depends, Query from sqlalchemy.ext.asyncio import AsyncSession from app.models.ConvertModel import ConvertResponse, HealthResponse, ConversionRecord from app.services import DoclingService as docling_service from app.database import get_db from pydantic import BaseModel router = APIRouter() class UrlRequest(BaseModel): url: str output_format: str = "markdown" use_llm: bool = True llm_prompt: str | None = None class SettingsRequest(BaseModel): ollama_base_url: str | None = None ollama_model: str = "llava" cleanup_model: str | None = None class SettingsResponse(BaseModel): llm_enabled: bool ollama_base_url: str | None ollama_model: str cleanup_model: str | None = None default_prompt: str | None = None SUPPORTED_INPUT_FORMATS = sorted([ "pdf", "docx", "xlsx", "pptx", "html", "htm", "jpg", "jpeg", "png", "tiff", "tif", "bmp", "md", "txt", "asciidoc", "adoc" ]) SUPPORTED_OUTPUT_FORMATS = ["markdown", "json", "html", "text"] @router.get("/settings", response_model=SettingsResponse) def get_settings(): return SettingsResponse( llm_enabled=docling_service.LLM_ACTIVE, ollama_base_url=docling_service.OLLAMA_BASE_URL, ollama_model=docling_service.OLLAMA_MODEL, cleanup_model=docling_service.CLEANUP_MODEL or None, default_prompt=docling_service.DEFAULT_ENRICH_PROMPT, ) @router.post("/settings", response_model=SettingsResponse) def update_settings(req: SettingsRequest): docling_service._init_llm(req.ollama_base_url or None, req.ollama_model) docling_service.CLEANUP_MODEL = req.cleanup_model or "" return SettingsResponse( llm_enabled=docling_service.LLM_ACTIVE, ollama_base_url=docling_service.OLLAMA_BASE_URL, ollama_model=docling_service.OLLAMA_MODEL, cleanup_model=docling_service.CLEANUP_MODEL or None, default_prompt=docling_service.DEFAULT_ENRICH_PROMPT, ) @router.get("/health", response_model=HealthResponse) def health(): from app.services.DoclingService import LLM_ACTIVE, OLLAMA_MODEL ocr = "tesseract" if _ocr_available() else "none" return HealthResponse( status="ok", supported_formats=SUPPORTED_INPUT_FORMATS, output_formats=SUPPORTED_OUTPUT_FORMATS, llm_enabled=LLM_ACTIVE, llm_model=OLLAMA_MODEL if LLM_ACTIVE else None, ocr_engine=ocr, ) def _ocr_available() -> bool: import shutil return shutil.which("tesseract") is not None @router.post("/convert", response_model=ConvertResponse) async def convert( file: UploadFile = File(...), output_format: str = Query(default="markdown", description="Output format: markdown | json | html | text"), use_llm: bool = Query(default=True, description="Run LLM enrichment on extracted text"), llm_prompt: str | None = Query(default=None, description="Custom system prompt for LLM enrichment"), db: AsyncSession = Depends(get_db), ): record = await docling_service.convert_file(file, db, output_format, use_llm=use_llm, llm_prompt=llm_prompt) return ConvertResponse( id=record.id, filename=record.filename, output_format=record.output_format, content=record.content, page_count=record.page_count, llm_enabled=record.llm_enabled, ) @router.post("/convert-url", response_model=ConvertResponse) async def convert_url( req: UrlRequest, db: AsyncSession = Depends(get_db), ): record = await docling_service.convert_url( req.url, db, req.output_format, use_llm=req.use_llm, llm_prompt=req.llm_prompt ) return ConvertResponse( id=record.id, filename=record.filename, output_format=record.output_format, content=record.content, page_count=record.page_count, llm_enabled=record.llm_enabled, ) @router.get("/conversions/{conversion_id}", response_model=ConvertResponse) async def get_conversion(conversion_id: int, db: AsyncSession = Depends(get_db)): record = await docling_service.get_conversion(conversion_id, db) return ConvertResponse( id=record.id, filename=record.filename, output_format=record.output_format, content=record.content, page_count=record.page_count, llm_enabled=record.llm_enabled, ) @router.get("/history", response_model=list[ConversionRecord]) async def history(limit: int = 20, db: AsyncSession = Depends(get_db)): records = await docling_service.get_history(db, limit) return [ ConversionRecord( id=r.id, filename=r.filename, file_type=r.file_type, output_format=r.output_format, page_count=r.page_count, created_at=str(r.created_at), ) for r in records ] @router.delete("/conversions/{conversion_id}") async def delete_conversion(conversion_id: int, db: AsyncSession = Depends(get_db)): return await docling_service.delete_conversion(conversion_id, db)