AI-markdown/markitdown-service/app/controllers/ConvertController.py

108 lines
3.8 KiB
Python

from fastapi import APIRouter, UploadFile, File, Depends, Query, Body, HTTPException
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.ConvertModel import ConvertResponse, HealthResponse, ConversionRecord
from app.services import MarkitdownService as markitdown_service
from app.database import get_db
from pydantic import BaseModel
class CleanupRequest(BaseModel):
text: str
prompt: str | None = None
model: str | None = None
class CleanupResponse(BaseModel):
text: str
class SettingsRequest(BaseModel):
ollama_base_url: str | None = None
ollama_model: str = "llava"
cleanup_model: str | None = None
class SettingsResponse(BaseModel):
llm_enabled: bool
ollama_base_url: str | None
ollama_model: str
cleanup_model: str | None = None
default_prompt: str | None = None
router = APIRouter()
@router.get("/health", response_model=HealthResponse)
def health():
return HealthResponse(
status="ok",
llm_enabled=markitdown_service.LLM_ACTIVE,
llm_model=markitdown_service.OLLAMA_MODEL if markitdown_service.LLM_ACTIVE else None,
)
@router.post("/convert", response_model=ConvertResponse)
async def convert(
file: UploadFile = File(...),
use_llm: bool = Query(default=True, description="Use LLM vision for image understanding"),
llm_prompt: str | None = Query(default=None, description="Custom prompt for LLM vision"),
db: AsyncSession = Depends(get_db),
):
record = await markitdown_service.convert_file(file, db, use_llm=use_llm, llm_prompt=llm_prompt)
return record
@router.get("/models")
def list_models():
if not markitdown_service.OLLAMA_BASE_URL:
return {"models": []}
try:
import httpx, re
base = re.sub(r"/v1/?$", "", markitdown_service.OLLAMA_BASE_URL.rstrip("/"))
resp = httpx.get(f"{base}/api/tags", timeout=5)
resp.raise_for_status()
names = [m["name"] for m in resp.json().get("models", [])]
return {"models": sorted(names)}
except Exception as e:
return {"models": [], "error": str(e)}
@router.get("/settings", response_model=SettingsResponse)
def get_settings():
return SettingsResponse(
llm_enabled=markitdown_service.LLM_ACTIVE,
ollama_base_url=markitdown_service.OLLAMA_BASE_URL,
ollama_model=markitdown_service.OLLAMA_MODEL,
cleanup_model=markitdown_service.CLEANUP_MODEL or None,
default_prompt=markitdown_service.DEFAULT_CLEANUP_PROMPT,
)
@router.post("/settings", response_model=SettingsResponse)
def update_settings(req: SettingsRequest):
markitdown_service._init_llm(req.ollama_base_url or None, req.ollama_model)
markitdown_service.CLEANUP_MODEL = req.cleanup_model or ""
return SettingsResponse(
llm_enabled=markitdown_service.LLM_ACTIVE,
ollama_base_url=markitdown_service.OLLAMA_BASE_URL,
ollama_model=markitdown_service.OLLAMA_MODEL,
cleanup_model=markitdown_service.CLEANUP_MODEL or None,
default_prompt=markitdown_service.DEFAULT_CLEANUP_PROMPT,
)
@router.post("/cleanup", response_model=CleanupResponse)
async def cleanup(req: CleanupRequest):
if not markitdown_service.LLM_ACTIVE:
raise HTTPException(status_code=503, detail="LLM not configured")
cleaned = markitdown_service.llm_cleanup(req.text, req.prompt, req.model)
return CleanupResponse(text=cleaned)
@router.get("/history", response_model=list[ConversionRecord])
async def history(limit: int = 20, db: AsyncSession = Depends(get_db)):
records = await markitdown_service.get_history(db, limit)
return [
ConversionRecord(
id=r.id,
filename=r.filename,
file_type=r.file_type,
llm_enabled=r.llm_enabled,
created_at=str(r.created_at),
)
for r in records
]