AI-markdown/services/markitdown-service/app/controllers/ConvertController.py

182 lines
6.1 KiB
Python

from fastapi import APIRouter, UploadFile, File, Depends, Query, Body, HTTPException
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.ConvertModel import ConvertResponse, HealthResponse, ConversionRecord
from app.services import MarkitdownService as markitdown_service
from app.database import get_db
from pydantic import BaseModel
class UrlRequest(BaseModel):
url: str
use_llm: bool = True
llm_prompt: str | None = None
class CleanupRequest(BaseModel):
text: str
prompt: str | None = None
model: str | None = None
class CleanupResponse(BaseModel):
text: str
class SettingsRequest(BaseModel):
cleanup_model: str | None = None
class SettingsResponse(BaseModel):
llm_enabled: bool
llm_base_url: str | None
llm_model: str
cleanup_model: str | None = None
default_prompt: str | None = None
router = APIRouter()
@router.get("/health", response_model=HealthResponse)
def health():
return HealthResponse(
status="ok",
llm_enabled=markitdown_service.LLM_ACTIVE,
llm_model=markitdown_service.LLM_MODEL if markitdown_service.LLM_ACTIVE else None,
)
@router.post("/convert", response_model=ConvertResponse)
async def convert(
file: UploadFile = File(...),
use_llm: bool = Query(default=True, description="Use LLM vision for image understanding"),
llm_prompt: str | None = Query(default=None, description="Custom prompt for LLM vision"),
db: AsyncSession = Depends(get_db),
):
record = await markitdown_service.convert_file(file, db, use_llm=use_llm, llm_prompt=llm_prompt)
return record
@router.post("/convert-url", response_model=ConvertResponse)
async def convert_url(
req: UrlRequest,
db: AsyncSession = Depends(get_db),
):
record = await markitdown_service.convert_url(
req.url, db, use_llm=req.use_llm, llm_prompt=req.llm_prompt
)
return record
@router.get("/models")
def list_models():
if not markitdown_service.LLM_BASE_URL:
return {"models": []}
try:
import httpx, re
base = re.sub(r"/v1/?$", "", markitdown_service.LLM_BASE_URL.rstrip("/"))
resp = httpx.get(f"{base}/api/tags", timeout=5)
resp.raise_for_status()
names = [m["name"] for m in resp.json().get("models", [])]
return {"models": sorted(names)}
except Exception as e:
return {"models": [], "error": str(e)}
@router.get("/settings", response_model=SettingsResponse)
def get_settings():
return SettingsResponse(
llm_enabled=markitdown_service.LLM_ACTIVE,
llm_base_url=markitdown_service.LLM_BASE_URL,
llm_model=markitdown_service.LLM_MODEL,
cleanup_model=markitdown_service.CLEANUP_MODEL or None,
default_prompt=markitdown_service.DEFAULT_CLEANUP_PROMPT,
)
@router.post("/settings", response_model=SettingsResponse)
def update_settings(req: SettingsRequest):
markitdown_service._init_llm()
markitdown_service.CLEANUP_MODEL = req.cleanup_model or ""
return SettingsResponse(
llm_enabled=markitdown_service.LLM_ACTIVE,
llm_base_url=markitdown_service.LLM_BASE_URL,
llm_model=markitdown_service.LLM_MODEL,
cleanup_model=markitdown_service.CLEANUP_MODEL or None,
default_prompt=markitdown_service.DEFAULT_CLEANUP_PROMPT,
)
@router.post("/cleanup", response_model=CleanupResponse)
async def cleanup(req: CleanupRequest):
if not markitdown_service.LLM_ACTIVE:
raise HTTPException(status_code=503, detail="LLM not configured")
cleaned = markitdown_service.llm_cleanup(req.text, req.prompt, req.model)
return CleanupResponse(text=cleaned)
class PathRequest(BaseModel):
path: str
use_llm: bool = True
llm_prompt: str | None = None
@router.get("/browse")
def browse(path: str = Query("/workspace")):
import os
abs_path = os.path.realpath(path)
if not abs_path.startswith("/workspace"):
raise HTTPException(status_code=403, detail="Access denied")
if not os.path.exists(abs_path):
raise HTTPException(status_code=404, detail="Path not found")
if os.path.isfile(abs_path):
return {"type": "file", "path": abs_path, "name": os.path.basename(abs_path)}
entries = []
try:
for name in sorted(os.listdir(abs_path)):
full = os.path.join(abs_path, name)
entries.append({
"name": name,
"path": full,
"type": "dir" if os.path.isdir(full) else "file",
"ext": os.path.splitext(name)[1].lower() if os.path.isfile(full) else None,
})
except PermissionError:
raise HTTPException(status_code=403, detail="Permission denied")
return {"type": "dir", "path": abs_path, "entries": entries}
@router.post("/convert-path")
async def convert_path(req: PathRequest, db: AsyncSession = Depends(get_db)):
import os
abs_path = os.path.realpath(req.path)
if not abs_path.startswith("/workspace"):
raise HTTPException(status_code=403, detail="Access denied")
if not os.path.exists(abs_path) or not os.path.isfile(abs_path):
raise HTTPException(status_code=404, detail="File not found")
record = await markitdown_service.convert_path(abs_path, db, use_llm=req.use_llm, llm_prompt=req.llm_prompt)
return record
class WriteFileRequest(BaseModel):
path: str
content: str
@router.post("/write-file")
def write_file(req: WriteFileRequest):
import os
abs_path = os.path.realpath(req.path)
if not abs_path.startswith("/workspace"):
raise HTTPException(status_code=403, detail="Access denied")
os.makedirs(os.path.dirname(abs_path), exist_ok=True)
with open(abs_path, "w", encoding="utf-8") as f:
f.write(req.content)
return {"path": abs_path, "bytes": len(req.content.encode())}
@router.get("/history", response_model=list[ConversionRecord])
async def history(limit: int = 20, db: AsyncSession = Depends(get_db)):
records = await markitdown_service.get_history(db, limit)
return [
ConversionRecord(
id=r.id,
filename=r.filename,
file_type=r.file_type,
llm_enabled=r.llm_enabled,
created_at=str(r.created_at),
)
for r in records
]