182 lines
6.1 KiB
Python
182 lines
6.1 KiB
Python
from fastapi import APIRouter, UploadFile, File, Depends, Query, Body, HTTPException
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from app.models.ConvertModel import ConvertResponse, HealthResponse, ConversionRecord
|
|
from app.services import MarkitdownService as markitdown_service
|
|
from app.database import get_db
|
|
from pydantic import BaseModel
|
|
|
|
class UrlRequest(BaseModel):
|
|
url: str
|
|
use_llm: bool = True
|
|
llm_prompt: str | None = None
|
|
|
|
class CleanupRequest(BaseModel):
|
|
text: str
|
|
prompt: str | None = None
|
|
model: str | None = None
|
|
|
|
class CleanupResponse(BaseModel):
|
|
text: str
|
|
|
|
class SettingsRequest(BaseModel):
|
|
cleanup_model: str | None = None
|
|
|
|
class SettingsResponse(BaseModel):
|
|
llm_enabled: bool
|
|
llm_base_url: str | None
|
|
llm_model: str
|
|
cleanup_model: str | None = None
|
|
default_prompt: str | None = None
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
@router.get("/health", response_model=HealthResponse)
|
|
def health():
|
|
return HealthResponse(
|
|
status="ok",
|
|
llm_enabled=markitdown_service.LLM_ACTIVE,
|
|
llm_model=markitdown_service.LLM_MODEL if markitdown_service.LLM_ACTIVE else None,
|
|
)
|
|
|
|
|
|
@router.post("/convert", response_model=ConvertResponse)
|
|
async def convert(
|
|
file: UploadFile = File(...),
|
|
use_llm: bool = Query(default=True, description="Use LLM vision for image understanding"),
|
|
llm_prompt: str | None = Query(default=None, description="Custom prompt for LLM vision"),
|
|
db: AsyncSession = Depends(get_db),
|
|
):
|
|
record = await markitdown_service.convert_file(file, db, use_llm=use_llm, llm_prompt=llm_prompt)
|
|
return record
|
|
|
|
|
|
@router.post("/convert-url", response_model=ConvertResponse)
|
|
async def convert_url(
|
|
req: UrlRequest,
|
|
db: AsyncSession = Depends(get_db),
|
|
):
|
|
record = await markitdown_service.convert_url(
|
|
req.url, db, use_llm=req.use_llm, llm_prompt=req.llm_prompt
|
|
)
|
|
return record
|
|
|
|
|
|
@router.get("/models")
|
|
def list_models():
|
|
if not markitdown_service.LLM_BASE_URL:
|
|
return {"models": []}
|
|
try:
|
|
import httpx, re
|
|
base = re.sub(r"/v1/?$", "", markitdown_service.LLM_BASE_URL.rstrip("/"))
|
|
resp = httpx.get(f"{base}/api/tags", timeout=5)
|
|
resp.raise_for_status()
|
|
names = [m["name"] for m in resp.json().get("models", [])]
|
|
return {"models": sorted(names)}
|
|
except Exception as e:
|
|
return {"models": [], "error": str(e)}
|
|
|
|
@router.get("/settings", response_model=SettingsResponse)
|
|
def get_settings():
|
|
return SettingsResponse(
|
|
llm_enabled=markitdown_service.LLM_ACTIVE,
|
|
llm_base_url=markitdown_service.LLM_BASE_URL,
|
|
llm_model=markitdown_service.LLM_MODEL,
|
|
cleanup_model=markitdown_service.CLEANUP_MODEL or None,
|
|
default_prompt=markitdown_service.DEFAULT_CLEANUP_PROMPT,
|
|
)
|
|
|
|
@router.post("/settings", response_model=SettingsResponse)
|
|
def update_settings(req: SettingsRequest):
|
|
markitdown_service._init_llm()
|
|
markitdown_service.CLEANUP_MODEL = req.cleanup_model or ""
|
|
return SettingsResponse(
|
|
llm_enabled=markitdown_service.LLM_ACTIVE,
|
|
llm_base_url=markitdown_service.LLM_BASE_URL,
|
|
llm_model=markitdown_service.LLM_MODEL,
|
|
cleanup_model=markitdown_service.CLEANUP_MODEL or None,
|
|
default_prompt=markitdown_service.DEFAULT_CLEANUP_PROMPT,
|
|
)
|
|
|
|
@router.post("/cleanup", response_model=CleanupResponse)
|
|
async def cleanup(req: CleanupRequest):
|
|
if not markitdown_service.LLM_ACTIVE:
|
|
raise HTTPException(status_code=503, detail="LLM not configured")
|
|
cleaned = markitdown_service.llm_cleanup(req.text, req.prompt, req.model)
|
|
return CleanupResponse(text=cleaned)
|
|
|
|
|
|
class PathRequest(BaseModel):
|
|
path: str
|
|
use_llm: bool = True
|
|
llm_prompt: str | None = None
|
|
|
|
|
|
@router.get("/browse")
|
|
def browse(path: str = Query("/workspace")):
|
|
import os
|
|
abs_path = os.path.realpath(path)
|
|
if not abs_path.startswith("/workspace"):
|
|
raise HTTPException(status_code=403, detail="Access denied")
|
|
if not os.path.exists(abs_path):
|
|
raise HTTPException(status_code=404, detail="Path not found")
|
|
if os.path.isfile(abs_path):
|
|
return {"type": "file", "path": abs_path, "name": os.path.basename(abs_path)}
|
|
entries = []
|
|
try:
|
|
for name in sorted(os.listdir(abs_path)):
|
|
full = os.path.join(abs_path, name)
|
|
entries.append({
|
|
"name": name,
|
|
"path": full,
|
|
"type": "dir" if os.path.isdir(full) else "file",
|
|
"ext": os.path.splitext(name)[1].lower() if os.path.isfile(full) else None,
|
|
})
|
|
except PermissionError:
|
|
raise HTTPException(status_code=403, detail="Permission denied")
|
|
return {"type": "dir", "path": abs_path, "entries": entries}
|
|
|
|
|
|
@router.post("/convert-path")
|
|
async def convert_path(req: PathRequest, db: AsyncSession = Depends(get_db)):
|
|
import os
|
|
abs_path = os.path.realpath(req.path)
|
|
if not abs_path.startswith("/workspace"):
|
|
raise HTTPException(status_code=403, detail="Access denied")
|
|
if not os.path.exists(abs_path) or not os.path.isfile(abs_path):
|
|
raise HTTPException(status_code=404, detail="File not found")
|
|
record = await markitdown_service.convert_path(abs_path, db, use_llm=req.use_llm, llm_prompt=req.llm_prompt)
|
|
return record
|
|
|
|
|
|
class WriteFileRequest(BaseModel):
|
|
path: str
|
|
content: str
|
|
|
|
|
|
@router.post("/write-file")
|
|
def write_file(req: WriteFileRequest):
|
|
import os
|
|
abs_path = os.path.realpath(req.path)
|
|
if not abs_path.startswith("/workspace"):
|
|
raise HTTPException(status_code=403, detail="Access denied")
|
|
os.makedirs(os.path.dirname(abs_path), exist_ok=True)
|
|
with open(abs_path, "w", encoding="utf-8") as f:
|
|
f.write(req.content)
|
|
return {"path": abs_path, "bytes": len(req.content.encode())}
|
|
|
|
|
|
@router.get("/history", response_model=list[ConversionRecord])
|
|
async def history(limit: int = 20, db: AsyncSession = Depends(get_db)):
|
|
records = await markitdown_service.get_history(db, limit)
|
|
return [
|
|
ConversionRecord(
|
|
id=r.id,
|
|
filename=r.filename,
|
|
file_type=r.file_type,
|
|
llm_enabled=r.llm_enabled,
|
|
created_at=str(r.created_at),
|
|
)
|
|
for r in records
|
|
]
|