Files
Crumb-Core-v.1/app/routers/admin_vectors.py

154 lines
5.6 KiB
Python

"""
Admin Vectors Router
Direct access to Qdrant vector search for system administrators.
"""
from fastapi import APIRouter, Depends, Body, Request, HTTPException
from fastapi.responses import HTMLResponse
from typing import List, Optional
from deps import current_user, get_db, get_qdrant_client
from config import get_settings
from services.provider_factory import ProviderFactory
from services.rag_service import RAGService
router = APIRouter()
@router.get("/vectors", name="admin_vectors_dashboard", response_class=HTMLResponse)
def vectors_dashboard(req: Request, user = Depends(current_user)):
"""
Render vector search interface.
"""
if not user:
from fastapi.responses import RedirectResponse
lang = req.session.get("lang", "de")
return RedirectResponse(f"/{lang}/login", status_code=302)
if user.get("role") != "admin":
return HTMLResponse("403 admin only", status_code=403)
return req.app.state.render(req, "pages/admin_vectors.html", seo={"title": "Vector Brain", "desc": "Search Qdrant Directly"})
@router.post("/vectors/search", name="admin_vectors_search")
def search_vectors(
req: Request,
query: str = Body(..., embed=True),
collection: str = Body("crumbforest_posts", embed=True),
limit: int = Body(5, embed=True),
user = Depends(current_user)
):
if user.get("role") != "admin":
raise HTTPException(status_code=403, detail="Admin access required")
settings = get_settings()
db_conn = get_db()
qdrant_client = get_qdrant_client()
try:
# Get provider
provider = ProviderFactory.create_provider(
provider_name="openrouter", # Default to configured provider
settings=settings
)
# Use EmbeddingService for consistency
from services.embedding_service import EmbeddingService
embedding_service = EmbeddingService(provider)
query_vector = embedding_service.embed_texts([query])[0]
# 2. Search Qdrant directly
results = qdrant_client.query_points(
collection_name=collection,
query=query_vector,
limit=limit,
with_payload=True,
with_vectors=False
).points
# 3. Format
formatted = []
for hit in results:
formatted.append({
"score": hit.score,
"id": hit.id,
"payload": hit.payload,
"excerpt": hit.payload.get("content", "")[:200] + "..." if hit.payload.get("content") else "No content"
})
return {"results": formatted}
except Exception as e:
print(f"Vector search failed: {e}")
import traceback
traceback.print_exc()
return {"error": str(e)}
finally:
db_conn.close()
@router.get("/vectors/content/{post_id}")
async def get_vector_content(post_id: int, user = Depends(current_user)):
"""
Get full content of a specific post.
"""
if user.get("role") != "admin":
raise HTTPException(status_code=403, detail="Admin access required")
db_conn = get_db()
try:
from pymysql.cursors import DictCursor
# 1. Try SQL posts table
with db_conn.cursor(DictCursor) as cur:
cur.execute("SELECT content, title, slug, sources FROM posts WHERE id = %s", (post_id,))
row = cur.fetchone()
if row:
return JSONResponse(row)
# 2. If not in posts, check post_vectors (for file-based docs)
cur.execute("SELECT metadata, post_type FROM post_vectors WHERE post_id = %s LIMIT 1", (post_id,))
meta_row = cur.fetchone()
if meta_row and meta_row.get("post_type") == "document":
import json
import os
try:
meta = json.loads(meta_row["metadata"]) if isinstance(meta_row["metadata"], str) else meta_row["metadata"]
file_path = meta.get("file_path")
if file_path and os.path.exists(file_path):
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
return JSONResponse({
"content": content,
"title": meta.get("file_name", "Document"),
"slug": "file://" + file_path,
"sources": "FileSystem"
})
except Exception as e:
print(f"Error reading doc file: {e}")
# Fall through to 404
raise HTTPException(status_code=404, detail="Content not found in SQL database or FileSystem (Orphaned Vector?)")
except Exception as e:
print(f"Error fetching content: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/vectors/collections", name="admin_vectors_collections")
def list_collections(user = Depends(current_user)):
"""
List available collections.
"""
if user.get("role") != "admin":
raise HTTPException(status_code=403, detail="Admin access required")
client = get_qdrant_client()
try:
collections = client.get_collections()
# Deduplicate and sort
names = sorted(list(set([c.name for c in collections.collections])))
return {"collections": names}
except Exception as e:
return {"error": str(e), "collections": []}