Crumb-Core-v.1/app/routers/admin_vectors.py

"""
Admin Vectors Router
Direct access to Qdrant vector search for system administrators.
"""
from fastapi import APIRouter, Depends, Body, Request, HTTPException
from fastapi.responses import HTMLResponse
from typing import List, Optional

from deps import current_user, get_db, get_qdrant_client
from config import get_settings
from services.provider_factory import ProviderFactory
from services.rag_service import RAGService

router = APIRouter()

@router.get("/vectors", name="admin_vectors_dashboard", response_class=HTMLResponse)
def vectors_dashboard(req: Request, user = Depends(current_user)):
    """
    Render vector search interface.
    """
    if not user:
        from fastapi.responses import RedirectResponse
        lang = req.session.get("lang", "de")
        return RedirectResponse(f"/{lang}/login", status_code=302)

    if user.get("role") != "admin":
        return HTMLResponse("403 admin only", status_code=403)

    return req.app.state.render(req, "pages/admin_vectors.html", seo={"title": "Vector Brain", "desc": "Search Qdrant Directly"})

@router.post("/vectors/search", name="admin_vectors_search")
def search_vectors(
    req: Request,
    query: str = Body(..., embed=True),
    collection: str = Body("crumbforest_posts", embed=True),
    limit: int = Body(5, embed=True),
    user = Depends(current_user)
):
    if user.get("role") != "admin":
        raise HTTPException(status_code=403, detail="Admin access required")

    settings = get_settings()
    db_conn = get_db()
    qdrant_client = get_qdrant_client()

    try:
        # Get provider
        provider = ProviderFactory.create_provider(
            provider_name="openrouter", # Default to configured provider
            settings=settings
        )

        # Use EmbeddingService for consistency
        from services.embedding_service import EmbeddingService
        embedding_service = EmbeddingService(provider)
        query_vector = embedding_service.embed_texts([query])[0]

        # 2. Search Qdrant directly
        results = qdrant_client.query_points(
            collection_name=collection,
            query=query_vector,
            limit=limit,
            with_payload=True,
            with_vectors=False
        ).points

        # 3. Format
        formatted = []
        for hit in results:
            formatted.append({
                "score": hit.score,
                "id": hit.id,
                "payload": hit.payload,
                "excerpt": hit.payload.get("content", "")[:200] + "..." if hit.payload.get("content") else "No content"
            })

        return {"results": formatted}

    except Exception as e:
        print(f"Vector search failed: {e}")
        import traceback
        traceback.print_exc()
        return {"error": str(e)}
    finally:
        db_conn.close()

@router.get("/vectors/content/{post_id}")
async def get_vector_content(post_id: int, user = Depends(current_user)):
    """
    Get full content of a specific post.
    """
    if user.get("role") != "admin":
        raise HTTPException(status_code=403, detail="Admin access required")

    db_conn = get_db()
    try:
        from pymysql.cursors import DictCursor
        # 1. Try SQL posts table
        with db_conn.cursor(DictCursor) as cur:
            cur.execute("SELECT content, title, slug, sources FROM posts WHERE id = %s", (post_id,))
            row = cur.fetchone()

            if row:
                return JSONResponse(row)

            # 2. If not in posts, check post_vectors (for file-based docs)
            cur.execute("SELECT metadata, post_type FROM post_vectors WHERE post_id = %s LIMIT 1", (post_id,))
            meta_row = cur.fetchone()

            if meta_row and meta_row.get("post_type") == "document":
                import json
                import os

                try:
                    meta = json.loads(meta_row["metadata"]) if isinstance(meta_row["metadata"], str) else meta_row["metadata"]
                    file_path = meta.get("file_path")

                    if file_path and os.path.exists(file_path):
                        with open(file_path, "r", encoding="utf-8") as f:
                            content = f.read()

                        return JSONResponse({
                            "content": content,
                            "title": meta.get("file_name", "Document"),
                            "slug": "file://" + file_path,
                            "sources": "FileSystem"
                        })
                except Exception as e:
                    print(f"Error reading doc file: {e}")
                    # Fall through to 404

        raise HTTPException(status_code=404, detail="Content not found in SQL database or FileSystem (Orphaned Vector?)")

    except Exception as e:
        print(f"Error fetching content: {e}")
        raise HTTPException(status_code=500, detail=str(e))

@router.get("/vectors/collections", name="admin_vectors_collections")
def list_collections(user = Depends(current_user)):
    """
    List available collections.
    """
    if user.get("role") != "admin":
        raise HTTPException(status_code=403, detail="Admin access required")

    client = get_qdrant_client()
    try:
        collections = client.get_collections()
        # Deduplicate and sort
        names = sorted(list(set([c.name for c in collections.collections])))
        return {"collections": names}
    except Exception as e:
        return {"error": str(e), "collections": []}