Initial commit: Crumbforest Architecture Refinement v1 (Clean)

2025-12-07 01:26:46 +01:00
commit 6c38ed680b
633 changed files with 61797 additions and 0 deletions
--- a/app/utils/chat_logger.py
+++ b/app/utils/chat_logger.py
@@ -0,0 +1,202 @@
+"""
+Chat Logger Utility
+DSGVO-compliant logging of chat interactions.
+"""
+import json
+import os
+from pathlib import Path
+from datetime import datetime
+from typing import Dict, Any, Optional
+
+
+class ChatLogger:
+    """
+    Logger for chat interactions.
+    Appends to JSONL file for easy parsing and DSGVO compliance.
+    """
+
+    def __init__(self, log_dir: str = "logs", log_file: str = "chat_history.jsonl"):
+        """
+        Initialize chat logger.
+
+        Args:
+            log_dir: Directory for log files
+            log_file: Name of log file (JSONL format)
+        """
+        self.log_dir = Path(log_dir)
+        self.log_file = self.log_dir / log_file
+
+        # Ensure log directory exists
+        self.log_dir.mkdir(parents=True, exist_ok=True)
+
+        # Ensure log file exists
+        if not self.log_file.exists():
+            self.log_file.touch()
+
+    def log_interaction(
+        self,
+        character_id: str,
+        character_name: str,
+        user_id: Optional[str],
+        user_role: Optional[str],
+        question: str,
+        answer: str,
+        model: str,
+        provider: str,
+        context_found: bool,
+        sources_count: int,
+        lang: str = "de",
+        session_id: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """
+        Log a chat interaction.
+
+        Args:
+            character_id: ID of the character (e.g., "eule")
+            character_name: Display name (e.g., "Krümeleule")
+            user_id: User ID (if authenticated) or None for anonymous
+            user_role: User role (e.g., "admin", "user", "anonymous")
+            question: User's question
+            answer: Character's answer
+            model: AI model used
+            provider: Provider name (e.g., "openrouter")
+            context_found: Whether RAG context was found
+            sources_count: Number of sources used
+            lang: Language code
+            session_id: Optional session identifier
+
+        Returns:
+            Dictionary with log entry
+        """
+        # Create log entry
+        log_entry = {
+            "timestamp": datetime.utcnow().isoformat() + "Z",
+            "character": {
+                "id": character_id,
+                "name": character_name
+            },
+            "user": {
+                "id": user_id or "anonymous",
+                "role": user_role or "anonymous"
+            },
+            "interaction": {
+                "question": question,
+                "answer": answer,
+                "lang": lang
+            },
+            "rag": {
+                "context_found": context_found,
+                "sources_count": sources_count
+            },
+            "ai": {
+                "provider": provider,
+                "model": model
+            },
+            "session_id": session_id,
+            # Token estimation (rough approximation)
+            "tokens_estimated": self._estimate_tokens(question, answer)
+        }
+
+        # Append to JSONL file
+        try:
+            with open(self.log_file, 'a', encoding='utf-8') as f:
+                f.write(json.dumps(log_entry, ensure_ascii=False) + "\n")
+        except Exception as e:
+            print(f"⚠️  Failed to write chat log: {e}")
+            # Don't raise - logging failure shouldn't break the chat
+
+        return log_entry
+
+    def _estimate_tokens(self, question: str, answer: str) -> int:
+        """
+        Rough estimation of tokens used.
+        Approximation: 1 token ≈ 4 characters for English/German.
+
+        Args:
+            question: User's question
+            answer: AI's answer
+
+        Returns:
+            Estimated token count
+        """
+        total_chars = len(question) + len(answer)
+        return total_chars // 4
+
+    def get_recent_logs(self, limit: int = 100) -> list:
+        """
+        Get recent log entries.
+
+        Args:
+            limit: Maximum number of entries to return
+
+        Returns:
+            List of log entries (most recent first)
+        """
+        if not self.log_file.exists():
+            return []
+
+        try:
+            with open(self.log_file, 'r', encoding='utf-8') as f:
+                lines = f.readlines()
+
+            # Parse JSONL
+            logs = []
+            for line in reversed(lines[-limit:]):
+                try:
+                    logs.append(json.loads(line))
+                except json.JSONDecodeError:
+                    continue
+
+            return logs
+
+        except Exception as e:
+            print(f"⚠️  Failed to read chat logs: {e}")
+            return []
+
+    def get_stats(self) -> Dict[str, Any]:
+        """
+        Get statistics about logged interactions.
+
+        Returns:
+            Dictionary with statistics
+        """
+        if not self.log_file.exists():
+            return {
+                "total_interactions": 0,
+                "file_size": 0,
+                "characters": {}
+            }
+
+        try:
+            # Count lines
+            with open(self.log_file, 'r', encoding='utf-8') as f:
+                lines = f.readlines()
+
+            # Get file size
+            file_size = self.log_file.stat().st_size
+
+            # Count by character
+            character_counts = {}
+            for line in lines:
+                try:
+                    entry = json.loads(line)
+                    char_id = entry.get('character', {}).get('id', 'unknown')
+                    character_counts[char_id] = character_counts.get(char_id, 0) + 1
+                except json.JSONDecodeError:
+                    continue
+
+            return {
+                "total_interactions": len(lines),
+                "file_size_bytes": file_size,
+                "file_size_mb": round(file_size / (1024 * 1024), 2),
+                "characters": character_counts
+            }
+
+        except Exception as e:
+            print(f"⚠️  Failed to get chat stats: {e}")
+            return {
+                "total_interactions": 0,
+                "file_size": 0,
+                "characters": {},
+                "error": str(e)
+            }