Crumb-Core-v.1/app/utils/chat_logger.py

"""
Chat Logger Utility
DSGVO-compliant logging of chat interactions.
"""
import json
import os
from pathlib import Path
from datetime import datetime
from typing import Dict, Any, Optional


class ChatLogger:
    """
    Logger for chat interactions.
    Appends to JSONL file for easy parsing and DSGVO compliance.
    """

    def __init__(self, log_dir: str = "logs", log_file: str = "chat_history.jsonl"):
        """
        Initialize chat logger.

        Args:
            log_dir: Directory for log files
            log_file: Name of log file (JSONL format)
        """
        self.log_dir = Path(log_dir)
        self.log_file = self.log_dir / log_file

        # Ensure log directory exists
        self.log_dir.mkdir(parents=True, exist_ok=True)

        # Ensure log file exists
        if not self.log_file.exists():
            self.log_file.touch()

    def log_interaction(
        self,
        character_id: str,
        character_name: str,
        user_id: Optional[str],
        user_role: Optional[str],
        question: str,
        answer: str,
        model: str,
        provider: str,
        context_found: bool,
        sources_count: int,
        lang: str = "de",
        session_id: Optional[str] = None
    ) -> Dict[str, Any]:
        """
        Log a chat interaction.

        Args:
            character_id: ID of the character (e.g., "eule")
            character_name: Display name (e.g., "Krümeleule")
            user_id: User ID (if authenticated) or None for anonymous
            user_role: User role (e.g., "admin", "user", "anonymous")
            question: User's question
            answer: Character's answer
            model: AI model used
            provider: Provider name (e.g., "openrouter")
            context_found: Whether RAG context was found
            sources_count: Number of sources used
            lang: Language code
            session_id: Optional session identifier

        Returns:
            Dictionary with log entry
        """
        # Create log entry
        log_entry = {
            "timestamp": datetime.utcnow().isoformat() + "Z",
            "character": {
                "id": character_id,
                "name": character_name
            },
            "user": {
                "id": user_id or "anonymous",
                "role": user_role or "anonymous"
            },
            "interaction": {
                "question": question,
                "answer": answer,
                "lang": lang
            },
            "rag": {
                "context_found": context_found,
                "sources_count": sources_count
            },
            "ai": {
                "provider": provider,
                "model": model
            },
            "session_id": session_id,
            # Token estimation (rough approximation)
            "tokens_estimated": self._estimate_tokens(question, answer)
        }

        # Append to JSONL file
        try:
            with open(self.log_file, 'a', encoding='utf-8') as f:
                f.write(json.dumps(log_entry, ensure_ascii=False) + "\n")
        except Exception as e:
            print(f"⚠️  Failed to write chat log: {e}")
            # Don't raise - logging failure shouldn't break the chat

        return log_entry

    def _estimate_tokens(self, question: str, answer: str) -> int:
        """
        Rough estimation of tokens used.
        Approximation: 1 token ≈ 4 characters for English/German.

        Args:
            question: User's question
            answer: AI's answer

        Returns:
            Estimated token count
        """
        total_chars = len(question) + len(answer)
        return total_chars // 4

    def get_recent_logs(self, limit: int = 100) -> list:
        """
        Get recent log entries.

        Args:
            limit: Maximum number of entries to return

        Returns:
            List of log entries (most recent first)
        """
        if not self.log_file.exists():
            return []

        try:
            with open(self.log_file, 'r', encoding='utf-8') as f:
                lines = f.readlines()

            # Parse JSONL
            logs = []
            for line in reversed(lines[-limit:]):
                try:
                    logs.append(json.loads(line))
                except json.JSONDecodeError:
                    continue

            return logs

        except Exception as e:
            print(f"⚠️  Failed to read chat logs: {e}")
            return []

    def get_stats(self) -> Dict[str, Any]:
        """
        Get statistics about logged interactions.

        Returns:
            Dictionary with statistics
        """
        if not self.log_file.exists():
            return {
                "total_interactions": 0,
                "file_size": 0,
                "characters": {}
            }

        try:
            # Count lines
            with open(self.log_file, 'r', encoding='utf-8') as f:
                lines = f.readlines()

            # Get file size
            file_size = self.log_file.stat().st_size

            # Count stats
            character_counts = {}
            total_tokens = 0
            tokens_by_model = {}
            tokens_by_role = {}
            context_found_count = 0

            # Simple pricing model (Blended average for OpenRouter)
            # Input: ~$5/M, Output: ~$15/M -> Avg ~$10/M = $0.00001 per token
            PRICE_PER_TOKEN = 0.00001

            for line in lines:
                try:
                    entry = json.loads(line)

                    # Character stats
                    char_id = entry.get('character', {}).get('id', 'unknown')
                    character_counts[char_id] = character_counts.get(char_id, 0) + 1

                    # Token stats
                    tokens = entry.get('tokens_estimated', 0)
                    total_tokens += tokens

                    # Model stats
                    model = entry.get('ai', {}).get('model', 'unknown')
                    tokens_by_model[model] = tokens_by_model.get(model, 0) + tokens

                    # Role stats
                    tokens_by_role[char_id] = tokens_by_role.get(char_id, 0) + tokens

                    # RAG stats
                    if entry.get('rag', {}).get('context_found'):
                        context_found_count += 1

                except json.JSONDecodeError:
                    continue

            total_interactions = len(lines)
            context_hit_rate = round((context_found_count / total_interactions * 100), 1) if total_interactions > 0 else 0
            estimated_cost = round(total_tokens * PRICE_PER_TOKEN, 4)

            return {
                "total_interactions": total_interactions,
                "total_tokens_estimated": total_tokens,
                "estimated_cost_usd": estimated_cost,
                "context_found_count": context_found_count,
                "context_hit_rate_percent": context_hit_rate,
                "file_size_bytes": file_size,
                "file_size_mb": round(file_size / (1024 * 1024), 2),
                "characters": character_counts,
                "tokens_by_model": tokens_by_model,
                "tokens_by_role": tokens_by_role,
                "last_updated": datetime.utcnow().isoformat() + "Z"
            }

        except Exception as e:
            print(f"⚠️  Failed to get chat stats: {e}")
            return {
                "total_interactions": 0,
                "file_size": 0,
                "characters": {},
                "error": str(e)
            }