Files
Crumb-Core-v.1/app/utils/chat_logger.py

241 lines
7.6 KiB
Python

"""
Chat Logger Utility
DSGVO-compliant logging of chat interactions.
"""
import json
import os
from pathlib import Path
from datetime import datetime
from typing import Dict, Any, Optional
class ChatLogger:
"""
Logger for chat interactions.
Appends to JSONL file for easy parsing and DSGVO compliance.
"""
def __init__(self, log_dir: str = "logs", log_file: str = "chat_history.jsonl"):
"""
Initialize chat logger.
Args:
log_dir: Directory for log files
log_file: Name of log file (JSONL format)
"""
self.log_dir = Path(log_dir)
self.log_file = self.log_dir / log_file
# Ensure log directory exists
self.log_dir.mkdir(parents=True, exist_ok=True)
# Ensure log file exists
if not self.log_file.exists():
self.log_file.touch()
def log_interaction(
self,
character_id: str,
character_name: str,
user_id: Optional[str],
user_role: Optional[str],
question: str,
answer: str,
model: str,
provider: str,
context_found: bool,
sources_count: int,
lang: str = "de",
session_id: Optional[str] = None
) -> Dict[str, Any]:
"""
Log a chat interaction.
Args:
character_id: ID of the character (e.g., "eule")
character_name: Display name (e.g., "Krümeleule")
user_id: User ID (if authenticated) or None for anonymous
user_role: User role (e.g., "admin", "user", "anonymous")
question: User's question
answer: Character's answer
model: AI model used
provider: Provider name (e.g., "openrouter")
context_found: Whether RAG context was found
sources_count: Number of sources used
lang: Language code
session_id: Optional session identifier
Returns:
Dictionary with log entry
"""
# Create log entry
log_entry = {
"timestamp": datetime.utcnow().isoformat() + "Z",
"character": {
"id": character_id,
"name": character_name
},
"user": {
"id": user_id or "anonymous",
"role": user_role or "anonymous"
},
"interaction": {
"question": question,
"answer": answer,
"lang": lang
},
"rag": {
"context_found": context_found,
"sources_count": sources_count
},
"ai": {
"provider": provider,
"model": model
},
"session_id": session_id,
# Token estimation (rough approximation)
"tokens_estimated": self._estimate_tokens(question, answer)
}
# Append to JSONL file
try:
with open(self.log_file, 'a', encoding='utf-8') as f:
f.write(json.dumps(log_entry, ensure_ascii=False) + "\n")
except Exception as e:
print(f"⚠️ Failed to write chat log: {e}")
# Don't raise - logging failure shouldn't break the chat
return log_entry
def _estimate_tokens(self, question: str, answer: str) -> int:
"""
Rough estimation of tokens used.
Approximation: 1 token ≈ 4 characters for English/German.
Args:
question: User's question
answer: AI's answer
Returns:
Estimated token count
"""
total_chars = len(question) + len(answer)
return total_chars // 4
def get_recent_logs(self, limit: int = 100) -> list:
"""
Get recent log entries.
Args:
limit: Maximum number of entries to return
Returns:
List of log entries (most recent first)
"""
if not self.log_file.exists():
return []
try:
with open(self.log_file, 'r', encoding='utf-8') as f:
lines = f.readlines()
# Parse JSONL
logs = []
for line in reversed(lines[-limit:]):
try:
logs.append(json.loads(line))
except json.JSONDecodeError:
continue
return logs
except Exception as e:
print(f"⚠️ Failed to read chat logs: {e}")
return []
def get_stats(self) -> Dict[str, Any]:
"""
Get statistics about logged interactions.
Returns:
Dictionary with statistics
"""
if not self.log_file.exists():
return {
"total_interactions": 0,
"file_size": 0,
"characters": {}
}
try:
# Count lines
with open(self.log_file, 'r', encoding='utf-8') as f:
lines = f.readlines()
# Get file size
file_size = self.log_file.stat().st_size
# Count stats
character_counts = {}
total_tokens = 0
tokens_by_model = {}
tokens_by_role = {}
context_found_count = 0
# Simple pricing model (Blended average for OpenRouter)
# Input: ~$5/M, Output: ~$15/M -> Avg ~$10/M = $0.00001 per token
PRICE_PER_TOKEN = 0.00001
for line in lines:
try:
entry = json.loads(line)
# Character stats
char_id = entry.get('character', {}).get('id', 'unknown')
character_counts[char_id] = character_counts.get(char_id, 0) + 1
# Token stats
tokens = entry.get('tokens_estimated', 0)
total_tokens += tokens
# Model stats
model = entry.get('ai', {}).get('model', 'unknown')
tokens_by_model[model] = tokens_by_model.get(model, 0) + tokens
# Role stats
tokens_by_role[char_id] = tokens_by_role.get(char_id, 0) + tokens
# RAG stats
if entry.get('rag', {}).get('context_found'):
context_found_count += 1
except json.JSONDecodeError:
continue
total_interactions = len(lines)
context_hit_rate = round((context_found_count / total_interactions * 100), 1) if total_interactions > 0 else 0
estimated_cost = round(total_tokens * PRICE_PER_TOKEN, 4)
return {
"total_interactions": total_interactions,
"total_tokens_estimated": total_tokens,
"estimated_cost_usd": estimated_cost,
"context_found_count": context_found_count,
"context_hit_rate_percent": context_hit_rate,
"file_size_bytes": file_size,
"file_size_mb": round(file_size / (1024 * 1024), 2),
"characters": character_counts,
"tokens_by_model": tokens_by_model,
"tokens_by_role": tokens_by_role,
"last_updated": datetime.utcnow().isoformat() + "Z"
}
except Exception as e:
print(f"⚠️ Failed to get chat stats: {e}")
return {
"total_interactions": 0,
"file_size": 0,
"characters": {},
"error": str(e)
}