Initial commit: Crumbforest Architecture Refinement v1 (Clean)
This commit is contained in:
202
app/utils/chat_logger.py
Normal file
202
app/utils/chat_logger.py
Normal file
@@ -0,0 +1,202 @@
|
||||
"""
|
||||
Chat Logger Utility
|
||||
DSGVO-compliant logging of chat interactions.
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
|
||||
class ChatLogger:
|
||||
"""
|
||||
Logger for chat interactions.
|
||||
Appends to JSONL file for easy parsing and DSGVO compliance.
|
||||
"""
|
||||
|
||||
def __init__(self, log_dir: str = "logs", log_file: str = "chat_history.jsonl"):
|
||||
"""
|
||||
Initialize chat logger.
|
||||
|
||||
Args:
|
||||
log_dir: Directory for log files
|
||||
log_file: Name of log file (JSONL format)
|
||||
"""
|
||||
self.log_dir = Path(log_dir)
|
||||
self.log_file = self.log_dir / log_file
|
||||
|
||||
# Ensure log directory exists
|
||||
self.log_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Ensure log file exists
|
||||
if not self.log_file.exists():
|
||||
self.log_file.touch()
|
||||
|
||||
def log_interaction(
|
||||
self,
|
||||
character_id: str,
|
||||
character_name: str,
|
||||
user_id: Optional[str],
|
||||
user_role: Optional[str],
|
||||
question: str,
|
||||
answer: str,
|
||||
model: str,
|
||||
provider: str,
|
||||
context_found: bool,
|
||||
sources_count: int,
|
||||
lang: str = "de",
|
||||
session_id: Optional[str] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Log a chat interaction.
|
||||
|
||||
Args:
|
||||
character_id: ID of the character (e.g., "eule")
|
||||
character_name: Display name (e.g., "Krümeleule")
|
||||
user_id: User ID (if authenticated) or None for anonymous
|
||||
user_role: User role (e.g., "admin", "user", "anonymous")
|
||||
question: User's question
|
||||
answer: Character's answer
|
||||
model: AI model used
|
||||
provider: Provider name (e.g., "openrouter")
|
||||
context_found: Whether RAG context was found
|
||||
sources_count: Number of sources used
|
||||
lang: Language code
|
||||
session_id: Optional session identifier
|
||||
|
||||
Returns:
|
||||
Dictionary with log entry
|
||||
"""
|
||||
# Create log entry
|
||||
log_entry = {
|
||||
"timestamp": datetime.utcnow().isoformat() + "Z",
|
||||
"character": {
|
||||
"id": character_id,
|
||||
"name": character_name
|
||||
},
|
||||
"user": {
|
||||
"id": user_id or "anonymous",
|
||||
"role": user_role or "anonymous"
|
||||
},
|
||||
"interaction": {
|
||||
"question": question,
|
||||
"answer": answer,
|
||||
"lang": lang
|
||||
},
|
||||
"rag": {
|
||||
"context_found": context_found,
|
||||
"sources_count": sources_count
|
||||
},
|
||||
"ai": {
|
||||
"provider": provider,
|
||||
"model": model
|
||||
},
|
||||
"session_id": session_id,
|
||||
# Token estimation (rough approximation)
|
||||
"tokens_estimated": self._estimate_tokens(question, answer)
|
||||
}
|
||||
|
||||
# Append to JSONL file
|
||||
try:
|
||||
with open(self.log_file, 'a', encoding='utf-8') as f:
|
||||
f.write(json.dumps(log_entry, ensure_ascii=False) + "\n")
|
||||
except Exception as e:
|
||||
print(f"⚠️ Failed to write chat log: {e}")
|
||||
# Don't raise - logging failure shouldn't break the chat
|
||||
|
||||
return log_entry
|
||||
|
||||
def _estimate_tokens(self, question: str, answer: str) -> int:
|
||||
"""
|
||||
Rough estimation of tokens used.
|
||||
Approximation: 1 token ≈ 4 characters for English/German.
|
||||
|
||||
Args:
|
||||
question: User's question
|
||||
answer: AI's answer
|
||||
|
||||
Returns:
|
||||
Estimated token count
|
||||
"""
|
||||
total_chars = len(question) + len(answer)
|
||||
return total_chars // 4
|
||||
|
||||
def get_recent_logs(self, limit: int = 100) -> list:
|
||||
"""
|
||||
Get recent log entries.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of entries to return
|
||||
|
||||
Returns:
|
||||
List of log entries (most recent first)
|
||||
"""
|
||||
if not self.log_file.exists():
|
||||
return []
|
||||
|
||||
try:
|
||||
with open(self.log_file, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
# Parse JSONL
|
||||
logs = []
|
||||
for line in reversed(lines[-limit:]):
|
||||
try:
|
||||
logs.append(json.loads(line))
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
return logs
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠️ Failed to read chat logs: {e}")
|
||||
return []
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get statistics about logged interactions.
|
||||
|
||||
Returns:
|
||||
Dictionary with statistics
|
||||
"""
|
||||
if not self.log_file.exists():
|
||||
return {
|
||||
"total_interactions": 0,
|
||||
"file_size": 0,
|
||||
"characters": {}
|
||||
}
|
||||
|
||||
try:
|
||||
# Count lines
|
||||
with open(self.log_file, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
# Get file size
|
||||
file_size = self.log_file.stat().st_size
|
||||
|
||||
# Count by character
|
||||
character_counts = {}
|
||||
for line in lines:
|
||||
try:
|
||||
entry = json.loads(line)
|
||||
char_id = entry.get('character', {}).get('id', 'unknown')
|
||||
character_counts[char_id] = character_counts.get(char_id, 0) + 1
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
return {
|
||||
"total_interactions": len(lines),
|
||||
"file_size_bytes": file_size,
|
||||
"file_size_mb": round(file_size / (1024 * 1024), 2),
|
||||
"characters": character_counts
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠️ Failed to get chat stats: {e}")
|
||||
return {
|
||||
"total_interactions": 0,
|
||||
"file_size": 0,
|
||||
"characters": {},
|
||||
"error": str(e)
|
||||
}
|
||||
Reference in New Issue
Block a user