241 lines
7.6 KiB
Python
241 lines
7.6 KiB
Python
"""
|
|
Chat Logger Utility
|
|
DSGVO-compliant logging of chat interactions.
|
|
"""
|
|
import json
|
|
import os
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from typing import Dict, Any, Optional
|
|
|
|
|
|
class ChatLogger:
|
|
"""
|
|
Logger for chat interactions.
|
|
Appends to JSONL file for easy parsing and DSGVO compliance.
|
|
"""
|
|
|
|
def __init__(self, log_dir: str = "logs", log_file: str = "chat_history.jsonl"):
|
|
"""
|
|
Initialize chat logger.
|
|
|
|
Args:
|
|
log_dir: Directory for log files
|
|
log_file: Name of log file (JSONL format)
|
|
"""
|
|
self.log_dir = Path(log_dir)
|
|
self.log_file = self.log_dir / log_file
|
|
|
|
# Ensure log directory exists
|
|
self.log_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Ensure log file exists
|
|
if not self.log_file.exists():
|
|
self.log_file.touch()
|
|
|
|
def log_interaction(
|
|
self,
|
|
character_id: str,
|
|
character_name: str,
|
|
user_id: Optional[str],
|
|
user_role: Optional[str],
|
|
question: str,
|
|
answer: str,
|
|
model: str,
|
|
provider: str,
|
|
context_found: bool,
|
|
sources_count: int,
|
|
lang: str = "de",
|
|
session_id: Optional[str] = None
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Log a chat interaction.
|
|
|
|
Args:
|
|
character_id: ID of the character (e.g., "eule")
|
|
character_name: Display name (e.g., "Krümeleule")
|
|
user_id: User ID (if authenticated) or None for anonymous
|
|
user_role: User role (e.g., "admin", "user", "anonymous")
|
|
question: User's question
|
|
answer: Character's answer
|
|
model: AI model used
|
|
provider: Provider name (e.g., "openrouter")
|
|
context_found: Whether RAG context was found
|
|
sources_count: Number of sources used
|
|
lang: Language code
|
|
session_id: Optional session identifier
|
|
|
|
Returns:
|
|
Dictionary with log entry
|
|
"""
|
|
# Create log entry
|
|
log_entry = {
|
|
"timestamp": datetime.utcnow().isoformat() + "Z",
|
|
"character": {
|
|
"id": character_id,
|
|
"name": character_name
|
|
},
|
|
"user": {
|
|
"id": user_id or "anonymous",
|
|
"role": user_role or "anonymous"
|
|
},
|
|
"interaction": {
|
|
"question": question,
|
|
"answer": answer,
|
|
"lang": lang
|
|
},
|
|
"rag": {
|
|
"context_found": context_found,
|
|
"sources_count": sources_count
|
|
},
|
|
"ai": {
|
|
"provider": provider,
|
|
"model": model
|
|
},
|
|
"session_id": session_id,
|
|
# Token estimation (rough approximation)
|
|
"tokens_estimated": self._estimate_tokens(question, answer)
|
|
}
|
|
|
|
# Append to JSONL file
|
|
try:
|
|
with open(self.log_file, 'a', encoding='utf-8') as f:
|
|
f.write(json.dumps(log_entry, ensure_ascii=False) + "\n")
|
|
except Exception as e:
|
|
print(f"⚠️ Failed to write chat log: {e}")
|
|
# Don't raise - logging failure shouldn't break the chat
|
|
|
|
return log_entry
|
|
|
|
def _estimate_tokens(self, question: str, answer: str) -> int:
|
|
"""
|
|
Rough estimation of tokens used.
|
|
Approximation: 1 token ≈ 4 characters for English/German.
|
|
|
|
Args:
|
|
question: User's question
|
|
answer: AI's answer
|
|
|
|
Returns:
|
|
Estimated token count
|
|
"""
|
|
total_chars = len(question) + len(answer)
|
|
return total_chars // 4
|
|
|
|
def get_recent_logs(self, limit: int = 100) -> list:
|
|
"""
|
|
Get recent log entries.
|
|
|
|
Args:
|
|
limit: Maximum number of entries to return
|
|
|
|
Returns:
|
|
List of log entries (most recent first)
|
|
"""
|
|
if not self.log_file.exists():
|
|
return []
|
|
|
|
try:
|
|
with open(self.log_file, 'r', encoding='utf-8') as f:
|
|
lines = f.readlines()
|
|
|
|
# Parse JSONL
|
|
logs = []
|
|
for line in reversed(lines[-limit:]):
|
|
try:
|
|
logs.append(json.loads(line))
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
return logs
|
|
|
|
except Exception as e:
|
|
print(f"⚠️ Failed to read chat logs: {e}")
|
|
return []
|
|
|
|
def get_stats(self) -> Dict[str, Any]:
|
|
"""
|
|
Get statistics about logged interactions.
|
|
|
|
Returns:
|
|
Dictionary with statistics
|
|
"""
|
|
if not self.log_file.exists():
|
|
return {
|
|
"total_interactions": 0,
|
|
"file_size": 0,
|
|
"characters": {}
|
|
}
|
|
|
|
try:
|
|
# Count lines
|
|
with open(self.log_file, 'r', encoding='utf-8') as f:
|
|
lines = f.readlines()
|
|
|
|
# Get file size
|
|
file_size = self.log_file.stat().st_size
|
|
|
|
# Count stats
|
|
character_counts = {}
|
|
total_tokens = 0
|
|
tokens_by_model = {}
|
|
tokens_by_role = {}
|
|
context_found_count = 0
|
|
|
|
# Simple pricing model (Blended average for OpenRouter)
|
|
# Input: ~$5/M, Output: ~$15/M -> Avg ~$10/M = $0.00001 per token
|
|
PRICE_PER_TOKEN = 0.00001
|
|
|
|
for line in lines:
|
|
try:
|
|
entry = json.loads(line)
|
|
|
|
# Character stats
|
|
char_id = entry.get('character', {}).get('id', 'unknown')
|
|
character_counts[char_id] = character_counts.get(char_id, 0) + 1
|
|
|
|
# Token stats
|
|
tokens = entry.get('tokens_estimated', 0)
|
|
total_tokens += tokens
|
|
|
|
# Model stats
|
|
model = entry.get('ai', {}).get('model', 'unknown')
|
|
tokens_by_model[model] = tokens_by_model.get(model, 0) + tokens
|
|
|
|
# Role stats
|
|
tokens_by_role[char_id] = tokens_by_role.get(char_id, 0) + tokens
|
|
|
|
# RAG stats
|
|
if entry.get('rag', {}).get('context_found'):
|
|
context_found_count += 1
|
|
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
total_interactions = len(lines)
|
|
context_hit_rate = round((context_found_count / total_interactions * 100), 1) if total_interactions > 0 else 0
|
|
estimated_cost = round(total_tokens * PRICE_PER_TOKEN, 4)
|
|
|
|
return {
|
|
"total_interactions": total_interactions,
|
|
"total_tokens_estimated": total_tokens,
|
|
"estimated_cost_usd": estimated_cost,
|
|
"context_found_count": context_found_count,
|
|
"context_hit_rate_percent": context_hit_rate,
|
|
"file_size_bytes": file_size,
|
|
"file_size_mb": round(file_size / (1024 * 1024), 2),
|
|
"characters": character_counts,
|
|
"tokens_by_model": tokens_by_model,
|
|
"tokens_by_role": tokens_by_role,
|
|
"last_updated": datetime.utcnow().isoformat() + "Z"
|
|
}
|
|
|
|
except Exception as e:
|
|
print(f"⚠️ Failed to get chat stats: {e}")
|
|
return {
|
|
"total_interactions": 0,
|
|
"file_size": 0,
|
|
"characters": {},
|
|
"error": str(e)
|
|
}
|