Initial commit: Crumbforest Architecture Refinement v1 (Clean)
This commit is contained in:
168
app/utils/security.py
Normal file
168
app/utils/security.py
Normal file
@@ -0,0 +1,168 @@
|
||||
"""
|
||||
Security utilities for input validation and filtering.
|
||||
"""
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class PromptInjectionFilter:
|
||||
"""Filter to detect and sanitize potential prompt injection attempts."""
|
||||
|
||||
# Common prompt injection patterns
|
||||
DANGEROUS_PATTERNS = [
|
||||
# English
|
||||
r"ignore\s+(all\s+)?(previous|prior|above)\s+(instructions?|prompts?|commands?)",
|
||||
r"disregard\s+(all\s+)?(previous|prior|above)",
|
||||
r"forget\s+(all\s+)?(previous|prior|above)",
|
||||
r"you\s+are\s+now",
|
||||
r"your\s+new\s+(role|instructions?|prompt)",
|
||||
r"system\s+prompt",
|
||||
r"tell\s+me\s+your\s+(instructions?|prompt|system)",
|
||||
r"what\s+(are|is)\s+your\s+(instructions?|prompt|rules)",
|
||||
|
||||
# German
|
||||
r"ignoriere\s+(alle\s+)?(vorherigen?|obigen?)\s+(anweisungen?|prompts?|befehle?)",
|
||||
r"vergiss\s+(alle\s+)?(vorherigen?|obigen?)",
|
||||
r"du\s+bist\s+jetzt",
|
||||
r"deine\s+neue\s+(rolle|anweisung)",
|
||||
r"system\s*-?\s*prompt",
|
||||
r"sage?\s+mir\s+deine\s+(anweisungen?|prompt|regeln)",
|
||||
|
||||
# Chinese (common in jailbreak attempts)
|
||||
r"你是",
|
||||
r"现在你是",
|
||||
r"忽略之前",
|
||||
|
||||
# Role manipulation
|
||||
r"act\s+as\s+(a\s+)?(?!teacher|tutor|guide)", # Allow educational roles
|
||||
r"pretend\s+to\s+be",
|
||||
r"roleplay\s+as",
|
||||
|
||||
# System commands
|
||||
r"<\s*system\s*>",
|
||||
r"<\s*admin\s*>",
|
||||
r"sudo\s+",
|
||||
|
||||
# Attempts to break out of context
|
||||
r"\[SYSTEM\]",
|
||||
r"\[INST\]",
|
||||
r"###\s*Instruction",
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the filter with compiled regex patterns."""
|
||||
self.patterns = [
|
||||
re.compile(pattern, re.IGNORECASE | re.MULTILINE)
|
||||
for pattern in self.DANGEROUS_PATTERNS
|
||||
]
|
||||
|
||||
def is_suspicious(self, text: str) -> bool:
|
||||
"""
|
||||
Check if text contains suspicious prompt injection patterns.
|
||||
|
||||
Args:
|
||||
text: User input to check
|
||||
|
||||
Returns:
|
||||
True if suspicious patterns detected, False otherwise
|
||||
"""
|
||||
for pattern in self.patterns:
|
||||
if pattern.search(text):
|
||||
return True
|
||||
return False
|
||||
|
||||
def sanitize(self, text: str, replace_with: str = "[FILTERED]") -> str:
|
||||
"""
|
||||
Sanitize text by replacing suspicious patterns.
|
||||
|
||||
Args:
|
||||
text: User input to sanitize
|
||||
replace_with: Replacement text for suspicious patterns
|
||||
|
||||
Returns:
|
||||
Sanitized text
|
||||
"""
|
||||
sanitized = text
|
||||
for pattern in self.patterns:
|
||||
sanitized = pattern.sub(replace_with, sanitized)
|
||||
return sanitized
|
||||
|
||||
def validate(self, text: str, max_length: int = 2000) -> tuple[bool, Optional[str]]:
|
||||
"""
|
||||
Validate user input for length and injection attempts.
|
||||
|
||||
Args:
|
||||
text: User input to validate
|
||||
max_length: Maximum allowed length
|
||||
|
||||
Returns:
|
||||
Tuple of (is_valid, error_message)
|
||||
"""
|
||||
# Check length
|
||||
if len(text) > max_length:
|
||||
return False, f"Input too long (max {max_length} characters)"
|
||||
|
||||
# Check for prompt injection
|
||||
if self.is_suspicious(text):
|
||||
return False, "Input contains suspicious patterns"
|
||||
|
||||
# Check for excessive repetition (potential DoS)
|
||||
if self._has_excessive_repetition(text):
|
||||
return False, "Input contains excessive repetition"
|
||||
|
||||
return True, None
|
||||
|
||||
def _has_excessive_repetition(self, text: str, threshold: int = 50) -> bool:
|
||||
"""
|
||||
Check if text has excessive character/word repetition.
|
||||
|
||||
Args:
|
||||
text: Text to check
|
||||
threshold: Maximum allowed consecutive repetitions
|
||||
|
||||
Returns:
|
||||
True if excessive repetition detected
|
||||
"""
|
||||
# Check character repetition
|
||||
if re.search(r'(.)\1{50,}', text): # Same char 50+ times
|
||||
return True
|
||||
|
||||
# Check word repetition
|
||||
words = text.split()
|
||||
if len(words) > 3:
|
||||
for i in range(len(words) - 3):
|
||||
word = words[i]
|
||||
# Count consecutive occurrences
|
||||
count = 1
|
||||
for j in range(i + 1, min(i + threshold, len(words))):
|
||||
if words[j] == word:
|
||||
count += 1
|
||||
else:
|
||||
break
|
||||
if count > 20: # Same word 20+ times in a row
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def sanitize_for_logging(text: str, max_length: int = 500) -> str:
|
||||
"""
|
||||
Sanitize text for safe logging (truncate, remove PII hints).
|
||||
|
||||
Args:
|
||||
text: Text to sanitize
|
||||
max_length: Maximum length for logging
|
||||
|
||||
Returns:
|
||||
Sanitized text safe for logging
|
||||
"""
|
||||
# Truncate
|
||||
if len(text) > max_length:
|
||||
text = text[:max_length] + "..."
|
||||
|
||||
# Basic PII patterns (email, phone, credit card)
|
||||
text = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]', text)
|
||||
text = re.sub(r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b', '[PHONE]', text)
|
||||
text = re.sub(r'\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b', '[CC]', text)
|
||||
|
||||
return text
|
||||
Reference in New Issue
Block a user