Crumb-Core-v.1/app/utils/security.py

"""
Security utilities for input validation and filtering.
"""
import re
from typing import Optional


class PromptInjectionFilter:
    """Filter to detect and sanitize potential prompt injection attempts."""

    # Common prompt injection patterns
    DANGEROUS_PATTERNS = [
        # English
        r"ignore\s+(all\s+)?(previous|prior|above)\s+(instructions?|prompts?|commands?)",
        r"disregard\s+(all\s+)?(previous|prior|above)",
        r"forget\s+(all\s+)?(previous|prior|above)",
        r"you\s+are\s+now",
        r"your\s+new\s+(role|instructions?|prompt)",
        r"system\s+prompt",
        r"tell\s+me\s+your\s+(instructions?|prompt|system)",
        r"what\s+(are|is)\s+your\s+(instructions?|prompt|rules)",

        # German
        r"ignoriere\s+(alle\s+)?(vorherigen?|obigen?)\s+(anweisungen?|prompts?|befehle?)",
        r"vergiss\s+(alle\s+)?(vorherigen?|obigen?)",
        r"du\s+bist\s+jetzt",
        r"deine\s+neue\s+(rolle|anweisung)",
        r"system\s*-?\s*prompt",
        r"sage?\s+mir\s+deine\s+(anweisungen?|prompt|regeln)",

        # Chinese (common in jailbreak attempts)
        r"你是",
        r"现在你是",
        r"忽略之前",

        # Role manipulation
        r"act\s+as\s+(a\s+)?(?!teacher|tutor|guide)",  # Allow educational roles
        r"pretend\s+to\s+be",
        r"roleplay\s+as",

        # System commands
        r"<\s*system\s*>",
        r"<\s*admin\s*>",
        r"sudo\s+",

        # Attempts to break out of context
        r"\[SYSTEM\]",
        r"\[INST\]",
        r"###\s*Instruction",
    ]

    def __init__(self):
        """Initialize the filter with compiled regex patterns."""
        self.patterns = [
            re.compile(pattern, re.IGNORECASE | re.MULTILINE)
            for pattern in self.DANGEROUS_PATTERNS
        ]

    def is_suspicious(self, text: str) -> bool:
        """
        Check if text contains suspicious prompt injection patterns.

        Args:
            text: User input to check

        Returns:
            True if suspicious patterns detected, False otherwise
        """
        for pattern in self.patterns:
            if pattern.search(text):
                return True
        return False

    def sanitize(self, text: str, replace_with: str = "[FILTERED]") -> str:
        """
        Sanitize text by replacing suspicious patterns.

        Args:
            text: User input to sanitize
            replace_with: Replacement text for suspicious patterns

        Returns:
            Sanitized text
        """
        sanitized = text
        for pattern in self.patterns:
            sanitized = pattern.sub(replace_with, sanitized)
        return sanitized

    def validate(self, text: str, max_length: int = 2000) -> tuple[bool, Optional[str]]:
        """
        Validate user input for length and injection attempts.

        Args:
            text: User input to validate
            max_length: Maximum allowed length

        Returns:
            Tuple of (is_valid, error_message)
        """
        # Check length
        if len(text) > max_length:
            return False, f"Input too long (max {max_length} characters)"

        # Check for prompt injection
        if self.is_suspicious(text):
            return False, "Input contains suspicious patterns"

        # Check for excessive repetition (potential DoS)
        if self._has_excessive_repetition(text):
            return False, "Input contains excessive repetition"

        return True, None

    def _has_excessive_repetition(self, text: str, threshold: int = 50) -> bool:
        """
        Check if text has excessive character/word repetition.

        Args:
            text: Text to check
            threshold: Maximum allowed consecutive repetitions

        Returns:
            True if excessive repetition detected
        """
        # Check character repetition
        if re.search(r'(.)\1{50,}', text):  # Same char 50+ times
            return True

        # Check word repetition
        words = text.split()
        if len(words) > 3:
            for i in range(len(words) - 3):
                word = words[i]
                # Count consecutive occurrences
                count = 1
                for j in range(i + 1, min(i + threshold, len(words))):
                    if words[j] == word:
                        count += 1
                    else:
                        break
                if count > 20:  # Same word 20+ times in a row
                    return True

        return False


def sanitize_for_logging(text: str, max_length: int = 500) -> str:
    """
    Sanitize text for safe logging (truncate, remove PII hints).

    Args:
        text: Text to sanitize
        max_length: Maximum length for logging

    Returns:
        Sanitized text safe for logging
    """
    # Truncate
    if len(text) > max_length:
        text = text[:max_length] + "..."

    # Basic PII patterns (email, phone, credit card)
    text = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]', text)
    text = re.sub(r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b', '[PHONE]', text)
    text = re.sub(r'\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b', '[CC]', text)

    return text