import sys import os sys.path.insert(0, '/app') from deps import get_db, get_qdrant_client from config import get_settings from services.provider_factory import ProviderFactory from services.rag_service import RAGService from services.document_indexer import DocumentIndexer def deep_clean(): print("๐Ÿงน Starting Deep Clean & Re-index...") db_conn = get_db() qdrant = get_qdrant_client() settings = get_settings() try: # 1. Setup Provider provider = ProviderFactory.create_provider( provider_name=settings.default_embedding_provider, settings=settings ) print(f"โœ… Using provider: {provider.provider_name} ({provider.model_name})") # 2. Clear Collections (Optional, but good for orphans) # Note: This might be dangerous if production. But for dev/fix it's essential. # Collections: "posts_de", "posts_en", "docs_crumbforest", etc. collections = qdrant.get_collections().collections for col in collections: print(f"๐Ÿ—‘๏ธ Deleting collection: {col.name}") qdrant.delete_collection(col.name) # 3. Re-scan Documents print("๐Ÿ“‚ Indexing Documents (Files)...") indexer = DocumentIndexer(db_conn, qdrant, provider) # Force re-index to ensure new IDs are used doc_results = indexer.index_all_categories(force=True) print(f" Indexed {doc_results['total_indexed']} documents.") # 4. Re-index Posts (DB) print("๐Ÿ’พ Indexing Posts (SQL)...") rag = RAGService(db_conn, qdrant, provider) # Index for supported locales for loc in ["de", "en"]: # or from config res = rag.index_all_posts(locale=loc) print(f" Locale {loc}: {res['indexed']} posts indexed.") print("โœจ Deep Clean Complete!") except Exception as e: print(f"โŒ Error: {e}") import traceback traceback.print_exc() finally: db_conn.close() if __name__ == "__main__": deep_clean()