From 5a81b0fdbc333b7693cb364f8f0e2bd38d435310 Mon Sep 17 00:00:00 2001 From: Branko May Trinkwald Date: Wed, 7 Jan 2026 14:57:32 +0100 Subject: [PATCH] Offline Ollama Vector Dojo --- KEKSHANDBUCH_ZERO_v0.0.md | 35 ++++ README.md | 15 +- crumb-mission-selector.sh | 9 +- .../tools/terminal_dojo/07_knowledge_setup.sh | 120 ++++++++++++ .../terminal_dojo/scripts/ingest_knowledge.py | 175 ++++++++++++++++++ missions/tools/terminal_dojo/terminal_dojo.sh | 12 +- 6 files changed, 357 insertions(+), 9 deletions(-) create mode 100755 missions/tools/terminal_dojo/07_knowledge_setup.sh create mode 100644 missions/tools/terminal_dojo/scripts/ingest_knowledge.py diff --git a/KEKSHANDBUCH_ZERO_v0.0.md b/KEKSHANDBUCH_ZERO_v0.0.md index 0f07396..a41a9c0 100644 --- a/KEKSHANDBUCH_ZERO_v0.0.md +++ b/KEKSHANDBUCH_ZERO_v0.0.md @@ -406,3 +406,38 @@ So bleibt der Flow am Laufen, ohne dass er sich verliert im Groove. **Version:** 0.0 (Zero Release) **Datum:** 2025-12-21 **Status:** Die Crew ist komplett! 🌲✨ + +--- + +## πŸ₯‹ Das Dojo & Das Nullfeld + +Willkommen im **Terminal Dojo** - dem Trainingsraum fΓΌr WaldwΓ€chter und solche, die es werden wollen. + +Hier trainiert der BΓ€r seine Kraft, der Fuchs seinen Flow und die Eule ihr GedΓ€chtnis. Und hier kannst du deine Werkzeuge polieren. + +### Was findest du im Dojo? +Start: `./crumb-mission-selector.sh` -> **Option 6** + +1. **Git Status**: Ist dein Wald synchron mit der Welt? +2. **OpenCode Installer**: Deine Werkstatt (IDE) einrichten. +3. **Ollama Manager**: Die Geister (KI-Modelle) rufen. +4. **Nullfeld (RAG)**: Das GedΓ€chtnis des Waldes. + +### 🌌 Das Nullfeld (The Zero Field) + +*"Ein Elefant vergisst nie, aber das Nullfeld weiß, was der Elefant noch lernen muss."* + +Das Nullfeld ist ein Ort, an dem Wissen in reiner Energie (Vektoren) gespeichert wird. Wenn du Dokumente (wie dieses Handbuch) in den `crumbdocs` Ordner legst und das Ritual (Phase 7) ausfΓΌhrst, wird dieses Wissen Teil des Waldes. + +**Wie nutzt du es?** +Wenn das Wissen im Nullfeld ist, kannst du jeden WaldwΓ€chter danach fragen. Sie werden erst im eigenen Kopf suchen, und dann das Nullfeld befragen. + +```bash +# Frage an die Eule mit Wissen aus dem Handbuch +crew eule "Was steht im Handbuch ΓΌber den Kernel?" + +# Frage an Bugsy +crew bugsy "Wie funktioniert die Qdrant Ingestion?" +``` + +Das Nullfeld verbindet dein Wissen mit der Weisheit der Crew. 🌲✨ diff --git a/README.md b/README.md index aa4586f..38f5f57 100644 --- a/README.md +++ b/README.md @@ -110,6 +110,13 @@ Drei komplette Missionen zum Roboter-Bauen mit der ganzen Crew: - **πŸŒ™ Mond Maschine** - Rainbow Predictor mit Computer Vision - **β˜€οΈ Solar Wasserkocher** - Physik-Simulation & Schleifen-Logik +### πŸ₯‹ Terminal Dojo (Setup & Tools) +Dein Trainings-Center fΓΌr das System: +- **Git Status** - Version Control checken +- **OpenCode Installer** - IDE Setup (optional) +- **Ollama Manager** - Lokale KI-Modelle verwalten +- **Nullfeld (RAG)** - Wissen speichern & abfragen + ## πŸ€– WaldwΓ€chter nutzen ```bash @@ -243,9 +250,11 @@ bash missions/robots/mond_maschine.sh - Bash 3.2+ (macOS default, Bash 4+ empfohlen fΓΌr volle Features) - curl, jq -- OpenRouter API Key -- Optional: Python 3 fΓΌr Robot-Missionen (opencv-python, numpy, ephem) - - Mond Maschine (Computer Vision): Pi 4+ oder Desktop empfohlen +- **Ollama** (optional, fΓΌr lokale Agents & RAG) + - Empfohlen: `nomic-embed-text` (wird automatisch geladen) +- **Python 3** (optional, fΓΌr Robot-Missionen & RAG Ingestion) + - `venv`, `requests`, `qdrant-client` (via Dojo Setup) + - `opencv-python`, `numpy` (fΓΌr Vision Missionen) ## πŸš€ Next Steps diff --git a/crumb-mission-selector.sh b/crumb-mission-selector.sh index 973285b..f82ad7b 100755 --- a/crumb-mission-selector.sh +++ b/crumb-mission-selector.sh @@ -1,11 +1,11 @@ -#!/bin/bash -# 🌲 Crumbforest Mission Doktor v2.1 +#!/usr/bin/env bash +# Crumbforest Mission Doktor v2.1 # Metadata-driven, erweiterbar, Bash 3.2+ kompatibel # Inspiriert vom crumbpages-doktor.sh Konzept # # v2.1 Changes: -# - Added: πŸ₯‹ Dojo (BashPanda GΓΌrtel-System) -# - Added: πŸ”§ Tools (Terminal Dojo) +# - Added: Dojo (BashPanda GΓΌrtel-System) +# - Added: Tools (Terminal Dojo) # - Fixed: Menu numbering set -euo pipefail @@ -20,7 +20,6 @@ MAGENTA='\033[1;35m' DIM='\033[2m' NC='\033[0m' # No Color -# === KONFIGURATION === SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" MISSION_DIR="${SCRIPT_DIR}/missions" ENV_FILE="${SCRIPT_DIR}/.env" diff --git a/missions/tools/terminal_dojo/07_knowledge_setup.sh b/missions/tools/terminal_dojo/07_knowledge_setup.sh new file mode 100755 index 0000000..72b0208 --- /dev/null +++ b/missions/tools/terminal_dojo/07_knowledge_setup.sh @@ -0,0 +1,120 @@ +#!/bin/bash +# ═══════════════════════════════════════════════════════════════════════════ +# πŸ₯‹ Terminal Dojo - Phase 7: Wissen (RAG) +# ═══════════════════════════════════════════════════════════════════════════ +# Ingest local Markdown files into Qdrant +# ═══════════════════════════════════════════════════════════════════════════ + +set -e + +# Farben +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +CYAN='\033[0;36m' +BOLD='\033[1m' +NC='\033[0m' + +DOCS_DIR="$HOME/Documents/crumbdocs" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +INGEST_SCRIPT="$SCRIPT_DIR/scripts/ingest_knowledge.py" +EMBEDDING_MODEL="nomic-embed-text" + +echo "" +echo -e "${CYAN}═══════════════════════════════════════════════════════════════${NC}" +echo -e "${BOLD} πŸ₯‹ TERMINAL DOJO - Phase 7: Wissen Laden${NC}" +echo -e "${CYAN}═══════════════════════════════════════════════════════════════${NC}" +echo "" + +# 1. Check Python & Dependencies +echo -e "${BOLD}1. System-Check${NC}" +if ! command -v python3 &> /dev/null; then + echo -e "${RED}Python 3 wird benΓΆtigt!${NC}" + exit 1 +fi +echo -e " ${GREEN}βœ“${NC} Python 3 gefunden" + +# Check packages (using venv) +VENV_DIR="$HOME/.terminal_dojo/venv" + +if [[ ! -d "$VENV_DIR" ]]; then + echo -e " ${YELLOW}β—‹${NC} Erstelle Python Virtual Environment..." + python3 -m venv "$VENV_DIR" +fi + +# Activate venv for checks/install +source "$VENV_DIR/bin/activate" + +if python3 -c "import requests" &> /dev/null; then + echo -e " ${GREEN}βœ“${NC} 'requests' Modul gefunden (in venv)" +else + echo -e " ${YELLOW}β—‹${NC} Installiere AbhΓ€ngigkeit: requests" + pip install requests || echo -e "${RED}Fehler beim Installieren von requests${NC}" +fi + +echo "" + +# 2. Check Ollama & Model +echo -e "${BOLD}2. Embedding Modell (Ollama)${NC}" + +if ! command -v ollama &> /dev/null; then + echo -e "${RED}Ollama nicht gefunden! Bitte Phase 3 ausfΓΌhren.${NC}" + exit 1 +fi + +if ollama list | grep -q "$EMBEDDING_MODEL"; then + echo -e " ${GREEN}βœ“${NC} Modell '$EMBEDDING_MODEL' ist bereit." +else + echo -e "${YELLOW} Lade Modell '$EMBEDDING_MODEL'...${NC} (Beste Such-QualitΓ€t)" + ollama pull "$EMBEDDING_MODEL" + echo -e " ${GREEN}βœ“${NC} Modell geladen!" +fi + +echo "" + +# 3. Crumbdocs Ordner +echo -e "${BOLD}3. Wissens-Quelle${NC}" +echo -e " Ordner: ${CYAN}$DOCS_DIR${NC}" + +if [[ ! -d "$DOCS_DIR" ]]; then + echo -e "${YELLOW} Ordner existiert noch nicht.${NC}" + read -p " Soll ich ihn erstellen? [J/n] " -n 1 -r + echo "" + if [[ $REPLY =~ ^[Nn]$ ]]; then + echo "Abbruch." + exit 0 + fi + mkdir -p "$DOCS_DIR" + echo -e " ${GREEN}βœ“${NC} Ordner erstellt." + echo "" + echo -e "${YELLOW}BITTE BEACHTEN:${NC}" + echo "Kopiere jetzt deine Markdown-Dateien (z.B. Keks-Handbuch) in diesen Ordner:" + echo "open $DOCS_DIR" + echo "" + + # Try to open on Mac + if [[ "$OSTYPE" == "darwin"* ]]; then + open "$DOCS_DIR" 2>/dev/null || true + fi + + read -p "DrΓΌcke Enter, wenn du Dateien hineinkopiert hast..." +else + echo -e " ${GREEN}βœ“${NC} Ordner gefunden." + FILE_COUNT=$(find "$DOCS_DIR" -name "*.md" | wc -l) + echo -e " Dateien: $FILE_COUNT" +fi + +echo "" + +# 4. Run Ingestion +echo -e "${BOLD}4. Wissen verarbeiten (Ingestion)${NC}" +echo -e "${DIM}Dies kann einen Moment dauern...${NC}" +echo "" + +python3 "$INGEST_SCRIPT" "$DOCS_DIR" + +echo "" +echo -e "${GREEN}Fertig! Das Wissen ist nun im Nullfeld (Qdrant) gespeichert.${NC}" +echo -e "Deine Crew kann jetzt Fragen dazu beantworten." +echo "" diff --git a/missions/tools/terminal_dojo/scripts/ingest_knowledge.py b/missions/tools/terminal_dojo/scripts/ingest_knowledge.py new file mode 100644 index 0000000..0be2f39 --- /dev/null +++ b/missions/tools/terminal_dojo/scripts/ingest_knowledge.py @@ -0,0 +1,175 @@ +import os +import sys +import glob +import json +import requests +import argparse +from typing import List, Dict, Any +from uuid import uuid4 + +# Configuration +OLLAMA_BASE_URL = "http://localhost:11434" +QDRANT_BASE_URL = "http://localhost:6333" +EMBEDDING_MODEL = "nomic-embed-text" +COLLECTION_NAME = "crumbforest_knowledge" + +def get_embedding(text: str) -> List[float]: + """Generate embedding using Ollama.""" + url = f"{OLLAMA_BASE_URL}/api/embeddings" + payload = { + "model": EMBEDDING_MODEL, + "prompt": text + } + + try: + response = requests.post(url, json=payload) + response.raise_for_status() + return response.json()["embedding"] + except Exception as e: + print(f"Error getting embedding: {e}") + return [] + +def create_collection_if_not_exists(): + """Ensure Qdrant collection exists.""" + url = f"{QDRANT_BASE_URL}/collections/{COLLECTION_NAME}" + + # Check existence and config + try: + response = requests.get(url) + if response.status_code == 200: + # Collection exists, check vector size + config = response.json().get("result", {}).get("config", {}).get("params", {}).get("vectors", {}) + current_size = config.get("size") + + if current_size == 768: + print(f"Collection '{COLLECTION_NAME}' exists and has correct dimension (768).") + return + else: + print(f"Collection '{COLLECTION_NAME}' has wrong dimension ({current_size}). Recreating for nomic-embed-text (768)...") + requests.delete(url) + except Exception as e: + print(f"Error checking collection: {e}") + # Proceed to try verify/create + + print(f"Creating collection '{COLLECTION_NAME}'...") + # Create with 768 dimensions (Nomic Embed Text v1.5) + payload = { + "vectors": { + "size": 768, + "distance": "Cosine" + } + } + try: + requests.put(url, json=payload).raise_for_status() + print("Collection created successfully.") + except Exception as e: + print(f"Error creating collection: {e}") + sys.exit(1) + +def parse_markdown(file_path: str) -> List[Dict[str, Any]]: + """ + Simple Markdown splitter. + Splits by headers (#) to keep semantic context. + """ + chunks = [] + + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + lines = content.split('\n') + current_chunk = [] + current_header = os.path.basename(file_path) + + for line in lines: + if line.startswith('#'): + # Save previous chunk if exists + if current_chunk: + text = '\n'.join(current_chunk).strip() + if text: + chunks.append({ + "text": text, + "header": current_header, + "source": file_path + }) + + # Start new chunk + current_header = line.strip().lstrip('#').strip() + current_chunk = [line] # Keep header in text for context + else: + current_chunk.append(line) + + # Add last chunk + if current_chunk: + text = '\n'.join(current_chunk).strip() + if text: + chunks.append({ + "text": text, + "header": current_header, + "source": file_path + }) + + return chunks + +def ingest_directory(directory: str): + """Scan directory and ingest files.""" + if not os.path.exists(directory): + print(f"Directory not found: {directory}") + return + + files = glob.glob(os.path.join(directory, "**/*.md"), recursive=True) + print(f"Found {len(files)} Markdown files.") + + total_vectors = 0 + + for file_path in files: + print(f"Processing {os.path.basename(file_path)}...") + chunks = parse_markdown(file_path) + + points = [] + for chunk in chunks: + vector = get_embedding(chunk["text"]) + if not vector: + continue + + points.append({ + "id": str(uuid4()), + "vector": vector, + "payload": { + "text": chunk["text"], + "header": chunk["header"], + "source": chunk["source"] + } + }) + + if points: + # Upsert + url = f"{QDRANT_BASE_URL}/collections/{COLLECTION_NAME}/points?wait=true" + payload = {"points": points} + try: + requests.put(url, json=payload).raise_for_status() + total_vectors += len(points) + except Exception as e: + print(f"Error upserting points: {e}") + + print(f"Done! {total_vectors} vectors stored inside '{COLLECTION_NAME}'.") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Ingest Markdown files into Qdrant.") + parser.add_argument("directory", help="Directory containing .md files") + args = parser.parse_args() + + # Check services + try: + requests.get(QDRANT_BASE_URL) + except: + print("Error: Qdrant is not running (checked localhost:6333).") + sys.exit(1) + + try: + requests.get(OLLAMA_BASE_URL) + except: + print("Error: Ollama is not running (checked localhost:11434).") + sys.exit(1) + + create_collection_if_not_exists() + ingest_directory(args.directory) diff --git a/missions/tools/terminal_dojo/terminal_dojo.sh b/missions/tools/terminal_dojo/terminal_dojo.sh index c60c4e6..25536e7 100755 --- a/missions/tools/terminal_dojo/terminal_dojo.sh +++ b/missions/tools/terminal_dojo/terminal_dojo.sh @@ -57,6 +57,14 @@ show_status() { else echo -e " ${DIM}β—‹${NC} Qdrant (optional)" fi + + # Knowledge (crumbdocs) + DOCS_DIR="$HOME/Documents/crumbdocs" + if [[ -d "$DOCS_DIR" ]] && [[ $(find "$DOCS_DIR" -maxdepth 1 -name "*.md" 2>/dev/null | wc -l) -gt 0 ]]; then + echo -e " ${GREEN}●${NC} Wissen ($(find "$DOCS_DIR" -name "*.md" | wc -l) Docs)" + else + echo -e " ${DIM}β—‹${NC} Wissen (leer)" + fi # Agents if [[ -d "$HOME/.terminal_dojo/agents" ]]; then @@ -77,6 +85,7 @@ show_menu() { echo -e " ${CYAN}4${NC}) WaldwΓ€chter - Crew-Agents konfigurieren" echo -e " ${CYAN}5${NC}) Qdrant - Crew Memory ${DIM}(optional)${NC}" echo -e " ${CYAN}6${NC}) Testen - Alles ausprobieren!" + echo -e " ${CYAN}7${NC}) Wissen (RAG) - Handbuch laden ✨" echo "" echo -e "${BOLD}Schnellstart:${NC}" echo "" @@ -118,7 +127,7 @@ run_all() { return fi - for phase in 1 2 3 4 5 6; do + for phase in 1 2 3 4 5 7 6; do run_phase $phase done } @@ -158,6 +167,7 @@ while true; do 4) run_phase 4 ;; 5) run_phase 5 ;; 6) run_phase 6 ;; + 7) run_phase 7 ;; a|A) run_all ;; c|C) start_crew ;; q|Q)