Offline Ollama Vector Dojo

This commit is contained in:
Branko May Trinkwald
2026-01-07 14:57:32 +01:00
parent abf7796490
commit 5a81b0fdbc
6 changed files with 357 additions and 9 deletions

View File

@@ -406,3 +406,38 @@ So bleibt der Flow am Laufen, ohne dass er sich verliert im Groove.
**Version:** 0.0 (Zero Release)
**Datum:** 2025-12-21
**Status:** Die Crew ist komplett! 🌲✨
---
## 🥋 Das Dojo & Das Nullfeld
Willkommen im **Terminal Dojo** - dem Trainingsraum für Waldwächter und solche, die es werden wollen.
Hier trainiert der Bär seine Kraft, der Fuchs seinen Flow und die Eule ihr Gedächtnis. Und hier kannst du deine Werkzeuge polieren.
### Was findest du im Dojo?
Start: `./crumb-mission-selector.sh` -> **Option 6**
1. **Git Status**: Ist dein Wald synchron mit der Welt?
2. **OpenCode Installer**: Deine Werkstatt (IDE) einrichten.
3. **Ollama Manager**: Die Geister (KI-Modelle) rufen.
4. **Nullfeld (RAG)**: Das Gedächtnis des Waldes.
### 🌌 Das Nullfeld (The Zero Field)
*"Ein Elefant vergisst nie, aber das Nullfeld weiß, was der Elefant noch lernen muss."*
Das Nullfeld ist ein Ort, an dem Wissen in reiner Energie (Vektoren) gespeichert wird. Wenn du Dokumente (wie dieses Handbuch) in den `crumbdocs` Ordner legst und das Ritual (Phase 7) ausführst, wird dieses Wissen Teil des Waldes.
**Wie nutzt du es?**
Wenn das Wissen im Nullfeld ist, kannst du jeden Waldwächter danach fragen. Sie werden erst im eigenen Kopf suchen, und dann das Nullfeld befragen.
```bash
# Frage an die Eule mit Wissen aus dem Handbuch
crew eule "Was steht im Handbuch über den Kernel?"
# Frage an Bugsy
crew bugsy "Wie funktioniert die Qdrant Ingestion?"
```
Das Nullfeld verbindet dein Wissen mit der Weisheit der Crew. 🌲✨

View File

@@ -110,6 +110,13 @@ Drei komplette Missionen zum Roboter-Bauen mit der ganzen Crew:
- **🌙 Mond Maschine** - Rainbow Predictor mit Computer Vision
- **☀️ Solar Wasserkocher** - Physik-Simulation & Schleifen-Logik
### 🥋 Terminal Dojo (Setup & Tools)
Dein Trainings-Center für das System:
- **Git Status** - Version Control checken
- **OpenCode Installer** - IDE Setup (optional)
- **Ollama Manager** - Lokale KI-Modelle verwalten
- **Nullfeld (RAG)** - Wissen speichern & abfragen
## 🤖 Waldwächter nutzen
```bash
@@ -243,9 +250,11 @@ bash missions/robots/mond_maschine.sh
- Bash 3.2+ (macOS default, Bash 4+ empfohlen für volle Features)
- curl, jq
- OpenRouter API Key
- Optional: Python 3 für Robot-Missionen (opencv-python, numpy, ephem)
- Mond Maschine (Computer Vision): Pi 4+ oder Desktop empfohlen
- **Ollama** (optional, für lokale Agents & RAG)
- Empfohlen: `nomic-embed-text` (wird automatisch geladen)
- **Python 3** (optional, für Robot-Missionen & RAG Ingestion)
- `venv`, `requests`, `qdrant-client` (via Dojo Setup)
- `opencv-python`, `numpy` (für Vision Missionen)
## 🚀 Next Steps

View File

@@ -1,11 +1,11 @@
#!/bin/bash
# 🌲 Crumbforest Mission Doktor v2.1
#!/usr/bin/env bash
# Crumbforest Mission Doktor v2.1
# Metadata-driven, erweiterbar, Bash 3.2+ kompatibel
# Inspiriert vom crumbpages-doktor.sh Konzept
#
# v2.1 Changes:
# - Added: 🥋 Dojo (BashPanda Gürtel-System)
# - Added: 🔧 Tools (Terminal Dojo)
# - Added: Dojo (BashPanda Gürtel-System)
# - Added: Tools (Terminal Dojo)
# - Fixed: Menu numbering
set -euo pipefail
@@ -20,7 +20,6 @@ MAGENTA='\033[1;35m'
DIM='\033[2m'
NC='\033[0m' # No Color
# === KONFIGURATION ===
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
MISSION_DIR="${SCRIPT_DIR}/missions"
ENV_FILE="${SCRIPT_DIR}/.env"

View File

@@ -0,0 +1,120 @@
#!/bin/bash
# ═══════════════════════════════════════════════════════════════════════════
# 🥋 Terminal Dojo - Phase 7: Wissen (RAG)
# ═══════════════════════════════════════════════════════════════════════════
# Ingest local Markdown files into Qdrant
# ═══════════════════════════════════════════════════════════════════════════
set -e
# Farben
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
BOLD='\033[1m'
NC='\033[0m'
DOCS_DIR="$HOME/Documents/crumbdocs"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
INGEST_SCRIPT="$SCRIPT_DIR/scripts/ingest_knowledge.py"
EMBEDDING_MODEL="nomic-embed-text"
echo ""
echo -e "${CYAN}═══════════════════════════════════════════════════════════════${NC}"
echo -e "${BOLD} 🥋 TERMINAL DOJO - Phase 7: Wissen Laden${NC}"
echo -e "${CYAN}═══════════════════════════════════════════════════════════════${NC}"
echo ""
# 1. Check Python & Dependencies
echo -e "${BOLD}1. System-Check${NC}"
if ! command -v python3 &> /dev/null; then
echo -e "${RED}Python 3 wird benötigt!${NC}"
exit 1
fi
echo -e " ${GREEN}${NC} Python 3 gefunden"
# Check packages (using venv)
VENV_DIR="$HOME/.terminal_dojo/venv"
if [[ ! -d "$VENV_DIR" ]]; then
echo -e " ${YELLOW}${NC} Erstelle Python Virtual Environment..."
python3 -m venv "$VENV_DIR"
fi
# Activate venv for checks/install
source "$VENV_DIR/bin/activate"
if python3 -c "import requests" &> /dev/null; then
echo -e " ${GREEN}${NC} 'requests' Modul gefunden (in venv)"
else
echo -e " ${YELLOW}${NC} Installiere Abhängigkeit: requests"
pip install requests || echo -e "${RED}Fehler beim Installieren von requests${NC}"
fi
echo ""
# 2. Check Ollama & Model
echo -e "${BOLD}2. Embedding Modell (Ollama)${NC}"
if ! command -v ollama &> /dev/null; then
echo -e "${RED}Ollama nicht gefunden! Bitte Phase 3 ausführen.${NC}"
exit 1
fi
if ollama list | grep -q "$EMBEDDING_MODEL"; then
echo -e " ${GREEN}${NC} Modell '$EMBEDDING_MODEL' ist bereit."
else
echo -e "${YELLOW} Lade Modell '$EMBEDDING_MODEL'...${NC} (Beste Such-Qualität)"
ollama pull "$EMBEDDING_MODEL"
echo -e " ${GREEN}${NC} Modell geladen!"
fi
echo ""
# 3. Crumbdocs Ordner
echo -e "${BOLD}3. Wissens-Quelle${NC}"
echo -e " Ordner: ${CYAN}$DOCS_DIR${NC}"
if [[ ! -d "$DOCS_DIR" ]]; then
echo -e "${YELLOW} Ordner existiert noch nicht.${NC}"
read -p " Soll ich ihn erstellen? [J/n] " -n 1 -r
echo ""
if [[ $REPLY =~ ^[Nn]$ ]]; then
echo "Abbruch."
exit 0
fi
mkdir -p "$DOCS_DIR"
echo -e " ${GREEN}${NC} Ordner erstellt."
echo ""
echo -e "${YELLOW}BITTE BEACHTEN:${NC}"
echo "Kopiere jetzt deine Markdown-Dateien (z.B. Keks-Handbuch) in diesen Ordner:"
echo "open $DOCS_DIR"
echo ""
# Try to open on Mac
if [[ "$OSTYPE" == "darwin"* ]]; then
open "$DOCS_DIR" 2>/dev/null || true
fi
read -p "Drücke Enter, wenn du Dateien hineinkopiert hast..."
else
echo -e " ${GREEN}${NC} Ordner gefunden."
FILE_COUNT=$(find "$DOCS_DIR" -name "*.md" | wc -l)
echo -e " Dateien: $FILE_COUNT"
fi
echo ""
# 4. Run Ingestion
echo -e "${BOLD}4. Wissen verarbeiten (Ingestion)${NC}"
echo -e "${DIM}Dies kann einen Moment dauern...${NC}"
echo ""
python3 "$INGEST_SCRIPT" "$DOCS_DIR"
echo ""
echo -e "${GREEN}Fertig! Das Wissen ist nun im Nullfeld (Qdrant) gespeichert.${NC}"
echo -e "Deine Crew kann jetzt Fragen dazu beantworten."
echo ""

View File

@@ -0,0 +1,175 @@
import os
import sys
import glob
import json
import requests
import argparse
from typing import List, Dict, Any
from uuid import uuid4
# Configuration
OLLAMA_BASE_URL = "http://localhost:11434"
QDRANT_BASE_URL = "http://localhost:6333"
EMBEDDING_MODEL = "nomic-embed-text"
COLLECTION_NAME = "crumbforest_knowledge"
def get_embedding(text: str) -> List[float]:
"""Generate embedding using Ollama."""
url = f"{OLLAMA_BASE_URL}/api/embeddings"
payload = {
"model": EMBEDDING_MODEL,
"prompt": text
}
try:
response = requests.post(url, json=payload)
response.raise_for_status()
return response.json()["embedding"]
except Exception as e:
print(f"Error getting embedding: {e}")
return []
def create_collection_if_not_exists():
"""Ensure Qdrant collection exists."""
url = f"{QDRANT_BASE_URL}/collections/{COLLECTION_NAME}"
# Check existence and config
try:
response = requests.get(url)
if response.status_code == 200:
# Collection exists, check vector size
config = response.json().get("result", {}).get("config", {}).get("params", {}).get("vectors", {})
current_size = config.get("size")
if current_size == 768:
print(f"Collection '{COLLECTION_NAME}' exists and has correct dimension (768).")
return
else:
print(f"Collection '{COLLECTION_NAME}' has wrong dimension ({current_size}). Recreating for nomic-embed-text (768)...")
requests.delete(url)
except Exception as e:
print(f"Error checking collection: {e}")
# Proceed to try verify/create
print(f"Creating collection '{COLLECTION_NAME}'...")
# Create with 768 dimensions (Nomic Embed Text v1.5)
payload = {
"vectors": {
"size": 768,
"distance": "Cosine"
}
}
try:
requests.put(url, json=payload).raise_for_status()
print("Collection created successfully.")
except Exception as e:
print(f"Error creating collection: {e}")
sys.exit(1)
def parse_markdown(file_path: str) -> List[Dict[str, Any]]:
"""
Simple Markdown splitter.
Splits by headers (#) to keep semantic context.
"""
chunks = []
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
lines = content.split('\n')
current_chunk = []
current_header = os.path.basename(file_path)
for line in lines:
if line.startswith('#'):
# Save previous chunk if exists
if current_chunk:
text = '\n'.join(current_chunk).strip()
if text:
chunks.append({
"text": text,
"header": current_header,
"source": file_path
})
# Start new chunk
current_header = line.strip().lstrip('#').strip()
current_chunk = [line] # Keep header in text for context
else:
current_chunk.append(line)
# Add last chunk
if current_chunk:
text = '\n'.join(current_chunk).strip()
if text:
chunks.append({
"text": text,
"header": current_header,
"source": file_path
})
return chunks
def ingest_directory(directory: str):
"""Scan directory and ingest files."""
if not os.path.exists(directory):
print(f"Directory not found: {directory}")
return
files = glob.glob(os.path.join(directory, "**/*.md"), recursive=True)
print(f"Found {len(files)} Markdown files.")
total_vectors = 0
for file_path in files:
print(f"Processing {os.path.basename(file_path)}...")
chunks = parse_markdown(file_path)
points = []
for chunk in chunks:
vector = get_embedding(chunk["text"])
if not vector:
continue
points.append({
"id": str(uuid4()),
"vector": vector,
"payload": {
"text": chunk["text"],
"header": chunk["header"],
"source": chunk["source"]
}
})
if points:
# Upsert
url = f"{QDRANT_BASE_URL}/collections/{COLLECTION_NAME}/points?wait=true"
payload = {"points": points}
try:
requests.put(url, json=payload).raise_for_status()
total_vectors += len(points)
except Exception as e:
print(f"Error upserting points: {e}")
print(f"Done! {total_vectors} vectors stored inside '{COLLECTION_NAME}'.")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Ingest Markdown files into Qdrant.")
parser.add_argument("directory", help="Directory containing .md files")
args = parser.parse_args()
# Check services
try:
requests.get(QDRANT_BASE_URL)
except:
print("Error: Qdrant is not running (checked localhost:6333).")
sys.exit(1)
try:
requests.get(OLLAMA_BASE_URL)
except:
print("Error: Ollama is not running (checked localhost:11434).")
sys.exit(1)
create_collection_if_not_exists()
ingest_directory(args.directory)

View File

@@ -57,6 +57,14 @@ show_status() {
else
echo -e " ${DIM}${NC} Qdrant (optional)"
fi
# Knowledge (crumbdocs)
DOCS_DIR="$HOME/Documents/crumbdocs"
if [[ -d "$DOCS_DIR" ]] && [[ $(find "$DOCS_DIR" -maxdepth 1 -name "*.md" 2>/dev/null | wc -l) -gt 0 ]]; then
echo -e " ${GREEN}${NC} Wissen ($(find "$DOCS_DIR" -name "*.md" | wc -l) Docs)"
else
echo -e " ${DIM}${NC} Wissen (leer)"
fi
# Agents
if [[ -d "$HOME/.terminal_dojo/agents" ]]; then
@@ -77,6 +85,7 @@ show_menu() {
echo -e " ${CYAN}4${NC}) Waldwächter - Crew-Agents konfigurieren"
echo -e " ${CYAN}5${NC}) Qdrant - Crew Memory ${DIM}(optional)${NC}"
echo -e " ${CYAN}6${NC}) Testen - Alles ausprobieren!"
echo -e " ${CYAN}7${NC}) Wissen (RAG) - Handbuch laden ✨"
echo ""
echo -e "${BOLD}Schnellstart:${NC}"
echo ""
@@ -118,7 +127,7 @@ run_all() {
return
fi
for phase in 1 2 3 4 5 6; do
for phase in 1 2 3 4 5 7 6; do
run_phase $phase
done
}
@@ -158,6 +167,7 @@ while true; do
4) run_phase 4 ;;
5) run_phase 5 ;;
6) run_phase 6 ;;
7) run_phase 7 ;;
a|A) run_all ;;
c|C) start_crew ;;
q|Q)