diff --git a/LICENSE b/LICENSE deleted file mode 100644 index b9c46f4..0000000 --- a/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2024 Crumbforest Contributors - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/native_crumbcore_v1/fix_eule.sh b/native_crumbcore_v1/fix_eule.sh new file mode 100644 index 0000000..f0d39af --- /dev/null +++ b/native_crumbcore_v1/fix_eule.sh @@ -0,0 +1,118 @@ +#!/usr/bin/env sh +# fix_eule.sh - Repair Eule permissions and service config +# Run as root! + +set -u + +echo "== 🦉 Eule Repair Kit 🦉 ==" + +if [ "$(id -u)" -ne 0 ]; then + echo "Error: Must be run as root." + exit 1 +fi + +CACHE_DIR="/opt/cache/huggingface" + +# 1. Environment Variables +echo +echo "--- Fixing Environment Variables ---" +ENV_FILE="/etc/profile.d/crumbforest_env.sh" +if [ ! -f "$ENV_FILE" ]; then + echo "Creating $ENV_FILE..." + echo 'export HF_HOME=/opt/cache/huggingface' > "$ENV_FILE" + echo "Done. (Users need to re-login or run 'source $ENV_FILE')" +else + if grep -q "HF_HOME" "$ENV_FILE"; then + echo "$ENV_FILE exists and contains HF_HOME. Good." + else + echo "Appending HF_HOME to $ENV_FILE..." + echo 'export HF_HOME=/opt/cache/huggingface' >> "$ENV_FILE" + fi +fi + +# 2. Fix Cache Permissions +echo +echo "--- Fixing HF Cache Permissions ---" + +if [ ! -d "$CACHE_DIR" ]; then + echo "Creating $CACHE_DIR..." + mkdir -p "$CACHE_DIR" +fi + +# Determine Target Group +TARGET_GROUP="crumbforest" +if ! getent group "$TARGET_GROUP" >/dev/null 2>&1; then + TARGET_GROUP="users" + echo "Group 'crumbforest' not found. Using '$TARGET_GROUP'." +fi + +echo "Setting ownership to root:$TARGET_GROUP for /opt/cache..." +# We own it as root, but give group write access +chown -R root:"$TARGET_GROUP" /opt/cache + +echo "Setting permissions to 775 (User/Group R+W+X)..." +chmod -R 775 /opt/cache +echo "Permissions applied." + +# 3. Model Cleanup Check +MODEL_DIR="$CACHE_DIR/hub/models--sentence-transformers--all-MiniLM-L6-v2" +# Check both potential locations (hub layout changes) +if [ ! -d "$MODEL_DIR" ]; then + MODEL_DIR="$CACHE_DIR/sentence-transformers/all-MiniLM-L6-v2" +fi + +if [ -d "$MODEL_DIR" ]; then + echo + echo "Found existing model at: $MODEL_DIR" + echo "If you suspect corruption, we can delete it to force a re-download." + echo -n "Delete model cache? [y/N] " + read -r REPLY + if echo "$REPLY" | grep -iq "^y"; then + echo "Removing $MODEL_DIR..." + rm -rf "$MODEL_DIR" + echo "Deleted." + else + echo "Skipping deletion." + fi +fi + +# 4. Fix Systemd Service Arguments +echo +echo "--- Fixing Systemd Service ---" +SERVICE_FILE="/etc/systemd/system/eule.service" + +if [ -f "$SERVICE_FILE" ]; then + # We also want to make sure the service sees the Env var if not set in ExecStart + # Using 'sed' to insert Environment line if missing is tricky. + # Instead, let's rely on /etc/profile.d if the service reads it (it usually doesn't). + # Better: Patch the service file to include Environment=HF_HOME=... + + if ! grep -q "Environment=.*HF_HOME" "$SERVICE_FILE"; then + echo "Adding Environment=HF_HOME=... to service file..." + # Insert under [Service] + sed -i '/\[Service\]/a Environment=HF_HOME=/opt/cache/huggingface' "$SERVICE_FILE" + SYSTEMD_CHANGED=1 + fi + + if grep -q "\-\-serve" "$SERVICE_FILE"; then + echo "Found deprecated argument '--serve' in $SERVICE_FILE. Removing..." + cp "$SERVICE_FILE" "${SERVICE_FILE}.bak.$(date +%s)" + sed -i 's/ --serve//g' "$SERVICE_FILE" + SYSTEMD_CHANGED=1 + fi + + if [ "${SYSTEMD_CHANGED:-0}" -eq 1 ]; then + echo "Reloading systemd..." + systemctl daemon-reload + echo "Restarting Eule..." + systemctl restart eule + else + echo "Service config looks ok." + fi +else + echo "Service file $SERVICE_FILE not found." +fi + +echo +echo "Done." +echo "👉 NOW: Run 'source $ENV_FILE' and try 'eule \"hello\"' again." diff --git a/native_crumbcore_v1/fix_eule_ttyd.sh b/native_crumbcore_v1/fix_eule_ttyd.sh new file mode 100644 index 0000000..946cf53 --- /dev/null +++ b/native_crumbcore_v1/fix_eule_ttyd.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env sh +# fix_eule_ttyd.sh - Grant TTYD user permissions for Eule (Cake/Python) +# Run as root! + +set -u + +echo "== 🦉 Eule TTYD Fixer 🦉 ==" +echo "Note: Baking access for 'crumbmission'..." + +if [ "$(id -u)" -ne 0 ]; then + echo "Error: Must be run as root." + exit 1 +fi + +TTYD_USER="crumbmission" +GROUP="crumbforest" + +# 1. Group Membership (for reading Cache) +echo +echo "--- Fixing Group Membership ---" +if id "$TTYD_USER" >/dev/null 2>&1; then + echo "Adding $TTYD_USER to group $GROUP..." + usermod -aG "$GROUP" "$TTYD_USER" + echo "Done." +else + echo "User $TTYD_USER not found. Skipping usage fix (are you running this on the right server?)." +fi + +# 2. Sudoers Configuration +echo +echo "--- Configuring Sudoers ---" +SUDOERS_FILE="/etc/sudoers.d/crumbforest-ttyd" + +# Allow direct python execution (Bypass Cake for reliability) +# AND allow potentially the cake wrapper if they fix the alias to use sudo +echo "Creating/Updating $SUDOERS_FILE..." + +# We define the specific command to be safe +CMD_PYTHON="/opt/venvs/crumbforest/bin/python3 /opt/eule/eule_rag.py *" +# Also allow bin/cake for nostalgic reasons if they use absolute path +CMD_CAKE="/var/www/html/bin/cake kruemeleule *" + +cat > "$SUDOERS_FILE" < "Wer seine Logs nicht liest, ist dazu verdammt, sie zu wiederholen." - Ein weiser Admin (2026) + +## Der neue `strato_doctor.sh` + +Wir haben den Doktor in die Facharztausbildung geschickt. Er kann nun nicht mehr nur "Puls fühlen" (Ping/Curl), sondern macht ein komplettes MRT deiner Logdateien. + +### Voraussetzungen + +Damit die Magie funktioniert, müssen die Standard-Pfade stimmen oder via ENV-Variablen gesetzt werden: +- Nginx Access Log: `/var/log/nginx/access.log` (oder `LOG_NGINX_ACCESS` setzen) +- Application Log: `/var/log/application.log` (oder `LOG_APP` setzen) + +### Features & Awk-Magie + +Hier erklären wir die "Zaubersprüche", die wir im Skript verwenden. + +#### 1. Latenz-Verteilung (Das "Gefühl" für Speed) +Wir wollen wissen: Wie viele Requests sind *wirklich* schnell? +Das Skript nutzt `awk` um die Requests in Zeit-Töpfe (Buckets) zu werfen. + +**Der Code-Schnipsel:** +```bash +awk -v treq="$TOTAL_REQ" '{ + if ($(NF-1) < 0.5) a[1]++; + # ... weitere Buckets ... +} END { + # Ausgabe der Prozente +}' +``` +*Erklärung:* `$(NF-1)` greift das vorletzte Feld der Logzeile. In vielen Nginx-Formaten steht dort die `request_time`. + +#### 2. Das 90. Perzentil (P90) +Der Durchschnitt lügt. Wenn 10 Requests 0.1s dauern und einer 100s, ist der Schnitt ~9s. Aber 90% der Leute sind glücklich. +Das P90 sagt uns: "90% aller Requests sind schneller als X Sekunden". + +**Wie wir es berechnen:** +1. Alle Zeiten extrahieren. +2. Sortieren (`sort -n`). +3. Den Wert an der Position 90% der Liste nehmen. + +#### 3. Die langsamsten Endpoints +Wer sind die Bremsen im System? +```bash +awk '{ + if ($(NF-1) > 1) { # Nur wer länger als 1s braucht + count[$(NF-3)]++; # Zähle die URL (Feld NF-3) + sum[$(NF-3)]+=$(NF-1) # Addiere die Zeit + } +}' +``` + +#### 4. Fehler-Scanner +Ein einfacher `grep`, der aber Gold wert ist. Wir suchen nach `CRITICAL` und `ERROR` im App-Log und zählen, wer am meisten nervt. + +## Anwendung + +Einfach laufen lassen: +```bash +./strato_doctor.sh +``` + +Oder für Profis mit Custom-Pfaden: +```bash +LOG_NGINX_ACCESS=./mein-access.log ./strato_doctor.sh +``` + +### Troubleshooting (Dr. House Edition) + +**Symptom: 0.00s Average Time?** +Das passiert, wenn Nginx kaputte Zeilen schreibt oder das Log-Format abweicht (z.B. Texte statt Zahlen an vorletzter Stelle). Der neue Doktor filtert das nun rigoros (`grep -E "^[0-9.]+$"`). + +**Symptom: Permission denied (Eule)?** +Der Doktor prüft nun auch, ob `HF_HOME` beschreibbar ist. Falls nicht: +1. `fix_eule.sh` als root ausführen. +2. Das repariert `/opt/cache` Permissions und entfernt alte Flags (`--serve`) aus der Systemd-Unit. + +### Nginx "Grundrauschen" (Das ist normal) + +Wenn du Dinge siehst wie: +- `SSL_do_handshake() failed ... bad key share`: Das sind oft Bots oder alte Browser, die sich nicht auf eine Verschlüsselung einigen konnten. Ignorieren. +- `user "admin" was not found in ...`: Da sucht jemand (Skript/Bot) nach Schwachstellen oder Admin-Logins. Solange sie "not found" sind -> Gut! 🛡️ + + diff --git a/native_crumbcore_v1/strato_doctor.sh b/native_crumbcore_v1/strato_doctor.sh new file mode 100755 index 0000000..b3424c8 --- /dev/null +++ b/native_crumbcore_v1/strato_doctor.sh @@ -0,0 +1,248 @@ +#!/usr/bin/env sh +# STRATO Doctor (Advanced) - System & Log Analysis +# Extends the "lite" version with deep-dive log analytics. + +set -eu + +# ---- Config (override via env) ---- +QDRANT_URL="${QDRANT_URL:-http://127.0.0.1:6333}" +HTTP_PROBE_URLS="${HTTP_PROBE_URLS:-http://127.0.0.1 http://127.0.0.1:8000}" +PING_TARGETS="${PING_TARGETS:-8.8.8.8 1.1.1.1}" +PORTS="${PORTS:-22 80 443 6333 8000}" +VENVS="${VENVS:-/opt/venvs/crumbforest}" +HF_HOME_SHOW="${HF_HOME:-/opt/cache/huggingface}" + +# Log Paths +LOG_NGINX_ACCESS="${LOG_NGINX_ACCESS:-/var/log/nginx/access.log}" +LOG_APP="${LOG_APP:-/var/log/application.log}" + +echo "== System ==" +HOSTNAME="$(hostname 2>/dev/null || echo unknown)" +OS=$(grep -o 'PRETTY_NAME=.*' /etc/os-release 2>/dev/null | head -n1 | sed 's/PRETTY_NAME=//; s/^"//; s/"$//') +KERNEL="$(uname -r 2>/dev/null || echo unknown)" +LOAD="$(uptime 2>/dev/null | awk -F'load average:' 'NF>1{gsub(/^ +| +$/,"",$2); print $2; next} {print "n/a"}')" +MEM="$(free -h 2>/dev/null | awk '/^Mem:/ {printf "%s/%s used\n",$3,$2}')" +echo "Host: $HOSTNAME" +echo "OS: ${OS:-n/a}" +echo "Kernel: $KERNEL" +echo "Load: ${LOAD:-n/a}" +echo "Memory: ${MEM:-n/a}" + +echo +echo "== Network ==" +if command -v ip >/dev/null 2>&1; then + ip -br addr 2>/dev/null | sed 's/ \+/ /g' +fi + +if command -v ss >/dev/null 2>&1; then + echo + echo "Listening (subset):" + for P in $PORTS; do + if ss -lnt 2>/dev/null | grep -q ":$P "; then + echo " port $P: LISTEN" + else + echo " port $P: -" + fi + done +fi + +echo +echo "== Connectivity ==" +for T in $PING_TARGETS; do + if ping -c1 -W1 "$T" >/dev/null 2>&1; then + echo "ping $T: ok" + else + echo "ping $T: fail" + fi +done + +echo +echo "== HTTP probes ==" +for U in $HTTP_PROBE_URLS; do + if command -v curl >/dev/null 2>&1; then + CODE="$(curl -ks -o /dev/null -w '%{http_code}' "$U" || echo 000)" + echo "$U -> HTTP $CODE" + fi +done + +echo +echo "== Qdrant ==" +if command -v curl >/dev/null 2>&1; then + if curl -fsS "$QDRANT_URL/healthz" >/dev/null 2>&1; then + echo "healthz: ok ($QDRANT_URL)" + else + echo "healthz: fail ($QDRANT_URL)" + fi + VERS="$(curl -fsS "$QDRANT_URL/versions" 2>/dev/null || true)" + [ -n "$VERS" ] && echo "versions: $VERS" +fi + +echo +echo "== Python / HF cache ==" +if command -v python3 >/dev/null 2>&1; then + python3 -V 2>&1 +else + echo "python3: not found" +fi +echo "HF_HOME: ${HF_HOME_SHOW}" +if [ -n "${TRANSFORMERS_CACHE:-}" ]; then + echo "TRANSFORMERS_CACHE is set (deprecated) -> prefer HF_HOME" +fi +if [ -d "$HF_HOME_SHOW" ]; then + if [ -w "$HF_HOME_SHOW" ]; then + echo "HF_HOME writable: yes ($HF_HOME_SHOW)" + ls -ld "$HF_HOME_SHOW" | sed 's/^/ -> /' + else + echo "HF_HOME writable: NO ($HF_HOME_SHOW)" + ls -ld "$HF_HOME_SHOW" | sed 's/^/ -> /' + fi +else + echo "HF_HOME does not exist" +fi + +echo +echo "== Virtualenvs ==" +for V in $VENVS; do + if [ -d "$V" ]; then + echo "$V: present" + else + echo "$V: missing" + fi +done + +echo +echo "== Containers ==" +ENGINE="" +if command -v docker >/dev/null 2>&1; then ENGINE="docker"; fi +if [ -z "$ENGINE" ] && command -v podman >/dev/null 2>&1; then ENGINE="podman"; fi +if [ -n "$ENGINE" ]; then + $ENGINE ps --format "table {{.Names}}\t{{.Image}}\t{{.Status}}\t{{.Ports}}" 2>/dev/null || true +else + echo "no docker/podman" +fi + +echo +echo "== systemd services (eule, vector, qdrant, nginx, apache2) ==" +for S in eule vector qdrant nginx apache2; do + if systemctl list-unit-files 2>/dev/null | grep -q "^$S"; then + if systemctl is-active --quiet "$S"; then + echo "$S: active" + # Show ExecStart for eule to debug arguments + if [ "$S" = "eule" ]; then + systemctl show -p ExecStart --value eule | xargs echo " -> ExecStart: " + fi + else + echo "$S: inactive" + fi + fi +done + +echo +echo "== Logs (warnings/errors) ==" +for S in eule vector qdrant; do + if systemctl list-unit-files 2>/dev/null | grep -q "^$S"; then + echo "-- $S --" + journalctl -u "$S" -n 50 --no-pager 2>/dev/null | grep -Ei 'warn|deprecat|error' || true + fi +done + +echo +echo "== Paths ==" +for D in /opt /srv /var/www; do + [ -d "$D" ] && echo "$D" && ls -la "$D" 2>/dev/null | head -n 10 +done + + +echo +echo "==================================" +echo "== 🦉 LOG ANALYSE MODULE 🦉 ==" +echo "==================================" + +if [ -f "$LOG_NGINX_ACCESS" ]; then + echo "Analyzing: $LOG_NGINX_ACCESS" + + # Check if log is populated + TOTAL_REQ=$(cat "$LOG_NGINX_ACCESS" | grep -v "elapsed" | wc -l) + echo "Total Requests: $TOTAL_REQ" + + if [ "$TOTAL_REQ" -gt 0 ]; then + echo + echo "--- Latency Distribution (Time Range) ---" + # Assumes standard nginx log format where latencies are at the end or configured specifically. + # Adapting user's awk script for generic usage (assuming latency is $(NF-1) like in their example) + # NOTE: Verify your nginx log_format! + awk -v treq="$TOTAL_REQ" '{ + if ($(NF-1) < 0.5) a[1]++; + else if ($(NF-1) >= 0.5 && $(NF-1) < 1) a[2]++; + else if ($(NF-1) >= 1 && $(NF-1) < 2) a[3]++; + else if ($(NF-1) >= 2 && $(NF-1) < 5) a[4]++; + else if ($(NF-1) >= 5 && $(NF-1) < 10) a[5]++; + else if ($(NF-1) >= 10 && $(NF-1) < 60) a[6]++; + else if ($(NF-1) >= 60 && $(NF-1) < 300) a[7]++; + else if ($(NF-1) >= 300) a[8]++; + } END { + print "0 - 500 ms : " (a[1]+0) " (" int((a[1]+0)*100/treq) "%)" + print "500 ms - 1 sec : " (a[2]+0) " (" int((a[2]+0)*100/treq) "%)" + print "1 sec - 2 secs : " (a[3]+0) " (" int((a[3]+0)*100/treq) "%)" + print "2 secs - 5 secs : " (a[4]+0) " (" int((a[4]+0)*100/treq) "%)" + print "5 secs - 10 secs: " (a[5]+0) " (" int((a[5]+0)*100/treq) "%)" + print "10 s - 60 s : " (a[6]+0) " (" int((a[6]+0)*100/treq) "%)" + print "1 m - 5 m : " (a[7]+0) " (" int((a[7]+0)*100/treq) "%)" + print "> 5 mins : " (a[8]+0) " (" int((a[8]+0)*100/treq) "%)" + }' "$LOG_NGINX_ACCESS" 2>/dev/null || echo "Error parsing latency (check log format)" + + + echo + echo "--- 90th Percentile Latency ---" + # Calculate P90 with stricter number check + awk '{print $(NF-1)}' "$LOG_NGINX_ACCESS" | grep -E "^[0-9.]+$" | sort -n | awk ' + BEGIN {c=0} + {a[c++]=$1} + END { + if (c==0) {print "N/A"; exit} + idx=int(c*0.9); + print "P90: " a[idx] " s" + }' 2>/dev/null + + echo + echo "--- Top 5 Slowest Endpoints (>1s average) ---" + # Filter for valid numbers in $(NF-1) to avoid garbage like 'zh-tw' + awk 'BEGIN{time=0;cnt=0} { + val=$(NF-1) + # Check if val is a number (integer or float) + if (val ~ /^[0-9]+(\.[0-9]+)?$/) { + if (val > 1) { + time+=val; + cnt++; + count[$(NF-3)]++; + sum[$(NF-3)]+=val + } + } + } END{ + for (url in count) + printf "%-40s Count: %d Avg: %.2fs\n", url, count[url], sum[url]/count[url] + }' "$LOG_NGINX_ACCESS" | sort -k4 -nr | head -n 5 2>/dev/null + + echo + echo "--- HTTP Response Codes ---" + awk '{print $9}' "$LOG_NGINX_ACCESS" | sort | uniq -c | sort -nr | head -n 10 2>/dev/null + + echo + echo "--- User Agents (Top 5) ---" + awk -F'"' '{print $6}' "$LOG_NGINX_ACCESS" | sort | uniq -c | sort -nr | head -n 5 2>/dev/null + fi +else + echo "Nginx Log not found at $LOG_NGINX_ACCESS" +fi + +if [ -f "$LOG_APP" ]; then + echo + echo "--- Application Log Errors ($LOG_APP) ---" + grep -i "CRITICAL\|ERROR" "$LOG_APP" | awk '{$1=""; $2=""; print $0}' | sort | uniq -c | sort -nr | head -n 10 +else + echo + echo "App Log not found at $LOG_APP" +fi + +echo +echo "Done."