zuhören 2026 <3

This commit is contained in:
2026-01-01 22:22:52 +01:00
parent 7bd3173c24
commit 173e5fa1a1
5 changed files with 517 additions and 21 deletions

21
LICENSE
View File

@@ -1,21 +0,0 @@
MIT License
Copyright (c) 2024 Crumbforest Contributors
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -0,0 +1,118 @@
#!/usr/bin/env sh
# fix_eule.sh - Repair Eule permissions and service config
# Run as root!
set -u
echo "== 🦉 Eule Repair Kit 🦉 =="
if [ "$(id -u)" -ne 0 ]; then
echo "Error: Must be run as root."
exit 1
fi
CACHE_DIR="/opt/cache/huggingface"
# 1. Environment Variables
echo
echo "--- Fixing Environment Variables ---"
ENV_FILE="/etc/profile.d/crumbforest_env.sh"
if [ ! -f "$ENV_FILE" ]; then
echo "Creating $ENV_FILE..."
echo 'export HF_HOME=/opt/cache/huggingface' > "$ENV_FILE"
echo "Done. (Users need to re-login or run 'source $ENV_FILE')"
else
if grep -q "HF_HOME" "$ENV_FILE"; then
echo "$ENV_FILE exists and contains HF_HOME. Good."
else
echo "Appending HF_HOME to $ENV_FILE..."
echo 'export HF_HOME=/opt/cache/huggingface' >> "$ENV_FILE"
fi
fi
# 2. Fix Cache Permissions
echo
echo "--- Fixing HF Cache Permissions ---"
if [ ! -d "$CACHE_DIR" ]; then
echo "Creating $CACHE_DIR..."
mkdir -p "$CACHE_DIR"
fi
# Determine Target Group
TARGET_GROUP="crumbforest"
if ! getent group "$TARGET_GROUP" >/dev/null 2>&1; then
TARGET_GROUP="users"
echo "Group 'crumbforest' not found. Using '$TARGET_GROUP'."
fi
echo "Setting ownership to root:$TARGET_GROUP for /opt/cache..."
# We own it as root, but give group write access
chown -R root:"$TARGET_GROUP" /opt/cache
echo "Setting permissions to 775 (User/Group R+W+X)..."
chmod -R 775 /opt/cache
echo "Permissions applied."
# 3. Model Cleanup Check
MODEL_DIR="$CACHE_DIR/hub/models--sentence-transformers--all-MiniLM-L6-v2"
# Check both potential locations (hub layout changes)
if [ ! -d "$MODEL_DIR" ]; then
MODEL_DIR="$CACHE_DIR/sentence-transformers/all-MiniLM-L6-v2"
fi
if [ -d "$MODEL_DIR" ]; then
echo
echo "Found existing model at: $MODEL_DIR"
echo "If you suspect corruption, we can delete it to force a re-download."
echo -n "Delete model cache? [y/N] "
read -r REPLY
if echo "$REPLY" | grep -iq "^y"; then
echo "Removing $MODEL_DIR..."
rm -rf "$MODEL_DIR"
echo "Deleted."
else
echo "Skipping deletion."
fi
fi
# 4. Fix Systemd Service Arguments
echo
echo "--- Fixing Systemd Service ---"
SERVICE_FILE="/etc/systemd/system/eule.service"
if [ -f "$SERVICE_FILE" ]; then
# We also want to make sure the service sees the Env var if not set in ExecStart
# Using 'sed' to insert Environment line if missing is tricky.
# Instead, let's rely on /etc/profile.d if the service reads it (it usually doesn't).
# Better: Patch the service file to include Environment=HF_HOME=...
if ! grep -q "Environment=.*HF_HOME" "$SERVICE_FILE"; then
echo "Adding Environment=HF_HOME=... to service file..."
# Insert under [Service]
sed -i '/\[Service\]/a Environment=HF_HOME=/opt/cache/huggingface' "$SERVICE_FILE"
SYSTEMD_CHANGED=1
fi
if grep -q "\-\-serve" "$SERVICE_FILE"; then
echo "Found deprecated argument '--serve' in $SERVICE_FILE. Removing..."
cp "$SERVICE_FILE" "${SERVICE_FILE}.bak.$(date +%s)"
sed -i 's/ --serve//g' "$SERVICE_FILE"
SYSTEMD_CHANGED=1
fi
if [ "${SYSTEMD_CHANGED:-0}" -eq 1 ]; then
echo "Reloading systemd..."
systemctl daemon-reload
echo "Restarting Eule..."
systemctl restart eule
else
echo "Service config looks ok."
fi
else
echo "Service file $SERVICE_FILE not found."
fi
echo
echo "Done."
echo "👉 NOW: Run 'source $ENV_FILE' and try 'eule \"hello\"' again."

View File

@@ -0,0 +1,64 @@
#!/usr/bin/env sh
# fix_eule_ttyd.sh - Grant TTYD user permissions for Eule (Cake/Python)
# Run as root!
set -u
echo "== 🦉 Eule TTYD Fixer 🦉 =="
echo "Note: Baking access for 'crumbmission'..."
if [ "$(id -u)" -ne 0 ]; then
echo "Error: Must be run as root."
exit 1
fi
TTYD_USER="crumbmission"
GROUP="crumbforest"
# 1. Group Membership (for reading Cache)
echo
echo "--- Fixing Group Membership ---"
if id "$TTYD_USER" >/dev/null 2>&1; then
echo "Adding $TTYD_USER to group $GROUP..."
usermod -aG "$GROUP" "$TTYD_USER"
echo "Done."
else
echo "User $TTYD_USER not found. Skipping usage fix (are you running this on the right server?)."
fi
# 2. Sudoers Configuration
echo
echo "--- Configuring Sudoers ---"
SUDOERS_FILE="/etc/sudoers.d/crumbforest-ttyd"
# Allow direct python execution (Bypass Cake for reliability)
# AND allow potentially the cake wrapper if they fix the alias to use sudo
echo "Creating/Updating $SUDOERS_FILE..."
# We define the specific command to be safe
CMD_PYTHON="/opt/venvs/crumbforest/bin/python3 /opt/eule/eule_rag.py *"
# Also allow bin/cake for nostalgic reasons if they use absolute path
CMD_CAKE="/var/www/html/bin/cake kruemeleule *"
cat > "$SUDOERS_FILE" <<EOF
# Crumbforest TTYD Access
# Generated by fix_eule_ttyd.sh
# Allow reading cached models and running the neural engine
$TTYD_USER ALL=(root) NOPASSWD: $CMD_PYTHON
$TTYD_USER ALL=(root) NOPASSWD: /usr/bin/python3 /opt/eule/eule_rag.py *
EOF
chmod 440 "$SUDOERS_FILE"
# 3. Alias Hint
echo
echo "--- Alias Suggestion ---"
echo "The 'eule' alias in TTYD should be updated to:"
echo "alias eule='sudo /opt/venvs/crumbforest/bin/python3 /opt/eule/eule_rag.py'"
echo
echo "(Or if you want to keep Cake: alias eule='cd /var/www/html && sudo bin/cake kruemeleule')"
echo "But direct python is more robust against env/path issues."
echo
echo "Done. TTYD user needs to re-login for group changes to take effect."

View File

@@ -0,0 +1,87 @@
# 🩺 Keks-Handbuch: Log-Analyse für Datendetektive
Willkommen im neuen Kapitel der systemischen Selbstfindung! Da wir den `strato_doctor.sh` nun mit Superkräften ausgestattet haben, wollen wir verstehen, was unter der Haube passiert.
> "Wer seine Logs nicht liest, ist dazu verdammt, sie zu wiederholen." - Ein weiser Admin (2026)
## Der neue `strato_doctor.sh`
Wir haben den Doktor in die Facharztausbildung geschickt. Er kann nun nicht mehr nur "Puls fühlen" (Ping/Curl), sondern macht ein komplettes MRT deiner Logdateien.
### Voraussetzungen
Damit die Magie funktioniert, müssen die Standard-Pfade stimmen oder via ENV-Variablen gesetzt werden:
- Nginx Access Log: `/var/log/nginx/access.log` (oder `LOG_NGINX_ACCESS` setzen)
- Application Log: `/var/log/application.log` (oder `LOG_APP` setzen)
### Features & Awk-Magie
Hier erklären wir die "Zaubersprüche", die wir im Skript verwenden.
#### 1. Latenz-Verteilung (Das "Gefühl" für Speed)
Wir wollen wissen: Wie viele Requests sind *wirklich* schnell?
Das Skript nutzt `awk` um die Requests in Zeit-Töpfe (Buckets) zu werfen.
**Der Code-Schnipsel:**
```bash
awk -v treq="$TOTAL_REQ" '{
if ($(NF-1) < 0.5) a[1]++;
# ... weitere Buckets ...
} END {
# Ausgabe der Prozente
}'
```
*Erklärung:* `$(NF-1)` greift das vorletzte Feld der Logzeile. In vielen Nginx-Formaten steht dort die `request_time`.
#### 2. Das 90. Perzentil (P90)
Der Durchschnitt lügt. Wenn 10 Requests 0.1s dauern und einer 100s, ist der Schnitt ~9s. Aber 90% der Leute sind glücklich.
Das P90 sagt uns: "90% aller Requests sind schneller als X Sekunden".
**Wie wir es berechnen:**
1. Alle Zeiten extrahieren.
2. Sortieren (`sort -n`).
3. Den Wert an der Position 90% der Liste nehmen.
#### 3. Die langsamsten Endpoints
Wer sind die Bremsen im System?
```bash
awk '{
if ($(NF-1) > 1) { # Nur wer länger als 1s braucht
count[$(NF-3)]++; # Zähle die URL (Feld NF-3)
sum[$(NF-3)]+=$(NF-1) # Addiere die Zeit
}
}'
```
#### 4. Fehler-Scanner
Ein einfacher `grep`, der aber Gold wert ist. Wir suchen nach `CRITICAL` und `ERROR` im App-Log und zählen, wer am meisten nervt.
## Anwendung
Einfach laufen lassen:
```bash
./strato_doctor.sh
```
Oder für Profis mit Custom-Pfaden:
```bash
LOG_NGINX_ACCESS=./mein-access.log ./strato_doctor.sh
```
### Troubleshooting (Dr. House Edition)
**Symptom: 0.00s Average Time?**
Das passiert, wenn Nginx kaputte Zeilen schreibt oder das Log-Format abweicht (z.B. Texte statt Zahlen an vorletzter Stelle). Der neue Doktor filtert das nun rigoros (`grep -E "^[0-9.]+$"`).
**Symptom: Permission denied (Eule)?**
Der Doktor prüft nun auch, ob `HF_HOME` beschreibbar ist. Falls nicht:
1. `fix_eule.sh` als root ausführen.
2. Das repariert `/opt/cache` Permissions und entfernt alte Flags (`--serve`) aus der Systemd-Unit.
### Nginx "Grundrauschen" (Das ist normal)
Wenn du Dinge siehst wie:
- `SSL_do_handshake() failed ... bad key share`: Das sind oft Bots oder alte Browser, die sich nicht auf eine Verschlüsselung einigen konnten. Ignorieren.
- `user "admin" was not found in ...`: Da sucht jemand (Skript/Bot) nach Schwachstellen oder Admin-Logins. Solange sie "not found" sind -> Gut! 🛡️

View File

@@ -0,0 +1,248 @@
#!/usr/bin/env sh
# STRATO Doctor (Advanced) - System & Log Analysis
# Extends the "lite" version with deep-dive log analytics.
set -eu
# ---- Config (override via env) ----
QDRANT_URL="${QDRANT_URL:-http://127.0.0.1:6333}"
HTTP_PROBE_URLS="${HTTP_PROBE_URLS:-http://127.0.0.1 http://127.0.0.1:8000}"
PING_TARGETS="${PING_TARGETS:-8.8.8.8 1.1.1.1}"
PORTS="${PORTS:-22 80 443 6333 8000}"
VENVS="${VENVS:-/opt/venvs/crumbforest}"
HF_HOME_SHOW="${HF_HOME:-/opt/cache/huggingface}"
# Log Paths
LOG_NGINX_ACCESS="${LOG_NGINX_ACCESS:-/var/log/nginx/access.log}"
LOG_APP="${LOG_APP:-/var/log/application.log}"
echo "== System =="
HOSTNAME="$(hostname 2>/dev/null || echo unknown)"
OS=$(grep -o 'PRETTY_NAME=.*' /etc/os-release 2>/dev/null | head -n1 | sed 's/PRETTY_NAME=//; s/^"//; s/"$//')
KERNEL="$(uname -r 2>/dev/null || echo unknown)"
LOAD="$(uptime 2>/dev/null | awk -F'load average:' 'NF>1{gsub(/^ +| +$/,"",$2); print $2; next} {print "n/a"}')"
MEM="$(free -h 2>/dev/null | awk '/^Mem:/ {printf "%s/%s used\n",$3,$2}')"
echo "Host: $HOSTNAME"
echo "OS: ${OS:-n/a}"
echo "Kernel: $KERNEL"
echo "Load: ${LOAD:-n/a}"
echo "Memory: ${MEM:-n/a}"
echo
echo "== Network =="
if command -v ip >/dev/null 2>&1; then
ip -br addr 2>/dev/null | sed 's/ \+/ /g'
fi
if command -v ss >/dev/null 2>&1; then
echo
echo "Listening (subset):"
for P in $PORTS; do
if ss -lnt 2>/dev/null | grep -q ":$P "; then
echo " port $P: LISTEN"
else
echo " port $P: -"
fi
done
fi
echo
echo "== Connectivity =="
for T in $PING_TARGETS; do
if ping -c1 -W1 "$T" >/dev/null 2>&1; then
echo "ping $T: ok"
else
echo "ping $T: fail"
fi
done
echo
echo "== HTTP probes =="
for U in $HTTP_PROBE_URLS; do
if command -v curl >/dev/null 2>&1; then
CODE="$(curl -ks -o /dev/null -w '%{http_code}' "$U" || echo 000)"
echo "$U -> HTTP $CODE"
fi
done
echo
echo "== Qdrant =="
if command -v curl >/dev/null 2>&1; then
if curl -fsS "$QDRANT_URL/healthz" >/dev/null 2>&1; then
echo "healthz: ok ($QDRANT_URL)"
else
echo "healthz: fail ($QDRANT_URL)"
fi
VERS="$(curl -fsS "$QDRANT_URL/versions" 2>/dev/null || true)"
[ -n "$VERS" ] && echo "versions: $VERS"
fi
echo
echo "== Python / HF cache =="
if command -v python3 >/dev/null 2>&1; then
python3 -V 2>&1
else
echo "python3: not found"
fi
echo "HF_HOME: ${HF_HOME_SHOW}"
if [ -n "${TRANSFORMERS_CACHE:-}" ]; then
echo "TRANSFORMERS_CACHE is set (deprecated) -> prefer HF_HOME"
fi
if [ -d "$HF_HOME_SHOW" ]; then
if [ -w "$HF_HOME_SHOW" ]; then
echo "HF_HOME writable: yes ($HF_HOME_SHOW)"
ls -ld "$HF_HOME_SHOW" | sed 's/^/ -> /'
else
echo "HF_HOME writable: NO ($HF_HOME_SHOW)"
ls -ld "$HF_HOME_SHOW" | sed 's/^/ -> /'
fi
else
echo "HF_HOME does not exist"
fi
echo
echo "== Virtualenvs =="
for V in $VENVS; do
if [ -d "$V" ]; then
echo "$V: present"
else
echo "$V: missing"
fi
done
echo
echo "== Containers =="
ENGINE=""
if command -v docker >/dev/null 2>&1; then ENGINE="docker"; fi
if [ -z "$ENGINE" ] && command -v podman >/dev/null 2>&1; then ENGINE="podman"; fi
if [ -n "$ENGINE" ]; then
$ENGINE ps --format "table {{.Names}}\t{{.Image}}\t{{.Status}}\t{{.Ports}}" 2>/dev/null || true
else
echo "no docker/podman"
fi
echo
echo "== systemd services (eule, vector, qdrant, nginx, apache2) =="
for S in eule vector qdrant nginx apache2; do
if systemctl list-unit-files 2>/dev/null | grep -q "^$S"; then
if systemctl is-active --quiet "$S"; then
echo "$S: active"
# Show ExecStart for eule to debug arguments
if [ "$S" = "eule" ]; then
systemctl show -p ExecStart --value eule | xargs echo " -> ExecStart: "
fi
else
echo "$S: inactive"
fi
fi
done
echo
echo "== Logs (warnings/errors) =="
for S in eule vector qdrant; do
if systemctl list-unit-files 2>/dev/null | grep -q "^$S"; then
echo "-- $S --"
journalctl -u "$S" -n 50 --no-pager 2>/dev/null | grep -Ei 'warn|deprecat|error' || true
fi
done
echo
echo "== Paths =="
for D in /opt /srv /var/www; do
[ -d "$D" ] && echo "$D" && ls -la "$D" 2>/dev/null | head -n 10
done
echo
echo "=================================="
echo "== 🦉 LOG ANALYSE MODULE 🦉 =="
echo "=================================="
if [ -f "$LOG_NGINX_ACCESS" ]; then
echo "Analyzing: $LOG_NGINX_ACCESS"
# Check if log is populated
TOTAL_REQ=$(cat "$LOG_NGINX_ACCESS" | grep -v "elapsed" | wc -l)
echo "Total Requests: $TOTAL_REQ"
if [ "$TOTAL_REQ" -gt 0 ]; then
echo
echo "--- Latency Distribution (Time Range) ---"
# Assumes standard nginx log format where latencies are at the end or configured specifically.
# Adapting user's awk script for generic usage (assuming latency is $(NF-1) like in their example)
# NOTE: Verify your nginx log_format!
awk -v treq="$TOTAL_REQ" '{
if ($(NF-1) < 0.5) a[1]++;
else if ($(NF-1) >= 0.5 && $(NF-1) < 1) a[2]++;
else if ($(NF-1) >= 1 && $(NF-1) < 2) a[3]++;
else if ($(NF-1) >= 2 && $(NF-1) < 5) a[4]++;
else if ($(NF-1) >= 5 && $(NF-1) < 10) a[5]++;
else if ($(NF-1) >= 10 && $(NF-1) < 60) a[6]++;
else if ($(NF-1) >= 60 && $(NF-1) < 300) a[7]++;
else if ($(NF-1) >= 300) a[8]++;
} END {
print "0 - 500 ms : " (a[1]+0) " (" int((a[1]+0)*100/treq) "%)"
print "500 ms - 1 sec : " (a[2]+0) " (" int((a[2]+0)*100/treq) "%)"
print "1 sec - 2 secs : " (a[3]+0) " (" int((a[3]+0)*100/treq) "%)"
print "2 secs - 5 secs : " (a[4]+0) " (" int((a[4]+0)*100/treq) "%)"
print "5 secs - 10 secs: " (a[5]+0) " (" int((a[5]+0)*100/treq) "%)"
print "10 s - 60 s : " (a[6]+0) " (" int((a[6]+0)*100/treq) "%)"
print "1 m - 5 m : " (a[7]+0) " (" int((a[7]+0)*100/treq) "%)"
print "> 5 mins : " (a[8]+0) " (" int((a[8]+0)*100/treq) "%)"
}' "$LOG_NGINX_ACCESS" 2>/dev/null || echo "Error parsing latency (check log format)"
echo
echo "--- 90th Percentile Latency ---"
# Calculate P90 with stricter number check
awk '{print $(NF-1)}' "$LOG_NGINX_ACCESS" | grep -E "^[0-9.]+$" | sort -n | awk '
BEGIN {c=0}
{a[c++]=$1}
END {
if (c==0) {print "N/A"; exit}
idx=int(c*0.9);
print "P90: " a[idx] " s"
}' 2>/dev/null
echo
echo "--- Top 5 Slowest Endpoints (>1s average) ---"
# Filter for valid numbers in $(NF-1) to avoid garbage like 'zh-tw'
awk 'BEGIN{time=0;cnt=0} {
val=$(NF-1)
# Check if val is a number (integer or float)
if (val ~ /^[0-9]+(\.[0-9]+)?$/) {
if (val > 1) {
time+=val;
cnt++;
count[$(NF-3)]++;
sum[$(NF-3)]+=val
}
}
} END{
for (url in count)
printf "%-40s Count: %d Avg: %.2fs\n", url, count[url], sum[url]/count[url]
}' "$LOG_NGINX_ACCESS" | sort -k4 -nr | head -n 5 2>/dev/null
echo
echo "--- HTTP Response Codes ---"
awk '{print $9}' "$LOG_NGINX_ACCESS" | sort | uniq -c | sort -nr | head -n 10 2>/dev/null
echo
echo "--- User Agents (Top 5) ---"
awk -F'"' '{print $6}' "$LOG_NGINX_ACCESS" | sort | uniq -c | sort -nr | head -n 5 2>/dev/null
fi
else
echo "Nginx Log not found at $LOG_NGINX_ACCESS"
fi
if [ -f "$LOG_APP" ]; then
echo
echo "--- Application Log Errors ($LOG_APP) ---"
grep -i "CRITICAL\|ERROR" "$LOG_APP" | awk '{$1=""; $2=""; print $0}' | sort | uniq -c | sort -nr | head -n 10
else
echo
echo "App Log not found at $LOG_APP"
fi
echo
echo "Done."