Files
quiz/deploy/bootstrap.sh
ameer 55ecb1b396 fix(stress): port harnesses to v1.2 single-session API + remove WS-batch hang
Local API stress (lib.mjs / api_stress.mjs):
- setupSession now does login -> /admin/api/reset and returns sid="main".
  Drops the dead /admin/api/quizzes + /admin/api/sessions calls left over
  from the multi-quiz codex era.
- bootServer writes the fixture pool (STRESS_POOL by default) to a tmp
  file and passes QUIZ_POOL_PATH so the v1.2 server has a session at
  startup.
- happyPath: drop the post-connect lobby_update wait (race with snapshot
  dispatch) and stop double-driving the lifecycle (next() already opens
  the next question, an explicit open() afterwards is a no-op).
- cross_session: rewritten as "cookie not honored on a non-existent sid"
  since v1.2 hosts a single canonical session.

Live accuracy stress (live_accuracy.mjs):
- Per-student lobby-snapshot timeout (12s) with WS error/close rejection,
  so a stalled handshake no longer hangs Promise.all until the outer
  shell timeout (which produced the exit=124 cycles).
- Open all student WSs in parallel (mirrors what real students do); the
  batch-of-8 throttle was masking the question we wanted answered.
- Instructor WS open also bounded by a 15s race so any failure surfaces
  as actionable error text instead of a silent stall.

Bootstrap (deploy/bootstrap.sh):
- Stage 1 provisions a 2GB swap file (idempotent) with vm.swappiness=10.
  1GB-RAM ECS instances OOM-kill uvicorn under WS-burst start-of-class
  pressure; swap absorbs the spike without affecting steady state.
- Pool seeding prefers examples/demo10_pool.json over the 2-question
  example so a fresh deploy boots with a usable demo.

Pool fixture (examples/demo10_pool.json):
- 10-question generic-knowledge demo pool, gitignore exception added.
2026-05-03 04:16:23 +08:00

148 lines
5.0 KiB
Bash
Executable File

#!/usr/bin/env bash
# Live in-lecture quiz portal — VPS bootstrap.
# Idempotent: safe to re-run on a partially-bootstrapped host.
# Designed for: fresh Ubuntu 24.04 LTS, run as root.
#
# Usage (one-shot, on the VPS):
# curl -fsSL https://gitea.ahkhan.me/apps/quiz/raw/branch/master/deploy/bootstrap.sh | bash
#
# Override via env:
# DOMAIN=quiz.example.org curl ... | bash
# REPO_URL=https://... curl ... | bash
set -euo pipefail
REPO_URL="${REPO_URL:-https://gitea.ahkhan.me/apps/quiz.git}"
APP_DIR="${APP_DIR:-/opt/quiz}"
APP_USER="${APP_USER:-quiz}"
DOMAIN="${DOMAIN:-quiz.ahkhan.me}"
BRANCH="${BRANCH:-master}"
if [ "$(id -u)" != "0" ]; then
echo "bootstrap.sh must run as root" >&2
exit 1
fi
stage() { printf '\n==> Stage %s\n' "$*"; }
stage "1/10: provision 2GB swap (skip if /swapfile already present)"
# 1GB-RAM ECS instances OOM-kill uvicorn during ws-burst peaks (50+
# simultaneous WS handshakes during class start). 2GB swap absorbs
# transient pressure without touching steady-state behavior.
if [ ! -f /swapfile ]; then
fallocate -l 2G /swapfile
chmod 600 /swapfile
mkswap /swapfile >/dev/null
swapon /swapfile
grep -q '^/swapfile ' /etc/fstab || echo '/swapfile none swap sw 0 0' >> /etc/fstab
fi
# vm.swappiness=10 keeps active pages in RAM, only swap under real pressure.
echo 'vm.swappiness=10' > /etc/sysctl.d/99-quiz.conf
sysctl -p /etc/sysctl.d/99-quiz.conf >/dev/null
stage "2/10: apt update + base packages"
apt-get update -q
DEBIAN_FRONTEND=noninteractive apt-get install -y -q \
git curl ca-certificates gnupg \
python3 python3-venv python3-pip \
debian-keyring debian-archive-keyring apt-transport-https
stage "3/10: install Caddy (skip if present)"
if ! command -v caddy >/dev/null 2>&1; then
curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/gpg.key' \
| gpg --dearmor -o /usr/share/keyrings/caddy-stable-archive-keyring.gpg
curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/debian.deb.txt' \
| tee /etc/apt/sources.list.d/caddy-stable.list >/dev/null
apt-get update -q
apt-get install -y -q caddy
fi
stage "4/10: create $APP_USER system user (skip if present)"
if ! id "$APP_USER" >/dev/null 2>&1; then
useradd --system --shell /usr/sbin/nologin --home-dir "$APP_DIR" "$APP_USER"
fi
stage "5/10: clone or update repo into $APP_DIR"
if [ -d "$APP_DIR/.git" ]; then
git -C "$APP_DIR" fetch origin
git -C "$APP_DIR" reset --hard "origin/$BRANCH"
else
rm -rf "$APP_DIR"
git clone --branch "$BRANCH" "$REPO_URL" "$APP_DIR"
fi
chown -R "$APP_USER":"$APP_USER" "$APP_DIR"
stage "6/10: build venv + install dependencies"
sudo -u "$APP_USER" -H python3 -m venv "$APP_DIR/.venv"
sudo -u "$APP_USER" -H "$APP_DIR/.venv/bin/pip" install --quiet --upgrade pip
sudo -u "$APP_USER" -H "$APP_DIR/.venv/bin/pip" install --quiet -e "$APP_DIR"
stage "7/10: configure environment (.env)"
ENV_FILE="$APP_DIR/.env"
if [ ! -f "$ENV_FILE" ]; then
if [ -f /root/.quiz.env ]; then
echo "Using /root/.quiz.env"
cp /root/.quiz.env "$ENV_FILE"
else
# Need to prompt for the admin password; reattach TTY if curl|bash
# left stdin pointed at the pipe.
if [ ! -t 0 ] && [ -r /dev/tty ]; then
exec < /dev/tty
fi
if [ ! -t 0 ]; then
echo "ERROR: stdin is not a TTY and /root/.quiz.env is missing." >&2
echo "Either pre-populate /root/.quiz.env or run this script interactively." >&2
exit 1
fi
QUIZ_SECRET_KEY=$(python3 -c 'import secrets; print(secrets.token_urlsafe(48))')
printf 'Admin password (input hidden): '
read -rs QUIZ_ADMIN_PASSWORD
echo
cat > "$ENV_FILE" <<EOF
QUIZ_DB_PATH=$APP_DIR/quiz.db
QUIZ_POOL_PATH=$APP_DIR/pool.json
QUIZ_SECRET_KEY=$QUIZ_SECRET_KEY
QUIZ_ADMIN_PASSWORD=$QUIZ_ADMIN_PASSWORD
QUIZ_HOST=127.0.0.1
QUIZ_PORT=8001
QUIZ_PUBLIC_URL=https://$DOMAIN
QUIZ_LOG_LEVEL=INFO
EOF
fi
chown "$APP_USER":"$APP_USER" "$ENV_FILE"
chmod 600 "$ENV_FILE"
fi
stage "8/10: seed pool.json (if not already present)"
POOL_FILE="$APP_DIR/pool.json"
if [ ! -f "$POOL_FILE" ]; then
SEED_POOL="$APP_DIR/examples/demo10_pool.json"
[ -f "$SEED_POOL" ] || SEED_POOL="$APP_DIR/examples/pool_example.json"
cp "$SEED_POOL" "$POOL_FILE"
chown "$APP_USER":"$APP_USER" "$POOL_FILE"
echo "Seeded $POOL_FILE from $(basename "$SEED_POOL"). Replace with your real pool when ready."
fi
stage "9/10: install systemd unit"
install -m 644 "$APP_DIR/deploy/quiz.service" /etc/systemd/system/quiz.service
systemctl daemon-reload
systemctl enable quiz.service
systemctl restart quiz.service
stage "10/10: configure Caddy"
sed "s/__DOMAIN__/$DOMAIN/g" "$APP_DIR/deploy/Caddyfile.tpl" > /etc/caddy/Caddyfile
systemctl reload caddy
echo
echo "==> Health check"
sleep 2
if curl -fs http://127.0.0.1:8001/healthz; then
echo
echo
echo "Bootstrap complete. Public URL: https://$DOMAIN"
else
echo
echo "Health check failed. Inspect: journalctl -u quiz.service -n 50"
exit 1
fi