fix(stress): port harnesses to v1.2 single-session API + remove WS-batch hang
Local API stress (lib.mjs / api_stress.mjs): - setupSession now does login -> /admin/api/reset and returns sid="main". Drops the dead /admin/api/quizzes + /admin/api/sessions calls left over from the multi-quiz codex era. - bootServer writes the fixture pool (STRESS_POOL by default) to a tmp file and passes QUIZ_POOL_PATH so the v1.2 server has a session at startup. - happyPath: drop the post-connect lobby_update wait (race with snapshot dispatch) and stop double-driving the lifecycle (next() already opens the next question, an explicit open() afterwards is a no-op). - cross_session: rewritten as "cookie not honored on a non-existent sid" since v1.2 hosts a single canonical session. Live accuracy stress (live_accuracy.mjs): - Per-student lobby-snapshot timeout (12s) with WS error/close rejection, so a stalled handshake no longer hangs Promise.all until the outer shell timeout (which produced the exit=124 cycles). - Open all student WSs in parallel (mirrors what real students do); the batch-of-8 throttle was masking the question we wanted answered. - Instructor WS open also bounded by a 15s race so any failure surfaces as actionable error text instead of a silent stall. Bootstrap (deploy/bootstrap.sh): - Stage 1 provisions a 2GB swap file (idempotent) with vm.swappiness=10. 1GB-RAM ECS instances OOM-kill uvicorn under WS-burst start-of-class pressure; swap absorbs the spike without affecting steady state. - Pool seeding prefers examples/demo10_pool.json over the 2-question example so a fresh deploy boots with a usable demo. Pool fixture (examples/demo10_pool.json): - 10-question generic-knowledge demo pool, gitignore exception added.
This commit is contained in:
@@ -25,14 +25,29 @@ fi
|
||||
|
||||
stage() { printf '\n==> Stage %s\n' "$*"; }
|
||||
|
||||
stage "1/9: apt update + base packages"
|
||||
stage "1/10: provision 2GB swap (skip if /swapfile already present)"
|
||||
# 1GB-RAM ECS instances OOM-kill uvicorn during ws-burst peaks (50+
|
||||
# simultaneous WS handshakes during class start). 2GB swap absorbs
|
||||
# transient pressure without touching steady-state behavior.
|
||||
if [ ! -f /swapfile ]; then
|
||||
fallocate -l 2G /swapfile
|
||||
chmod 600 /swapfile
|
||||
mkswap /swapfile >/dev/null
|
||||
swapon /swapfile
|
||||
grep -q '^/swapfile ' /etc/fstab || echo '/swapfile none swap sw 0 0' >> /etc/fstab
|
||||
fi
|
||||
# vm.swappiness=10 keeps active pages in RAM, only swap under real pressure.
|
||||
echo 'vm.swappiness=10' > /etc/sysctl.d/99-quiz.conf
|
||||
sysctl -p /etc/sysctl.d/99-quiz.conf >/dev/null
|
||||
|
||||
stage "2/10: apt update + base packages"
|
||||
apt-get update -q
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install -y -q \
|
||||
git curl ca-certificates gnupg \
|
||||
python3 python3-venv python3-pip \
|
||||
debian-keyring debian-archive-keyring apt-transport-https
|
||||
|
||||
stage "2/9: install Caddy (skip if present)"
|
||||
stage "3/10: install Caddy (skip if present)"
|
||||
if ! command -v caddy >/dev/null 2>&1; then
|
||||
curl -1sLf 'https://dl.cloudsmith.io/public/caddy/stable/gpg.key' \
|
||||
| gpg --dearmor -o /usr/share/keyrings/caddy-stable-archive-keyring.gpg
|
||||
@@ -42,12 +57,12 @@ if ! command -v caddy >/dev/null 2>&1; then
|
||||
apt-get install -y -q caddy
|
||||
fi
|
||||
|
||||
stage "3/9: create $APP_USER system user (skip if present)"
|
||||
stage "4/10: create $APP_USER system user (skip if present)"
|
||||
if ! id "$APP_USER" >/dev/null 2>&1; then
|
||||
useradd --system --shell /usr/sbin/nologin --home-dir "$APP_DIR" "$APP_USER"
|
||||
fi
|
||||
|
||||
stage "4/9: clone or update repo into $APP_DIR"
|
||||
stage "5/10: clone or update repo into $APP_DIR"
|
||||
if [ -d "$APP_DIR/.git" ]; then
|
||||
git -C "$APP_DIR" fetch origin
|
||||
git -C "$APP_DIR" reset --hard "origin/$BRANCH"
|
||||
@@ -57,12 +72,12 @@ else
|
||||
fi
|
||||
chown -R "$APP_USER":"$APP_USER" "$APP_DIR"
|
||||
|
||||
stage "5/9: build venv + install dependencies"
|
||||
stage "6/10: build venv + install dependencies"
|
||||
sudo -u "$APP_USER" -H python3 -m venv "$APP_DIR/.venv"
|
||||
sudo -u "$APP_USER" -H "$APP_DIR/.venv/bin/pip" install --quiet --upgrade pip
|
||||
sudo -u "$APP_USER" -H "$APP_DIR/.venv/bin/pip" install --quiet -e "$APP_DIR"
|
||||
|
||||
stage "6/9: configure environment (.env)"
|
||||
stage "7/10: configure environment (.env)"
|
||||
ENV_FILE="$APP_DIR/.env"
|
||||
if [ ! -f "$ENV_FILE" ]; then
|
||||
if [ -f /root/.quiz.env ]; then
|
||||
@@ -98,21 +113,23 @@ EOF
|
||||
chmod 600 "$ENV_FILE"
|
||||
fi
|
||||
|
||||
stage "7/9: seed pool.json (if not already present)"
|
||||
stage "8/10: seed pool.json (if not already present)"
|
||||
POOL_FILE="$APP_DIR/pool.json"
|
||||
if [ ! -f "$POOL_FILE" ]; then
|
||||
cp "$APP_DIR/examples/pool_example.json" "$POOL_FILE"
|
||||
SEED_POOL="$APP_DIR/examples/demo10_pool.json"
|
||||
[ -f "$SEED_POOL" ] || SEED_POOL="$APP_DIR/examples/pool_example.json"
|
||||
cp "$SEED_POOL" "$POOL_FILE"
|
||||
chown "$APP_USER":"$APP_USER" "$POOL_FILE"
|
||||
echo "Seeded $POOL_FILE from examples/pool_example.json — replace with your real pool when ready."
|
||||
echo "Seeded $POOL_FILE from $(basename "$SEED_POOL"). Replace with your real pool when ready."
|
||||
fi
|
||||
|
||||
stage "8/9: install systemd unit"
|
||||
stage "9/10: install systemd unit"
|
||||
install -m 644 "$APP_DIR/deploy/quiz.service" /etc/systemd/system/quiz.service
|
||||
systemctl daemon-reload
|
||||
systemctl enable quiz.service
|
||||
systemctl restart quiz.service
|
||||
|
||||
stage "9/9: configure Caddy"
|
||||
stage "10/10: configure Caddy"
|
||||
sed "s/__DOMAIN__/$DOMAIN/g" "$APP_DIR/deploy/Caddyfile.tpl" > /etc/caddy/Caddyfile
|
||||
systemctl reload caddy
|
||||
|
||||
|
||||
Reference in New Issue
Block a user