fix(stress): port harnesses to v1.2 single-session API + remove WS-batch hang

Local API stress (lib.mjs / api_stress.mjs):
- setupSession now does login -> /admin/api/reset and returns sid="main".
  Drops the dead /admin/api/quizzes + /admin/api/sessions calls left over
  from the multi-quiz codex era.
- bootServer writes the fixture pool (STRESS_POOL by default) to a tmp
  file and passes QUIZ_POOL_PATH so the v1.2 server has a session at
  startup.
- happyPath: drop the post-connect lobby_update wait (race with snapshot
  dispatch) and stop double-driving the lifecycle (next() already opens
  the next question, an explicit open() afterwards is a no-op).
- cross_session: rewritten as "cookie not honored on a non-existent sid"
  since v1.2 hosts a single canonical session.

Live accuracy stress (live_accuracy.mjs):
- Per-student lobby-snapshot timeout (12s) with WS error/close rejection,
  so a stalled handshake no longer hangs Promise.all until the outer
  shell timeout (which produced the exit=124 cycles).
- Open all student WSs in parallel (mirrors what real students do); the
  batch-of-8 throttle was masking the question we wanted answered.
- Instructor WS open also bounded by a 15s race so any failure surfaces
  as actionable error text instead of a silent stall.

Bootstrap (deploy/bootstrap.sh):
- Stage 1 provisions a 2GB swap file (idempotent) with vm.swappiness=10.
  1GB-RAM ECS instances OOM-kill uvicorn under WS-burst start-of-class
  pressure; swap absorbs the spike without affecting steady state.
- Pool seeding prefers examples/demo10_pool.json over the 2-question
  example so a fresh deploy boots with a usable demo.

Pool fixture (examples/demo10_pool.json):
- 10-question generic-knowledge demo pool, gitignore exception added.
This commit is contained in:
ameer
2026-05-03 04:16:23 +08:00
parent 2136286275
commit 55ecb1b396
6 changed files with 226 additions and 64 deletions

View File

@@ -55,13 +55,21 @@ async function happyPath(server) {
}));
const admin = new Admin(server.url, sid, jar);
await admin.connect();
await admin.waitFor("lobby_update");
// Don't wait on lobby_update from the snapshot; that's a race
// (snapshot dispatch can land before the listener attaches). The
// first thing we DO act on (a question_open we triggered) is a
// sufficient liveness signal for the admin WS.
for (let q = 0; q < STRESS_POOL.questions.length; q++) {
// Pre-register waiters so we don't lose the broadcast in the race window
// Pre-register waiters BEFORE triggering the broadcast so we don't
// lose the message in the race window.
const studentOpenWaits = students.map(s => s.waitFor("question_open"));
const adminOpenWait = admin.waitFor("question_open");
admin.open(q, 5);
// v1.2: advance_to_next handles the whole lifecycle (close prev +
// open next). Use open() only for the very first question from
// the lobby state.
if (q === 0) admin.open(q, 5);
else admin.next();
await adminOpenWait;
await Promise.all(studentOpenWaits);
// Each student picks a random answer (mostly correct)
@@ -78,14 +86,15 @@ async function happyPath(server) {
note("happy", `student${i} q${q}: ${e.message}`);
}
}));
const studentClosedWaits = students.map(s => s.waitFor("question_closed", { timeoutMs: 3000 }).catch(() => null));
const adminClosedWait = admin.waitFor("question_closed", { timeoutMs: 3000 });
admin.close();
await adminClosedWait;
await Promise.all(studentClosedWaits);
if (q < STRESS_POOL.questions.length - 1) {
admin.next();
await sleep(150);
// Only manually verify question_closed on the LAST question;
// intermediate closes happen implicitly inside admin.next() and
// do broadcast a question_closed, but we don't need to gate on it.
if (q === STRESS_POOL.questions.length - 1) {
const studentClosedWaits = students.map(s => s.waitFor("question_closed", { timeoutMs: 3000 }).catch(() => null));
const adminClosedWait = admin.waitFor("question_closed", { timeoutMs: 3000 });
admin.close();
await adminClosedWait;
await Promise.all(studentClosedWaits);
}
}
const sessionEndedWait = admin.waitFor("session_ended", { timeoutMs: 3000 });
@@ -247,23 +256,25 @@ async function cookieTampering(server) {
s.disconnect();
}
// Cross-session cookie: cookie from session A should not work on session B.
// Cross-session cookie: in v1.2 the server hosts a SINGLE canonical session
// ("main"), so cross-session reuse isn't a topology that exists at runtime.
// We instead assert the closest single-session analog: a cookie issued for
// sid="main" is rejected when used against a non-existent sid path.
async function crossSessionCookie(server) {
const { sid: sidA, jar: jarA } = await setupSession(server.url, server.adminPw, STRESS_POOL);
const { sid: sidB } = await setupSession(server.url, server.adminPw, STRESS_POOL);
const { sid: sidA } = await setupSession(server.url, server.adminPw, STRESS_POOL);
const s = new Student(server.url, sidA, "X1", "CrossUser");
await s.join();
// Try to use sidA's cookie to access sidB
const wsUrl = server.url.replace(/^http/, "ws") + `/ws/student/${sidB}`;
let opened = false;
const bogusSid = "not-a-real-session";
const wsUrl = server.url.replace(/^http/, "ws") + `/ws/student/${bogusSid}`;
let opened = false, closeCode = null;
await new Promise(res => {
const w = new WebSocket(wsUrl, { headers: { Cookie: s.jar.header() } });
w.on("open", () => { opened = true; w.close(); res(); });
w.on("close", () => res());
w.on("close", (c) => { closeCode = c; res(); });
w.on("error", () => res());
setTimeout(res, 1500);
});
expect(!opened, "cross_session", "cookie from sidA rejected when used against sidB", { opened });
expect(!opened, "cross_session", "cookie not honored against non-existent sid", { opened, closeCode });
}
// Duplicate student_id: two browsers join with same student_id (different cookies)