package orchestrator_test

import (
	"context"
	"path/filepath"
	"testing"
	"time"

	"github.com/flothus/tmux-xterm-research/server-go/internal/harness/event"
	"github.com/flothus/tmux-xterm-research/server-go/internal/harness/orchestrator"
	"github.com/flothus/tmux-xterm-research/server-go/internal/harness/store"
)

// TestSweepOrphanedAgents is the V102 regression: agents whose parent
// run already reached a terminal status must be marked terminated. They
// were accumulating across server restarts (43 in the real DB at the time
// of fix) because the per-run cleanup didn't survive the restart.
func TestSweepOrphanedAgents(t *testing.T) {
	tmp := t.TempDir()
	st, err := store.Open(filepath.Join(tmp, "harness.db"))
	if err != nil {
		t.Fatal(err)
	}
	defer st.Close()
	bus := event.NewBus(st)
	orch := orchestrator.New(st, bus)
	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
	defer cancel()

	now := store.FmtTime(store.Now())
	// Set up: one killed run with 2 leaked agents, one running run with 1 agent.
	_, _ = st.DB().Exec(`INSERT INTO runs(id, started_at, status, failure_category) VALUES('r-dead', ?, 'killed', 'manual')`, now)
	_, _ = st.DB().Exec(`INSERT INTO runs(id, started_at, status) VALUES('r-alive', ?, 'running')`, now)
	for _, id := range []string{"a-orphan-1", "a-orphan-2"} {
		_, _ = st.DB().Exec(`INSERT INTO agents(id, run_id, status, spawned_at) VALUES(?, 'r-dead', 'running', ?)`, id, now)
	}
	_, _ = st.DB().Exec(`INSERT INTO agents(id, run_id, status, spawned_at) VALUES('a-live', 'r-alive', 'running', ?)`, now)

	swept, err := orch.SweepOrphanedAgents(ctx)
	if err != nil {
		t.Fatalf("SweepOrphanedAgents: %v", err)
	}
	if swept != 2 {
		t.Errorf("swept = %d, want 2", swept)
	}
	// Orphans now terminated; alive agent untouched.
	for _, id := range []string{"a-orphan-1", "a-orphan-2"} {
		var status string
		_ = st.DB().QueryRow(`SELECT status FROM agents WHERE id=?`, id).Scan(&status)
		if status != "terminated" {
			t.Errorf("agent %s status = %q, want \"terminated\"", id, status)
		}
	}
	var liveStatus string
	_ = st.DB().QueryRow(`SELECT status FROM agents WHERE id='a-live'`).Scan(&liveStatus)
	if liveStatus != "running" {
		t.Errorf("live agent status = %q, want \"running\" (sweep too aggressive)", liveStatus)
	}
}

// TestEndRunSucceedsUnderCanceledCtx is the V101 regression: when ctx is
// canceled (the canonical "kill the run" case), EndRun must still write
// the terminal state to the DB. Otherwise the run stays 'running' forever
// even though the user pressed Ctrl-C.
func TestEndRunSucceedsUnderCanceledCtx(t *testing.T) {
	tmp := t.TempDir()
	st, err := store.Open(filepath.Join(tmp, "harness.db"))
	if err != nil {
		t.Fatal(err)
	}
	defer st.Close()
	bus := event.NewBus(st)
	orch := orchestrator.New(st, bus)

	parentCtx, parentCancel := context.WithCancel(context.Background())
	_ = orch.CreateRun(parentCtx, "run-cancel", "ctx-cancel test")
	parentCancel() // simulate Ctrl-C

	// EndRun called with a canceled ctx must still write the row.
	if err := orch.EndRun(parentCtx, "run-cancel", orchestrator.RunOutcome{
		Status:          orchestrator.RunKilled,
		FailureCategory: orchestrator.FailCtxCanceled,
		FailureDetail:   "test cancel",
	}); err != nil {
		t.Fatalf("EndRun under canceled ctx: %v", err)
	}
	var status, fcat string
	_ = st.DB().QueryRow(`SELECT status, IFNULL(failure_category,'') FROM runs WHERE id='run-cancel'`).Scan(&status, &fcat)
	if status != "killed" {
		t.Errorf("status under canceled ctx = %q, want \"killed\"", status)
	}
	if fcat != "ctx_canceled" {
		t.Errorf("failure_category = %q, want \"ctx_canceled\"", fcat)
	}
	// And the run.killed event must have landed in the events table even
	// though ctx was canceled (audit trail requirement).
	var runKilledEvents int
	_ = st.DB().QueryRow(`SELECT COUNT(*) FROM events WHERE run_id='run-cancel' AND kind='run.killed'`).Scan(&runKilledEvents)
	if runKilledEvents < 1 {
		t.Errorf("no run.killed event emitted under canceled ctx — audit trail lost")
	}
}

// TestSweepOrphanedRuns is the V93 regression: a 'running' run from an
// interrupted previous session must be terminated by the orphan sweep so
// it doesn't poison cross-run analytics forever.
func TestSweepOrphanedRuns(t *testing.T) {
	tmp := t.TempDir()
	st, err := store.Open(filepath.Join(tmp, "harness.db"))
	if err != nil {
		t.Fatal(err)
	}
	defer st.Close()
	bus := event.NewBus(st)
	orch := orchestrator.New(st, bus)
	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
	defer cancel()

	// Insert an old 'running' run with no recent events.
	oldStart := store.FmtTime(store.Now().Add(-2 * time.Hour))
	_, _ = st.DB().Exec(`INSERT INTO runs(id, started_at, status) VALUES('r-orphan', ?, 'running')`, oldStart)
	// And a recent 'running' run that should NOT be swept.
	recentStart := store.FmtTime(store.Now())
	_, _ = st.DB().Exec(`INSERT INTO runs(id, started_at, status) VALUES('r-fresh', ?, 'running')`, recentStart)

	swept, err := orch.SweepOrphanedRuns(ctx, 1*time.Hour)
	if err != nil {
		t.Fatalf("SweepOrphanedRuns: %v", err)
	}
	if swept != 1 {
		t.Errorf("swept = %d, want 1", swept)
	}
	// r-orphan should now be killed.
	var status, fcat string
	_ = st.DB().QueryRow(`SELECT status, IFNULL(failure_category,'') FROM runs WHERE id='r-orphan'`).Scan(&status, &fcat)
	if status != "killed" {
		t.Errorf("orphan run status = %q, want \"killed\"", status)
	}
	if fcat != "stalled" {
		t.Errorf("orphan run failure_category = %q, want \"stalled\"", fcat)
	}
	// r-fresh stays running.
	_ = st.DB().QueryRow(`SELECT status FROM runs WHERE id='r-fresh'`).Scan(&status)
	if status != "running" {
		t.Errorf("fresh run status = %q, want \"running\" (sweep was too aggressive)", status)
	}
}

// TestEndRunAbandonsDanglingTasks is the regression test for V18 + the
// abandoned/failed split. When a run is killed (max_wall, max_turns,
// stalled, cost_ceiling, etc), all non-terminal tasks must transition to
// `abandoned` — NOT `failed`. Abandoned means "the work was interrupted
// by run-end, not by an intrinsic task error"; failed means "the agent
// hit a real error". Without this split, dashboards conflate timed-out-
// but-working with actually-broken, and graders count abandoned tasks
// as silent failures even though no diagnostic exists for them.
func TestEndRunAbandonsDanglingTasks(t *testing.T) {
	tmp := t.TempDir()
	st, err := store.Open(filepath.Join(tmp, "harness.db"))
	if err != nil {
		t.Fatal(err)
	}
	defer st.Close()
	bus := event.NewBus(st)
	orch := orchestrator.New(st, bus)
	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
	defer cancel()

	_ = orch.CreateRun(ctx, "run-kill", "V18 regression")

	now := store.FmtTime(store.Now())
	// Three tasks in various non-terminal states + one already-failed task
	// (to verify we don't reset terminal rows).
	_, _ = st.DB().Exec(`INSERT INTO tasks(id, run_id, title, state, attempts, created_at, updated_at) VALUES('t-ip','run-kill','t1','in_progress',0,?,?)`, now, now)
	_, _ = st.DB().Exec(`INSERT INTO tasks(id, run_id, title, state, attempts, created_at, updated_at) VALUES('t-cl','run-kill','t2','awaiting_clarification',0,?,?)`, now, now)
	_, _ = st.DB().Exec(`INSERT INTO tasks(id, run_id, title, state, attempts, created_at, updated_at) VALUES('t-su','run-kill','t3','awaiting_subtask',0,?,?)`, now, now)
	_, _ = st.DB().Exec(`INSERT INTO tasks(id, run_id, title, state, attempts, created_at, updated_at) VALUES('t-ok','run-kill','t4','failed',0,?,?)`, now, now)

	if err := orch.EndRun(ctx, "run-kill", orchestrator.RunOutcome{
		Status: orchestrator.RunKilled, FailureCategory: orchestrator.FailMaxWall,
	}); err != nil {
		t.Fatalf("EndRun: %v", err)
	}

	// All three dangling tasks must now be 'abandoned' — the run-end
	// disposition for in-flight work. 'failed' is reserved for real
	// per-task errors.
	for _, tid := range []string{"t-ip", "t-cl", "t-su"} {
		var state string
		_ = st.DB().QueryRow(`SELECT state FROM tasks WHERE id=?`, tid).Scan(&state)
		if state != "abandoned" {
			t.Errorf("task %s after EndRun(killed) = %q, want abandoned", tid, state)
		}
	}
	// The already-failed task should NOT have been touched.
	var preExistingFailed string
	_ = st.DB().QueryRow(`SELECT state FROM tasks WHERE id='t-ok'`).Scan(&preExistingFailed)
	if preExistingFailed != "failed" {
		t.Errorf("pre-existing failed task got mutated: state=%q", preExistingFailed)
	}

	// task.abandoned events should exist for the three dangling tasks; the
	// already-failed task should NOT get an abandoned event.
	var abandonedEvents int
	_ = st.DB().QueryRow(`SELECT COUNT(*) FROM events WHERE run_id='run-kill' AND kind='task.abandoned'`).Scan(&abandonedEvents)
	if abandonedEvents != 3 {
		t.Errorf("task.abandoned event count = %d, want 3", abandonedEvents)
	}
	// And no task.failed events were emitted for the run-end cleanup —
	// the only failed event allowed would be from the pre-existing failed
	// row (which we did NOT add an event for in setup, so 0 is correct).
	var failedEvents int
	_ = st.DB().QueryRow(`SELECT COUNT(*) FROM events WHERE run_id='run-kill' AND kind='task.failed'`).Scan(&failedEvents)
	if failedEvents != 0 {
		t.Errorf("task.failed event count = %d, want 0 (run-kill cleanup uses task.abandoned now)", failedEvents)
	}
}