package runtime_test

// Tier-2 integration tests for the LLM-master rebuild. Each test exercises
// ONE layer of the chain (master delegates, worker self-reports, refusal
// fallback, query_registry shape, post-inbox hook) with a scripted runtime
// so failures localize before they get blamed on full-stack orchestration.
//
// Companion to internal/harness/runtime/introspection_peers_test.go (the
// {{peers}} regression test that found the original root cause).

import (
	"context"
	"path/filepath"
	"sync/atomic"
	"testing"
	"time"

	"github.com/flothus/tmux-xterm-research/server-go/internal/harness/envelope"
	"github.com/flothus/tmux-xterm-research/server-go/internal/harness/event"
	"github.com/flothus/tmux-xterm-research/server-go/internal/harness/runtime"
	"github.com/flothus/tmux-xterm-research/server-go/internal/harness/runtime/scripted"
	"github.com/flothus/tmux-xterm-research/server-go/internal/harness/store"
	"github.com/flothus/tmux-xterm-research/server-go/internal/harness/transport"
)

// testRig spins up the minimal store+bus+queue+scripted-runtime needed for
// these tests. Anything more (orchestrator, master, runlive) belongs in a
// higher tier — keeping these tests small is the point.
type testRig struct {
	st     *store.Store
	bus    *event.Bus
	queue  *transport.Queue
	rt     *scripted.Runtime
	runID  string
	events chan event.Event
	stop   func()
}

func newRig(t *testing.T, runID string) *testRig {
	t.Helper()
	tmp := t.TempDir()
	st, err := store.Open(filepath.Join(tmp, "harness.db"))
	if err != nil {
		t.Fatal(err)
	}
	bus := event.NewBus(st)
	q := transport.New(st, bus)
	// Zero out Nack backoff so tests that call HandleOne in a tight loop
	// can re-receive the message immediately instead of waiting the
	// 250ms→500ms→1s default schedule.
	q.BackoffBase = 0
	q.BackoffMax = 0
	rt := scripted.New()

	// Subscribe BEFORE inserting the run row so we don't miss early events.
	ch, cancel := bus.Subscribe(256)

	if _, err := st.DB().Exec(`INSERT INTO runs(id, started_at, status) VALUES(?, ?, 'running')`,
		runID, store.FmtTime(store.Now())); err != nil {
		t.Fatal(err)
	}

	rig := &testRig{
		st: st, bus: bus, queue: q, rt: rt, runID: runID,
		events: make(chan event.Event, 256),
	}
	stopRelay := make(chan struct{})
	go func() {
		for {
			select {
			case <-stopRelay:
				return
			case ev, ok := <-ch:
				if !ok {
					return
				}
				select {
				case rig.events <- ev:
				default:
					// drop — tests assert presence, not exact count
				}
			}
		}
	}()
	rig.stop = func() {
		close(stopRelay)
		cancel()
		_ = st.Close()
	}
	t.Cleanup(rig.stop)
	return rig
}

// insertAgent inserts the agents row that the runtime expects to exist
// before Spawn / HandleOne run. The orchestrator does this in production;
// we do it inline here.
func (r *testRig) insertAgent(t *testing.T, id, role, provider string) {
	t.Helper()
	now := store.FmtTime(store.Now())
	_, err := r.st.DB().Exec(
		`INSERT INTO agents(id, run_id, role, provider, status, spawned_at, heartbeat_at)
		 VALUES(?, ?, ?, ?, 'running', ?, ?)`,
		id, r.runID, role, provider, now, now,
	)
	if err != nil {
		t.Fatal(err)
	}
}

// waitForEvent drains rig.events until an event matching kind+predicate is
// seen (or the timeout fires). Returns the matched event so the test can
// assert payload shape.
func (r *testRig) waitForEvent(t *testing.T, kind string, predicate func(event.Event) bool, timeout time.Duration) event.Event {
	t.Helper()
	deadline := time.After(timeout)
	for {
		select {
		case <-deadline:
			t.Fatalf("timed out waiting for event kind=%q", kind)
			return event.Event{}
		case ev := <-r.events:
			if string(ev.Kind) != kind {
				continue
			}
			if predicate == nil || predicate(ev) {
				return ev
			}
		}
	}
}

// hasEventWithin returns true iff any event matching kind+predicate is seen
// before the timeout. Non-fatal — for "should NOT fire" assertions invert.
func (r *testRig) hasEventWithin(kind string, predicate func(event.Event) bool, timeout time.Duration) bool {
	deadline := time.After(timeout)
	for {
		select {
		case <-deadline:
			return false
		case ev := <-r.events:
			if string(ev.Kind) != kind {
				continue
			}
			if predicate == nil || predicate(ev) {
				return true
			}
		}
	}
}

// ---------------------------------------------------------------------
// 1. Master emits delegate on user prompt
// ---------------------------------------------------------------------

// TestMasterAgentEmitsDelegateOnUserPrompt is the LLM-mode happy path:
// a from=user delegate envelope arrives at the master; the scripted LLM
// emits a `delegate` tool call; the harness routes it to the worker
// (via ResolveRoleAgent) and the envelope lands in the worker's inbox.
//
// What this guards against:
//   - terminalAction detection wrongly classifying `delegate` as a refusal
//     and nacking the master message
//   - ResolveRoleAgent not being consulted for delegate (would send to
//     literal role string and the queue would drop)
//   - the child task not being created (delegate's preflight)
func TestMasterAgentEmitsDelegateOnUserPrompt(t *testing.T) {
	rig := newRig(t, "run-md1")
	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
	defer cancel()

	rig.insertAgent(t, "master-1", "master", "scripted")
	rig.insertAgent(t, "fe-worker-1", "fe-worker", "scripted")

	rig.rt.SetScript("master-1", []*runtime.LLMResponse{{
		Text: "Routing to fe-worker.",
		Tokens: runtime.TokenUsage{Prompt: 100, Completion: 50},
		ToolCalls: []runtime.ToolCall{{
			Name: "delegate",
			Args: map[string]any{
				"to":     "fe-worker",
				"intent": "self-report",
			},
		}},
	}})
	_, _ = rig.rt.Spawn(ctx, runtime.SpawnSpec{AgentID: "master-1", RunID: rig.runID})

	master := runtime.NewAgent(rig.rt, rig.st, rig.bus, rig.queue,
		"master-1", "master", rig.runID,
		nil, []string{"delegate", "send_message", "query_registry"}, "scripted", 0)
	master.ResolveRoleAgent = func(role string) string {
		if role == "fe-worker" {
			return "fe-worker-1"
		}
		return ""
	}

	// Seed a root task so the delegate tool's "incoming.TaskID required"
	// preflight passes.
	now := store.FmtTime(store.Now())
	_, err := rig.st.DB().Exec(
		`INSERT INTO tasks(id, run_id, title, state, attempts, created_at, updated_at)
		 VALUES(?,?,?,?,0,?,?)`,
		"task-md1-root", rig.runID, "user prompt", "in_progress", now, now,
	)
	if err != nil {
		t.Fatal(err)
	}

	userMsg := &envelope.Envelope{
		ID: "msg-user-md1", RunID: rig.runID,
		From: "user", To: "master-1", Type: envelope.TypeDelegate,
		TaskID: "task-md1-root", TTLMs: 60000,
		Payload: envelope.Payload{Intent: "self-report everyone", Expects: envelope.ExpectsReport},
	}
	if err := rig.queue.Send(ctx, userMsg); err != nil {
		t.Fatal(err)
	}

	handled, err := master.HandleOne(ctx)
	if err != nil {
		t.Fatalf("master.HandleOne err=%v", err)
	}
	if !handled {
		t.Fatal("expected master to handle one message")
	}

	rig.waitForEvent(t, "agent.delegated", func(ev event.Event) bool {
		to, _ := ev.Payload["to_agent"].(string)
		return to == "fe-worker-1"
	}, 2*time.Second)

	// Negative assertion: the master message must NOT have been nacked
	// (terminalAction detection works).
	if rig.hasEventWithin("agent.handle_one.nacked", func(ev event.Event) bool {
		mid, _ := ev.Payload["message_id"].(string)
		return mid == "msg-user-md1"
	}, 200*time.Millisecond) {
		t.Fatal("master message was nacked despite emitting delegate tool call")
	}

	// And the worker's inbox actually has the envelope (the routing worked
	// end-to-end through the transport, not just the bus event).
	got, err := rig.queue.Receive(ctx, "fe-worker-1")
	if err != nil {
		t.Fatal(err)
	}
	if got == nil {
		t.Fatal("worker inbox empty — delegate envelope was not delivered")
	}
	if got.Type != envelope.TypeDelegate || got.From != "master-1" {
		t.Fatalf("wrong envelope on worker: from=%s type=%s", got.From, got.Type)
	}
}

// ---------------------------------------------------------------------
// 2. Worker self-reports on introspection
// ---------------------------------------------------------------------

// TestWorkerAgentEmitsSelfReportOnIntrospection: the scripted worker
// receives an introspection-shaped delegate and emits a `self_report`
// tool call. Asserts the agent.self_report event fires with the
// expected identity fields. The tool is implicit, so it works even when
// the role allowlist is minimal.
func TestWorkerAgentEmitsSelfReportOnIntrospection(t *testing.T) {
	rig := newRig(t, "run-wsr1")
	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
	defer cancel()

	rig.insertAgent(t, "fe-worker-1", "fe-worker", "scripted")

	rig.rt.SetScript("fe-worker-1", []*runtime.LLMResponse{{
		Text: "Reporting in.",
		Tokens: runtime.TokenUsage{Prompt: 80, Completion: 40},
		ToolCalls: []runtime.ToolCall{{
			Name: "self_report",
			Args: map[string]any{"notes": "ready"},
		}},
	}})
	_, _ = rig.rt.Spawn(ctx, runtime.SpawnSpec{AgentID: "fe-worker-1", RunID: rig.runID})

	worker := runtime.NewAgent(rig.rt, rig.st, rig.bus, rig.queue,
		"fe-worker-1", "fe-worker", rig.runID,
		[]string{"client/**/*"},
		// Note: self_report is implicit so we DON'T list it. Tests that
		// allowed() includes implicit tools.
		[]string{"write_file"}, "scripted", 0)
	worker.SubOrg = "frontend"
	worker.ArtifactsDir = filepath.Join(t.TempDir(), "fe-worker-1")

	now := store.FmtTime(store.Now())
	_, _ = rig.st.DB().Exec(
		`INSERT INTO tasks(id, run_id, title, state, attempts, created_at, updated_at)
		 VALUES(?,?,?,?,0,?,?)`,
		"task-wsr1", rig.runID, "self-report", "in_progress", now, now,
	)

	deleg := &envelope.Envelope{
		ID: "msg-wsr1", RunID: rig.runID,
		From: "master-1", To: "fe-worker-1", Type: envelope.TypeDelegate,
		TaskID: "task-wsr1", TTLMs: 60000,
		Payload: envelope.Payload{Intent: "identify and self-report", Expects: envelope.ExpectsReport},
	}
	if err := rig.queue.Send(ctx, deleg); err != nil {
		t.Fatal(err)
	}

	if _, err := worker.HandleOne(ctx); err != nil {
		t.Fatalf("worker.HandleOne err=%v", err)
	}

	ev := rig.waitForEvent(t, "agent.self_report", func(ev event.Event) bool {
		aid, _ := ev.Payload["agent_id"].(string)
		return aid == "fe-worker-1"
	}, 2*time.Second)

	if got, _ := ev.Payload["role"].(string); got != "fe-worker" {
		t.Errorf("self_report role=%q want fe-worker", got)
	}
	if got, _ := ev.Payload["sub_org"].(string); got != "frontend" {
		t.Errorf("self_report sub_org=%q want frontend", got)
	}
	if got, _ := ev.Payload["notes"].(string); got != "ready" {
		t.Errorf("self_report notes=%q want 'ready'", got)
	}
}

// ---------------------------------------------------------------------
// 3. Refusal fallback fires after threshold
// ---------------------------------------------------------------------

// TestRefusalFallbackFiresAfterThreshold: scripted master returns ZERO
// tool calls on three consecutive user-prompt delegates targeting the
// SAME message id; the third turn should fire RefusalFallbackHook
// exactly once and Ack the message (so it stops bouncing).
//
// Implementation note: the queue's Nack on turn 1 + 2 makes the message
// visible again so the same agent can pick it up. Threshold = 3 (the
// runlive default).
func TestRefusalFallbackFiresAfterThreshold(t *testing.T) {
	rig := newRig(t, "run-rf1")
	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
	defer cancel()

	rig.insertAgent(t, "master-1", "master", "scripted")

	// Three no-tool-call turns; if the test misbehaves and a fourth call
	// happens, default-to-empty so we don't panic.
	rig.rt.SetScript("master-1", []*runtime.LLMResponse{
		{Text: "Waiting on team."},
		{Text: "Still waiting."},
		{Text: "Continuing to wait."},
		{Text: "Should not be reached."},
	})
	_, _ = rig.rt.Spawn(ctx, runtime.SpawnSpec{AgentID: "master-1", RunID: rig.runID})

	master := runtime.NewAgent(rig.rt, rig.st, rig.bus, rig.queue,
		"master-1", "master", rig.runID,
		nil, []string{"delegate", "send_message"}, "scripted", 0)
	master.SetRefusalThreshold(3)

	var fallbackCount int64
	master.RefusalFallbackHook = func(ctx context.Context, msg *envelope.Envelope) {
		atomic.AddInt64(&fallbackCount, 1)
	}

	now := store.FmtTime(store.Now())
	_, _ = rig.st.DB().Exec(
		`INSERT INTO tasks(id, run_id, title, state, attempts, created_at, updated_at)
		 VALUES(?,?,?,?,0,?,?)`,
		"task-rf1", rig.runID, "user prompt", "in_progress", now, now,
	)

	userMsg := &envelope.Envelope{
		ID: "msg-rf1", RunID: rig.runID,
		From: "user", To: "master-1", Type: envelope.TypeDelegate,
		TaskID: "task-rf1", TTLMs: 60000,
		Payload: envelope.Payload{Intent: "self-report everyone", Expects: envelope.ExpectsReport},
	}
	if err := rig.queue.Send(ctx, userMsg); err != nil {
		t.Fatal(err)
	}

	// Three Handle cycles. Each non-terminal turn nacks (turns 1+2);
	// turn 3 hits the threshold and fires the fallback + acks.
	for i := 0; i < 3; i++ {
		if _, err := master.HandleOne(ctx); err != nil {
			t.Fatalf("HandleOne turn %d: %v", i+1, err)
		}
	}

	if got := atomic.LoadInt64(&fallbackCount); got != 1 {
		t.Fatalf("RefusalFallbackHook fired %d times, want exactly 1", got)
	}

	// fired event must be present.
	rig.waitForEvent(t, "agent.refusal_fallback_fired", func(ev event.Event) bool {
		mid, _ := ev.Payload["message_id"].(string)
		return mid == "msg-rf1"
	}, 1*time.Second)

	// One more turn should NOT re-fire the fallback (state was cleared).
	// And the message should not be re-delivered.
	got, _ := rig.queue.Receive(ctx, "master-1")
	if got != nil {
		t.Fatalf("master message still in queue after fallback ack: %s", got.ID)
	}
}

// ---------------------------------------------------------------------
// 4. query_registry returns the team shape
// ---------------------------------------------------------------------

// TestQueryRegistryReturnsTeamShape calls query_registry through the
// LLM tool path and asserts the emitted agent.query_registry event
// carries the LM-D-enriched fields (provider, heartbeat_at,
// in_flight_task_id). Pre-populates two sibling rows + an in-flight
// task so the in_flight branch isn't dead.
func TestQueryRegistryReturnsTeamShape(t *testing.T) {
	rig := newRig(t, "run-qr1")
	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
	defer cancel()

	rig.insertAgent(t, "master-1", "master", "scripted")
	rig.insertAgent(t, "fe-worker-1", "fe-worker", "claude-code")
	rig.insertAgent(t, "be-worker-1", "be-worker", "codex")

	now := store.FmtTime(store.Now())
	_, _ = rig.st.DB().Exec(
		`INSERT INTO tasks(id, run_id, title, state, attempts, created_at, updated_at, owner_agent_id)
		 VALUES(?,?,?,?,0,?,?,?)`,
		"task-qr-active", rig.runID, "in-flight work", "in_progress", now, now, "fe-worker-1",
	)

	rig.rt.SetScript("master-1", []*runtime.LLMResponse{{
		Text: "Listing team.",
		Tokens: runtime.TokenUsage{Prompt: 60, Completion: 30},
		ToolCalls: []runtime.ToolCall{{
			Name: "query_registry",
			Args: map[string]any{"kind": "agents"},
		}},
	}})
	_, _ = rig.rt.Spawn(ctx, runtime.SpawnSpec{AgentID: "master-1", RunID: rig.runID})

	master := runtime.NewAgent(rig.rt, rig.st, rig.bus, rig.queue,
		"master-1", "master", rig.runID,
		nil, []string{"query_registry", "delegate"}, "scripted", 0)

	// Master needs SOMETHING to handle. Send a non-user delegate so the
	// terminalAction-nack path doesn't fire (this test only cares about
	// query_registry's payload shape, not master's user-prompt behavior).
	_, _ = rig.st.DB().Exec(
		`INSERT INTO tasks(id, run_id, title, state, attempts, created_at, updated_at)
		 VALUES(?,?,?,?,0,?,?)`,
		"task-qr-trigger", rig.runID, "trigger", "in_progress", now, now,
	)
	trigger := &envelope.Envelope{
		ID: "msg-qr-trigger", RunID: rig.runID,
		From: "fe-worker-1", To: "master-1", Type: envelope.TypeReport,
		TaskID: "task-qr-trigger", TTLMs: 60000,
		Payload: envelope.Payload{Intent: "fyi", Expects: envelope.ExpectsNone},
	}
	if err := rig.queue.Send(ctx, trigger); err != nil {
		t.Fatal(err)
	}

	if _, err := master.HandleOne(ctx); err != nil {
		t.Fatalf("HandleOne: %v", err)
	}

	ev := rig.waitForEvent(t, "agent.query_registry", func(ev event.Event) bool {
		k, _ := ev.Payload["kind"].(string)
		return k == "agents"
	}, 2*time.Second)

	results, ok := ev.Payload["results"].([]map[string]any)
	if !ok {
		t.Fatalf("query_registry results not a []map[string]any: %T", ev.Payload["results"])
	}
	if len(results) != 3 {
		t.Fatalf("got %d agents in registry, want 3", len(results))
	}

	// Shape check: every row must carry provider + heartbeat_at; at least
	// one must carry in_flight_task_id (fe-worker-1 owns task-qr-active).
	sawInFlight := false
	for _, row := range results {
		if _, has := row["provider"]; !has {
			t.Errorf("row missing provider: %v", row)
		}
		if _, has := row["heartbeat_at"]; !has {
			t.Errorf("row missing heartbeat_at: %v", row)
		}
		if id, _ := row["agent_id"].(string); id == "fe-worker-1" {
			tid, _ := row["in_flight_task_id"].(string)
			if tid == "task-qr-active" {
				sawInFlight = true
			}
		}
	}
	if !sawInFlight {
		t.Errorf("fe-worker-1 row missing in_flight_task_id=task-qr-active; results=%v", results)
	}
}

// ---------------------------------------------------------------------
// 5. PostInboxHook fires after a successful turn
// ---------------------------------------------------------------------

// TestPostInboxHookFiresOnReport: install a PostInboxHook on a scripted
// agent, deliver a Report envelope, run one HandleOne cycle, assert the
// hook was called with the right message + response (the LM-F separation
// of LLM-consumption from side-effect dispatch).
func TestPostInboxHookFiresOnReport(t *testing.T) {
	rig := newRig(t, "run-ph1")
	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
	defer cancel()

	rig.insertAgent(t, "master-1", "master", "scripted")

	rig.rt.SetScript("master-1", []*runtime.LLMResponse{{
		Text: "Acknowledged.",
		Tokens: runtime.TokenUsage{Prompt: 30, Completion: 10},
		// No tool calls — Report doesn't need to fan out. The post-inbox
		// hook is meant to fire even without tool calls (that's how master
		// in LLM mode learns "a worker reported back, complete the task").
	}})
	_, _ = rig.rt.Spawn(ctx, runtime.SpawnSpec{AgentID: "master-1", RunID: rig.runID})

	master := runtime.NewAgent(rig.rt, rig.st, rig.bus, rig.queue,
		"master-1", "master", rig.runID,
		nil, []string{"delegate", "send_message"}, "scripted", 0)

	var hookCalled int64
	var seenMsgID, seenText string
	master.PostInboxHook = func(ctx context.Context, msg *envelope.Envelope, resp *runtime.LLMResponse) {
		atomic.AddInt64(&hookCalled, 1)
		if msg != nil {
			seenMsgID = msg.ID
		}
		if resp != nil {
			seenText = resp.Text
		}
	}

	now := store.FmtTime(store.Now())
	_, _ = rig.st.DB().Exec(
		`INSERT INTO tasks(id, run_id, title, state, attempts, created_at, updated_at)
		 VALUES(?,?,?,?,0,?,?)`,
		"task-ph1", rig.runID, "child", "in_progress", now, now,
	)

	report := &envelope.Envelope{
		ID: "msg-ph1-report", RunID: rig.runID,
		From: "fe-worker-1", To: "master-1", Type: envelope.TypeReport,
		TaskID: "task-ph1", TTLMs: 60000,
		Payload: envelope.Payload{Intent: "done", Expects: envelope.ExpectsNone},
	}
	if err := rig.queue.Send(ctx, report); err != nil {
		t.Fatal(err)
	}

	if _, err := master.HandleOne(ctx); err != nil {
		t.Fatalf("HandleOne: %v", err)
	}

	if got := atomic.LoadInt64(&hookCalled); got != 1 {
		t.Fatalf("PostInboxHook fired %d times, want 1", got)
	}
	if seenMsgID != "msg-ph1-report" {
		t.Errorf("hook saw msg id %q, want msg-ph1-report", seenMsgID)
	}
	if seenText != "Acknowledged." {
		t.Errorf("hook saw response text %q, want 'Acknowledged.'", seenText)
	}
}
