package evaluator

import (
	"context"
	"path/filepath"
	"testing"

	"github.com/flothus/tmux-xterm-research/server-go/internal/harness/event"
	"github.com/flothus/tmux-xterm-research/server-go/internal/harness/store"
)

func newReg(t *testing.T) *Registry {
	t.Helper()
	tmp := t.TempDir()
	st, err := store.Open(filepath.Join(tmp, "harness.db"))
	if err != nil {
		t.Fatal(err)
	}
	t.Cleanup(func() { st.Close() })
	return NewRegistry(st, event.NewBus(st))
}

func TestMultiJudgeProducesMultipleEvaluations(t *testing.T) {
	r := newReg(t)
	ctx := context.Background()
	_ = r.Register(ctx, HeuristicEvaluator{Name: "heur"})
	_ = r.Register(ctx, LLMJudge{Name: "judge", Callback: func(ctx context.Context, target Target) ([]Score, string, error) {
		return []Score{{Dimension: "correctness", Value: 0.7, Rationale: "good"},
			{Dimension: "overall", Value: 0.75, Rationale: "good"}}, "ok", nil
	}})
	ids, err := r.EvaluateAll(ctx, Target{Kind: TargetArtifact, ID: "art-1", Body: "# Title\n\nbody body body body body"})
	if err != nil {
		t.Fatal(err)
	}
	if len(ids) != 2 {
		t.Errorf("got %d evaluations, want 2", len(ids))
	}
}

func TestCalibrationAgainstUserRating(t *testing.T) {
	r := newReg(t)
	ctx := context.Background()
	// Heuristic says high overall; user agrees → agreed_with_user=1.
	_ = r.Register(ctx, HeuristicEvaluator{Name: "heur"})
	// Bad judge: rates everything 0.1.
	_ = r.Register(ctx, LLMJudge{Name: "bad-judge", Callback: func(ctx context.Context, target Target) ([]Score, string, error) {
		return []Score{{Dimension: "overall", Value: 0.1, Rationale: "always-bad"}}, "ok", nil
	}})
	body := "# Header\n\n" + repeatChar(' ', 1000)
	target := Target{Kind: TargetArtifact, ID: "art-2", Body: body}
	if _, err := r.EvaluateAll(ctx, target); err != nil {
		t.Fatal(err)
	}
	if err := r.RecordUserRating(ctx, target, 1.0); err != nil {
		t.Fatal(err)
	}
	// Verify: heur agrees, bad-judge does not.
	rows, _ := r.St.DB().Query(`SELECT evaluator_id, agreed_with_user FROM evaluations WHERE target_id='art-2' AND evaluator_id<>'user'`)
	defer rows.Close()
	agreement := map[string]int{}
	for rows.Next() {
		var e string
		var a int
		_ = rows.Scan(&e, &a)
		agreement[e] = a
	}
	if agreement["heur"] != 1 {
		t.Errorf("heur agreement = %v, want 1", agreement["heur"])
	}
	if agreement["bad-judge"] != 0 {
		t.Errorf("bad-judge agreement = %v, want 0", agreement["bad-judge"])
	}
}

func repeatChar(c byte, n int) string {
	b := make([]byte, n)
	for i := range b {
		b[i] = c
	}
	return string(b)
}
