// Package cli is the non-interactive CLI runtime. Each LLM call shells out
// to `<cmd> <args> <prompt-via-stdin-or-arg>` and waits for the process to
// exit, capturing clean stdout. No TTY, no TUI rendering, no send-keys
// guessing.
//
// This is the production-grade real-CLI path:
//
//	claude -p "..."   non-interactive Claude Code one-shot mode
//	codex exec "..."  non-interactive Codex one-shot mode
//
// Each turn = one process. No session state shared across turns; the
// caller passes accumulated conversation history in the prompt or via
// --session-id (if the CLI supports it).
//
// Compared to the tmux/send-keys runtime this trades session continuity
// for reliability: every turn produces parseable stdout we can extract a
// harness-out block from. That's the right trade for harness use.
package cli

import (
	"bytes"
	"context"
	"errors"
	"fmt"
	"os"
	"os/exec"
	"path/filepath"
	"strings"
	"sync"
	"time"

	"github.com/flothus/tmux-xterm-research/server-go/internal/harness/runtime"
	"github.com/flothus/tmux-xterm-research/server-go/internal/harness/runtime/tmux"
)

// Profile names a non-interactive CLI mode.
type Profile struct {
	// Name identifies the provider (e.g. "claude-code-print", "codex-exec").
	Name string
	// Command is the executable to run.
	Command string
	// Args are static flags appended before the prompt.
	Args []string
	// PromptVia controls how the prompt is delivered: "stdin" (default,
	// preferred) or "arg" (last positional argument).
	PromptVia string
	// TokenEst is the per-character token estimator divisor.
	TokenEstDivisor float64
}

// ClaudePrintProfile is the canonical configuration for `claude -p`.
// Reads prompt from stdin via -p flag.
var ClaudePrintProfile = Profile{
	Name:            "claude-print",
	Command:         "claude",
	Args:            []string{"-p"},
	PromptVia:       "arg",
	TokenEstDivisor: 4.0,
}

// CodexExecProfile is the canonical configuration for `codex exec`.
// Subscription-billed (ChatGPT Plus/Pro) like the TUI — no API tokens.
var CodexExecProfile = Profile{
	Name:            "codex-exec",
	Command:         "codex",
	Args:            []string{"exec"},
	PromptVia:       "arg",
	TokenEstDivisor: 3.6,
}

// GeminiPrintProfile is the canonical non-interactive configuration for
// the Google `gemini` CLI. When the user has cached OAuth from a prior
// interactive login, this uses the same Code Assist free-tier quota as
// the TUI (Google's docs state quotas are "shared across interactive and
// agent modes"). If OAuth isn't cached the CLI falls back to
// GEMINI_API_KEY / GOOGLE_API_KEY (paid Gemini API). Pre-flight should
// verify OAuth is present if you want subscription billing.
var GeminiPrintProfile = Profile{
	Name:            "gemini-print",
	Command:         "gemini",
	Args:            []string{"-p"},
	PromptVia:       "arg",
	TokenEstDivisor: 4.0,
}

// Runtime implements runtime.Runtime by shelling out per turn.
type Runtime struct {
	Profile Profile
	// Timeout caps how long one process invocation may run.
	Timeout time.Duration
	// LogDir, when set, is the directory where per-agent transcript logs
	// are written ("<LogDir>/<agentID>.log"). Same path the tmux pipe-pane
	// log uses, so the UI's `/api/harness/agents/{id}/pane` endpoint works
	// uniformly across runtimes — without this, headless / cli print-mode
	// agents have no live-pane view because the endpoint only reads tmux
	// log files. Empty disables logging (tests / older callers stay silent).
	LogDir string

	mu      sync.Mutex
	agents  map[string]*agentState
}

// NewRuntime constructs a Runtime for the given profile.
func NewRuntime(p Profile) *Runtime {
	return &Runtime{
		Profile: p,
		Timeout: 120 * time.Second,
		agents:  map[string]*agentState{},
	}
}

type agentState struct {
	systemPrompt string
	history      []turn
	status       string
}

type turn struct {
	role    string // "user" or "assistant"
	content string
}

// PreFlight verifies the CLI is callable end-to-end without hanging on
// an interactive modal (update prompt, EULA, auth wizard). Runs
// `<cli> --version` with a 5s ceiling. Cheap to call at startup; a hang
// here is the L2 signature the live audit caught — the codex/gemini
// "no output ever" disasters happen because the CLI is waiting on a
// modal we can't see from the harness.
func (r *Runtime) PreFlight(ctx context.Context) error {
	if _, err := exec.LookPath(r.Profile.Command); err != nil {
		return fmt.Errorf("cli: %s not on PATH: %w", r.Profile.Command, err)
	}
	cctx, cancel := context.WithTimeout(ctx, 5*time.Second)
	defer cancel()
	cmd := exec.CommandContext(cctx, r.Profile.Command, "--version")
	out, err := cmd.CombinedOutput()
	if cctx.Err() == context.DeadlineExceeded {
		return fmt.Errorf("cli: %s --version timed out after 5s (CLI likely waiting on an interactive modal — update prompt, auth flow, EULA)", r.Profile.Command)
	}
	if err != nil {
		return fmt.Errorf("cli: %s --version exit: %w: %s", r.Profile.Command, err, strings.TrimSpace(string(out)))
	}
	return nil
}

// Spawn registers the agent. No subprocess yet — that happens per CallLLM.
func (r *Runtime) Spawn(ctx context.Context, spec runtime.SpawnSpec) (string, error) {
	if _, err := exec.LookPath(r.Profile.Command); err != nil {
		return "", fmt.Errorf("cli: %s not on PATH: %w", r.Profile.Command, err)
	}
	r.mu.Lock()
	r.agents[spec.AgentID] = &agentState{
		systemPrompt: spec.Prompt,
		status:       "running",
	}
	r.mu.Unlock()
	// Seed the per-agent transcript log so the UI's live-pane viewer has
	// something to show before the first turn fires. Same file the tmux
	// pane endpoint reads.
	r.appendLog(spec.AgentID, fmt.Sprintf("[harness] spawned via cli runtime (cmd=%s)\n[harness] system prompt:\n%s\n", r.Profile.Command, spec.Prompt))
	return spec.AgentID, nil
}

// appendLog writes one entry to the per-agent transcript log. Best-effort:
// if LogDir is empty or the write fails (perm denied, disk full), we
// silently skip — the log is observability, not correctness. The log file
// path matches the tmux pipe-pane format so the same /api/harness/agents/
// {id}/pane endpoint serves both runtimes uniformly.
func (r *Runtime) appendLog(agentID, body string) {
	if r.LogDir == "" {
		return
	}
	_ = os.MkdirAll(r.LogDir, 0o755)
	path := filepath.Join(r.LogDir, agentID+".log")
	f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
	if err != nil {
		return
	}
	defer f.Close()
	_, _ = f.WriteString(body)
}

// CallLLM runs one subprocess turn. Builds a full prompt from accumulated
// history + the new incoming message + role md (if present), invokes the
// CLI, captures stdout, parses the harness-out block.
func (r *Runtime) CallLLM(ctx context.Context, agentID string, req runtime.LLMRequest) (*runtime.LLMResponse, error) {
	r.mu.Lock()
	state, ok := r.agents[agentID]
	if !ok {
		r.mu.Unlock()
		return nil, runtime.ErrUnknownAgent
	}
	// V70: buildPrompt reads state.history; another goroutine could
	// be appending to it concurrently. Hold the lock across the read so
	// the slice header isn't raced.
	prompt := buildPrompt(state, req)
	r.mu.Unlock()
	cctx, cancel := context.WithTimeout(ctx, r.Timeout)
	defer cancel()

	args := append([]string{}, r.Profile.Args...)
	if r.Profile.PromptVia == "arg" || r.Profile.PromptVia == "" {
		args = append(args, prompt)
	}
	cmd := exec.CommandContext(cctx, r.Profile.Command, args...)
	var stdout, stderr bytes.Buffer
	cmd.Stdout = &stdout
	cmd.Stderr = &stderr
	if r.Profile.PromptVia == "stdin" {
		cmd.Stdin = strings.NewReader(prompt)
	}

	// Log the outbound prompt before invoking the CLI so the UI's live
	// pane shows the prompt even if the call hangs/errors before producing
	// stdout. The format mirrors what xterm renders cleanly for tmux logs.
	r.appendLog(agentID, fmt.Sprintf("\n[harness] >> turn started %s\n%s\n[harness] >> awaiting CLI...\n", time.Now().UTC().Format(time.RFC3339), prompt))

	if err := cmd.Run(); err != nil {
		errOut := stderr.String()
		r.appendLog(agentID, fmt.Sprintf("[harness] << CLI error: %v\n%s\n", err, errOut))
		return &runtime.LLMResponse{
			Text:          stdout.String(),
			ParseFailures: 1,
		}, fmt.Errorf("cli: %s exit: %w: %s", r.Profile.Command, err, errOut)
	}

	out := stdout.String()
	// Mirror stdout to the per-agent transcript log so the live-pane UI
	// renders the CLI's response in real time — without this, headless and
	// cli-print agents look "silent" in the dashboard even though they
	// produced full responses.
	r.appendLog(agentID, fmt.Sprintf("[harness] << CLI response (%d bytes)\n%s\n", len(out), out))

	// Append to history so future turns see context.
	r.mu.Lock()
	state.history = append(state.history,
		turn{role: "user", content: prompt},
		turn{role: "assistant", content: out},
	)
	r.mu.Unlock()

	resp, err := parseHarnessOut(out, prompt, r.Profile.TokenEstDivisor)
	if err != nil {
		return &runtime.LLMResponse{Text: out, ParseFailures: 1}, err
	}
	return resp, nil
}

// Terminate marks the agent terminated. No subprocess to kill — each turn
// already exited.
func (r *Runtime) Terminate(ctx context.Context, agentID, reason string) error {
	r.mu.Lock()
	defer r.mu.Unlock()
	if s, ok := r.agents[agentID]; ok {
		s.status = "terminated"
	}
	return nil
}

// Health returns the agent's runtime status.
func (r *Runtime) Health(ctx context.Context, agentID string) (string, error) {
	r.mu.Lock()
	defer r.mu.Unlock()
	if s, ok := r.agents[agentID]; ok {
		return s.status, nil
	}
	return "", runtime.ErrUnknownAgent
}

// buildPrompt assembles the per-turn prompt: system prompt (first turn
// only — subsequent turns reference it implicitly via history), then a
// short "incoming message" block + the harness-out output contract.
func buildPrompt(state *agentState, req runtime.LLMRequest) string {
	var sb strings.Builder
	if len(state.history) == 0 && state.systemPrompt != "" {
		sb.WriteString(state.systemPrompt)
		sb.WriteString("\n\n")
	}
	if req.IncomingMessage != nil {
		m := req.IncomingMessage
		sb.WriteString("[harness-turn]\n")
		sb.WriteString(fmt.Sprintf("Inbox message id=%s type=%s from=%s task_id=%s\n",
			m.ID, m.Type, m.From, m.TaskID))
		if m.Payload.Intent != "" {
			sb.WriteString("intent: " + m.Payload.Intent + "\n")
		}
		if m.Payload.Expects != "" {
			sb.WriteString("expects: " + string(m.Payload.Expects) + "\n")
		}
		for _, ref := range m.Payload.ContextRefs {
			sb.WriteString("context_ref: " + ref + "\n")
		}
	} else if req.Prompt != "" {
		sb.WriteString(req.Prompt + "\n")
	}
	if len(req.AvailableTools) > 0 {
		sb.WriteString("\navailable tools: " + strings.Join(req.AvailableTools, ", ") + "\n")
	}
	if req.PriorParseError != "" {
		// L8: corrective context for the next attempt.
		sb.WriteString("\n[correction] your previous response failed parsing: ")
		sb.WriteString(req.PriorParseError)
		sb.WriteString("\nemit EXACTLY one fenced harness-out JSON block, no surrounding prose.\n")
	}
	sb.WriteString("\nReply ONLY with one fenced harness-out block of JSON. Schema:\n")
	sb.WriteString("```harness-out\n{\"text\":\"...\",\"tool_calls\":[{\"name\":\"<tool>\",\"args\":{...}}],\"tokens\":{\"prompt\":0,\"completion\":0}}\n```\n")
	sb.WriteString("Always emit the block, even on a no-op turn. No prose outside it.\n")
	return sb.String()
}

// parseHarnessOut extracts the harness-out block from stdout, falling back
// to a "reformat retry" path is the caller's responsibility — here we just
// produce a clean response or an error.
func parseHarnessOut(stdout, prompt string, divisor float64) (*runtime.LLMResponse, error) {
	body, ok := tmux.ExtractBlock(stdout)
	if !ok {
		return nil, errors.New("cli: no harness-out block in stdout")
	}
	var wire struct {
		Text      string             `json:"text"`
		ToolCalls []runtime.ToolCall `json:"tool_calls"`
		Tokens    runtime.TokenUsage `json:"tokens"`
	}
	if err := jsonUnmarshalRelaxed([]byte(body), &wire); err != nil {
		return nil, fmt.Errorf("cli: malformed JSON in harness-out: %w", err)
	}
	if wire.Tokens.Prompt == 0 && wire.Tokens.Completion == 0 {
		// Estimate from char count. Guard against divisor=0 (would yield
		// +Inf and an undefined int conversion).
		d := divisor
		if d <= 0 {
			d = 4.0
		}
		wire.Tokens.Prompt = int(float64(len(prompt)) / d)
		wire.Tokens.Completion = int(float64(len(body)) / d)
	}
	return &runtime.LLMResponse{
		Text: wire.Text, ToolCalls: wire.ToolCalls, Tokens: wire.Tokens,
	}, nil
}

// jsonUnmarshalRelaxed is encoding/json with a tolerance pass for common
// LLM mistakes (trailing commas etc.). For Phase O we use strict json;
// extending later if real outputs need it.
func jsonUnmarshalRelaxed(raw []byte, v any) error {
	return jsonStrict(raw, v)
}

// jsonStrict delegates to encoding/json. Separated so we can swap in a
// tolerant parser later without touching call sites.
func jsonStrict(raw []byte, v any) error {
	return jsonImpl(raw, v)
}
