// Package headless is the HTTP-driven Anthropic SDK adapter. Real LLM calls // go through this runtime; it implements runtime.Runtime. // // We talk to the Anthropic Messages API directly (POST /v1/messages) rather // than depending on a third-party Go SDK — keeps deps minimal and lets us // observe the exact wire format for trace recording. Tools are forwarded as // the API's tool-use schema; the response's tool_use blocks become // runtime.ToolCall entries. // // Real-network tests live in headless_smoke_test.go and are gated on // ANTHROPIC_API_KEY so CI/offline runs stay deterministic. package headless import ( "bytes" "context" "encoding/json" "errors" "fmt" "io" "net/http" "os" "path/filepath" "sync" "time" "github.com/flothus/tmux-xterm-research/server-go/internal/harness/runtime" ) const ( defaultEndpoint = "https://api.anthropic.com/v1/messages" defaultModel = "claude-haiku-4-5-20251001" apiVersion = "2023-06-01" ) // Runtime is an HTTP-backed Anthropic Messages client implementing // runtime.Runtime. type Runtime struct { APIKey string Endpoint string Model string MaxTokens int HTTPClient *http.Client // MaxParseRetries controls how many reformat attempts we make when the // LLM produces malformed structured output. Plan §13. MaxParseRetries int // LogDir, when set, is the directory where per-agent transcript logs // are written ("/.log"). Same path the tmux pipe-pane // log uses, so the dashboard's `/api/harness/agents/{id}/pane` endpoint // works uniformly across runtimes — headless agents are otherwise // invisible in the live-pane viewer because they never spawn a tmux // pane. Empty disables logging. LogDir string mu sync.Mutex agents map[string]*agentState } type agentState struct { role string tools []string zone []string runID string systemPrompt string // history is the running message list we send back as `messages` in each // API call. For Phase A we accumulate user+assistant turns. // // mu guards history under concurrent CallLLM (V69). Without it, two // parallel calls on the same agent would race on slice append. mu sync.Mutex history []apiMessage status string } // New constructs a Runtime. APIKey may be passed directly or via // ANTHROPIC_API_KEY environment variable (handled by NewFromEnv). func New(apiKey string) *Runtime { return &Runtime{ APIKey: apiKey, Endpoint: defaultEndpoint, Model: defaultModel, MaxTokens: 2048, HTTPClient: &http.Client{Timeout: 60 * time.Second}, MaxParseRetries: 2, agents: map[string]*agentState{}, } } // NewFromEnv constructs a Runtime using ANTHROPIC_API_KEY. Returns nil and // an error if the env var is unset — call sites use this to skip integration // tests gracefully. func NewFromEnv() (*Runtime, error) { key := os.Getenv("ANTHROPIC_API_KEY") if key == "" { return nil, errors.New("ANTHROPIC_API_KEY not set") } return New(key), nil } // Spawn registers the agent with the runtime and seeds its history with the // system prompt baked into spec.Prompt. func (r *Runtime) Spawn(ctx context.Context, spec runtime.SpawnSpec) (string, error) { r.mu.Lock() r.agents[spec.AgentID] = &agentState{ role: spec.Role, tools: spec.Tools, zone: spec.ZoneScope, runID: spec.RunID, status: "running", systemPrompt: spec.Prompt, } r.mu.Unlock() // Seed the per-agent transcript log so the live-pane viewer shows // the system prompt before any turn fires. Same file the tmux pane // endpoint reads. r.appendLog(spec.AgentID, fmt.Sprintf("[harness] spawned via headless runtime (model=%s)\n[harness] system prompt:\n%s\n", r.Model, spec.Prompt)) return spec.AgentID, nil } // appendLog writes one entry to the per-agent transcript log. Best-effort: // LogDir empty or write errors are silently swallowed — the log is // observability, not correctness. Path format matches the tmux pipe-pane // log so the same /api/harness/agents/{id}/pane endpoint serves both // runtimes. func (r *Runtime) appendLog(agentID, body string) { if r.LogDir == "" { return } _ = os.MkdirAll(r.LogDir, 0o755) path := filepath.Join(r.LogDir, agentID+".log") f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) if err != nil { return } defer f.Close() _, _ = f.WriteString(body) } // CallLLM makes a real Anthropic Messages API call and parses the response. // On parse failure, retries up to MaxParseRetries by appending a reformat // instruction. func (r *Runtime) CallLLM(ctx context.Context, agentID string, req runtime.LLMRequest) (*runtime.LLMResponse, error) { r.mu.Lock() state, ok := r.agents[agentID] r.mu.Unlock() if !ok { return nil, runtime.ErrUnknownAgent } // Build user content from the incoming envelope. For Phase A we just // stuff the intent into a text block; later phases will expand this. userText := req.Prompt if req.IncomingMessage != nil { userText = fmt.Sprintf("[%s] %s\n\nIntent: %s", req.IncomingMessage.Type, userText, req.IncomingMessage.Payload.Intent, ) } // V69: every history mutation goes through state.mu so concurrent // CallLLM invocations on the same agent don't race on slice append. state.mu.Lock() state.history = append(state.history, apiMessage{ Role: "user", Content: []apiBlock{{Type: "text", Text: userText}}, }) state.mu.Unlock() // Mirror the outbound prompt to the transcript log before the HTTP // call so the live-pane viewer shows the prompt even if the API call // hangs or errors. r.appendLog(agentID, fmt.Sprintf("\n[harness] >> turn started %s\n%s\n[harness] >> awaiting Anthropic API...\n", time.Now().UTC().Format(time.RFC3339), userText)) var resp *runtime.LLMResponse var lastErr error parseFailures := 0 for attempt := 0; attempt <= r.MaxParseRetries; attempt++ { raw, err := r.callOnce(ctx, state, req.AvailableTools) if err != nil { r.appendLog(agentID, fmt.Sprintf("[harness] << API error: %v\n", err)) lastErr = err break } parsed, parseErr := parseResponse(raw) if parseErr == nil { parsed.ParseFailures = parseFailures resp = parsed // Add assistant turn to history. state.mu.Lock() state.history = append(state.history, apiMessage{ Role: "assistant", Content: rawToBlocks(raw), }) state.mu.Unlock() // Render the assistant turn into the transcript log: text + // any tool calls, in the order they came back. var renderedSB bytes.Buffer fmt.Fprintf(&renderedSB, "[harness] << API response (tokens prompt=%d completion=%d)\n", parsed.Tokens.Prompt, parsed.Tokens.Completion) if parsed.Text != "" { renderedSB.WriteString(parsed.Text) renderedSB.WriteString("\n") } for _, tc := range parsed.ToolCalls { argJSON, _ := json.Marshal(tc.Args) fmt.Fprintf(&renderedSB, "[tool_call] %s args=%s\n", tc.Name, string(argJSON)) } r.appendLog(agentID, renderedSB.String()) break } parseFailures++ r.appendLog(agentID, fmt.Sprintf("[harness] << parse failure (attempt %d): %v\n", attempt+1, parseErr)) state.mu.Lock() state.history = append(state.history, apiMessage{ Role: "user", Content: []apiBlock{{Type: "text", Text: "Your previous output was malformed. Please reformat as the requested structured output. Error: " + parseErr.Error(), }}, }) state.mu.Unlock() lastErr = parseErr } if resp == nil { return &runtime.LLMResponse{ParseFailures: parseFailures}, lastErr } return resp, nil } // Terminate marks the agent terminated. func (r *Runtime) Terminate(ctx context.Context, agentID, reason string) error { r.mu.Lock() defer r.mu.Unlock() if s, ok := r.agents[agentID]; ok { s.status = "terminated" } return nil } // Health returns the runtime's view of an agent's status. func (r *Runtime) Health(ctx context.Context, agentID string) (string, error) { r.mu.Lock() defer r.mu.Unlock() s, ok := r.agents[agentID] if !ok { return "", runtime.ErrUnknownAgent } return s.status, nil } // --- API wire types --- type apiMessage struct { Role string `json:"role"` Content []apiBlock `json:"content"` } type apiBlock struct { Type string `json:"type"` Text string `json:"text,omitempty"` ID string `json:"id,omitempty"` Name string `json:"name,omitempty"` Input map[string]any `json:"input,omitempty"` } type apiRequest struct { Model string `json:"model"` MaxTokens int `json:"max_tokens"` Messages []apiMessage `json:"messages"` Tools []apiTool `json:"tools,omitempty"` System string `json:"system,omitempty"` } type apiTool struct { Name string `json:"name"` Description string `json:"description"` InputSchema map[string]any `json:"input_schema"` } type apiResponse struct { Content []apiBlock `json:"content"` Usage apiUsage `json:"usage"` Error *apiError `json:"error,omitempty"` } type apiUsage struct { InputTokens int `json:"input_tokens"` OutputTokens int `json:"output_tokens"` } type apiError struct { Type string `json:"type"` Message string `json:"message"` } func (r *Runtime) callOnce(ctx context.Context, state *agentState, tools []string) (*apiResponse, error) { // V69: snapshot history under the lock so a concurrent append doesn't // race the slice header read. Send the snapshot in the API call. state.mu.Lock() historySnap := append([]apiMessage(nil), state.history...) systemPrompt := state.systemPrompt state.mu.Unlock() body := apiRequest{ Model: r.Model, MaxTokens: r.MaxTokens, Messages: historySnap, System: systemPrompt, } for _, name := range tools { t := toolSchema(name) if t != nil { body.Tools = append(body.Tools, *t) } } raw, err := json.Marshal(body) if err != nil { return nil, err } httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, r.Endpoint, bytes.NewReader(raw)) if err != nil { return nil, err } httpReq.Header.Set("Content-Type", "application/json") httpReq.Header.Set("x-api-key", r.APIKey) httpReq.Header.Set("anthropic-version", apiVersion) httpResp, err := r.HTTPClient.Do(httpReq) if err != nil { return nil, err } defer httpResp.Body.Close() body2, _ := io.ReadAll(httpResp.Body) if httpResp.StatusCode/100 != 2 { return nil, fmt.Errorf("headless: api error %d: %s", httpResp.StatusCode, string(body2)) } var out apiResponse if err := json.Unmarshal(body2, &out); err != nil { return nil, fmt.Errorf("headless: response decode: %w", err) } if out.Error != nil { return nil, fmt.Errorf("headless: api error: %s %s", out.Error.Type, out.Error.Message) } return &out, nil } // parseResponse extracts text + tool calls from an apiResponse. func parseResponse(r *apiResponse) (*runtime.LLMResponse, error) { if r == nil { return nil, errors.New("nil response") } out := &runtime.LLMResponse{ Tokens: runtime.TokenUsage{Prompt: r.Usage.InputTokens, Completion: r.Usage.OutputTokens}, } for _, b := range r.Content { switch b.Type { case "text": if out.Text != "" { out.Text += "\n" } out.Text += b.Text case "tool_use": out.ToolCalls = append(out.ToolCalls, runtime.ToolCall{ ID: b.ID, Name: b.Name, Args: b.Input, }) } } return out, nil } func rawToBlocks(r *apiResponse) []apiBlock { return r.Content } // toolSchema returns the Anthropic-tool-use JSON Schema for a known harness // tool. Returns nil for unknown tools (silently dropped from the request). func toolSchema(name string) *apiTool { switch name { case "introspect": return &apiTool{ Name: "introspect", Description: "Return information about your role, parents, peers, open tasks, and where to store outputs. Call this first on spawn.", InputSchema: map[string]any{ "type": "object", "properties": map[string]any{}, }, } case "write_file": return &apiTool{ Name: "write_file", Description: "Write a file inside your zone scope. Calls outside your zone are rejected by the harness.", InputSchema: map[string]any{ "type": "object", "properties": map[string]any{ "path": map[string]any{"type": "string"}, "content": map[string]any{"type": "string"}, }, "required": []string{"path", "content"}, }, } case "request_clarification": return &apiTool{ Name: "request_clarification", Description: "Ask a question that blocks your current task. Routed upward and replied to with type=answer.", InputSchema: map[string]any{ "type": "object", "properties": map[string]any{ "question": map[string]any{"type": "string"}, "to": map[string]any{"type": "string"}, }, "required": []string{"question"}, }, } case "send_message": return &apiTool{ Name: "send_message", Description: "Send a typed envelope to another agent. Use for reports, queries, escalations.", InputSchema: map[string]any{ "type": "object", "properties": map[string]any{ "to": map[string]any{"type": "string"}, "type": map[string]any{"type": "string"}, "payload": map[string]any{"type": "object"}, }, "required": []string{"to", "type"}, }, } } return nil }