Skip to content

Testing Agents

mock() returns responses in order. Same agent, same tools, $0 cost, deterministic results.

import { mock } from 'agentfootprint';
const provider = mock([
// Response 1: LLM decides to call a tool
{ content: '', toolCalls: [{ id: 'tc1', name: 'search', arguments: { query: 'AI trends' } }] },
// Response 2: LLM generates final answer using tool results
{ content: 'The latest AI trends include...' },
]);

Verify the agent calls the right tools and uses the results:

import { describe, it, expect } from 'vitest';
import { Agent, mock, defineTool } from 'agentfootprint';
import { agentObservability } from 'agentfootprint/observe';
const lookupOrder = defineTool({
id: 'lookup_order',
description: 'Look up an order',
inputSchema: { type: 'object', properties: { orderId: { type: 'string' } }, required: ['orderId'] },
handler: async ({ orderId }) => ({
content: JSON.stringify({ orderId, status: 'shipped', amount: 299 }),
}),
});
describe('support agent', () => {
it('looks up the order and responds with status', async () => {
const obs = agentObservability();
const agent = Agent.create({
provider: mock([
{ content: '', toolCalls: [{ id: 'tc1', name: 'lookup_order', arguments: { orderId: 'ORD-1003' } }] },
{ content: 'Your order ORD-1003 has shipped!' },
]),
})
.system('You are a support agent.')
.tool(lookupOrder)
.recorder(obs)
.build();
const result = await agent.run('Check order ORD-1003');
// Assert on content
expect(result.content).toContain('shipped');
// Assert on iterations (2 = tool call + final response)
expect(result.iterations).toBe(2);
// Assert on recorder data
expect(obs.tokens().totalCalls).toBe(2);
expect(obs.tools().byTool['lookup_order'].calls).toBe(1);
});
});

Verify the execution trace contains expected entries:

it('narrative includes tool call details', async () => {
const agent = Agent.create({ provider: mock([...]) })
.tool(lookupOrder)
.build();
await agent.run('Check order ORD-1003');
const narrative = agent.getNarrative();
expect(narrative.some(line => line.includes('lookup_order'))).toBe(true);
expect(narrative.some(line => line.includes('shipped'))).toBe(true);
});

Verify conditional instructions activate based on decision scope:

import { defineInstruction, AgentPattern } from 'agentfootprint/instructions';
it('refund instruction activates for cancelled orders', async () => {
const refund = defineInstruction({
id: 'refund-handling',
activeWhen: (d) => d.orderStatus === 'cancelled',
prompt: 'Offer refund. Timeline: 3-5 days.',
});
const agent = Agent.create({
provider: mock([
// Call 1: tool call
{ content: '', toolCalls: [{ id: 'tc1', name: 'lookup_order', arguments: { orderId: 'ORD-1003' } }] },
// Call 2: response (should mention refund since instruction activated)
{ content: 'Your order was cancelled. I can process a refund in 3-5 days.' },
]),
})
.tool(lookupOrder)
.instruction(refund)
.decision({ orderStatus: null })
.pattern(AgentPattern.Dynamic)
.build();
const result = await agent.run('Check order ORD-1003');
expect(result.content).toContain('refund');
});
import type { AgentStreamEvent } from 'agentfootprint';
it('emits tool_start and tool_end events', async () => {
const events: AgentStreamEvent[] = [];
const agent = Agent.create({ provider: mock([...]) })
.tool(lookupOrder)
.build();
await agent.run('Check order', {
onEvent: (event) => events.push(event),
});
const toolStarts = events.filter(e => e.type === 'tool_start');
const toolEnds = events.filter(e => e.type === 'tool_end');
expect(toolStarts).toHaveLength(1);
expect(toolEnds).toHaveLength(1);
expect(toolStarts[0].toolName).toBe('lookup_order');
});
import { ExplainRecorder } from 'agentfootprint/explain';
it('LLM response is grounded in tool results', async () => {
const explain = new ExplainRecorder();
const agent = Agent.create({ provider: mock([...]) })
.tool(lookupOrder)
.recorder(explain)
.build();
await agent.run('Check order ORD-1003');
const report = explain.explain();
// Sources should contain the order data
expect(report.sources.length).toBeGreaterThan(0);
// Claims should reference data from sources
expect(report.claims.length).toBeGreaterThan(0);
});
  • One mock response per LLM call. If the agent makes 2 LLM calls (tool + response), provide 2 mock responses.
  • Tool handlers still execute. mock() replaces the LLM, not your tools. Tool handlers run normally.
  • Use agentObservability() to assert on token counts, tool usage, and cost.
  • Use .getNarrativeEntries() for structured assertions instead of string matching on .getNarrative().