How to Build an AI Agent From Scratch (2026): Under 200 Lines of Code
An AI agent is surprisingly simple fundamentally: it's a loop where an LLM decides what to do, does it, observes the result, and repeats until the task is done. You don't need a framework. You don't need 1,000 lines of code. Here's the full implementation.
What Is an AI Agent?
A standard LLM call looks like this:
user: write me a summary of X
model: here is the summary...
An agent loop looks like this:
user: research the current state of fusion energy and write a report
model: I'll search for recent news about fusion energy.
[calls search tool]
[reads results]
model: I'll also look up the latest ITER progress.
[calls search tool again]
[reads results]
model: Now I have enough context. Here's the report...
The key difference: the model decides what tools to call and when, based on what it needs to complete the task.
The Core Loop
Every agent implementation has the same fundamental structure:
- Give the model a set of available tools
- Model responds — either with a tool call or a final answer
- If tool call: execute it, add the result to the conversation
- Go to step 2
- If final answer: return it
Here's the minimal implementation:
import anthropic
import json
from typing import Any
client = anthropic.Anthropic()
def run_agent(task: str, tools: list[dict], tool_functions: dict[str, callable]) -> str:
"""Run an agent loop until the model returns a final answer."""
messages = [{"role": "user", "content": task}]
while True:
response = client.messages.create(
model="claude-sonnet-4-5",
max_tokens=4096,
tools=tools,
messages=messages
)
# Model is done — return the text response
if response.stop_reason == "end_turn":
for block in response.content:
if hasattr(block, 'text'):
return block.text
# Model wants to use a tool
if response.stop_reason == "tool_use":
# Add the model's response to conversation
messages.append({"role": "assistant", "content": response.content})
# Execute each tool call
tool_results = []
for block in response.content:
if block.type == "tool_use":
print(f" → Calling {block.name}({block.input})")
try:
result = tool_functions[block.name](**block.input)
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": str(result)
})
except Exception as e:
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": f"Error: {str(e)}",
"is_error": True
})
# Add tool results to conversation
messages.append({"role": "user", "content": tool_results})
That's the entire agent loop. 40 lines. Everything else is tooling and polish.
Building a Real Research Agent
You can build an agent that can search the web, fetch URLs, and write structured reports.
Step 1: Define the tools
tools = [
{
"name": "web_search",
"description": "Search the web for current information. Returns titles, snippets, and URLs.",
"input_schema": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The search query"
}
},
"required": ["query"]
}
},
{
"name": "fetch_url",
"description": "Fetch the content of a URL. Returns the main text content.",
"input_schema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL to fetch"
}
},
"required": ["url"]
}
},
{
"name": "save_report",
"description": "Save the final research report to a file.",
"input_schema": {
"type": "object",
"properties": {
"filename": {"type": "string"},
"content": {"type": "string"}
},
"required": ["filename", "content"]
}
}
]
Step 2: Implement the tool functions
import httpx
from bs4 import BeautifulSoup
def web_search(query: str) -> str:
"""Search using SerpAPI or similar."""
import os
api_key = os.environ["SERPAPI_KEY"]
response = httpx.get(
"https://serpapi.com/search",
params={"q": query, "api_key": api_key, "num": 5}
)
data = response.json()
results = []
for r in data.get("organic_results", [])[:5]:
results.append(f"Title: {r['title']}\nURL: {r['link']}\nSnippet: {r.get('snippet', '')}\n")
return "\n".join(results) if results else "No results found"
def fetch_url(url: str) -> str:
"""Fetch and extract text from a URL."""
try:
response = httpx.get(url, timeout=10, follow_redirects=True)
soup = BeautifulSoup(response.text, 'html.parser')
# Remove scripts and styles
for tag in soup(["script", "style", "nav", "footer"]):
tag.decompose()
text = soup.get_text(separator="\n", strip=True)
# Limit to first 8000 chars to avoid context overflow
return text[:8000]
except Exception as e:
return f"Failed to fetch URL: {e}"
def save_report(filename: str, content: str) -> str:
"""Save report to disk."""
with open(filename, 'w') as f:
f.write(content)
return f"Saved to {filename}"
tool_functions = {
"web_search": web_search,
"fetch_url": fetch_url,
"save_report": save_report
}
Step 3: Wire it together with a system prompt
SYSTEM_PROMPT = """You are a research agent. When given a research task:
1. Search for relevant information using web_search
2. Fetch full content from the most relevant URLs
3. Synthesize findings into a structured report
4. Save the report using save_report
Always cite your sources. Be thorough but concise."""
def run_research_agent(task: str) -> str:
messages = [
{"role": "user", "content": task}
]
response = client.messages.create(
model="claude-sonnet-4-5",
max_tokens=8192,
system=SYSTEM_PROMPT,
tools=tools,
messages=messages
)
# ... (same loop as above)
return run_agent_loop(messages, tools, tool_functions)
if __name__ == "__main__":
result = run_research_agent(
"Research the current state of battery technology for EVs. "
"Focus on 2025-2026 developments. Save as battery-research.md"
)
print(result)
Adding Memory
Agents become dramatically more useful with memory. There are three patterns:
1. In-context memory (simplest)
Just keep the conversation history. Works for single sessions.
# The messages list IS the memory — conversation builds up naturally
messages = []
messages.append({"role": "user", "content": "My name is Alice"})
# ... model responds ...
messages.append({"role": "user", "content": "What's my name?"})
# Model knows: Alice
2. Summarization memory
When context gets long, summarize old history:
MAX_MESSAGES = 20
def maybe_compress_history(messages: list) -> list:
if len(messages) <= MAX_MESSAGES:
return messages
# Summarize old messages
summary_response = client.messages.create(
model="claude-haiku-3-5", # cheap model for summarization
max_tokens=1024,
messages=[{
"role": "user",
"content": f"Summarize this conversation history in 300 words, preserving key facts:\n\n{json.dumps(messages[:-10])}"
}]
)
summary = summary_response.content[0].text
# Keep summary + recent messages
return [
{"role": "user", "content": f"[Previous conversation summary: {summary}]"},
{"role": "assistant", "content": "Understood, I have context from our previous conversation."},
*messages[-10:] # Keep last 10 messages verbatim
]
3. External memory (production-grade)
Store facts in a vector database, retrieve relevant ones per query:
from openai import OpenAI # Just for embeddings
import numpy as np
class SimpleVectorMemory:
def __init__(self):
self.memories = []
self.embeddings = []
self.embed_client = OpenAI()
def store(self, text: str):
embedding = self.embed_client.embeddings.create(
model="text-embedding-3-small",
input=text
).data[0].embedding
self.memories.append(text)
self.embeddings.append(embedding)
def recall(self, query: str, top_k: int = 3) -> list[str]:
query_embedding = self.embed_client.embeddings.create(
model="text-embedding-3-small",
input=query
).data[0].embedding
# Cosine similarity
similarities = [
np.dot(query_embedding, e) / (np.linalg.norm(query_embedding) * np.linalg.norm(e))
for e in self.embeddings
]
top_indices = np.argsort(similarities)[-top_k:][::-1]
return [self.memories[i] for i in top_indices]
Error Handling and Robustness
Production agents need these patterns:
import time
from functools import wraps
def with_retry(max_retries: int = 3, backoff: float = 1.0):
"""Decorator for retrying failed tool calls."""
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
for attempt in range(max_retries):
try:
return func(*args, **kwargs)
except Exception as e:
if attempt == max_retries - 1:
raise
wait = backoff * (2 ** attempt)
print(f" Retry {attempt+1}/{max_retries} after {wait}s: {e}")
time.sleep(wait)
return wrapper
return decorator
# Apply to tool functions
@with_retry(max_retries=3)
def web_search(query: str) -> str:
# ... implementation
pass
# Guard against infinite loops
MAX_ITERATIONS = 20
def run_agent_safe(task: str, ...) -> str:
iterations = 0
while iterations < MAX_ITERATIONS:
iterations += 1
# ... agent loop ...
raise RuntimeError(f"Agent exceeded {MAX_ITERATIONS} iterations")
Complete Agent: Under 200 Lines
Here's the full working implementation:
#!/usr/bin/env python3
"""
Minimal AI research agent. Requires:
pip install anthropic httpx beautifulsoup4
export ANTHROPIC_API_KEY=...
export SERPAPI_KEY=...
"""
import os, json, time
import anthropic, httpx
from bs4 import BeautifulSoup
from functools import wraps
client = anthropic.Anthropic()
MAX_ITER = 20
def retry(n=3):
def d(f):
@wraps(f)
def w(*a, **k):
for i in range(n):
try: return f(*a, **k)
except Exception as e:
if i==n-1: return f"Error: {e}"
time.sleep(2**i)
return w
return d
@retry()
def web_search(query: str) -> str:
r = httpx.get("https://serpapi.com/search",
params={"q":query,"api_key":os.environ["SERPAPI_KEY"],"num":5}, timeout=10)
results = r.json().get("organic_results",[])
return "\n\n".join(f"{x['title']}\n{x['link']}\n{x.get('snippet','')}" for x in results[:5])
@retry()
def fetch_url(url: str) -> str:
r = httpx.get(url, timeout=10, follow_redirects=True)
soup = BeautifulSoup(r.text,'html.parser')
[t.decompose() for t in soup(["script","style","nav"])]
return soup.get_text(separator="\n",strip=True)[:6000]
def save_report(filename: str, content: str) -> str:
with open(filename,'w') as f: f.write(content)
return f"Saved: {filename}"
TOOLS = [
{"name":"web_search","description":"Search the web",
"input_schema":{"type":"object","properties":{"query":{"type":"string"}},"required":["query"]}},
{"name":"fetch_url","description":"Fetch URL content",
"input_schema":{"type":"object","properties":{"url":{"type":"string"}},"required":["url"]}},
{"name":"save_report","description":"Save report to file",
"input_schema":{"type":"object","properties":{"filename":{"type":"string"},"content":{"type":"string"}},"required":["filename","content"]}},
]
FNS = {"web_search":web_search,"fetch_url":fetch_url,"save_report":save_report}
def agent(task: str) -> str:
msgs = [{"role":"user","content":task}]
system = "You are a research agent. Search, fetch, synthesize, then save your report."
for _ in range(MAX_ITER):
r = client.messages.create(model="claude-sonnet-4-5",max_tokens=8192,
system=system,tools=TOOLS,messages=msgs)
if r.stop_reason == "end_turn":
return next(b.text for b in r.content if hasattr(b,'text'))
msgs.append({"role":"assistant","content":r.content})
results = []
for b in r.content:
if b.type == "tool_use":
print(f"→ {b.name}({list(b.input.values())[0][:60]}...)")
out = FNS[b.name](**b.input)
results.append({"type":"tool_result","tool_use_id":b.id,"content":str(out)})
msgs.append({"role":"user","content":results})
return "Max iterations reached"
if __name__ == "__main__":
import sys
task = " ".join(sys.argv[1:]) or "Research the latest LLM pricing trends in 2026. Save as llm-pricing.md"
print(agent(task))
What to Build Next
Once you have this working, the natural extensions are:
- Parallel tool execution: Call multiple tools simultaneously instead of serially
- Sub-agents: Have the main agent delegate to specialized sub-agents
- Streaming: Stream the response for better UX
- Observability: Log every tool call and result for debugging
- Cost limits: Track token usage and stop if spending exceeds budget
Frameworks like LangGraph and Mastra add these features on top of exactly this same core loop. Understanding the loop first makes those frameworks much easier to use correctly.
The fundamentals don't change: decide, act, observe, repeat. Everything else is scaffolding.