Monitor and evaluate CrewAI agent performance using Patronus AI’s comprehensive evaluation platform for LLM outputs and agent behaviors.
uv add patronus
export PATRONUS_API_KEY="your_patronus_api_key"
PatronusEvalTool
, which allows agents to select the most appropriate evaluator and criteria:
from crewai import Agent, Task, Crew
from crewai_tools import PatronusEvalTool
# Initialize the tool
patronus_eval_tool = PatronusEvalTool()
# Define an agent that uses the tool
coding_agent = Agent(
role="Coding Agent",
goal="Generate high quality code and verify that the output is code",
backstory="An experienced coder who can generate high quality python code.",
tools=[patronus_eval_tool],
verbose=True,
)
# Example task to generate and evaluate code
generate_code_task = Task(
description="Create a simple program to generate the first N numbers in the Fibonacci sequence. Select the most appropriate evaluator and criteria for evaluating your output.",
expected_output="Program that generates the first N numbers in the Fibonacci sequence.",
agent=coding_agent,
)
# Create and run the crew
crew = Crew(agents=[coding_agent], tasks=[generate_code_task])
result = crew.kickoff()
PatronusPredefinedCriteriaEvalTool
, which uses predefined evaluator and criteria:
from crewai import Agent, Task, Crew
from crewai_tools import PatronusPredefinedCriteriaEvalTool
# Initialize the tool with predefined criteria
patronus_eval_tool = PatronusPredefinedCriteriaEvalTool(
evaluators=[{"evaluator": "judge", "criteria": "contains-code"}]
)
# Define an agent that uses the tool
coding_agent = Agent(
role="Coding Agent",
goal="Generate high quality code",
backstory="An experienced coder who can generate high quality python code.",
tools=[patronus_eval_tool],
verbose=True,
)
# Example task to generate code
generate_code_task = Task(
description="Create a simple program to generate the first N numbers in the Fibonacci sequence.",
expected_output="Program that generates the first N numbers in the Fibonacci sequence.",
agent=coding_agent,
)
# Create and run the crew
crew = Crew(agents=[coding_agent], tasks=[generate_code_task])
result = crew.kickoff()
PatronusLocalEvaluatorTool
, which uses custom function evaluators:
from crewai import Agent, Task, Crew
from crewai_tools import PatronusLocalEvaluatorTool
from patronus import Client, EvaluationResult
import random
# Initialize the Patronus client
client = Client()
# Register a custom evaluator
@client.register_local_evaluator("random_evaluator")
def random_evaluator(**kwargs):
score = random.random()
return EvaluationResult(
score_raw=score,
pass_=score >= 0.5,
explanation="example explanation",
)
# Initialize the tool with the custom evaluator
patronus_eval_tool = PatronusLocalEvaluatorTool(
patronus_client=client,
evaluator="random_evaluator",
evaluated_model_gold_answer="example label",
)
# Define an agent that uses the tool
coding_agent = Agent(
role="Coding Agent",
goal="Generate high quality code",
backstory="An experienced coder who can generate high quality python code.",
tools=[patronus_eval_tool],
verbose=True,
)
# Example task to generate code
generate_code_task = Task(
description="Create a simple program to generate the first N numbers in the Fibonacci sequence.",
expected_output="Program that generates the first N numbers in the Fibonacci sequence.",
agent=coding_agent,
)
# Create and run the crew
crew = Crew(agents=[coding_agent], tasks=[generate_code_task])
result = crew.kickoff()
PatronusEvalTool
does not require any parameters during initialization. It automatically fetches available evaluators and criteria from the Patronus API.
PatronusPredefinedCriteriaEvalTool
accepts the following parameters during initialization:
[{"evaluator": "judge", "criteria": "contains-code"}]
.PatronusLocalEvaluatorTool
accepts the following parameters during initialization:
PatronusEvalTool
and PatronusPredefinedCriteriaEvalTool
, the following parameters are required when calling the tool:
PatronusLocalEvaluatorTool
, the same parameters are required, but the evaluator and gold answer are specified during initialization.
Was this page helpful?