Files
tw2/examples/evaluation/ace_bench/main.py
jimi a842f1861f
Some checks failed
Pre-commit / run (ubuntu-latest) (push) Has been cancelled
Deploy Sphinx documentation to Pages / build_en (ubuntu-latest, 3.10) (push) Has been cancelled
Deploy Sphinx documentation to Pages / build_zh (ubuntu-latest, 3.10) (push) Has been cancelled
Python Unittest Coverage / test (macos-15, 3.10) (push) Has been cancelled
Python Unittest Coverage / test (macos-15, 3.11) (push) Has been cancelled
Python Unittest Coverage / test (macos-15, 3.12) (push) Has been cancelled
Python Unittest Coverage / test (ubuntu-latest, 3.10) (push) Has been cancelled
Python Unittest Coverage / test (ubuntu-latest, 3.11) (push) Has been cancelled
Python Unittest Coverage / test (ubuntu-latest, 3.12) (push) Has been cancelled
Python Unittest Coverage / test (windows-latest, 3.10) (push) Has been cancelled
Python Unittest Coverage / test (windows-latest, 3.11) (push) Has been cancelled
Python Unittest Coverage / test (windows-latest, 3.12) (push) Has been cancelled
chore: initial import of standalone agentscope project
2026-03-02 18:21:40 +08:00

131 lines
3.5 KiB
Python

# -*- coding: utf-8 -*-
"""Example of running ACEBench evaluation with AgentScope."""
import os
import asyncio
from argparse import ArgumentParser
from typing import Callable
from agentscope.message import Msg
from agentscope.model import DashScopeChatModel
from agentscope.formatter import DashScopeChatFormatter
from agentscope.agent import ReActAgent
from agentscope.evaluate import (
ACEBenchmark,
Task,
SolutionOutput,
RayEvaluator,
FileEvaluatorStorage,
ACEPhone,
)
from agentscope.tool import Toolkit
async def react_agent_solution(
ace_task: Task,
pre_hook: Callable,
) -> SolutionOutput:
"""Run ReAct agent with the given task in ACEBench.
Args:
ace_task (`Task`):
Task to run in ACEBench.
pre_hook (Callable):
The pre-hook function to save the agent's pre-print messages.
"""
# Equip tool functions
toolkit = Toolkit()
for tool, json_schema in ace_task.metadata["tools"]:
# register the tool function with the given json schema
toolkit.register_tool_function(tool, json_schema=json_schema)
# Create a ReAct agent
agent = ReActAgent(
name="Friday",
sys_prompt="You are a helpful assistant named Friday. "
"Your target is to solve the given task with your tools."
"Try to solve the task as best as you can.",
model=DashScopeChatModel(
api_key=os.environ.get("DASHSCOPE_API_KEY"),
model_name="qwen3-max",
stream=False,
),
formatter=DashScopeChatFormatter(),
toolkit=toolkit,
)
agent.register_instance_hook(
"pre_print",
"save_logging",
pre_hook,
)
# Execute the agent to solve the task
msg_input = Msg("user", ace_task.input, role="user")
# Print the input by the running agent to call the pre-print hook
await agent.print(msg_input)
await agent(msg_input)
# Obtain the trajectory of the agent's memory
traj = []
for msg in await agent.memory.get_memory():
traj.extend(msg.get_content_blocks(["tool_use", "tool_result"]))
# Obtain the final state of the phone and travel system
phone: ACEPhone = ace_task.metadata["phone"]
final_state = phone.get_current_state()
# Wrap into a SolutionOutput
solution = SolutionOutput(
success=True,
output=final_state,
trajectory=traj,
)
return solution
async def main() -> None:
"""Main function for running ACEBench."""
# Prepare data and results directories
parser = ArgumentParser()
parser.add_argument(
"--data_dir",
type=str,
required=True,
help="Where to save the dataset.",
)
parser.add_argument(
"--result_dir",
type=str,
required=True,
help="Where to save the evaluation results.",
)
parser.add_argument(
"--n_workers",
type=int,
default=1,
help="The number of ray workers to use for evaluation.",
)
args = parser.parse_args()
# Create the evaluator
# or GeneralEvaluator, which more suitable for local debug
evaluator = RayEvaluator(
name="ACEbench evaluation",
benchmark=ACEBenchmark(
data_dir=args.data_dir,
),
# Repeat how many times
n_repeat=1,
storage=FileEvaluatorStorage(
save_dir=args.result_dir,
),
# How many workers to use
n_workers=args.n_workers,
)
# Run the evaluation
await evaluator.run(react_agent_solution)
asyncio.run(main())