chore: initialize sandbox and overwrite remote content

2026-03-02 22:32:27 +08:00
commit a64378956a
584 changed files with 93604 additions and 0 deletions
--- a/docs/tutorial/en/Makefile
+++ b/docs/tutorial/en/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--- a/docs/tutorial/en/build.sh
+++ b/docs/tutorial/en/build.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+set -e
+
+# Clean old build files
+rm -rf build/ doctrees/
+
+# Build the html
+sphinx-build -M html ./ build
+
+# Remove temporary files (double insurance)
+rm -rf build/html/.doctrees
+rm -f build/html/.buildinfo
+find build/html -name "*.pickle" -delete
+find build/html -name "__pycache__" -delete
+find build/html -name "*.pyc" -delete
+
+echo "✅ English docs built successfully, temporary files cleaned"
--- a/docs/tutorial/en/conf.py
+++ b/docs/tutorial/en/conf.py
@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+# Configuration file for the Sphinx documentation builder.
+#
+# For the full list of built-in configuration values, see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Project information -----------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
+
+project = "AgentScope"
+copyright = "2025, Alibaba"
+author = "Alibaba Tongyi Lab"
+
+# -- General configuration ---------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
+
+extensions = [
+    "myst_parser",
+    "sphinx_gallery.gen_gallery",
+    "sphinx.ext.autodoc",
+    "sphinx.ext.viewcode",
+    "sphinx.ext.napoleon",
+]
+
+myst_enable_extensions = [
+    "colon_fence",
+]
+
+sphinx_gallery_conf = {
+    "download_all_examples": False,
+    "examples_dirs": [
+        "src",
+    ],
+    "gallery_dirs": [
+        "tutorial",
+    ],
+    "filename_pattern": "src/.*\.py",
+    "example_extensions": [".py"],
+}
+
+templates_path = ["../_templates"]
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
+
+languages = ["en", "zh_CN"]
+language = "en"
+
+# -- Options for HTML output -------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
+
+html_theme = "furo"
+html_title = (
+    "<span style='font-weight: 700; color: #2196f3;'>AgentScope</span>"
+)
+html_logo = "../_static/images/logo.svg"
+html_favicon = "../_static/images/logo.svg"
+html_static_path = ["../_static"]
+html_css_files = [
+    "css/gallery.css",
+]
+
+html_js_files = [
+    "language_switch.js",
+]
+
+html_theme_options = {
+    "footer_icons": [
+        {
+            "name": "GitHub",
+            "url": "https://github.com/agentscope-ai/agentscope",
+            "html": """
+                <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 16 16">
+                    <path fill-rule="evenodd" d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z"></path>
+                </svg>
+            """,
+            "class": "",
+        },
+        {
+            "name": "Discord",
+            "url": "https://discord.gg/eYMpfnkG8h",
+            "html": """
+                <svg stroke="currentColor" fill="currentColor" stroke-width="0" t="1753331148815" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="5721" width="200" height="200">
+                    <path d="M723.903423 359.138018c-69.65045-52.952793-136.256577-51.476757-136.256576-51.476757l-6.088649 7.564685c83.027027 25.738378 121.127207 62.085766 121.127207 62.085766a387.459459 387.459459 0 0 0-145.297297-46.956397 418.179459 418.179459 0 0 0-98.340901 1.752793 73.801802 73.801802 0 0 1-7.564684 1.476036 357.385225 357.385225 0 0 0-110.702703 30.258739 278.786306 278.786306 0 0 0-28.782703 13.653333S353.049369 339.488288 440.873514 313.657658l-4.612613-6.088649s-66.513874-1.476036-136.164324 51.476757A654.252973 654.252973 0 0 0 230.630631 642.167928s40.867748 71.126486 148.341621 73.801802c0 0 16.697658-22.694054 31.827027-40.867748-62.085766-18.45045-84.77982-57.565405-84.77982-57.565405a130.998198 130.998198 0 0 0 13.653334 7.564684s0 1.568288 1.476036 1.568289c1.476036 1.476036 3.044324 1.476036 4.52036 3.044324a238.748829 238.748829 0 0 0 34.779099 16.605405 513.199279 513.199279 0 0 0 71.218739 21.218018 350.558559 350.558559 0 0 0 125.555315 0 329.894054 329.894054 0 0 0 69.650451-21.218018A247.328288 247.328288 0 0 0 702.685405 618.09009s-24.262342 39.391712-87.824144 57.565405c13.653333 18.45045 31.827027 39.299459 31.827027 39.29946 107.473874-2.952072 148.341622-73.801802 146.773334-72.602523a654.990991 654.990991 0 0 0-69.558199-283.214414zM421.131532 596.77982a54.705586 54.705586 0 0 1 0-109.042162 54.705586 54.705586 0 0 1 0 109.042162z m177.124324 0a54.705586 54.705586 0 1 1 49.908468-54.521081 52.491532 52.491532 0 0 1-49.908468 54.521081z" p-id="5722"></path><path d="M512 1024A512 512 0 1 1 1024 512 512.645766 512.645766 0 0 1 512 1024z m0-972.892252a461.261261 461.261261 0 1 0 461.261261 461.261261 461.261261 461.261261 0 0 0-461.261261-461.261261z" p-id="5723"></path>
+                </svg>
+            """,
+            "class": "",
+        },
+        {
+            "name": "DingTalk",
+            "url": "https://qr.dingtalk.com/action/joingroup?code=v1,k1,OmDlBXpjW+I2vWjKDsjvI9dhcXjGZi3bQiojOq3dlDw=&_dt_no_comment=1&origin=11",
+            "html": """
+                <svg stroke="currentColor" fill="currentColor" stroke-width="0" viewBox="0 0 1024 1024">
+                    <path d="M512 0C229.205333 0 0 229.205333 0 512s229.205333 512 512 512 512-229.205333 512-512S794.794667 0 512 0z m237.312 480.810667c-1.109333 4.48-3.712 11.093333-7.424 18.986666h0.128l-0.426667 0.682667c-21.504 46.037333-77.610667 136.106667-77.610666 136.106667l-0.298667-0.597334-16.384 28.501334h79.018667l-150.912 200.917333 34.304-136.533333h-62.208l21.589333-90.282667c-17.493333 4.224-38.101333 10.026667-62.592 17.92 0 0-33.109333 19.370667-95.317333-37.333333 0 0-41.984-36.992-17.578667-46.165334 10.410667-3.925333 50.304-8.917333 81.706667-13.226666 42.410667-5.674667 68.48-8.789333 68.48-8.789334s-130.773333 2.005333-161.792-2.901333c-30.976-4.906667-70.4-56.704-78.805334-102.186667 0 0-12.970667-25.002667 27.904-13.226666 40.917333 11.818667 210.005333 46.08 210.005334 46.08S321.109333 411.434667 306.517333 394.922667c-14.634667-16.469333-43.093333-89.770667-39.424-134.869334 0 0 1.621333-11.221333 13.098667-8.192 0 0 162.602667 74.282667 273.792 114.986667 111.104 40.704 207.786667 61.397333 195.328 114.005333z" opacity=".65" p-id="6077"></path>
+                </svg>
+            """,
+            "class": "",
+        },
+    ],
+    "light_css_variables": {
+        "color-brand-primary": "#2196f3",
+        "color-brand-content": "#2196f3",
+        "color-admonition-background": "#f8f9fa",
+    },
+    "dark_css_variables": {
+        "color-link": "#2196f3",
+        "color-link--hover": "#2196f3",
+        "color-brand-primary": "#64b5f6",
+        "color-brand-content": "#64b5f6",
+    },
+}
+
+source_suffix = [".md", ".rst"]
+
+
+# -- Options for API documentation -------------------------------------------
+
+autodoc_member_order = "bysource"
+autodoc_typehints = "description"
+autodoc_class_signature = "separated"
+autodoc_default_options = {
+    "special-members": "__call__",
+}
+
+add_module_names = False
+python_display_short_literal_types = True
+
+
+def skip_member(app, what, name, obj, skip, options):
+    if name in [
+        "__call__",
+        "_format",
+        "_format_agent_message",
+        "_format_tool_sequence",
+    ]:
+        return False
+
+    return skip
+
+
+def setup(app):
+    app.connect("autodoc-skip-member", skip_member)
--- a/docs/tutorial/en/index.rst
+++ b/docs/tutorial/en/index.rst
@@ -0,0 +1,76 @@
+.. AgentScope Doc documentation master file, created by
+   sphinx-quickstart on Thu Aug  8 15:07:21 2024.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to AgentScope's documentation!
+==========================================
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Tutorial
+
+   tutorial/quickstart_installation
+   tutorial/quickstart_key_concept
+   tutorial/quickstart_message
+   tutorial/quickstart_agent
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Workflow
+
+   tutorial/workflow_conversation
+   tutorial/workflow_multiagent_debate
+   tutorial/workflow_concurrent_agents
+   tutorial/workflow_routing
+   tutorial/workflow_handoffs
+
+.. toctree::
+   :maxdepth: 1
+   :caption: FAQ
+
+   tutorial/faq
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Model and Context
+
+   tutorial/task_model
+   tutorial/task_prompt
+   tutorial/task_token
+   tutorial/task_memory
+   tutorial/task_long_term_memory
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Tool
+
+   tutorial/task_tool
+   tutorial/task_mcp
+   tutorial/task_agent_skill
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Agent
+
+   tutorial/task_agent
+   tutorial/task_state
+   tutorial/task_hook
+   tutorial/task_middleware
+   tutorial/task_a2a
+   tutorial/task_realtime
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Features
+
+   tutorial/task_pipeline
+   tutorial/task_plan
+   tutorial/task_rag
+   tutorial/task_studio
+   tutorial/task_tracing
+   tutorial/task_eval
+   tutorial/task_eval_openjudge
+   tutorial/task_embedding
+   tutorial/task_tts
+   tutorial/task_tuner
--- a/docs/tutorial/en/make.bat
+++ b/docs/tutorial/en/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.https://www.sphinx-doc.org/
+	exit /b 1
+)
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
--- a/docs/tutorial/en/src/README.md
+++ b/docs/tutorial/en/src/README.md
--- a/docs/tutorial/en/src/faq.py
+++ b/docs/tutorial/en/src/faq.py
@@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+"""
+.. _faq:
+
+FAQ
+========================================
+
+About AgentScope
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*What is AgentScope?*
+    AgentScope is a multi-agent framework, aiming to provide a simple yet efficient way to build LLM-empowered agent applications.
+
+*What is the difference between AgentScope v1.0 and v0.x?*
+    AgentScope v1.0 is a complete refactoring of the framework, equipped with new features and improvements. Refer to for detailed changes.
+
+
+About Model
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*How to integrate my own model with AgentScope?*
+    Create your own model by inheriting ``agentscope.model.ChatModelBase`` and implement the ``__call__`` method.
+
+*What models are supported by AgentScope?*
+    Currently, AgentScope has built-in support for DashScope, Gemini, OpenAI, Anthropic, and Ollama APIs, and the ``OpenAIChatModel`` compatible with DeepSeek and vLLMs models.
+
+*How to monitor the token usage in AgentScope?*
+    In AgentScope Studio, we provide visualization of token usage and tracing. Refer :ref:`studio` section for more details.
+
+
+About Agent
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*How to create my own agent?*
+    You can choose to use the ``ReActAgent`` class directly, or create your own agent by inheriting from ``AgentBase`` or ``ReActAgentBase`` classes. Refer to the :ref:`agent` section for more details.
+
+
+*How to forward the (streaming) output of agents to my own frontend or application?*
+    Use the pre hook of the ``print`` function to forward printing messages. Refer to the :ref:`hook` section.
+
+
+About Tools
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*How many tools are provided by AgentScope?*
+    AgentScope provides a set of built-in tools, including ``execute_python_code``, ``execute_shell_command``, ``write_text_file`` , etc. You can find them under ``agentscope.tool`` module.
+
+
+About Reporting Bugs
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*How can I report a bug in AgentScope?*
+    If you encounter a bug while using AgentScope, please report it by opening an issue on our GitHub repository.
+
+*How can I report a security bug in AgentScope?*
+    If you discover a security issue in AgentScope, please report it to us through the `Alibaba Security Response Center (ASRC) <https://security.alibaba.com/>`_.
+
+"""
--- a/docs/tutorial/en/src/quickstart_agent.py
+++ b/docs/tutorial/en/src/quickstart_agent.py
@@ -0,0 +1,245 @@
+# -*- coding: utf-8 -*-
+"""
+.. _react-agent:
+
+Create ReAct Agent
+====================
+
+AgentScope provides out-of-the-box ReAct agent ``ReActAgent`` under ``agentscope.agent`` that can be used directly.
+
+It supports the following features at the same time:
+
+- ✨ Basic features
+    - Support **hooks** around ``reply``, ``observe``, ``print``, ``_reasoning`` and ``_acting`` functions
+    - Support structured output
+- ✋ Realtime Steering
+    - Support user **interrupt**
+    - Support customized **interruption handling**
+- 🛠️ Tools
+    - Support both **sync/async** tool functions
+    - Support **streaming** tool response
+    - Support **stateful** tools management
+    - Support **parallel** tool calls
+    - Support **MCP** server
+- 💾 Memory
+    - Support **agent-controlled** long-term memory management
+    - Support static long-term memory management
+
+.. tip:: Refer to the :ref:`agent` section for more details about these
+ features. In quickstart, we focus on how to create a ReAct agent and run it.
+
+"""
+
+from agentscope.agent import ReActAgent, AgentBase
+from agentscope.formatter import DashScopeChatFormatter
+from agentscope.memory import InMemoryMemory
+from agentscope.message import Msg
+from agentscope.model import DashScopeChatModel
+import asyncio
+import os
+
+from agentscope.tool import Toolkit, execute_python_code
+
+
+# %%
+# Creating ReAct Agent
+# ------------------------------
+# To improve the flexibility, the ``ReActAgent`` class exposes the following parameters in its constructor:
+#
+# .. list-table:: Initialization parameters of ``ReActAgent`` class
+#   :header-rows: 1
+#
+#   * - Parameter
+#     - Further Reading
+#     - Description
+#   * - ``name`` (required)
+#     -
+#     - The name of the agent
+#   * - ``sys_prompt`` (required)
+#     -
+#     - The system prompt of the agent
+#   * - ``model`` (required)
+#     - :ref:`model`
+#     - The model used by the agent to generate responses
+#   * - ``formatter`` (required)
+#     - :ref:`prompt`
+#     - The prompt construction strategy, should be consisted with the model
+#   * - ``toolkit``
+#     - :ref:`tool`
+#     - The toolkit to register/call tool functions.
+#   * - ``memory``
+#     - :ref:`memory`
+#     - The short-term memory used to store the conversation history
+#   * - ``long_term_memory``
+#     - :ref:`long-term-memory`
+#     - The long-term memory
+#   * - ``long_term_memory_mode``
+#     - :ref:`long-term-memory`
+#     - The mode of the long-term memory:
+#
+#       - ``agent_control``: allow agent to control the long-term memory by itself
+#       - ``static_control``: retrieving and recording from/to long-term memory will happen in the beginning/end of each reply.
+#       - ``both``: activate the above two modes at the same time
+#   * - ``enable_meta_tool``
+#     - :ref:`tool`
+#     - Whether to enable the meta tool, which allows the agent to manage tools by itself
+#   * - ``parallel_tool_calls``
+#     - :ref:`agent`
+#     - Whether to allow parallel tool calls
+#   * - ``max_iters``
+#     -
+#     - The maximum number of iterations for the agent to generate a response
+#   * - ``plan_notebook``
+#     - :ref:`plan`
+#     - The plan notebook to manage the plans
+#   * - ``print_hint_msg``
+#     -
+#     - Whether to print the hint message generated by the plan notebook at each step
+#
+# Taking DashScope API as example, we create an agent object as follows:
+
+
+async def creating_react_agent() -> None:
+    """Create a ReAct agent and run a simple task."""
+    # Prepare tools
+    toolkit = Toolkit()
+    toolkit.register_tool_function(execute_python_code)
+
+    jarvis = ReActAgent(
+        name="Jarvis",
+        sys_prompt="You're a helpful assistant named Jarvis",
+        model=DashScopeChatModel(
+            model_name="qwen-max",
+            api_key=os.environ["DASHSCOPE_API_KEY"],
+            stream=True,
+            enable_thinking=False,
+        ),
+        formatter=DashScopeChatFormatter(),
+        toolkit=toolkit,
+        memory=InMemoryMemory(),
+    )
+
+    msg = Msg(
+        name="user",
+        content="Hi! Jarvis, run Hello World in Python.",
+        role="user",
+    )
+
+    await jarvis(msg)
+
+
+asyncio.run(creating_react_agent())
+
+# %%
+# Creating From Scratch
+# --------------------------------
+# You may want to create an agent from scratch, AgentScope provides two base classes for you to inherit from:
+#
+# .. list-table::
+#   :header-rows: 1
+#
+#   * - Class
+#     - Abstract Methods
+#     - Description
+#   * - ``AgentBase``
+#     - | ``reply``
+#       | ``observe``
+#       | ``handle_interrupt``
+#     - - The base class for all agents, supporting pre- and post- hooks around ``reply``, ``observe`` and ``print`` functions.
+#       - Implement the realtime steering within the ``__call__`` method.
+#   * - ``ReActAgentBase``
+#     - | ``reply``
+#       | ``observe``
+#       | ``handle_interrupt``
+#       | ``_reasoning``
+#       | ``_acting``
+#     - Add two abstract functions ``_reasoning`` and ``_acting`` on the basis of ``AgentBase``, as well as their hooks.
+#
+# Please refer to the :ref:`agent` section for more details about the agent class.
+#
+# Taking the ``AgentBase`` class as an example, we can create a custom agent
+# class by inheriting from it and implementing the ``reply`` method.
+
+
+class MyAgent(AgentBase):
+    """A custom agent class"""
+
+    def __init__(self) -> None:
+        """Initialize the agent"""
+        super().__init__()
+
+        self.name = "Friday"
+        self.sys_prompt = "You're a helpful assistant named Friday."
+        self.model = DashScopeChatModel(
+            model_name="qwen-max",
+            api_key=os.environ["DASHSCOPE_API_KEY"],
+            stream=False,
+        )
+        self.formatter = DashScopeChatFormatter()
+        self.memory = InMemoryMemory()
+
+    async def reply(self, msg: Msg | list[Msg] | None) -> Msg:
+        """Reply to the message."""
+        await self.memory.add(msg)
+
+        # Prepare the prompt
+        prompt = await self.formatter.format(
+            [
+                Msg("system", self.sys_prompt, "system"),
+                *await self.memory.get_memory(),
+            ],
+        )
+
+        # Call the model
+        response = await self.model(prompt)
+
+        msg = Msg(
+            name=self.name,
+            content=response.content,
+            role="assistant",
+        )
+
+        # Record the response in memory
+        await self.memory.add(msg)
+
+        # Print the message
+        await self.print(msg)
+        return msg
+
+    async def observe(self, msg: Msg | list[Msg] | None) -> None:
+        """Observe the message."""
+        # Store the message in memory
+        await self.memory.add(msg)
+
+    async def handle_interrupt(self) -> Msg:
+        """Postprocess the interrupt."""
+        # Taking a fixed response as example
+        return Msg(
+            name=self.name,
+            content="I noticed you interrupted me, how can I help you?",
+            role="assistant",
+        )
+
+
+async def run_custom_agent() -> None:
+    """Run the custom agent."""
+    agent = MyAgent()
+    msg = Msg(
+        name="user",
+        content="Who are you?",
+        role="user",
+    )
+    await agent(msg)
+
+
+asyncio.run(run_custom_agent())
+
+# %%
+#
+# Further Reading
+# ---------------------
+# - :ref:`agent`
+# - :ref:`model`
+# - :ref:`prompt`
+# - :ref:`tool`
+#
--- a/docs/tutorial/en/src/quickstart_installation.py
+++ b/docs/tutorial/en/src/quickstart_installation.py
@@ -0,0 +1,56 @@
+# -*- coding: utf-8 -*-
+"""
+.. _installation:
+
+Installation
+============================
+
+AgentScope requires Python 3.10 or higher. You can install from source or pypi.
+
+From PyPI
+----------------
+.. code-block:: bash
+
+    pip install agentscope
+
+From Source
+----------------
+To install AgentScope from source, you need to clone the repository from
+GitHub and install by the following commands
+
+.. code-block:: bash
+
+    git clone -b main https://github.com/agentscope-ai/agentscope
+    cd agentscope
+    pip install -e .
+
+To ensure AgentScope is installed successfully, check via executing the following code:
+"""
+
+import agentscope
+
+print(agentscope.__version__)
+
+# %%
+# Extra Dependencies
+# ----------------------------
+#
+# To satisfy the requirements of different functionalities, AgentScope provides
+# extra dependencies that can be installed based on your needs.
+#
+# - full: Including extra dependencies for model APIs and tool functions
+# - dev: Development dependencies, including testing and documentation tools
+#
+# For example, when installing the full dependencies, the installation command varies depending on your operating system.
+#
+# For Windows users:
+#
+# .. code-block:: bash
+#
+#       pip install agentscope[full]
+#
+# For Mac and Linux users:
+#
+# .. code-block:: bash
+#
+#       pip install agentscope\[full\]
--- a/docs/tutorial/en/src/quickstart_key_concept.py
+++ b/docs/tutorial/en/src/quickstart_key_concept.py
@@ -0,0 +1,122 @@
+# -*- coding: utf-8 -*-
+"""
+.. key-concepts:
+
+Key Concepts
+====================================
+
+This chapter establishes key concepts from an engineering
+perspective to introduce AgentScope's design.
+
+.. note:: The goal of introducing the key concepts in AgentScope is to claim what practical problems AgentScope addresses and how it supports developers, rather than to offer formal definitions.
+
+State
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In AgentScope, state management is a fundamental building block that maintains snapshots of objects' runtime data.
+
+AgentScope separates object initialization from state management, allowing
+object to be restored to different states after initialization through
+``load_state_dict`` and ``state_dict`` methods.
+
+In AgentScope, agent, memory, long-term memory and toolkit are all stateful
+objects. AgentScope links the state management of these objects together by supporting nested state management.
+
+Message
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In AgentScope, message is the fundamental data structure,
+used to
+
+- exchange information between agents,
+- display information in the user interface,
+- store information in memory,
+- act as a unified medium between AgentScope and different LLM APIs.
+
+Tool
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+A tool in AgentScope refers to callable object, no matter it's a
+
+- function,
+- partial function,
+- instance method,
+- class method,
+- static method, or
+- callable instance with ``__call__`` method.
+
+Besides, the callable object can be either
+
+- async or sync,
+- streaming or non-streaming.
+
+So feel free to use any callable object as a tool in AgentScope.
+
+Agent
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In AgentScope, the agent behaviors are abstracted into three core functions in
+``AgentBase`` class:
+
+- ``reply``: Handle incoming message(s) and generate a response message.
+- ``observe``: Receive message(s) from the environment or other agents without returning a response.
+- ``print``: Display message(s) to the target terminal, web interface, etc.
+
+To support realtime steering, an additional ``handle_interrupt`` function is
+provided to handle user interrupts during the agent's reply process.
+
+Additionally, ReAct agent is the most important agent in AgentScope, where
+the agent's reply process is divided into two stages:
+
+- reasoning: thinking and generating tool calls by calling the LLM
+- acting: execute the tool functions.
+
+Thus, we provide two additional core functions in ``ReActAgentBase`` class,
+``_reasoning`` and ``_acting``.
+
+Formatter
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Formatter is the core component for LLM compatibility in AgentScope,
+responsible for converting message objects into the required format for
+LLM APIs.
+
+Besides, additional functionality such as prompt engineering, truncation,
+and message validation can also be implemented in the formatter.
+
+Within the formatter, the "multi-agent" (or "multi-identity") concept differs
+from the common multi-agent orchestration concept.
+It focuses on the scenario where multiple identities are involved in the
+given messages, so that the common used ``role`` field (usually "role",
+"assistant" or "system") in LLM APIs cannot distinguish them.
+
+Therefore, AgentScope provides multi-agent formatter to handle
+this scenario, usually used in games, multi-person chats, and social
+simulations.
+
+.. note:: Multi-agent workflow **!=** multi-agent in formatter.
+ For example, even if the following code snippet may involve multiple
+ agents (the ``tool_agent`` and the ``tool_function`` caller), the input query
+ is wrapped into a **user** message, so the ``role`` field can still distinguish
+ between them.
+
+ .. code-block:: python
+
+    async def tool_function(query: str) -> str:
+        \"\"\"Tool function calling another agent\"\"\"
+        msg = Msg("user", query, role="user")
+        tool_agent = Agent(name="Programmer")
+        return await tool_agent(msg)
+
+ Understanding this distinction helps developers better grasp AgentScope's formatter design.
+
+
+Long-Term Memory
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Although providing different base classes for short- and
+long-term memory, there are no strict distinctions between them in AgentScope.
+
+In our view, everything should be **requirement-driven**. As long as your
+needs are excellently met, developers can completely use just one powerful
+memory system.
+
+For ensuring the flexibility of AgentScope, we provide a two mode long-term
+memory system, allowing the agent to manage (record and retrieve) the
+long-term memory by its own.
+"""
--- a/docs/tutorial/en/src/quickstart_message.py
+++ b/docs/tutorial/en/src/quickstart_message.py
@@ -0,0 +1,267 @@
+# -*- coding: utf-8 -*-
+"""
+.. _message:
+
+Create Message
+====================
+
+Message is the core concept in AgentScope, used to support multimodal data, tools API, information storage/exchange and prompt construction.
+
+A message consists of four fields:
+
+- ``name``,
+- ``role``,
+- ``content``, and
+- ``metadata``
+
+The types and descriptions of these fields are as follows:
+
+.. list-table:: The fields in a message object
+    :header-rows: 1
+
+    * - Field
+      - Type
+      - Description
+    * - name
+      - ``str``
+      - The name/identity of the message sender
+    * - role
+      - | ``Literal[``
+        |     ``"system",``
+        |     ``"assistant",``
+        |     ``"user"``
+        | ``]``
+      - The role of the message sender, which must be one of "system", "assistant", or "user".
+    * - content
+      - ``str | list[ContentBlock]``
+      - The data of the message, which can be a string or a list of blocks.
+    * - metadata
+      - ``dict[str, JSONSerializableObject] | None``
+      - A dict containing additional metadata about the message, usually used for structured output.
+
+.. tip:: - In application with multiple identities, the ``name`` field is used to distinguish between different identities.
+ - The ``metadata`` field is recommended for structured output, which won't be included in the prompt construction.
+
+Next, we introduce the supported blocks in the ``content`` field by their corresponding scenarios.
+"""
+
+from agentscope.message import (
+    Msg,
+    Base64Source,
+    TextBlock,
+    ThinkingBlock,
+    ImageBlock,
+    AudioBlock,
+    VideoBlock,
+    ToolUseBlock,
+    ToolResultBlock,
+)
+import json
+
+# %%
+# Creating Textual Message
+# -----------------------------
+# Creating a message object by providing the ``name``, ``role``, and ``content`` fields.
+#
+
+msg = Msg(
+    name="Jarvis",
+    role="assistant",
+    content="Hi! How can I help you?",
+)
+
+print(f"The name of the sender: {msg.name}")
+print(f"The role of the sender: {msg.role}")
+print(f"The content of the message: {msg.content}")
+
+# %%
+# Creating Multimodal Message
+# --------------------------------------
+# The message class supports multimodal content by providing different content blocks:
+#
+# .. list-table:: Multimodal content blocks in AgentScope
+#     :header-rows: 1
+#
+#     * - Class
+#       - Description
+#       - Example
+#     * - TextBlock
+#       - Pure text data
+#       - .. code-block:: python
+#
+#             TextBlock(
+#                type="text",
+#                text="Hello, world!"
+#             )
+#     * - ImageBlock
+#       - The image data
+#       - .. code-block:: python
+#
+#             ImageBlock(
+#                type="image",
+#                source=URLSource(
+#                    type="url",
+#                    url="https://example.com/image.jpg"
+#                )
+#             )
+#     * - AudioBlock
+#       - The audio data
+#       - .. code-block:: python
+#
+#             AudioBlock(
+#                type="audio",
+#                source=URLSource(
+#                    type="url",
+#                    url="https://example.com/audio.mp3"
+#                )
+#             )
+#     * - VideoBlock
+#       - The video data
+#       - .. code-block:: python
+#
+#             VideoBlock(
+#                type="video",
+#                source=URLSource(
+#                    type="url",
+#                    url="https://example.com/video.mp4"
+#                )
+#             )
+#
+# For ``ImageBlock``, ``AudioBlock`` and ``VideoBlock``, you can use either a base64 encoded string as the source:
+#
+
+msg = Msg(
+    name="Jarvis",
+    role="assistant",
+    content=[
+        TextBlock(
+            type="text",
+            text="This is a multimodal message with base64 encoded data.",
+        ),
+        ImageBlock(
+            type="image",
+            source=Base64Source(
+                type="base64",
+                media_type="image/jpeg",
+                data="/9j/4AAQSkZ...",
+            ),
+        ),
+        AudioBlock(
+            type="audio",
+            source=Base64Source(
+                type="base64",
+                media_type="audio/mpeg",
+                data="SUQzBAAAAA...",
+            ),
+        ),
+        VideoBlock(
+            type="video",
+            source=Base64Source(
+                type="base64",
+                media_type="video/mp4",
+                data="AAAAIGZ0eX...",
+            ),
+        ),
+    ],
+)
+
+# %%
+# Creating Thinking Message
+# --------------------------------------
+# The ``ThinkingBlock`` is to support reasoning models, containing the thinking process of the model.
+#
+
+msg_thinking = Msg(
+    name="Jarvis",
+    role="assistant",
+    content=[
+        ThinkingBlock(
+            type="thinking",
+            thinking="I'm building an example for thinking block in AgentScope.",
+        ),
+        TextBlock(
+            type="text",
+            text="This is an example for thinking block.",
+        ),
+    ],
+)
+
+# %%
+# .. _tool-block:
+#
+# Creating Tool Use/Result Message
+# --------------------------------------
+# The ``ToolUseBlock`` and ``ToolResultBlock`` are to support tools API:
+#
+
+msg_tool_call = Msg(
+    name="Jarvis",
+    role="assistant",
+    content=[
+        ToolUseBlock(
+            type="tool_use",
+            id="343",
+            name="get_weather",
+            input={
+                "location": "Beijing",
+            },
+        ),
+    ],
+)
+
+msg_tool_res = Msg(
+    name="system",
+    role="system",
+    content=[
+        ToolResultBlock(
+            type="tool_result",
+            id="343",
+            name="get_weather",
+            output="The weather in Beijing is sunny with a temperature of 25°C.",
+        ),
+    ],
+)
+
+
+# %%
+# .. tip:: Refer to the :ref:`tool` section for more information about tools API in AgentScope.
+#
+# Serialization and Deserialization
+# ------------------------------------------------
+# Message object can be serialized and deserialized by ``to_dict`` and ``from_dict`` methods, respectively.
+
+serialized_msg = msg.to_dict()
+
+print(type(serialized_msg))
+print(json.dumps(serialized_msg, indent=4))
+
+# %%
+# Deserialize a message from a string in JSON format.
+
+new_msg = Msg.from_dict(serialized_msg)
+
+print(type(new_msg))
+print(f'The sender of the message: "{new_msg.name}"')
+print(f'The role of the sender: "{new_msg.role}"')
+print(f'The content of the message: "{json.dumps(new_msg.content, indent=4)}"')
+
+# %%
+# Property Functions
+# ------------------------------------------------
+# To ease the use of message object, AgentScope provides these functions:
+#
+# .. list-table:: Functions of the message object
+#   :header-rows: 1
+#
+#   * - Function
+#     - Parameters
+#     - Description
+#   * - get_text_content
+#     - \-
+#     - Gather content from all ``TextBlock`` in to a single string (separated by "\\n").
+#   * - get_content_blocks
+#     - ``block_type``
+#     - Return a list of content blocks of the specified type. If ``block_type`` not provided, return content in blocks format.
+#   * - has_content_blocks
+#     - ``block_type``
+#     - Check whether the message has content blocks of the specified type. The ``str`` content is considered as a ``TextBlock`` type.
--- a/docs/tutorial/en/src/task_a2a.py
+++ b/docs/tutorial/en/src/task_a2a.py
@@ -0,0 +1,212 @@
+# -*- coding: utf-8 -*-
+"""
+.. _a2a:
+
+A2A Agent
+============================
+
+A2A (Agent-to-Agent) is an open standard protocol for enabling interoperable communication between different AI agents.
+
+AgentScope provides support for the A2A protocol at two levels: obtaining Agent Card information and connecting to remote agents. The related APIs are as follows:
+
+.. list-table:: A2A Related Classes
+    :header-rows: 1
+
+    * - Class
+      - Description
+    * - ``A2AAgent``
+      - Agent class for communicating with remote A2A agents
+    * - ``A2AChatFormatter``
+      - Formatter for converting between AgentScope messages and A2A message/task formats
+    * - ``AgentCardResolverBase``
+      - Base class for Agent Card resolvers
+    * - ``FileAgentCardResolver``
+      - Resolver for loading Agent Cards from local JSON files
+    * - ``WellKnownAgentCardResolver``
+      - Resolver for fetching Agent Cards from the well-known path of a URL
+    * - ``NacosAgentCardResolver``
+      - Resolver for fetching Agent Cards from the Nacos Agent Registry
+
+This section demonstrates how to create an ``A2AAgent`` and communicate with remote A2A agents.
+
+.. note:: Note that A2A support is an **experimental feature** and may change in future versions. Due to limitations of the A2A protocol itself, ``A2AAgent`` cannot fully align with local agents like ``ReActAgent``, including:
+
+ - Only supports chatbot scenarios, i.e., only supports conversations between one user and one agent (does not affect handoff/router usage patterns)
+ - Does not support real-time interruption during conversations
+ - Does not support agentic structured output
+ - In the current implementation, messages received by the ``observe`` method are stored locally and sent to the remote agent together when the ``reply`` method is called. Therefore, if several ``observe`` calls are made without a subsequent ``reply`` call, those messages will not be seen by the remote agent
+
+
+"""
+
+from a2a.types import AgentCard, AgentCapabilities
+from v2.nacos import ClientConfig
+
+from agentscope.a2a import WellKnownAgentCardResolver, NacosAgentCardResolver
+from agentscope.agent import A2AAgent, UserAgent
+from agentscope.message import Msg, TextBlock
+from agentscope.tool import ToolResponse
+
+# %%
+# Obtaining Agent Cards
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# First, we need to obtain an Agent Card to connect to the corresponding agent. An Agent Card contains information such as the agent's name, description, capabilities, and connection details.
+#
+# Manually Creating Agent Card
+# --------------------------------
+#
+# If you know all the information of an Agent Card, you can directly create an Agent Card object from `a2a.types.AgentCard`.
+#
+
+# Create an Agent Card object
+agent_card = AgentCard(
+    name="Friday",  # Agent name
+    description="A fun chatting companion",  # Agent description
+    url="http://localhost:8000",  # Agent's RPC service URL
+    version="1.0.0",  # Agent version
+    capabilities=AgentCapabilities(  # Agent capability configuration
+        push_notifications=False,
+        state_transition_history=True,
+        streaming=True,
+    ),
+    default_input_modes=["text/plain"],  # Supported input formats
+    default_output_modes=["text/plain"],  # Supported output formats
+    skills=[],  # Agent skill list
+)
+
+# %%
+#
+# Fetching from Remote Services
+# --------------------------------
+# AgentScope also supports fetching from the standard path of remote services (well-known server).
+# Here's an example using ``WellKnownAgentCardResolver`` to fetch an Agent Card from the standard path of a remote service:
+#
+
+
+async def agent_card_from_well_known_website() -> AgentCard:
+    """Example of fetching an Agent Card from the well-known path of a remote service."""
+    # Create an Agent Card resolver
+    resolver = WellKnownAgentCardResolver(
+        base_url="http://localhost:8000",
+    )
+    # Fetch and return the Agent Card
+    return await resolver.get_agent_card()
+
+
+# %%
+# Loading Agent Cards from Local Files
+# --------------------------------
+#
+# The ``FileAgentCardResolver`` class supports loading Agent Cards from local JSON files, suitable for configuration file management scenarios.
+# An example of an Agent Card in JSON format is shown below:
+#
+# .. code-block:: json
+#     :caption: Example Agent Card JSON file content
+#
+#     {
+#         "name": "RemoteAgent",
+#         "url": "http://localhost:8000",
+#         "description": "Remote A2A Agent",
+#         "version": "1.0.0",
+#         "capabilities": {},
+#         "default_input_modes": ["text/plain"],
+#         "default_output_modes": ["text/plain"],
+#         "skills": []
+#     }
+#
+# You can easily load this file using ``FileAgentCardResolver``:
+#
+
+
+async def agent_card_from_file() -> AgentCard:
+    """Example of loading an Agent Card from a local JSON file."""
+    from agentscope.a2a import FileAgentCardResolver
+
+    # Load Agent Card from JSON file
+    resolver = FileAgentCardResolver(
+        file_path="./agent_card.json",  # JSON file path
+    )
+    # Fetch and return the Agent Card
+    return await resolver.get_agent_card()
+
+
+# %%
+# Fetching Agent Cards from Nacos Registry
+# --------------------------------
+#
+# Nacos is an open-source dynamic service discovery, configuration management, and service management platform. In version 3.1.0, it introduced the Agent Registry feature, supporting distributed registration, discovery, and version management of A2A agents.
+#
+# .. important:: The prerequisite for using ``NacosAgentCardResolver`` is that the user has deployed a Nacos server version 3.1.0 or higher. For deployment and registration procedures, please refer to the `official documentation <https://nacos.io/docs/latest/quickstart/quick-start>`_.
+#
+
+
+async def agent_card_from_nacos() -> AgentCard:
+    """Example of fetching an Agent Card from the Nacos registry."""
+
+    # Create a Nacos Agent Card resolver
+    resolver = NacosAgentCardResolver(
+        remote_agent_name="my-remote-agent",  # Agent name registered in Nacos
+        nacos_client_config=ClientConfig(
+            server_addresses="http://localhost:8848",  # Nacos server address
+            # Other optional configuration items
+        ),
+    )
+    # Fetch and return the Agent Card
+    return await resolver.get_agent_card()
+
+
+# %%
+# Building an A2A Agent
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# The ``A2AAgent`` class provided by AgentScope is used to communicate with remote A2A agents, and its usage is similar to regular agents.
+
+agent = A2AAgent(agent_card=agent_card)
+
+# %%
+# Using ``A2AAgent``, developers can build chatbot scenario conversations, or encapsulate it as a tool function to build more complex application scenarios such as handoff/router.
+# Currently, the format protocol conversion supported by ``A2AAgent`` is handled by ``agentscope.formatter.A2AChatFormatter``, which supports:
+#
+# - Converting AgentScope's ``Msg`` messages to A2A protocol's ``Message`` format
+# - Converting A2A protocol responses back to AgentScope's ``Msg`` format
+# - Converting A2A protocol's ``Task`` responses to AgentScope's ``Msg`` format
+# - Supporting multiple content types such as text, images, audio, and video
+#
+
+
+async def a2a_in_chatbot() -> None:
+    """Example of chatting using A2AAgent."""
+
+    user = UserAgent("user")
+
+    msg = None
+    while True:
+        msg = await user(msg)
+        if msg.get_text_content() == "exit":
+            break
+        msg = await agent(msg)
+
+
+# %%
+# Or encapsulate it as a tool function for invocation:
+
+
+async def create_worker(query: str) -> ToolResponse:
+    """Complete a given task through a sub-agent
+
+    Args:
+        query (`str`):
+            Description of the task to be completed by the sub-agent
+    """
+    res = await agent(
+        Msg("user", query, "user"),
+    )
+    return ToolResponse(
+        content=[
+            TextBlock(
+                type="text",
+                text=res.get_text_content(),
+            ),
+        ],
+    )
--- a/docs/tutorial/en/src/task_agent.py
+++ b/docs/tutorial/en/src/task_agent.py
@@ -0,0 +1,426 @@
+# -*- coding: utf-8 -*-
+"""
+.. _agent:
+
+Agent
+=========================
+
+In this tutorial, we first focus on introducing the ReAct agent in AgentScope,
+then we briefly introduce how to customize your own agent from scratch.
+
+ReAct Agent
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In AgentScope, the ``ReActAgent`` class integrates various features into a final implementation, including
+
+.. list-table:: Features of ``ReActAgent``
+    :header-rows: 1
+
+    * - Feature
+      - Reference
+    * - Support realtime steering
+      -
+    * - Support memory compression
+      -
+    * - Support parallel tool calls
+      -
+    * - Support structured output
+      -
+    * - Support fine-grained MCP control
+      - :ref:`mcp`
+    * - Support agent-controlled tools management (Meta tool)
+      - :ref:`tool`
+    * - Support self-controlled long-term memory
+      - :ref:`long-term-memory`
+    * - Support automatic state management
+      - :ref:`state`
+
+
+Due to limited space, in this tutorial we only demonstrate the first three
+features of ``ReActAgent`` class, leaving the others to the corresponding sections
+listed above.
+
+"""
+
+import asyncio
+import json
+import os
+from datetime import datetime
+import time
+
+from pydantic import BaseModel, Field
+
+from agentscope.agent import ReActAgent
+from agentscope.formatter import DashScopeChatFormatter
+from agentscope.memory import InMemoryMemory
+from agentscope.message import TextBlock, Msg
+from agentscope.model import DashScopeChatModel
+from agentscope.tool import Toolkit, ToolResponse
+
+
+# %%
+# Realtime Steering
+# ---------------------------------------
+#
+# The realtime steering allows user to interrupt the agent's reply at any time,
+# which is implemented based on the asyncio cancellation mechanism.
+#
+# Specifically, when calling the ``interrupt`` method of the agent, it will
+# cancel the current reply task, and execute the ``handle_interrupt`` method
+# for postprocessing.
+#
+# .. hint:: With the feature of supporting streaming tool results in
+#  :ref:`tool`, users can interrupt the tool execution if it takes too long or
+#  deviates from user expectations by Ctrl+C in the terminal or calling the
+#  ``interrupt`` method of the agent in your code.
+#
+# The interruption logic has been implemented in the ``AgentBase`` class as a
+# basic feature, leaving a ``handle_interrupt`` method for users to customize the
+# post-processing of interruption as follows:
+#
+# .. code-block:: python
+#
+#     # code snippet of AgentBase
+#     class AgentBase:
+#         ...
+#         async def __call__(self, *args: Any, **kwargs: Any) -> Msg:
+#             ...
+#             reply_msg: Msg | None = None
+#             try:
+#                 self._reply_task = asyncio.current_task()
+#                 reply_msg = await self.reply(*args, **kwargs)
+#
+#             except asyncio.CancelledError:
+#                 # Catch the interruption and handle it by the handle_interrupt method
+#                 reply_msg = await self.handle_interrupt(*args, **kwargs)
+#
+#             ...
+#
+#         @abstractmethod
+#         async def handle_interrupt(self, *args: Any, **kwargs: Any) -> Msg:
+#             pass
+#
+#
+# In ``ReActAgent`` class, we return a fixed message "I noticed that you have
+# interrupted me. What can I do for you?" as follows:
+#
+# .. figure:: ../../_static/images/interruption_en.gif
+#     :width: 100%
+#     :align: center
+#     :class: bordered-image
+#     :alt: Example of interruption
+#
+#     Example of interruption
+#
+# You can override it with your own implementation, for example, calling the LLM
+# to generate a simple response to the interruption.
+#
+#
+# Memory Compression
+# ----------------------------------------
+# As conversations grow longer, the token count in memory can exceed model context
+# limits or slow down inference. ``ReActAgent`` provides an automatic memory compression
+# feature to address this issue.
+#
+# **Basic Usage**
+#
+# To enable memory compression, provide a ``CompressionConfig`` instance when initializing
+# the ``ReActAgent``:
+#
+# .. code-block:: python
+#
+#     from agentscope.agent import ReActAgent
+#     from agentscope.token import CharTokenCounter
+#
+#     agent = ReActAgent(
+#         name="Assistant",
+#         sys_prompt="You are a helpful assistant.",
+#         model=model,
+#         formatter=formatter,
+#         compression_config=ReActAgent.CompressionConfig(
+#             enable=True,
+#             agent_token_counter=CharTokenCounter(),  # The token counter for the agent
+#             trigger_threshold=10000,  # Trigger compression when exceeding 10000 tokens
+#             keep_recent=3,            # Keep the most recent 3 messages uncompressed
+#         ),
+#     )
+#
+# When memory compression is enabled, the agent monitors the token count in its memory.
+# Once it exceeds the ``trigger_threshold``, the agent automatically:
+#
+# 1. Identifies messages that haven't been compressed yet (via ``exclude_mark``)
+# 2. Keeps the most recent ``keep_recent`` messages uncompressed (to preserve recent context)
+# 3. Sends older messages to an LLM to generate a structured summary
+# 4. Marks the compressed messages with ``MemoryMark.COMPRESSED`` (via ``update_messages_mark``)
+# 5. Stores the summary in memory (via ``update_compressed_summary``)
+#
+# .. important:: The compression uses a **marking mechanism** rather than replacing messages. Old messages are marked as compressed and excluded from future retrievals via ``exclude_mark=MemoryMark.COMPRESSED``, while the generated summary is stored separately and retrieved when needed. This approach preserves the original messages and allows flexible memory management. For more details about the mark functionality, please refer to :ref:`memory`.
+#
+# By default, the compressed summary is structured into five key fields:
+#
+# - **task_overview**: The user's core request and success criteria
+# - **current_state**: What has been completed so far, including files and outputs
+# - **important_discoveries**: Technical constraints, decisions, errors, and failed approaches
+# - **next_steps**: Specific actions needed to complete the task
+# - **context_to_preserve**: User preferences, domain details, and promises made
+#
+# **Customizing Compression**
+#
+# You can customize how compression works by specifying ``summary_schema``,
+# ``summary_template``, and ``compression_prompt`` parameters.
+#
+# - **compression_prompt**: Guides the LLM on how to generate the summary
+# - **summary_schema**: Defines the structure of the compressed summary using a Pydantic model
+# - **summary_template**: Formats how the compressed summary is presented back to the agent
+#
+# Here's an example of customizing the compression:
+#
+# .. code-block:: python
+#
+#     from pydantic import BaseModel, Field
+#
+#     # Define a custom summary structure
+#     class CustomSummary(BaseModel):
+#         main_topic: str = Field(
+#             max_length=200,
+#             description="The main topic of the conversation"
+#         )
+#         key_points: str = Field(
+#             max_length=400,
+#             description="Important points discussed"
+#         )
+#         pending_tasks: str = Field(
+#             max_length=200,
+#             description="Tasks that remain to be done"
+#         )
+#
+#     # Create agent with custom compression configuration
+#     agent = ReActAgent(
+#         name="Assistant",
+#         sys_prompt="You are a helpful assistant.",
+#         model=model,
+#         formatter=formatter,
+#         compression_config=ReActAgent.CompressionConfig(
+#             enable=True,
+#             agent_token_counter=CharTokenCounter(),
+#             trigger_threshold=10000,
+#             keep_recent=3,
+#             # Custom schema for structured summary
+#             summary_schema=CustomSummary,
+#             # Custom prompt to guide compression
+#             compression_prompt=(
+#                 "<system-hint>Please summarize the above conversation "
+#                 "focusing on the main topic, key discussion points, "
+#                 "and any pending tasks.</system-hint>"
+#             ),
+#             # Custom template to format the summary
+#             summary_template=(
+#                 "<system-info>Conversation Summary:\n"
+#                 "Main Topic: {main_topic}\n\n"
+#                 "Key Points:\n{key_points}\n\n"
+#                 "Pending Tasks:\n{pending_tasks}"
+#                 "</system-info>"
+#             ),
+#         ),
+#     )
+#
+# The ``summary_template`` uses the fields defined in ``summary_schema`` as placeholders
+# (e.g., ``{main_topic}``, ``{key_points}``). After the LLM generates the structured summary,
+# these placeholders will be replaced with the actual values.
+#
+# .. note:: The agent ensures that tool use and tool result pairs are kept together during compression to maintain the integrity of the conversation flow.
+#
+# .. tip:: You can use a smaller, faster model for compression by specifying a different ``compression_model`` and ``compression_formatter`` to reduce costs and latency.
+#
+#
+#
+# Parallel Tool Calls
+# ----------------------------------------
+# ``ReActAgent`` supports parallel tool calls by providing a ``parallel_tool_calls``
+# argument in its constructor.
+# When multiple tool calls are generated, and ``parallel_tool_calls`` is set to ``True``,
+# they will be executed in parallel by the ``asyncio.gather`` function.
+#
+# .. note:: The parallel tool execution in ``ReActAgent`` is implemented based on ``asyncio.gather``. Therefore, to maximize the effect of parallel tool execution, both the tool function itself and the logic within it must be asynchronous.
+#
+# .. note:: When running, please ensure that parallel tool calling is supported at the model level and the corresponding parameters are set correctly (can be passed through ``generate_kwargs``). For example, for the DashScope API, you need to set ``parallel_tool_calls`` to ``True``, otherwise parallel tool calling will not be possible.
+
+
+# prepare a tool function
+async def example_tool_function(tag: str) -> ToolResponse:
+    """A sample example tool function"""
+    start_time = datetime.now().strftime("%H:%M:%S.%f")
+
+    # Sleep for 3 seconds to simulate a long-running task
+    await asyncio.sleep(3)
+
+    end_time = datetime.now().strftime("%H:%M:%S.%f")
+    return ToolResponse(
+        content=[
+            TextBlock(
+                type="text",
+                text=f"Tag {tag} started at {start_time} and ended at {end_time}. ",
+            ),
+        ],
+    )
+
+
+toolkit = Toolkit()
+toolkit.register_tool_function(example_tool_function)
+
+# Create an ReAct agent
+agent = ReActAgent(
+    name="Jarvis",
+    sys_prompt="You're a helpful assistant named Jarvis.",
+    model=DashScopeChatModel(
+        model_name="qwen-max",
+        api_key=os.environ["DASHSCOPE_API_KEY"],
+        # Preset the generation kwargs to enable parallel tool calls
+        generate_kwargs={
+            "parallel_tool_calls": True,
+        },
+    ),
+    memory=InMemoryMemory(),
+    formatter=DashScopeChatFormatter(),
+    toolkit=toolkit,
+    parallel_tool_calls=True,
+)
+
+
+async def example_parallel_tool_calls() -> None:
+    """Example of parallel tool calls"""
+    # prompt the agent to generate two tool calls at once
+    await agent(
+        Msg(
+            "user",
+            "Generate two tool calls of the 'example_tool_function' function with tag as 'tag1' and 'tag2' AT ONCE so that they can execute in parallel.",
+            "user",
+        ),
+    )
+
+
+asyncio.run(example_parallel_tool_calls())
+
+# %%
+# Structured Output
+# ----------------------------------------
+# To generate a structured output, the ``ReActAgent`` instance receives a child class
+# of the ``pydantic.BaseModel`` as the ``structured_model`` argument in its ``__call__`` function.
+# Then we can get the structured output from the ``metadata`` field of the returned message.
+#
+#
+# Taking introducing Einstein as an example:
+#
+
+# Create an ReAct agent
+agent = ReActAgent(
+    name="Jarvis",
+    sys_prompt="You're a helpful assistant named Jarvis.",
+    model=DashScopeChatModel(
+        model_name="qwen-max",
+        api_key=os.environ["DASHSCOPE_API_KEY"],
+        # Preset the generation kwargs to enable parallel tool calls
+        generate_kwargs={
+            "parallel_tool_calls": True,
+        },
+    ),
+    memory=InMemoryMemory(),
+    formatter=DashScopeChatFormatter(),
+    toolkit=Toolkit(),
+    parallel_tool_calls=True,
+)
+
+
+# The structured model
+class Model(BaseModel):
+    name: str = Field(description="The name of the person")
+    description: str = Field(
+        description="A one-sentence description of the person",
+    )
+    age: int = Field(description="The age")
+    honor: list[str] = Field(description="A list of honors of the person")
+
+
+async def example_structured_output() -> None:
+    """The example structured output"""
+    res = await agent(
+        Msg(
+            "user",
+            "Introduce Einstein",
+            "user",
+        ),
+        structured_model=Model,
+    )
+    print("\nThe structured output:")
+    print(json.dumps(res.metadata, indent=4))
+
+
+asyncio.run(example_structured_output())
+
+# %%
+# Customizing Agent
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# AgentScope provides two base classes, ``AgentBase`` and ``ReActAgentBase``, which
+# differ in the abstract methods they define and the hooks they support.
+# Specifically, the ``ReActAgentBase`` extends ``AgentBase`` with additional ``_reasoning`` and ``_acting``
+# abstract methods, as well as their pre- and post- hooks.
+#
+# Developers can choose to inherit from either of these base classes based on their needs.
+# We summarize the agent under ``agentscope.agent`` module as follows:
+#
+# .. list-table:: Agent classes in AgentScope
+#     :header-rows: 1
+#
+#     * - Class
+#       - Abstract Method
+#       - Support Hooks
+#       - Description
+#     * - ``AgentBase``
+#       - | ``reply``
+#         | ``observe``
+#         | ``print``
+#         | ``handle_interrupt``
+#       - | pre\_/post_reply
+#         | pre\_/post_observe
+#         | pre\_/post_print
+#       - The base class for all agents, providing the basic interface and hooks.
+#     * - ``ReActAgentBase``
+#       - | ``reply``
+#         | ``observe``
+#         | ``print``
+#         | ``handle_interrupt``
+#         | ``_reasoning``
+#         | ``_acting``
+#       - | pre\_/post_reply
+#         | pre\_/post_observe
+#         | pre\_/post_print
+#         | pre\_/post_reasoning
+#         | pre\_/post_acting
+#       - The abstract class for ReAct agent, extending ``AgentBase`` with reasoning and acting abstract methods and their hooks.
+#     * - ``ReActAgent``
+#       - \-
+#       - | pre\_/post_reply
+#         | pre\_/post_observe
+#         | pre\_/post_print
+#         | pre\_/post_reasoning
+#         | pre\_/post_acting
+#       - An implementation of ``ReActAgentBase``
+#     * - ``UserAgent``
+#       -
+#       -
+#       - A special agent that represents the user, used to interact with the agent
+#     * - ``A2aAgent``
+#       - \-
+#       - | pre\_/post_reply
+#         | pre\_/post_observe
+#         | pre\_/post_print
+#       - Agent for communicating with remote A2A agents, see :ref:`a2a`
+#
+#
+#
+# Further Reading
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# - :ref:`tool`
+# - :ref:`hook`
+# - :ref:`a2a`
+#
--- a/docs/tutorial/en/src/task_agent_skill.py
+++ b/docs/tutorial/en/src/task_agent_skill.py
@@ -0,0 +1,137 @@
+# -*- coding: utf-8 -*-
+"""
+.. _agent_skill:
+
+Agent Skill
+============================
+
+`Agent skill <https://claude.com/blog/skills>`_ is an approach proposed by
+Anthropic to improve agent capabilities on specific tasks.
+
+AgentScope provides built-in support for Agent Skills through the ``Toolkit``
+class, allowing users to easily register and manage agent skills.
+
+The related APIs are as follows:
+
+.. list-table:: Agent skill API in ``Toolkit`` class
+    :header-rows: 1
+
+    * - API
+      - Description
+    * - ``register_agent_skill``
+      - Register agent skills from a given directory.
+    * - ``remove_agent_skill``
+      - Remove a registered agent skill by name.
+    * - ``get_agent_skill_prompt``
+      - Get the prompt for all registered agent skills, which can be
+        attached to the system prompt for the agent.
+
+In this section we demonstrate how to register agent skills and use them in an
+ReAct agent.
+"""
+import os
+
+from agentscope.agent import ReActAgent
+from agentscope.formatter import DashScopeChatFormatter
+from agentscope.memory import InMemoryMemory
+from agentscope.model import DashScopeChatModel
+from agentscope.tool import Toolkit
+
+# %%
+# Registering Agent Skills
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# First, we need to prepare an agent skill directory, which follows the
+# requirements specified in the `Anthropic blog <https://claude.com/blog/skills>`_.
+#
+# .. note:: The skill directory must contain a ``SKILL.md`` file containing
+#  YAML frontmatter and instructions.
+#
+# Here, we fake an example skill directory ``sample_skill`` with the following files:
+#
+# .. code-block:: markdown
+#
+#   ---
+#   name: sample_skill
+#   description: A sample agent skill for demonstration.
+#   ---
+#
+#   # Sample Skill
+#   ...
+#
+
+os.makedirs("sample_skill", exist_ok=True)
+with open("sample_skill/SKILL.md", "w", encoding="utf-8") as f:
+    f.write(
+        """---
+name: sample_skill
+description: A sample agent skill for demonstration.
+---
+
+# Sample Skill
+...
+""",
+    )
+
+# %%
+# Then, we can register the skill using the ``register_agent_skill`` API of
+# the ``Toolkit`` class.
+#
+
+toolkit = Toolkit()
+
+toolkit.register_agent_skill("sample_skill")
+
+# %%
+# After that, we can get the prompt for all registered agent skills using the
+# ``get_agent_skill_prompt`` API
+
+agent_skill_prompt = toolkit.get_agent_skill_prompt()
+print("Agent Skill Prompt:")
+print(agent_skill_prompt)
+
+# %%
+# Of course, we can customize the prompt template when creating the ``Toolkit``
+# instance.
+
+toolkit = Toolkit(
+    # The instruction that introduces how to use the skill to the agent/llm
+    agent_skill_instruction="<system-info>You're provided a collection of skills, each in a directory and described by a SKILL.md file.</system-info>\n",
+    # The template for formatting each skill's prompt, must contain
+    # {name}, {description}, and {dir} fields
+    agent_skill_template="- {name}({dir}): {description}",
+)
+
+toolkit.register_agent_skill("sample_skill")
+agent_skill_prompt = toolkit.get_agent_skill_prompt()
+print("Customized Agent Skill Prompt:")
+print(agent_skill_prompt)
+
+# %%
+# Integrating Agent Skills with ReActAgent
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# The `ReActAgent` class in AgentScope will attach the agent skill prompt to
+# the system prompt automatically.
+#
+# We can create a ReAct agent with the registered agent skills as follows:
+#
+# .. important:: When using agent skills, the agent must be equipped with text
+#  file reading or shell command tools to access the skill instructions in
+#  `SKILL.md` files.
+#
+
+agent = ReActAgent(
+    name="Friday",
+    sys_prompt="You are a helpful assistant named Friday.",
+    model=DashScopeChatModel(
+        model_name="qwen3-max",
+        api_key=os.environ["DASHSCOPE_API_KEY"],
+    ),
+    memory=InMemoryMemory(),
+    formatter=DashScopeChatFormatter(),
+    toolkit=toolkit,
+)
+
+print("Agent's System Prompt with Agent Skills:")
+print(agent.sys_prompt)
--- a/docs/tutorial/en/src/task_embedding.py
+++ b/docs/tutorial/en/src/task_embedding.py
@@ -0,0 +1,132 @@
+# -*- coding: utf-8 -*-
+"""
+.. _embedding:
+
+Embedding
+=========================
+
+In AgentScope, the embedding module provides a unified interface for vector representation generation, which features:
+
+- Support **caching embeddings** to avoid redundant API calls
+- Support **multiple embedding providers** with a consistent API
+
+AgentScope has built-in embedding classes for the following API providers:
+
+.. list-table::
+    :header-rows: 1
+
+    * - Provider
+      - Class
+    * - OpenAI
+      - ``OpenAITextEmbedding``
+    * - Gemini
+      - ``GeminiTextEmbedding``
+    * - DashScope
+      - ``DashScopeTextEmbedding``, ``DashScopeMultiModalEmbedding``
+    * - Ollama
+      - ``OllamaTextEmbedding``
+
+All classes inherit from ``EmbeddingModelBase``, implementing the ``__call__`` method and generating ``EmbeddingResponse`` object with the embeddings and usage information.
+The ``DashScopeMultiModalEmbedding`` supports multi-modal embeddings for text, images, and videos.
+
+Taking the DashScope embedding class as an example, you can use it as follows:
+"""
+
+import asyncio
+import os
+import tempfile
+
+from agentscope.embedding import DashScopeTextEmbedding, FileEmbeddingCache
+
+
+async def example_dashscope_embedding() -> None:
+    """Example usage of DashScope text embedding."""
+    texts = [
+        "What is the capital of France?",
+        "Paris is the capital city of France.",
+    ]
+
+    # Initialize the DashScope text embedding instance
+    embedding_model = DashScopeTextEmbedding(
+        model_name="text-embedding-v2",
+        api_key=os.getenv("DASHSCOPE_API_KEY"),
+    )
+
+    # Get the embedding from the model
+    response = await embedding_model(texts)
+
+    print("The embedding ID: ", response.id)
+    print("The embedding create at: ", response.created_at)
+    print("The embedding usage: ", response.usage)
+    print("The embedding:")
+    print(response.embeddings)
+
+
+asyncio.run(example_dashscope_embedding())
+
+# %%
+# You can customize your embedding model by subclassing ``EmbeddingModelBase`` and implementing the ``__call__`` method.
+#
+# Embedding Cache
+# ---------------------
+# AgentScope provides a base class ``EmbeddingCacheBase`` for caching embeddings, as well as a file-based implementation ``FileEmbeddingCache``.
+# It works as follows in the embedding module:
+#
+# .. image:: ../../_static/images/embedding_cache.png
+#   :align: center
+#   :width: 90%
+#
+# To use caching, just pass an instance of ``FileEmbeddingCache`` (or your custom cache) to the embedding model's constructor as follows:
+#
+
+
+async def example_embedding_cache() -> None:
+    """Demonstrate embedding with cache functionality."""
+    # Example texts
+    texts = [
+        "What is the capital of France?",
+        "Paris is the capital city of France.",
+    ]
+
+    # Create a temporary directory for cache demonstration
+    # In real applications, you might want to use a persistent directory
+    cache_dir = tempfile.mkdtemp(prefix="embedding_cache_")
+    print(f"Using cache directory: {cache_dir}")
+
+    # Initialize the embedding model with cache
+    # We limit the cache to 100 files and 10MB for demonstration purposes
+    embedder = DashScopeTextEmbedding(
+        model_name="text-embedding-v3",
+        api_key=os.getenv("DASHSCOPE_API_KEY"),
+        embedding_cache=FileEmbeddingCache(
+            cache_dir=cache_dir,
+            max_file_number=100,
+            max_cache_size=10,  # Maximum cache size in MB
+        ),
+    )
+
+    # First call - will fetch from API and store in cache
+    print("\n=== First API Call (No Cache Hit) ===")
+    start_time = asyncio.get_event_loop().time()
+    response1 = await embedder(texts)
+    elapsed_time1 = asyncio.get_event_loop().time() - start_time
+    print(f"Source: {response1.source}")  # Should be 'api'
+    print(f"Time taken: {elapsed_time1:.4f} seconds")
+    print(f"Tokens used: {response1.usage.tokens}")
+
+    # Second call with the same texts - should use cache
+    print("\n=== Second API Call (Cache Hit Expected) ===")
+    start_time = asyncio.get_event_loop().time()
+    response2 = await embedder(texts)
+    elapsed_time2 = asyncio.get_event_loop().time() - start_time
+    print(f"Source: {response2.source}")  # Should be 'cache'
+    print(f"Time taken: {elapsed_time2:.4f} seconds")
+    print(
+        f"Tokens used: {response2.usage.tokens}",
+    )  # Should be 0 for cached results
+    print(
+        f"Speed improvement: {elapsed_time1 / elapsed_time2:.1f}x faster with cache",
+    )
+
+
+asyncio.run(example_embedding_cache())
--- a/docs/tutorial/en/src/task_eval.py
+++ b/docs/tutorial/en/src/task_eval.py
@@ -0,0 +1,257 @@
+# -*- coding: utf-8 -*-
+"""
+.. _eval:
+
+Evaluation
+=========================
+
+AgentScope provides a built-in evaluation framework for assessing agent performance across different tasks and benchmarks, featuring:
+
+- `Ray <https://github.com/ray-project/ray>`_-based parallel and distributed evaluation
+- Support continuation after interruption
+- 🚧 Visualization of evaluation results
+
+.. note:: We are keeping integrating new benchmarks into AgentScope:
+
+ - ✅ `ACEBench <https://github.com/ACEBench/ACEBench>`_
+ - 🚧 `GAIA <https://huggingface.co/datasets/gaia-benchmark/GAIA/tree/main>`_ Benchmark
+
+
+Overview
+---------------------------
+
+The AgentScope evaluation framework consists of several key components:
+
+- **Benchmark**: Collections of tasks for systematic evaluation
+    - **Task**: Individual evaluation units with inputs, ground truth, and metrics
+        - **Metric**: Measurement functions that assess solution quality
+- **Evaluator**: Engine that runs evaluation, aggregates results, and analyzes performance
+    - **Evaluator Storage**: Persistent storage for recording and retrieving evaluation results
+- **Solution**: The user-defined solution
+
+.. figure:: ../../_static/images/evaluation.png
+    :width: 90%
+    :alt: AgentScope Evaluation Framework
+
+    *AgentScope Evaluation Framework*
+
+The current implementation in AgentScope includes:
+
+- Evaluator:
+    - ``RayEvaluator``: A ray-based evaluator that supports parallel and distributed evaluation.
+    - ``GeneralEvaluator``: A general evaluator that runs tasks sequentially, friendly for debugging.
+- Benchmark:
+    - ``ACEBench``: A benchmark for evaluating agent capabilities.
+
+We have provided a toy example in our `GitHub repository <https://github.com/agentscope-ai/agentscope/tree/main/examples/evaluation/ace_bench>`_ with ``RayEvaluator`` and the agent multistep tasks in ACEBench.
+
+Core Components
+---------------
+We are going to build a simple toy math question benchmark to demonstrate
+how to use the AgentScope evaluation module.
+"""
+
+TOY_BENCHMARK = [
+    {
+        "id": "math_problem_1",
+        "question": "What is 2 + 2?",
+        "ground_truth": 4.0,
+        "tags": {
+            "difficulty": "easy",
+            "category": "math",
+        },
+    },
+    {
+        "id": "math_problem_2",
+        "question": "What is 12345 + 54321 + 6789 + 9876?",
+        "ground_truth": 83331,
+        "tags": {
+            "difficulty": "medium",
+            "category": "math",
+        },
+    },
+]
+
+# %%
+# From Tasks, Solutions and Metrics to Benchmark
+# ~~~~~~~~~~~~~~~~~~~
+#
+# - A ``SolutionOutput`` contains all information generated by the agent, including the trajectory and final output.
+# - A ``Metric`` represents a single evaluation callable instance that compares the generated solution (e.g., trajectory or final output) to the ground truth.
+# In the toy example, we define a metric that simply checks whether the ``output`` field in the solution matches the ground truth.
+
+from agentscope.evaluate import (
+    SolutionOutput,
+    MetricBase,
+    MetricResult,
+    MetricType,
+)
+
+
+class CheckEqual(MetricBase):
+    def __init__(
+        self,
+        ground_truth: float,
+    ):
+        super().__init__(
+            name="math check number equal",
+            metric_type=MetricType.NUMERICAL,
+            description="Toy metric checking if two numbers are equal",
+            categories=[],
+        )
+        self.ground_truth = ground_truth
+
+    async def __call__(
+        self,
+        solution: SolutionOutput,
+    ) -> MetricResult:
+        if solution.output == self.ground_truth:
+            return MetricResult(
+                name=self.name,
+                result=1.0,
+                message="Correct",
+            )
+        else:
+            return MetricResult(
+                name=self.name,
+                result=0.0,
+                message="Incorrect",
+            )
+
+
+# %%
+# - A ``Task`` is a unit in the benchmark that includes all information for the agent to execute and evaluate (e.g., input/query and its ground truth).
+# - A ``Benchmark`` organizes multiple tasks for systematic evaluation.
+
+from typing import Generator
+from agentscope.evaluate import (
+    Task,
+    BenchmarkBase,
+)
+
+
+class ToyBenchmark(BenchmarkBase):
+    def __init__(self):
+        super().__init__(
+            name="Toy bench",
+            description="A toy benchmark for demonstrating the evaluation module.",
+        )
+        self.dataset = self._load_data()
+
+    @staticmethod
+    def _load_data() -> list[Task]:
+        dataset = []
+        for item in TOY_BENCHMARK:
+            dataset.append(
+                Task(
+                    id=item["id"],
+                    input=item["question"],
+                    ground_truth=item["ground_truth"],
+                    tags=item.get("tags", {}),
+                    metrics=[
+                        CheckEqual(item["ground_truth"]),
+                    ],
+                    metadata={},
+                ),
+            )
+        return dataset
+
+    def __iter__(self) -> Generator[Task, None, None]:
+        """Iterate over the benchmark."""
+        for task in self.dataset:
+            yield task
+
+    def __getitem__(self, index: int) -> Task:
+        """Get a task by index."""
+        return self.dataset[index]
+
+    def __len__(self) -> int:
+        """Get the length of the benchmark."""
+        return len(self.dataset)
+
+
+# %%
+# Evaluators
+# ~~~~~~~~~~
+#
+# Evaluators manage the evaluation process. They can automatically iterate through the
+# tasks in the benchmark and feed each task into a solution-generation function,
+# where developers need to define the logic for running agents and retrieving
+# the execution result and trajectory. Below is an example of
+# running ``GeneralEvaluator`` with our toy benchmark. If there is a large
+# benchmark and the developer wants to get the evaluation more efficiently
+# through parallelization, ``RayEvaluator`` is available as a built-in solution
+# as well.
+
+
+import os
+import asyncio
+from typing import Callable
+from pydantic import BaseModel
+
+from agentscope.message import Msg
+from agentscope.model import DashScopeChatModel
+from agentscope.formatter import DashScopeChatFormatter
+from agentscope.agent import ReActAgent
+
+from agentscope.evaluate import (
+    GeneralEvaluator,
+    FileEvaluatorStorage,
+)
+
+
+class ToyBenchAnswerFormat(BaseModel):
+    answer_as_number: float
+
+
+async def toy_solution_generation(
+    task: Task,
+    pre_hook: Callable,
+) -> SolutionOutput:
+    agent = ReActAgent(
+        name="Friday",
+        sys_prompt="You are a helpful assistant named Friday. "
+        "Your target is to solve the given task with your tools. "
+        "Try to solve the task as best as you can.",
+        model=DashScopeChatModel(
+            api_key=os.environ.get("DASHSCOPE_API_KEY"),
+            model_name="qwen-max",
+            stream=False,
+        ),
+        formatter=DashScopeChatFormatter(),
+    )
+    agent.register_instance_hook(
+        "pre_print",
+        "save_logging",
+        pre_hook,
+    )
+    msg_input = Msg("user", task.input, role="user")
+    res = await agent(
+        msg_input,
+        structured_model=ToyBenchAnswerFormat,
+    )
+    return SolutionOutput(
+        success=True,
+        output=res.metadata.get("answer_as_number", None),
+        trajectory=[],
+    )
+
+
+async def main() -> None:
+    evaluator = GeneralEvaluator(
+        name="Toy benchmark evaluation",
+        benchmark=ToyBenchmark(),
+        # Repeat how many times
+        n_repeat=1,
+        storage=FileEvaluatorStorage(
+            save_dir="./results",
+        ),
+        # How many workers to use
+        n_workers=1,
+    )
+
+    # Run the evaluation
+    await evaluator.run(toy_solution_generation)
+
+
+asyncio.run(main())
--- a/docs/tutorial/en/src/task_eval_openjudge.py
+++ b/docs/tutorial/en/src/task_eval_openjudge.py
@@ -0,0 +1,341 @@
+# -*- coding: utf-8 -*-
+"""
+Evaluation with OpenJudge
+=========================
+
+This guide introduces how to use [OpenJudge](https://github.com/agentscope-ai/OpenJudge) graders as AgentScope metrics to evaluate your multi-agent applications.
+OpenJudge is a comprehensive evaluation system designed to assess the quality of LLM applications. By integrating OpenJudge into AgentScope, you can extend AgentScope's native evaluation capabilities from basic execution checks to deep, semantic quality analysis.
+
+
+.. note::
+   Install dependencies before running:
+
+   .. code-block:: bash
+
+       pip install agentscope py-openjudge
+
+
+Overview
+--------
+While AgentScope provides a robust `MetricBase` for defining evaluation logic, implementing complex, semantic-level metrics (like "Hallucination Detection" or "Response Relevance") often requires
+significant effort in prompt engineering and pipeline construction.
+
+Integrating OpenJudge brings three dimensions of capability extension to AgentScope:
+
+1.  **Enhance Evaluation Depth:**: Move beyond simple success/failure checks to multi-dimensional assessments (Accuracy, Safety, Tone, etc.).
+2.  **Leverage Verified Graders**: Instantly access 50+ pre-built, expert-level graders without writing custom evaluation prompts, see the [OpenJudge documentation](https://agentscope-ai.github.io/OpenJudge/built_in_graders/overview/) for details.
+3.  **Closed-loop Iteration**: Seamlessly embed OpenJudge into AgentScope's `Benchmark`, obtaining quantitative scores and qualitative reasoning.
+
+
+How to Evaluate with OpenJudge
+--------------------
+
+We are going to build a simple QA benchmark to demonstrate how to use the AgentScope evaluation module by integrating OpenJudge's graders.
+"""
+
+# %%
+QA_BENCHMARK_DATASET = [
+    {
+        "id": "qa_task_1",
+        "question": "What are the health benefits of regular exercise?",
+        "reference_output": "Regular exercise improves cardiovascular health, strengthens muscles and bones, "
+        "helps maintain a healthy weight, and can improve mental health by reducing anxiety and depression.",
+        "ground_truth": "Answers should cover physical and mental health benefits",
+        "difficulty": "medium",
+        "category": "health",
+    },
+    {
+        "id": "qa_task_2",
+        "question": "Describe the main causes of climate change.",
+        "reference_output": "Climate change is primarily caused by increased concentrations of greenhouse gases "
+        "in the atmosphere due to human activities like burning fossil fuels, deforestation, and industrial processes.",
+        "ground_truth": "Answers should mention greenhouse gases and human activities",
+        "difficulty": "hard",
+        "category": "environment",
+    },
+    {
+        "id": "qa_task_3",
+        "question": "What is the significance of the Turing Test in AI?",
+        "reference_output": "The Turing Test, proposed by Alan Turing, is a measure of a machine's ability to exhibit"
+        " intelligent behavior equivalent to, or indistinguishable from, that of a human.",
+        "ground_truth": "Should mention Alan Turing, purpose of the test, and its implications for AI",
+        "difficulty": "hard",
+        "category": "technology",
+    },
+]
+
+
+# %% [markdown]
+# AgentScope Metric vs. OpenJudge Grader
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# To make OpenJudge compatible with AgentScope, we need an adapter that inherits from
+# AgentScope's ``MetricBase`` and acts as a bridge to OpenJudge's ``BaseGrader``.
+#
+# * **AgentScope Metric**: A generic unit of evaluation that accepts a ``SolutionOutput`` and returns a ``MetricResult``.
+# * **OpenJudge Grader**: A specialized evaluation unit (e.g., ``RelevanceGrader``) that requires specific, semantic inputs (like ``query``, ``response``, ``context``), and returns a ``GraderResult``.
+#
+# This "Adapter" allows you to plug *any* OpenJudge grader into your AgentScope benchmark seamlessly.
+#
+
+# %%
+from openjudge.graders.base_grader import BaseGrader
+from openjudge.graders.schema import GraderScore, GraderError
+from openjudge.utils.mapping import parse_data_with_mapper
+from agentscope.evaluate import (
+    MetricBase,
+    MetricType,
+    MetricResult,
+    SolutionOutput,
+)
+
+
+class OpenJudgeMetric(MetricBase):
+    """
+    A wrapper that converts an OpenJudge grader into an AgentScope Metric.
+    """
+
+    def __init__(
+        self,
+        grader_cls: type[BaseGrader],
+        data: dict,
+        mapper: dict,
+        name: str | None = None,
+        description: str | None = None,
+        **grader_kwargs,
+    ):
+        # Initialize the OpenJudge grader
+        self.grader = grader_cls(**grader_kwargs)
+
+        super().__init__(
+            name=name or self.grader.name,
+            metric_type=MetricType.NUMERICAL,
+            description=description or self.grader.description,
+        )
+
+        self.data = data
+        self.mapper = mapper
+
+    async def __call__(self, solution: SolutionOutput) -> MetricResult:
+        """Execute the wrapped OpenJudge grader against the agent solution."""
+        if not solution.success:
+            return MetricResult(
+                name=self.name,
+                result=0.0,
+                message="Solution failed",
+            )
+
+        try:
+            # 1. Context Construction
+            # Combine Static Task Context (item) and Dynamic Agent Output (solution)
+            combined_data = {
+                "data": self.data,
+                "solution": {
+                    "output": solution.output,
+                    "meta": solution.meta,
+                    "trajectory": getattr(solution, "trajectory", []),
+                },
+            }
+
+            # 2. Data Mapping
+            # Use the mapper to extract 'query', 'response', 'context' from the combined data
+            grader_inputs = parse_data_with_mapper(
+                combined_data,
+                self.mapper,
+            )
+
+            # 3. Evaluation Execution
+            result = await self.grader.aevaluate(**grader_inputs)
+
+            # 4. Result Formatting
+            if isinstance(result, GraderScore):
+                return MetricResult(
+                    name=self.name,
+                    result=result.score,
+                    # We preserve the detailed reasoning provided by OpenJudge
+                    message=result.reason or "",
+                )
+            elif isinstance(result, GraderError):
+                return MetricResult(
+                    name=self.name,
+                    result=0.0,
+                    message=f"Error: {result.error}",
+                )
+            else:
+                return MetricResult(
+                    name=self.name,
+                    result=0.0,
+                    message="Unknown result type",
+                )
+
+        except Exception as e:
+            return MetricResult(
+                name=self.name,
+                result=0.0,
+                message=f"Exception: {str(e)}",
+            )
+
+
+# %% [markdown]
+# From OpenJudge's Graders to AgentScope's Benchmark
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# OpenJudge provides a rich collection of built-in graders. In this example, we select two
+# common graders suitable for Question-Answering tasks:
+#
+# * **RelevanceGrader**: Evaluates whether the agent's response directly addresses the user's query.
+# * **CorrectnessGrader**: Verifies the factual accuracy of the response against a provided ground truth.
+#
+# .. tip::
+#    OpenJudge offers 50+ built-in graders covering diverse dimensions like **Hallucination**, **Safety**, **Code Quality**,
+#    and **JSON Formatting**. Please refer to the `OpenJudge Documentation <https://agentscope-ai.github.io/OpenJudge/built_in_graders/overview/>`_
+#    for the full list of available graders.
+#
+# .. note::
+#    Ensure you have set your ``DASHSCOPE_API_KEY`` environment variable before running the example below.
+
+# %%
+import os
+from typing import Generator
+from openjudge.graders.common.relevance import RelevanceGrader
+from openjudge.graders.common.correctness import CorrectnessGrader
+from agentscope.evaluate import (
+    Task,
+    BenchmarkBase,
+)
+
+
+class QABenchmark(BenchmarkBase):
+    """A benchmark for QA tasks using OpenJudge metrics."""
+
+    def __init__(self):
+        super().__init__(
+            name="QA Quality Benchmark",
+            description="Benchmark to evaluate QA systems using OpenJudge grader classes",
+        )
+        self.dataset = self._load_data()
+
+    def _load_data(self):
+        tasks = []
+        # Configuration for LLM-based graders
+        # Ensure OPENAI_API_KEY is set in your environment variables
+        model_config = {
+            "model": "qwen3-32b",
+            "api_key": os.environ.get("DASHSCOPE_API_KEY"),
+            "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
+        }
+
+        for data in QA_BENCHMARK_DATASET:
+            # Define the Mapping: Left is OpenJudge key, Right is AgentScope path
+            mapper = {
+                "query": "data.input",
+                "response": "solution.output",
+                "context": "data.ground_truth",
+                "reference_response": "data.reference_output",
+            }
+
+            # Instantiate Metrics via Wrapper
+            metrics = [
+                OpenJudgeMetric(
+                    grader_cls=RelevanceGrader,
+                    data=data,
+                    mapper=mapper,
+                    name="Relevance",
+                    model=model_config,
+                ),
+                OpenJudgeMetric(
+                    grader_cls=CorrectnessGrader,
+                    data=data,
+                    mapper=mapper,
+                    name="Correctness",
+                    model=model_config,
+                ),
+            ]
+
+            # Create Task
+            task = Task(
+                id=data["id"],
+                input=data["question"],
+                ground_truth=data["ground_truth"],
+                metrics=metrics,
+            )
+
+            tasks.append(task)
+
+        return tasks
+
+    def __iter__(self) -> Generator[Task, None, None]:
+        """Iterate over the benchmark."""
+        yield from self.dataset
+
+    def __getitem__(self, index: int) -> Task:
+        """Get a task by index."""
+        return self.dataset[index]
+
+    def __len__(self) -> int:
+        """Get the length of the benchmark."""
+        return len(self.dataset)
+
+
+# %% [markdown]
+# Run Evaluation
+# ~~~~~~~~~~
+# Finally, use AgentScope's ``GeneralEvaluator`` to run the benchmark on a QA agent.
+# The results will include both the **Quantitative Score** and the **Qualitative Reasoning**
+# from the OpenJudge graders.
+
+# %%
+
+from typing import Callable
+
+from agentscope.agent import ReActAgent
+from agentscope.evaluate import GeneralEvaluator
+from agentscope.evaluate import FileEvaluatorStorage
+from agentscope.formatter import DashScopeChatFormatter
+from agentscope.message import Msg
+from agentscope.model import OpenAIChatModel
+
+
+async def qa_agent(task: Task, pre_hook: Callable) -> SolutionOutput:
+    """Solution function that generates answers to QA tasks."""
+
+    model = OpenAIChatModel(
+        model_name="qwen3-32b",
+        api_key=os.getenv("DASHSCOPE_API_KEY"),
+    )
+
+    # Create a QA agent
+    agent = ReActAgent(
+        name="QAAgent",
+        sys_prompt="You are an expert at answering questions. Provide clear, accurate, and comprehensive answers.",
+        model=model,
+        formatter=DashScopeChatFormatter(),
+    )
+
+    # Generate response
+    msg_input = Msg(name="User", content=task.input, role="user")
+    response = await agent(msg_input)
+    response_text = response.content
+
+    return SolutionOutput(
+        success=True,
+        output=response_text,
+        trajectory=[
+            task.input,
+            response_text,
+        ],  # Store the interaction trajectory
+    )
+
+
+async def main() -> None:
+    evaluator = GeneralEvaluator(
+        name="OpenJudge Integration Demo",
+        benchmark=QABenchmark(),
+        # Repeat how many times
+        n_repeat=1,
+        storage=FileEvaluatorStorage(
+            save_dir="./results",
+        ),
+        # How many workers to use
+        n_workers=1,
+    )
+
+    await evaluator.run(qa_agent)
--- a/docs/tutorial/en/src/task_hook.py
+++ b/docs/tutorial/en/src/task_hook.py
@@ -0,0 +1,291 @@
+# -*- coding: utf-8 -*-
+"""
+.. _hook:
+
+Agent Hooks
+===========================
+
+Hooks are extension points in AgentScope that allow developers to customize agent behaviors at specific execution points, providing a flexible way to modify or extend the agent's functionality without changing its core implementation.
+
+In AgentScope, hooks are implemented around the agent's core functions:
+
+
+.. list-table:: Supported hook types in AgentScope
+    :header-rows: 1
+
+    * - Agent Class
+      - Core Function
+      - Hook Types
+      - Description
+    * - | ``AgentBase`` &
+        | its child classes
+      - ``reply``
+      - | ``pre_reply``
+        | ``post_reply``
+      - The hooks before/after agent replying to a message
+    * -
+      - ``print``
+      - | ``pre_print``
+        | ``post_print``
+      - The hook before/after printing a message to the target output (e.g., terminal, web interface)
+    * -
+      - ``observe``
+      - | ``pre_observe``
+        | ``post_observe``
+      - The hooks before/after observing a message from the environment or other agents
+    * - | ``ReActAgentBase`` &
+        | its child classes
+      - | ``reply``
+        | ``print``
+        | ``observe``
+      - | ``pre_reply``
+        | ``post_reply``
+        | ``pre_print``
+        | ``post_print``
+        | ``pre_observe``
+        | ``post_observe``
+      -
+    * -
+      - ``_reasoning``
+      - | ``pre_reasoning``
+        | ``post_reasoning``
+      - The hooks before/after the agent's reasoning process
+    * -
+      - ``_acting``
+      - | ``pre_acting``
+        | ``post_acting``
+      - The hooks before/after the agent's acting process
+
+.. tip:: Since hooks in AgentScope are implemented using a metaclass, they support inheritance.
+
+To simplify the usage, AgentScope provides unified signatures for all hooks.
+
+"""
+import asyncio
+from typing import Any, Type
+
+from agentscope.agent import ReActAgentBase, AgentBase
+from agentscope.message import Msg
+
+
+# %%
+# Hook Signature
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# AgentScope provides unified hook signatures for all pre- and post-hooks as follows:
+#
+# **Pre-Hook Signature**
+#
+# .. list-table:: The signature of all pre-hooks
+#   :header-rows: 1
+#
+#   * -
+#     - Name
+#     - Description
+#   * - Arguments
+#     - ``self: AgentBase | ReActAgentBase``
+#     - The agent instance
+#   * -
+#     - ``kwargs: dict[str, Any]``
+#     - | The input arguments of the target
+#       | function, or the modified arguments
+#       | from the most recent non-None return
+#       | value of previous hooks
+#   * - Returns
+#     - ``dict[str, Any] | None``
+#     - The modified arguments or None
+#
+# .. note:: All positional arguments and keyword arguments of the core function are passed as a single ``kwargs`` dict to the hook functions
+#
+# A pre-hook template is defined as follows:
+#
+
+
+def pre_hook_template(
+    self: AgentBase | ReActAgentBase,
+    kwargs: dict[str, Any],
+) -> dict[str, Any] | None:  # The modified displayed message
+    """Pre hook template."""
+    pass
+
+
+# %%
+# **Post-Hook Signature**
+#
+# For post hooks, an additional ``output`` argument is added to the signature, which represents the output of the target function.
+# If the core function has no output, the ``output`` argument will be ``None``.
+#
+# .. list-table:: The signature of all post-hooks
+#   :header-rows: 1
+#
+#   * -
+#     - Name
+#     - Description
+#   * - Arguments
+#     - ``self: AgentBase | ReActAgentBase``
+#     - The agent instance
+#   * -
+#     - ``kwargs: dict[str, Any]``
+#     - | A dict that contains all the arguments
+#       | of the target function
+#   * -
+#     - ``output: Any``
+#     - | The output of the target function or
+#       | the most recent non-None return value
+#       | from previous hooks
+#   * - Returns
+#     - ``dict[str, Any] | None``
+#     - The modified arguments or None
+#
+
+
+def post_hook_template(
+    self: AgentBase | ReActAgentBase,
+    kwargs: dict[str, Any],
+    output: Any,  # The output of the target function
+) -> Any:  # The modified output
+    """Post hook template."""
+    pass
+
+
+# %%
+# Hook Management
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# AgentScope provides both instance- and class-level hooks, depending on the effective scope of the hooks.
+# They execute in the following order:
+#
+# .. image:: ../../_static/images/sequential_hook.png
+#   :width: 90%
+#   :align: center
+#   :alt: Hooks in AgentScope
+#   :class: bordered-image
+#
+# AgentScope provides built-in methods to manage hooks at both instance and class levels as follows:
+#
+# .. list-table:: Hook management methods in AgentScope
+#   :header-rows: 1
+#
+#   * - Level
+#     - Method
+#     - Description
+#   * - Instance-level
+#     - ``register_instance_hook``
+#     - | Register a hook for the current object with
+#       | given hook type and name.
+#   * -
+#     - ``remove_instance_hook``
+#     - | Remove a hook for the current object with
+#       | given hook type and name.
+#   * -
+#     - ``clear_instance_hooks``
+#     - | Clear all hooks for the current object with
+#       | given hook type.
+#   * - Class-level
+#     - ``register_class_hook``
+#     - | Register a hook for all objects of the class
+#       | with given hook type and name.
+#   * -
+#     - ``remove_class_hook``
+#     - | Remove a hook for all objects of the class
+#       | with given hook type and name.
+#   * -
+#     - ``clear_class_hooks``
+#     - | Clear all hooks for all objects of the
+#       | class with given hook type.
+#
+# When using hooks, you MUST follow these rules:
+#
+# .. important:: **Execution Order**
+#
+#  - Hooks are executed in registration order
+#  - Multiple hooks can be chained together
+#  **Return Value Handling**
+#
+#  - For pre-hooks: Non-None return values are passed to the next hook or core function
+#   - When a hook returns None, the next hook will use the most recent non-None return value from previous hooks
+#   - If all previous hooks return None, the next hook receives a copy of the original arguments
+#   - The final non-None return value (or original arguments if all hooks return None) is passed to the core function
+#  - For post-hooks: Works the same way as pre-hooks.
+#  **Important**: Never call the core function (reply/speak/observe/_reasoning/_acting) within a hook to avoid infinite loops
+#
+# Taking the following agent as an example, we can see how to register, remove and clear hooks:
+#
+
+
+# Create a simple test agent class
+class TestAgent(AgentBase):
+    """A test agent for demonstrating hooks."""
+
+    async def reply(self, msg: Msg) -> Msg:
+        """Reply to the message."""
+        return msg
+
+
+# %%
+# We create an instance-level hook and a class-level hook to modify the message content before replying.
+#
+
+
+# Create two pre-reply hooks
+def instance_pre_reply_hook(
+    self: AgentBase,
+    kwargs: dict[str, Any],
+) -> dict[str, Any]:
+    """A pre-reply hook that modifies the message content."""
+    msg = kwargs["msg"]
+    msg.content += "[instance-pre-reply]"
+    # return modified kwargs
+    return {
+        **kwargs,
+        "msg": msg,
+    }
+
+
+def cls_pre_reply_hook(
+    self: AgentBase,
+    kwargs: dict[str, Any],
+) -> dict[str, Any]:
+    """A pre-reply hook that modifies the message content."""
+    msg = kwargs["msg"]
+    msg.content += "[cls-pre-reply]"
+    # return modified kwargs
+    return {
+        **kwargs,
+        "msg": msg,
+    }
+
+
+# Register class hook
+TestAgent.register_class_hook(
+    hook_type="pre_reply",
+    hook_name="test_pre_reply",
+    hook=cls_pre_reply_hook,
+)
+
+# Register instance hook
+agent = TestAgent()
+agent.register_instance_hook(
+    hook_type="pre_reply",
+    hook_name="test_pre_reply",
+    hook=instance_pre_reply_hook,
+)
+
+
+async def example_test_hook() -> None:
+    """An example function to test the hooks."""
+    msg = Msg(
+        name="user",
+        content="Hello, world!",
+        role="user",
+    )
+    res = await agent(msg)
+    print("Response content:", res.content)
+    TestAgent.clear_class_hooks()
+
+
+asyncio.run(example_test_hook())
+
+# %%
+# We can see that a "[instance-pre-reply]" and a "[cls-pre-reply]" are added to the message content.
+#
--- a/docs/tutorial/en/src/task_long_term_memory.py
+++ b/docs/tutorial/en/src/task_long_term_memory.py
@@ -0,0 +1,435 @@
+# -*- coding: utf-8 -*-
+"""
+.. _long-term-memory:
+
+Long-Term Memory
+========================
+
+In AgentScope, we provide a basic class for long-term memory (``LongTermMemoryBase``) and an implementation based on the `mem0 <https://github.com/mem0ai/mem0>`_ library (``Mem0LongTermMemory``).
+Together with the design of ``ReActAgent`` class in :ref:`agent` section, we provide two long-term memory modes:
+
+- ``agent_control``: the agent autonomously manages long-term memory by tool calls, and
+- ``static_control``: the developer explicitly controls long-term memory operations.
+
+Developers can also use the ``both`` mode, which activates both memory management modes.
+
+.. hint:: These memory modes are suitable for different usage scenarios. Developers can choose the appropriate mode based on their needs.
+
+Using mem0 Long-Term Memory
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. note:: We provide an example of using mem0 long-term memory in the GitHub repository under the ``examples/long_term_memory/mem0`` directory.
+
+"""
+
+import os
+import asyncio
+
+from agentscope.message import Msg
+from agentscope.memory import InMemoryMemory
+from agentscope.agent import ReActAgent
+from agentscope.formatter import DashScopeChatFormatter
+from agentscope.model import DashScopeChatModel
+from agentscope.tool import Toolkit
+
+
+# Create mem0 long-term memory instance
+from agentscope.memory import Mem0LongTermMemory
+from agentscope.embedding import DashScopeTextEmbedding
+
+
+long_term_memory = Mem0LongTermMemory(
+    agent_name="Friday",
+    user_name="user_123",
+    model=DashScopeChatModel(
+        model_name="qwen-max-latest",
+        api_key=os.environ.get("DASHSCOPE_API_KEY"),
+        stream=False,
+    ),
+    embedding_model=DashScopeTextEmbedding(
+        model_name="text-embedding-v2",
+        api_key=os.environ.get("DASHSCOPE_API_KEY"),
+    ),
+    on_disk=False,
+)
+
+# %%
+# The ``Mem0LongTermMemory`` class provides two main methods for long-term memory operations:
+# ``record`` and ``retrieve``.
+# They take a list of messages as input and record/retrieve information from long-term memory.
+#
+# As an example, we first store a user preference and then retrieve related information from long-term memory.
+#
+
+
+# Basic usage example
+async def basic_usage():
+    """Basic usage example"""
+    # Record memory
+    await long_term_memory.record(
+        [Msg("user", "I like staying in homestays", "user")],
+    )
+
+    # Retrieve memory
+    results = await long_term_memory.retrieve(
+        [Msg("user", "My accommodation preferences", "user")],
+    )
+    print(f"Retrieval results: {results}")
+
+
+asyncio.run(basic_usage())
+
+# %%
+# Integration with ReAct Agent
+# ----------------------------------------
+# In AgentScope, the ``ReActAgent`` class receives a ``long_term_memory``
+# parameter in its constructor, as well as a ``long_term_memory_mode`` parameter
+# that specifies the long-term memory mode.
+#
+# If ``long_term_memory_mode`` is set to ``agent_control`` or ``both``, two
+# tool functions ``record_to_memory`` and ``retrieve_from_memory`` will be
+# registered in the agent's toolkit, allowing the agent to autonomously
+# manage long-term memory through tool calls.
+#
+# .. note:: To achieve the best results, the ``"agent_control"`` mode may require
+#  additional instructions in the system prompt.
+#
+
+# Create ReAct agent with long-term memory
+agent = ReActAgent(
+    name="Friday",
+    sys_prompt="You are an assistant with long-term memory capabilities.",
+    model=DashScopeChatModel(
+        api_key=os.environ.get("DASHSCOPE_API_KEY"),
+        model_name="qwen-max-latest",
+    ),
+    formatter=DashScopeChatFormatter(),
+    toolkit=Toolkit(),
+    memory=InMemoryMemory(),
+    long_term_memory=long_term_memory,
+    long_term_memory_mode="static_control",  # Use static_control mode
+)
+
+
+async def record_preferences():
+    """ReAct agent integration example"""
+    # Conversation example
+    msg = Msg(
+        "user",
+        "When I travel to Hangzhou, I like staying in homestays",
+        "user",
+    )
+    await agent(msg)
+
+
+asyncio.run(record_preferences())
+
+# %%
+# Then we clear the short-term memory and ask the agent about the user's preferences.
+#
+
+
+async def retrieve_preferences():
+    """Retrieve user preferences from long-term memory"""
+    # Clear short-term memory
+    await agent.memory.clear()
+    # The agent will remember previous conversations
+    msg2 = Msg("user", "What are my preferences? Answer briefly.", "user")
+    await agent(msg2)
+
+
+asyncio.run(retrieve_preferences())
+
+
+# %%
+# Using ReMe Long-Term Memory
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# .. note:: We provide an example of using ReMe long-term memory in the GitHub repository under the ``examples/long_term_memory/reme`` directory.
+#
+# .. code-block:: python
+#     :caption: Example of ReMe long-term memory setup
+#
+#     from agentscope.memory import ReMePersonalLongTermMemory
+#
+#     # Create ReMe personal long-term memory instance
+#     reme_long_term_memory = ReMePersonalLongTermMemory(
+#         agent_name="Friday",
+#         user_name="user_123",
+#         model=DashScopeChatModel(
+#             model_name="qwen3-max",
+#             api_key=os.environ.get("DASHSCOPE_API_KEY"),
+#             stream=False,
+#         ),
+#         embedding_model=DashScopeTextEmbedding(
+#             model_name="text-embedding-v4",
+#             api_key=os.environ.get("DASHSCOPE_API_KEY"),
+#             dimensions=1024,
+#         ),
+#     )
+#
+#
+# The ``ReMePersonalLongTermMemory`` class provides four main methods for long-term memory operations.
+# They include ``record_to_memory`` and ``retrieve_from_memory`` for tool calls,
+# as well as ``record`` and ``retrieve`` for direct calls.
+#
+# As an example, we use ``record_to_memory`` to record user preferences.
+#
+# .. code-block:: python
+#     :caption: Example of recording to ReMe long-term memory
+#
+#     async def test_record_to_memory():
+#         """Test record_to_memory tool function interface"""
+#         async with reme_long_term_memory:
+#             result = await reme_long_term_memory.record_to_memory(
+#                 thinking="The user is sharing their travel preferences and habits",
+#                 content=[
+#                     "I prefer to stay in homestays when traveling to Hangzhou",
+#                     "I like to visit the West Lake in the morning",
+#                     "I enjoy drinking Longjing tea",
+#                 ],
+#             )
+#             # Extract result text
+#             result_text = " ".join(
+#                 block.get("text", "")
+#                 for block in result.content
+#                 if block.get("type") == "text"
+#             )
+#             print(f"Recording result: {result_text}")
+#
+#
+#
+# Then we use ``retrieve_from_memory`` to retrieve related memories.
+#
+# .. code-block:: python
+#     :caption: Example of retrieving from ReMe long-term memory
+#
+#     async def test_retrieve_from_memory():
+#         """Test retrieve_from_memory tool function interface"""
+#         async with reme_long_term_memory:
+#             # First record some content
+#             await reme_long_term_memory.record_to_memory(
+#                 thinking="User is sharing travel preferences",
+#                 content=[
+#                     "I prefer to stay in homestays when traveling to Hangzhou",
+#                 ],
+#             )
+#
+#             # Then retrieve
+#             result = await reme_long_term_memory.retrieve_from_memory(
+#                 keywords=["Hangzhou travel", "tea preference"],
+#             )
+#             retrieved_text = " ".join(
+#                 block.get("text", "")
+#                 for block in result.content
+#                 if block.get("type") == "text"
+#             )
+#             print(f"Retrieved memories: {retrieved_text}")
+#
+#
+# Besides the tool function interface, we can also use the ``record`` method to directly record message conversations.
+#
+# .. code-block:: python
+#     :caption: Example of direct recording to ReMe long-term memory
+#
+#     async def test_record_direct():
+#         """Test record direct recording method"""
+#         async with reme_long_term_memory:
+#             await reme_long_term_memory.record(
+#                 msgs=[
+#                     Msg(
+#                         role="user",
+#                         content="I work as a software engineer and prefer remote work",
+#                         name="user",
+#                     ),
+#                     Msg(
+#                         role="assistant",
+#                         content="Understood! You're a software engineer who values remote work flexibility.",
+#                         name="assistant",
+#                     ),
+#                     Msg(
+#                         role="user",
+#                         content="I usually start my day at 9 AM with a cup of coffee",
+#                         name="user",
+#                     ),
+#                 ],
+#             )
+#             print("Successfully recorded conversation messages")
+#
+#
+# Similarly, we use the ``retrieve`` method to retrieve related memories.
+#
+# .. code-block:: python
+#     :caption: Example of direct retrieval from ReMe long-term memory
+#
+#     async def test_retrieve_direct():
+#         """Test retrieve direct retrieval method"""
+#         async with reme_long_term_memory:
+#             # First record some content
+#             await reme_long_term_memory.record(
+#                 msgs=[
+#                     Msg(
+#                         role="user",
+#                         content="I work as a software engineer and prefer remote work",
+#                         name="user",
+#                     ),
+#                 ],
+#             )
+#
+#             # Then retrieve
+#             memories = await reme_long_term_memory.retrieve(
+#                 msg=Msg(
+#                     role="user",
+#                     content="What do you know about my work preferences?",
+#                     name="user",
+#                 ),
+#             )
+#             print(
+#                 f"Retrieved memories: {memories if memories else 'No memories found'}",
+#             )
+#
+#
+# Integration with ReAct Agent
+# ----------------------------------------
+# In AgentScope, the ``ReActAgent`` class receives a ``long_term_memory``
+# parameter in its constructor, as well as a ``long_term_memory_mode`` parameter.
+#
+# If ``long_term_memory_mode`` is set to ``agent_control`` or ``both``,
+# ``record_to_memory`` and ``retrieve_from_memory`` tool functions will be
+# registered, allowing the agent to autonomously manage long-term memory through tool calls.
+#
+# .. note:: To achieve the best results, the ``"agent_control"`` mode may require
+#  additional instructions in the system prompt.
+#
+# .. code-block:: python
+#     :caption: Example of ReAct agent with ReMe long-term memory
+#
+#     # Create ReAct agent with long-term memory (agent_control mode)
+#     async def test_react_agent_with_reme():
+#         """Test ReActAgent integration with ReMe personal memory"""
+#         async with reme_long_term_memory:
+#             agent_with_reme = ReActAgent(
+#                 name="Friday",
+#                 sys_prompt=(
+#                     "You are a helpful assistant named Friday with long-term memory capabilities. "
+#                     "\n\n## Memory Management Guidelines:\n"
+#                     "1. **Recording Memories**: When users share personal information, preferences, "
+#                     "habits, or facts about themselves, ALWAYS record them using `record_to_memory` "
+#                     "for future reference.\n"
+#                     "\n2. **Retrieving Memories**: BEFORE answering questions about the user's preferences, "
+#                     "past information, or personal details, you MUST FIRST call `retrieve_from_memory` "
+#                     "to check if you have any relevant stored information. Do NOT rely solely on the "
+#                     "current conversation context.\n"
+#                     "\n3. **When to Retrieve**: Call `retrieve_from_memory` when:\n"
+#                     "   - User asks questions like 'what do I like?', 'what are my preferences?', "
+#                     "'what do you know about me?'\n"
+#                     "   - User asks about their past behaviors, habits, or preferences\n"
+#                     "   - User refers to information they mentioned before\n"
+#                     "   - You need context about the user to provide personalized responses\n"
+#                     "\nAlways check your memory first before claiming you don't know something about the user."
+#                 ),
+#                 model=DashScopeChatModel(
+#                     model_name="qwen3-max",
+#                     api_key=os.environ.get("DASHSCOPE_API_KEY"),
+#                     stream=False,
+#                 ),
+#                 formatter=DashScopeChatFormatter(),
+#                 toolkit=Toolkit(),
+#                 memory=InMemoryMemory(),
+#                 long_term_memory=reme_long_term_memory,
+#                 long_term_memory_mode="agent_control",  # Use agent_control mode
+#             )
+#
+#             # User shares preferences
+#             msg = Msg(
+#                 role="user",
+#                 content="When I travel to Hangzhou, I prefer to stay in a homestay",
+#                 name="user",
+#             )
+#             response = await agent_with_reme(msg)
+#             print(f"Agent response: {response.get_text_content()}")
+#
+#             # Clear short-term memory to test long-term memory
+#             await agent_with_reme.memory.clear()
+#
+#             # Query preferences
+#             msg2 = Msg(
+#                 role="user",
+#                 content="what preference do I have?",
+#                 name="user",
+#             )
+#             response2 = await agent_with_reme(msg2)
+#             print(f"Agent response: {response2.get_text_content()}")
+#
+#
+# Then we clear the short-term memory and ask the agent about the user's preferences.
+#
+# .. code-block:: python
+#     :caption: Example of retrieving preferences with ReAct agent and ReMe long-term memory
+#
+#     async def retrieve_reme_preferences():
+#         """Retrieve user preferences from long-term memory"""
+#         async with reme_long_term_memory:
+#             # Create agent (reusing for demonstration completeness)
+#             agent_with_reme = ReActAgent(
+#                 name="Friday",
+#                 sys_prompt="You are an assistant with long-term memory capabilities.",
+#                 model=DashScopeChatModel(
+#                     api_key=os.environ.get("DASHSCOPE_API_KEY"),
+#                     model_name="qwen3-max",
+#                     stream=False,
+#                 ),
+#                 formatter=DashScopeChatFormatter(),
+#                 toolkit=Toolkit(),
+#                 memory=InMemoryMemory(),
+#                 long_term_memory=reme_long_term_memory,
+#                 long_term_memory_mode="agent_control",
+#             )
+#
+#             # Clear short-term memory
+#             await agent_with_reme.memory.clear()
+#             # The agent will remember previous conversations
+#             msg2 = Msg("user", "What are my preferences? Answer briefly.", "user")
+#             await agent_with_reme(msg2)
+#
+# Customizing Long-Term Memory
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# AgentScope provides the ``LongTermMemoryBase`` base class, which defines the basic
+#
+# Developers can inherit from ``LongTermMemoryBase`` to implement custom long-term
+# memory systems according to their needs：
+#
+# .. list-table:: Long-term memory classes in AgentScope
+#     :header-rows: 1
+#
+#     * - Class
+#       - Abstract Methods
+#       - Description
+#     * - ``LongTermMemoryBase``
+#       - | ``record``
+#         | ``retrieve``
+#         | ``record_to_memory``
+#         | ``retrieve_from_memory``
+#       - - For ``"static_control"`` mode, you must implement the ``record`` and ``retrieve`` methods.
+#         - For ``"agent_control"`` mode, the ``record_to_memory`` and ``retrieve_from_memory`` methods must be implemented.
+#     * - ``Mem0LongTermMemory``
+#       - | ``record``
+#         | ``retrieve``
+#         | ``record_to_memory``
+#         | ``retrieve_from_memory``
+#       - Long-term memory implementation based on the mem0 library, supporting vector storage and retrieval.
+#     * - ``ReMePersonalLongTermMemory``
+#       - | ``record``
+#         | ``retrieve``
+#         | ``record_to_memory``
+#         | ``retrieve_from_memory``
+#       - Personal memory implementation based on the ReMe framework, providing powerful memory management and retrieval capabilities.
+#
+#
+#
+#
+# Further Reading
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# - :ref:`memory` - Basic memory system
+# - :ref:`agent` - ReAct agent
+# - :ref:`tool` - Tool system
--- a/docs/tutorial/en/src/task_mcp.py
+++ b/docs/tutorial/en/src/task_mcp.py
@@ -0,0 +1,207 @@
+# -*- coding: utf-8 -*-
+"""
+.. _mcp:
+
+MCP
+=========================
+
+The tutorial covers the following features of AgentScope in support of the MCP (Model Context Protocol):
+
+- Support both **HTTP** (streamable HTTP and SSE) and **StdIO** MCP servers
+- Provide both **stateful** and **stateless** MCP clients
+- Provide both **server-level** and **function-level** MCP tool management
+
+Here the stateful/stateless distinction refers to whether the client maintains a persistent session with the MCP server or not.
+The table below summarizes the supported MCP client types and protocols:
+
+.. list-table:: Supported MCP client types and protocols
+    :header-rows: 1
+
+    * - Client Type
+      - HTTP (Streamable HTTP and SSE)
+      - StdIO
+    * - Stateful Client
+      - ``HttpStatefulClient``
+      - ``HttpStatelessClient``
+    * - Stateless Client
+      - ``StdIOStatefulClient``
+      -
+
+"""
+import asyncio
+import json
+import os
+
+from agentscope.mcp import HttpStatefulClient, HttpStatelessClient
+from agentscope.tool import Toolkit
+
+# %%
+# MCP Client
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# In AgentScope, MCP clients are responsible for
+#
+# - connecting to the MCP server,
+# - obtaining tool functions from the server, and
+# - calling tool functions in the MCP server.
+#
+# There are two types of MCP clients in AgentScope: **Stateful** and **Stateless**.
+# They only differ in how to manage the session with the MCP server.
+#
+# - Stateful Client: The stateful MCP client **maintains a persistent session** with the MCP server within its lifetime. The developers should explicitly call ``connect()`` and ``close()`` methods to manage the connection lifecycle.
+# - Stateless Client: The stateless MCP client creates a new session when calling the tool function, and destroys the session right after the tool function call, which is much more lightweight.
+#
+# .. note:: - The StdIO MCP server only has stateful client, when ``connect()`` is called, it will start the MCP server locally and then connect to it.
+#  - For stateful clients, developers must ensure the client is connected when calling the tool functions.
+#  - When multiple `HttpStatefulClients` or `StdIOStatefulClients` are connected, they should be closed in Last In First Out (LIFO) order to prevent errors.
+#
+# Taking Gaode map MCP server as an example, the creation of stateful and stateless clients are very similar:
+#
+
+stateful_client = HttpStatefulClient(
+    # The name to identify the MCP
+    name="mcp_services_stateful",
+    transport="streamable_http",
+    url=f"https://mcp.amap.com/mcp?key={os.environ['GAODE_API_KEY']}",
+)
+
+stateless_client = HttpStatelessClient(
+    # The name to identify the MCP
+    name="mcp_services_stateless",
+    transport="streamable_http",
+    url=f"https://mcp.amap.com/mcp?key={os.environ['GAODE_API_KEY']}",
+)
+
+# %%
+# Both stateful and stateless clients provide the following methods:
+#
+# .. list-table:: MCP Client Methods
+#    :header-rows: 1
+#
+#    * - Method
+#      - Description
+#    * - ``list_tools``
+#      - List all tools available in the MCP server.
+#    * - ``get_callable_function``
+#      - Get a callable function object from the MCP server by its name.
+#
+# MCP as Tool
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# AgentScope provides fine-grained management of MCP tools, including both server-level and function-level management.
+#
+# Server-Level Management
+# --------------------------------
+# You can register all tools from an MCP server into ``Toolkit`` as follows.
+#
+# .. tip:: Optionally, you can specify a group name to organize the tools. Refer to :ref:`tool` section for group-wise tools management.
+#
+
+toolkit = Toolkit()
+
+
+async def example_register_stateless_mcp() -> None:
+    """Example of registering MCP tools from a stateless client."""
+    # Register all tools from the MCP server
+    await toolkit.register_mcp_client(
+        stateless_client,
+        # group_name="map_services",  # Optional group name
+    )
+
+    print(
+        "Total number of MCP tools registered:",
+        len(toolkit.get_json_schemas()),
+    )
+
+    maps_geo = next(
+        tool
+        for tool in toolkit.get_json_schemas()
+        if tool["function"]["name"] == "maps_geo"
+    )
+    print("\nThe example ``maps_geo`` function:")
+    print(
+        json.dumps(
+            maps_geo,
+            indent=4,
+            ensure_ascii=False,
+        ),
+    )
+
+
+asyncio.run(example_register_stateless_mcp())
+
+# %%
+# To remove the registered tools, you can use the ``remove_tool_function`` to remove a specific tool function, or ``remove_mcp_clients`` to remove all tools from a specific MCP.
+#
+
+
+async def example_remove_mcp_tools() -> None:
+    """Example of removing MCP tools."""
+    print(
+        "Total number of tools before removal: ",
+        len(toolkit.get_json_schemas()),
+    )
+
+    # Remove a specific tool function by its name
+    toolkit.remove_tool_function("maps_geo")
+    print("Number of tools: ", len(toolkit.get_json_schemas()))
+
+    # Remove all tools from the MCP client by its name
+    await toolkit.remove_mcp_clients(client_names=["mcp_services_stateless"])
+    print("Number of tools: ", len(toolkit.get_json_schemas()))
+
+
+asyncio.run(example_remove_mcp_tools())
+
+# %%
+# Function-Level Management
+# --------------------------------
+# We notice the demand for more fine-grained control over MCP tools, such as post-processing the tool results, or use them to create a more complex tool function.
+#
+# Therefore, AgentScope supports to obtain the callable function object from MCP by its name, so that you can
+#
+# - call it directly,
+# - wrap it into your own function, or anyway you like.
+#
+# Additionally, you can specify whether to wrap the tool result into ``ToolResponse`` object in AgentScope, so that you can use it seamlessly with the ``Toolkit``.
+# If you set ``wrap_tool_result=False``, the raw result type ``mcp.types.CallToolResult`` will be returned.
+#
+# Taking the ``maps_geo`` function as an example, you can obtain it as a callable function object as follows:
+#
+
+
+async def example_function_level_usage() -> None:
+    """Example of using function-level MCP tool."""
+    func_obj = await stateless_client.get_callable_function(
+        func_name="maps_geo",
+        # Whether to wrap the tool result into ToolResponse in AgentScope
+        wrap_tool_result=True,
+    )
+
+    # You can obtain its name, description and json schema
+    print("Function name:", func_obj.name)
+    print("Function description:", func_obj.description)
+    print(
+        "Function JSON schema:",
+        json.dumps(func_obj.json_schema, indent=4, ensure_ascii=False),
+    )
+
+    # Call the function object directly
+    res = await func_obj(
+        address="Tiananmen Square",
+        city="Beijing",
+    )
+    print("\nFunction call result:")
+    print(res)
+
+
+asyncio.run(example_function_level_usage())
+
+# %%
+# Further Reading
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# For more details, see:
+#
+# - :ref:`tool`
+# - :ref:`agent`
+#
--- a/docs/tutorial/en/src/task_memory.py
+++ b/docs/tutorial/en/src/task_memory.py
@@ -0,0 +1,446 @@
+# -*- coding: utf-8 -*-
+"""
+.. _memory:
+
+Memory
+========================
+
+The memory module in AgentScope is responsible for
+
+- storing the messages and
+- managing them with specific marks
+in different storage implementations.
+
+The **mark** is a string label associated with each message in the memory,
+which can be used to categorize, filter, and retrieve messages based on their
+context or purpose.
+
+It's powerful for high-level memory management in agents. For example,
+In `ReActAgent` class, the hint messages are stored with the
+mark "hint", and the memory compression functionality is also implemented
+based on marks.
+
+.. note:: The memory module only provides storage and management
+ functionalities. The algorithm logic such as compression is implemented in
+ the agent level.
+
+Currently, AgentScope provides the following memory storage implementations:
+
+.. list-table:: The built-in memory storage implementations in AgentScope
+    :header-rows: 1
+
+    * - Memory Class
+      - Description
+    * - ``InMemoryMemory``
+      - A simple in-memory implementation of memory storage.
+    * - ``AsyncSQLAlchemyMemory``
+      - An asynchronous SQLAlchemy-based implementation of memory storage, which supports various databases such as SQLite, PostgreSQL, MySQL, etc.
+    * - ``RedisMemory``
+      - A Redis-based implementation of memory storage.
+
+.. tip:: If you're interested in contributing new memory storage implementations, please refer to the
+ `Contribution Guide <https://github.com/agentscope-ai/agentscope/blob/main/CONTRIBUTING.md#types-of-contributions>`_.
+
+All the above memory classes inherit from the base class ``MemoryBase``, and
+provide the following methods to manage the messages in the memory:
+
+.. list-table:: The methods provided by the memory classes
+    :header-rows: 1
+
+    * - Method
+      - Description
+    * - ``add(
+            memories: Msg | list[Msg] | None,
+            marks: str | list[str] | None = None,
+        ) -> None``
+      - Add ``Msg`` object(s) to the memory storage with the given mark(s) (if provided).
+    * - ``delete(msg_ids: list[str]) -> int``
+      - Delete messages from the memory storage by their IDs.
+    * - ``delete_by_mark(mark: str | list[str]) -> int``
+      - Delete messages from the memory by their marks.
+    * - ``size() -> int``
+        - Get the size of the memory storage.
+    * - ``clear() -> None``
+      - Clear the memory storage.
+    * - ``get_memory(
+            mark: str | None = None,
+            exclude_mark: str | None = None,
+        ) -> list[Msg]``
+      - Get the messages from the memory by mark (if provided). Otherwise, get all messages. If the ``update_compressed_summary`` method is used to store a compressed summary, it will be attached to the head of the returned messages.
+    * - ``update_messages_mark(
+            new_mark: str | None,
+            old_mark: str | None = None,
+            msg_ids: list[str] | None = None,
+        ) -> int``
+      - A unified method to update marks of messages in the storage (add, remove, or change marks).
+    * - ``update_compressed_summary(
+            summary: str,
+        ) -> None``
+      - Update the summary attribute stored in the memory.
+"""
+import asyncio
+import json
+
+import fakeredis
+from sqlalchemy.ext.asyncio import create_async_engine
+
+from agentscope.memory import (
+    InMemoryMemory,
+    AsyncSQLAlchemyMemory,
+    RedisMemory,
+)
+from agentscope.message import Msg
+
+
+# %%
+# In-Memory Memory
+# ~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# The in-memory memory provides a simple way to store messages in memory.
+# Together with the :ref:`state` module, it can persist the memory content across
+# different users and sessions.
+
+
+async def in_memory_example():
+    """An example of using InMemoryMemory to store messages in memory."""
+    memory = InMemoryMemory()
+    await memory.add(
+        Msg("Alice", "Generate a report about AgentScope", "user"),
+    )
+
+    # Add a hint message with the mark "hint"
+    await memory.add(
+        [
+            Msg(
+                "system",
+                "<system-hint>Create a plan first to collect information and "
+                "generate the report step by step.</system-hint>",
+                "system",
+            ),
+        ],
+        marks="hint",
+    )
+
+    msgs = await memory.get_memory(mark="hint")
+    print("The messages with mark 'hint':")
+    for msg in msgs:
+        print(f"- {msg}")
+
+    # All the stored messages can be exported and loaded via ``state_dict`` and ``load_state_dict`` methods.
+    state = memory.state_dict()
+    print("The state dict of the memory:")
+    print(json.dumps(state, indent=2))
+
+    # delete messages by mark
+    deleted_count = await memory.delete_by_mark("hint")
+    print(f"Deleted {deleted_count} messages with mark 'hint'.")
+
+    print("The state dict of the memory after deletion:")
+    state = memory.state_dict()
+    print(json.dumps(state, indent=2))
+
+
+asyncio.run(in_memory_example())
+
+# %%
+# Relational Database Memory
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# AgentScope provides a unified interface to work with relational databases via SQLAlchemy, supporting
+#
+# - various databases such as SQLite, PostgreSQL, MySQL, etc.
+# - user and session management, and
+# - connection pooling in the production environment
+#
+# Specifically, here we use a memory backed by SQLite as an example.
+
+
+async def sqlalchemy_example() -> None:
+    """An example of using AsyncSQLAlchemyMemory to store messages in a SQLite database."""
+
+    # Create an async SQLAlchemy engine first
+    engine = create_async_engine("sqlite+aiosqlite:///./test_memory.db")
+
+    # Then create the memory with the engine
+    memory = AsyncSQLAlchemyMemory(
+        engine_or_session=engine,
+        # Optionally specify user_id and session_id
+        user_id="user_1",
+        session_id="session_1",
+    )
+
+    await memory.add(
+        Msg("Alice", "Generate a report about AgentScope", "user"),
+    )
+
+    await memory.add(
+        [
+            Msg(
+                "system",
+                "<system-hint>Create a plan first to collect information and "
+                "generate the report step by step.</system-hint>",
+                "system",
+            ),
+        ],
+        marks="hint",
+    )
+
+    msgs = await memory.get_memory(mark="hint")
+    print("The messages with mark 'hint':")
+    for msg in msgs:
+        print(f"- {msg}")
+
+    # Close the engine when done
+    await memory.close()
+
+
+asyncio.run(sqlalchemy_example())
+
+# %%
+# Optionally, you can also use the ``AsyncSQLAlchemyMemory`` as an async context manager, and the session will be closed automatically when exiting the context.
+
+
+async def sqlalchemy_context_example() -> None:
+    """Example of using AsyncSQLAlchemyMemory as an async context manager."""
+    engine = create_async_engine("sqlite+aiosqlite:///./test_memory.db")
+    async with AsyncSQLAlchemyMemory(
+        engine_or_session=engine,
+        user_id="user_1",
+        session_id="session_1",
+    ) as memory:
+        await memory.add(
+            Msg("Alice", "Generate a report about AgentScope", "user"),
+        )
+
+        msgs = await memory.get_memory()
+        print("All messages in the memory:")
+        for msg in msgs:
+            print(f"- {msg}")
+
+
+asyncio.run(sqlalchemy_context_example())
+
+# %%
+# In production environment e.g. with FastAPI, the connection pooling can be enabled as follows:
+#
+# .. code-block:: python
+#    :caption: SQLAlchemy Memory with Connection Pooling in FastAPI
+#
+#    from typing import AsyncGenerator
+#
+#     from fastapi import FastAPI, Depends
+#     from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker, AsyncSession
+#
+#     from agentscope.agent import ReActAgent
+#     from agentscope.pipeline import stream_printing_messages
+#
+#
+#     app = FastAPI()
+#
+#     # Create an async SQLAlchemy engine with connection pooling
+#     engine = create_async_engine(
+#         "sqlite+aiosqlite:///./test_memory.db",
+#         pool_size=10,
+#         max_overflow=20,
+#         pool_timeout=30,
+#         # ...  The other pool settings
+#     )
+#
+#     # Create a session maker
+#     async_session_marker = async_sessionmaker(
+#         engine,
+#         expire_on_commit=False,
+#         autocommit=False,
+#         autoflush=False,
+#     )
+#
+#     async def get_db() -> AsyncGenerator[AsyncSession, None]:
+#         async with async_session_marker() as session:
+#             try:
+#                 yield session
+#                 await session.commit()
+#             except Exception:
+#                 await session.rollback()
+#                 raise
+#             finally:
+#                 await session.close()
+#
+#     @app.post("/chat")
+#     async def chat_endpoint(
+#         user_id: str,
+#         session_id: str,
+#         input: str,
+#         db_session: AsyncSession = Depends(get_db),
+#     ):
+#         # Some setup for the agent
+#         ...
+#
+#         # Create the agent with the SQLAlchemy memory
+#         agent = ReActAgent(
+#             # ...
+#             memory=AsyncSQLAlchemyMemory(
+#                 engine_or_session=db_session,
+#                 user_id=user_id,
+#                 session_id=session_id,
+#             ),
+#         )
+#
+#         # Handle the chat with the agent
+#         async for msg, _ in stream_printing_messages(
+#             agents=[agent],
+#             coroutine_task=agent(Msg("user", input, "user")),
+#         ):
+#             # yield msg to the client
+#             ...
+#
+#
+# NoSQL Database Memory
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# AgentScope also provides memory implementations based on NoSQL databases such as Redis.
+# It also supports user and session management, and connection pooling in the production environment.
+#
+# First, we can initialize the Redis memory as follows:
+
+
+async def redis_memory_example() -> None:
+    """An example of using RedisMemory to store messages in Redis."""
+    # Use fakeredis for in-memory testing without a real Redis server
+    fake_redis = fakeredis.aioredis.FakeRedis(decode_responses=True)
+    # Create the Redis memory
+    memory = RedisMemory(
+        # Using fake redis for demonstration
+        connection_pool=fake_redis.connection_pool,
+        # You can also connect to a real Redis server by specifying host and port
+        # host="localhost",
+        # port=6379,
+        # Optionally specify user_id and session_id
+        user_id="user_1",
+        session_id="session_1",
+    )
+
+    # Add a message to the memory
+    await memory.add(
+        Msg(
+            "Alice",
+            "Generate a report about AgentScope",
+            "user",
+        ),
+    )
+
+    # Add a hint message with the mark "hint"
+    await memory.add(
+        Msg(
+            "system",
+            "<system-hint>Create a plan first to collect information and "
+            "generate the report step by step.</system-hint>",
+            "system",
+        ),
+        marks="hint",
+    )
+
+    # Retrieve messages with the mark "hint"
+    msgs = await memory.get_memory(mark="hint")
+    print("The messages with mark 'hint':")
+    for msg in msgs:
+        print(f"- {msg}")
+
+
+asyncio.run(redis_memory_example())
+
+# %%
+# Similarly, the `RedisMemory` can also be used with connection pooling in the production environment, e.g., with FastAPI.
+#
+# .. code-block:: python
+#    :caption: Redis Memory with Connection Pooling in FastAPI
+#
+#     from fastapi import FastAPI, HTTPException
+#     from redis.asyncio import ConnectionPool
+#     from contextlib import asynccontextmanager
+#
+#     # Global Redis connection pool
+#     redis_pool: ConnectionPool | None = None
+#
+#
+#     # Use the lifespan event to manage the Redis connection pool
+#     @asynccontextmanager
+#     async def lifespan(app: FastAPI):
+#         global redis_pool
+#         redis_pool = ConnectionPool(
+#             host="localhost",
+#             port=6379,
+#             db=0,
+#             password=None,
+#             decode_responses=True,
+#             max_connections=10,
+#             encoding="utf-8",
+#         )
+#         print("✅ Redis connection established")
+#
+#         yield
+#
+#         await redis_pool.disconnect()
+#         print("✅ Redis connection closed")
+#
+#
+#     app = FastAPI(lifespan=lifespan)
+#
+#
+#     @app.post("/chat_endpoint")
+#     async def chat_endpoint(
+#         user_id: str, session_id: str, input: str
+#     ):  # ✅ 直接使用BaseModel
+#         """A chat endpoint"""
+#         global redis_pool
+#         if redis_pool is None:
+#             raise HTTPException(
+#                 status_code=500,
+#                 detail="Redis connection pool is not initialized.",
+#             )
+#
+#         # Create the Redis memory
+#         memory = RedisMemory(
+#             connection_pool=redis_pool,
+#             user_id=user_id,
+#             session_id=session_id,
+#         )
+#
+#         ...
+#
+#         # Close the Redis client connection when done
+#         client = memory.get_client()
+#         await client.aclose()
+#
+#
+#
+# Customizing Memory
+# ~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# To customize your own memory, just inherit from ``MemoryBase`` and implement the following methods:
+#
+# .. list-table::
+#     :header-rows: 1
+#
+#     * - Method
+#       - Description
+#     * - ``add``
+#       - Add ``Msg`` objects to the memory
+#     * - ``delete``
+#       - Delete ``Msg`` objects from the memory
+#     * - ``delete_by_mark``
+#       - Delete ``Msg`` objects from the memory by their marks
+#     * - ``size``
+#       - The size of the memory
+#     * - ``clear``
+#       - Clear the memory content
+#     * - ``get_memory``
+#       - Get the memory content as a list of ``Msg`` objects
+#     * - ``update_messages_mark``
+#       - Update marks of messages in the memory
+#     * - ``state_dict``
+#       - Get the state dictionary of the memory
+#     * - ``load_state_dict``
+#       - Load the state dictionary of the memory
+#
+# Further Reading
+# ~~~~~~~~~~~~~~~~~~~~~~~~
+# - :ref:`agent`
+# - :ref:`long-term-memory`
--- a/docs/tutorial/en/src/task_middleware.py
+++ b/docs/tutorial/en/src/task_middleware.py
@@ -0,0 +1,403 @@
+# -*- coding: utf-8 -*-
+"""
+.. _middleware:
+
+Middleware
+===========================
+
+AgentScope provides a flexible middleware system that allows developers to intercept and modify the execution of various operations.
+Currently, middleware support is available for **tool execution** in the ``Toolkit`` class.
+
+The middleware system follows an **onion model**, where each middleware wraps around the previous one, forming layers.
+This allows developers to:
+
+- Perform **pre-processing** before the operation
+- **Intercept and modify** responses during execution
+- Perform **post-processing** after the operation completes
+- **Skip** the operation execution entirely based on conditions
+
+.. tip:: Future versions of AgentScope will expand middleware support to other components such as agents and models.
+
+"""
+import asyncio
+from typing import AsyncGenerator, Callable
+
+from agentscope.message import TextBlock, ToolUseBlock
+from agentscope.tool import ToolResponse, Toolkit
+
+
+# %%
+# Tool Execution Middleware
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# The ``Toolkit`` class supports middleware for tool execution via the ``register_middleware`` method.
+# Each middleware can intercept the tool call and modify the input or output.
+#
+# Middleware Signature
+# ------------------------------
+#
+# A middleware function should have the following signature:
+#
+# .. code-block:: python
+#
+#     async def middleware(
+#         kwargs: dict,
+#         next_handler: Callable,
+#     ) -> AsyncGenerator[ToolResponse, None]:
+#         # Access parameters from kwargs
+#         tool_call = kwargs["tool_call"]
+#
+#         # Pre-processing
+#         # ...
+#
+#         # Call the next middleware or tool function
+#         async for response in await next_handler(**kwargs):
+#             # Post-processing
+#             yield response
+#
+# .. list-table:: Middleware Parameters
+#    :header-rows: 1
+#
+#    * - Parameter
+#      - Type
+#      - Description
+#    * - ``kwargs``
+#      - ``dict``
+#      - Context parameters. Currently, includes ``tool_call`` (ToolUseBlock). May include additional parameters in future versions.
+#    * - ``next_handler``
+#      - ``Callable``
+#      - A callable that accepts kwargs dict and returns a coroutine yielding AsyncGenerator of ToolResponse objects
+#    * - **Returns**
+#      - ``AsyncGenerator[ToolResponse, None]``
+#      - An async generator that yields ToolResponse objects
+#
+# Basic Example
+# ------------------------------
+#
+# Here is a simple middleware that logs tool calls:
+#
+
+
+async def logging_middleware(
+    kwargs: dict,
+    next_handler: Callable,
+) -> AsyncGenerator[ToolResponse, None]:
+    """A middleware that logs tool execution."""
+    # Access the tool call from kwargs
+    tool_call = kwargs["tool_call"]
+
+    # Pre-processing: log before tool execution
+    print(f"[Middleware] Calling tool: {tool_call['name']}")
+    print(f"[Middleware] Input: {tool_call['input']}")
+
+    # Call the next handler (either another middleware or the actual tool)
+    async for response in await next_handler(**kwargs):
+        # Post-processing: log the response
+        print(f"[Middleware] Response: {response.content[0]['text']}")
+        yield response
+
+    # This will execute after all responses are yielded
+    print(f"[Middleware] Tool {tool_call['name']} completed")
+
+
+# %%
+# Let's register this middleware with a toolkit and test it:
+#
+
+
+async def search_tool(query: str) -> ToolResponse:
+    """A simple search tool.
+
+    Args:
+        query (`str`):
+            The search query.
+
+    Returns:
+        `ToolResponse`:
+            The search result.
+    """
+    return ToolResponse(
+        content=[
+            TextBlock(
+                type="text",
+                text=f"Search results for '{query}'",
+            ),
+        ],
+    )
+
+
+async def example_logging_middleware() -> None:
+    """Example of using logging middleware."""
+    # Create a toolkit and register the tool
+    toolkit = Toolkit()
+    toolkit.register_tool_function(search_tool)
+
+    # Register the middleware
+    toolkit.register_middleware(logging_middleware)
+
+    # Call the tool
+    result = await toolkit.call_tool_function(
+        ToolUseBlock(
+            type="tool_use",
+            id="1",
+            name="search_tool",
+            input={"query": "AgentScope"},
+        ),
+    )
+
+    async for response in result:
+        print(f"\n[Final] {response.content[0]['text']}\n")
+
+
+print("=" * 60)
+print("Example 1: Logging Middleware")
+print("=" * 60)
+asyncio.run(example_logging_middleware())
+
+# %%
+# Modifying Input and Output
+# ------------------------------
+#
+# Middleware can also modify the tool call input and the response content:
+#
+
+
+async def transform_middleware(
+    kwargs: dict,
+    next_handler: Callable,
+) -> AsyncGenerator[ToolResponse, None]:
+    """A middleware that transforms input and output."""
+    # Access the tool call from kwargs
+    tool_call = kwargs["tool_call"]
+
+    # Pre-processing: modify the input
+    original_query = tool_call["input"]["query"]
+    tool_call["input"]["query"] = f"[TRANSFORMED] {original_query}"
+
+    async for response in await next_handler(**kwargs):
+        # Post-processing: modify the response
+        original_text = response.content[0]["text"]
+        response.content[0]["text"] = f"{original_text} [MODIFIED]"
+        yield response
+
+
+async def example_transform_middleware() -> None:
+    """Example of transforming middleware."""
+    toolkit = Toolkit()
+    toolkit.register_tool_function(search_tool)
+    toolkit.register_middleware(transform_middleware)
+
+    result = await toolkit.call_tool_function(
+        ToolUseBlock(
+            type="tool_use",
+            id="2",
+            name="search_tool",
+            input={"query": "middleware"},
+        ),
+    )
+
+    async for response in result:
+        print(f"Result: {response.content[0]['text']}")
+
+
+print("\n" + "=" * 60)
+print("Example 2: Transform Middleware")
+print("=" * 60)
+asyncio.run(example_transform_middleware())
+
+# %%
+# Authorization Middleware
+# ------------------------------
+#
+# You can use middleware to implement authorization checks and skip tool execution if not authorized:
+#
+
+
+async def authorization_middleware(
+    kwargs: dict,
+    next_handler: Callable,
+) -> AsyncGenerator[ToolResponse, None]:
+    """A middleware that checks authorization."""
+    # Access the tool call from kwargs
+    tool_call = kwargs["tool_call"]
+
+    # Check if the tool is authorized (simple example)
+    authorized_tools = {"search_tool"}
+
+    if tool_call["name"] not in authorized_tools:
+        # Skip execution and return error directly
+        print(f"[Auth] Tool {tool_call['name']} is not authorized")
+        yield ToolResponse(
+            content=[
+                TextBlock(
+                    type="text",
+                    text=f"Error: Tool '{tool_call['name']}' is not authorized",  # noqa: E501
+                ),
+            ],
+        )
+        return
+
+    # Tool is authorized, proceed
+    print(f"[Auth] Tool {tool_call['name']} is authorized")
+    async for response in await next_handler(**kwargs):
+        yield response
+
+
+async def unauthorized_tool(data: str) -> ToolResponse:
+    """An unauthorized tool.
+
+    Args:
+        data (`str`):
+            Some data.
+
+    Returns:
+        `ToolResponse`:
+            The result.
+    """
+    return ToolResponse(
+        content=[TextBlock(type="text", text=f"Processing {data}")],
+    )
+
+
+async def example_authorization_middleware() -> None:
+    """Example of authorization middleware."""
+    toolkit = Toolkit()
+    toolkit.register_tool_function(search_tool)
+    toolkit.register_tool_function(unauthorized_tool)
+    toolkit.register_middleware(authorization_middleware)
+
+    # Try authorized tool
+    print("\nCalling authorized tool:")
+    result = await toolkit.call_tool_function(
+        ToolUseBlock(
+            type="tool_use",
+            id="3",
+            name="search_tool",
+            input={"query": "test"},
+        ),
+    )
+    async for response in result:
+        print(f"Result: {response.content[0]['text']}")
+
+    # Try unauthorized tool
+    print("\nCalling unauthorized tool:")
+    result = await toolkit.call_tool_function(
+        ToolUseBlock(
+            type="tool_use",
+            id="4",
+            name="unauthorized_tool",
+            input={"data": "test"},
+        ),
+    )
+    async for response in result:
+        print(f"Result: {response.content[0]['text']}")
+
+
+print("\n" + "=" * 60)
+print("Example 3: Authorization Middleware")
+print("=" * 60)
+asyncio.run(example_authorization_middleware())
+
+# %%
+# Multiple Middleware (Onion Model)
+# ------------------------------
+#
+# When multiple middleware are registered, they form an onion-like structure.
+# The execution order follows the onion model:
+#
+# - **Pre-processing**: Executes in the order middleware are registered
+# - **Post-processing**: Executes in reverse order (inner to outer)
+#
+# This is because the actual tool response object is passed through the middleware chain,
+# and each middleware modifies it in place.
+#
+
+
+async def middleware_1(
+    kwargs: dict,
+    next_handler: Callable,
+) -> AsyncGenerator[ToolResponse, None]:
+    """First middleware."""
+    # Access the tool call from kwargs
+    tool_call = kwargs["tool_call"]
+
+    # Pre-processing
+    print("[M1] Pre-processing")
+    tool_call["input"]["query"] += " [M1]"
+
+    async for response in await next_handler(**kwargs):
+        # Post-processing
+        response.content[0]["text"] += " [M1]"
+        print("[M1] Post-processing")
+        yield response
+
+
+async def middleware_2(
+    kwargs: dict,
+    next_handler: Callable,
+) -> AsyncGenerator[ToolResponse, None]:
+    """Second middleware."""
+    # Access the tool call from kwargs
+    tool_call = kwargs["tool_call"]
+
+    # Pre-processing
+    print("[M2] Pre-processing")
+    tool_call["input"]["query"] += " [M2]"
+
+    async for response in await next_handler(**kwargs):
+        # Post-processing
+        response.content[0]["text"] += " [M2]"
+        print("[M2] Post-processing")
+        yield response
+
+
+async def example_multiple_middleware() -> None:
+    """Example of multiple middleware."""
+    toolkit = Toolkit()
+    toolkit.register_tool_function(search_tool)
+
+    # Register middleware in order
+    toolkit.register_middleware(middleware_1)
+    toolkit.register_middleware(middleware_2)
+
+    result = await toolkit.call_tool_function(
+        ToolUseBlock(
+            type="tool_use",
+            id="5",
+            name="search_tool",
+            input={"query": "test"},
+        ),
+    )
+
+    async for response in result:
+        print(f"\nFinal result: {response.content[0]['text']}")
+
+
+print("\n" + "=" * 60)
+print("Example 4: Multiple Middleware (Onion Model)")
+print("=" * 60)
+print("\nExecution flow:")
+print("M1 Pre → M2 Pre → Tool → M2 Post → M1 Post")
+print()
+asyncio.run(example_multiple_middleware())
+
+# %%
+# Use Cases
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# The middleware system is useful for various scenarios:
+#
+# - **Logging and Monitoring**: Track tool usage and performance
+# - **Authorization**: Control access to specific tools
+# - **Rate Limiting**: Limit the frequency of tool calls
+# - **Caching**: Cache tool responses for repeated calls
+# - **Error Handling**: Add retry logic or graceful degradation
+# - **Input Validation**: Validate and sanitize tool inputs
+# - **Output Transformation**: Format or filter tool outputs
+# - **Metrics Collection**: Collect statistics about tool usage
+#
+# .. note::
+#     - Middleware are applied in the order they are registered
+#     - The same ``ToolResponse`` object is passed through the middleware chain and modified in place
+#     - Middleware can completely skip tool execution by not calling ``next_handler``
+#     - All middleware must be async generator functions that yield ``ToolResponse`` objects
--- a/docs/tutorial/en/src/task_model.py
+++ b/docs/tutorial/en/src/task_model.py
@@ -0,0 +1,235 @@
+# -*- coding: utf-8 -*-
+"""
+.. _model:
+
+Model
+====================
+
+In this tutorial, we introduce the model APIs integrated in AgentScope, how to use them and how to integrate new model APIs.
+The supported model APIs and providers include:
+
+.. list-table::
+    :header-rows: 1
+
+    * - API
+      - Class
+      - Compatible
+      - Streaming
+      - Tools
+      - Vision
+      - Reasoning
+    * - OpenAI
+      - ``OpenAIChatModel``
+      - vLLM, DeepSeek
+      - ✅
+      - ✅
+      - ✅
+      - ✅
+    * - DashScope
+      - ``DashScopeChatModel``
+      -
+      - ✅
+      - ✅
+      - ✅
+      - ✅
+    * - Anthropic
+      - ``AnthropicChatModel``
+      -
+      - ✅
+      - ✅
+      - ✅
+      - ✅
+    * - Gemini
+      - ``GeminiChatModel``
+      -
+      - ✅
+      - ✅
+      - ✅
+      - ✅
+    * - Ollama
+      - ``OllamaChatModel``
+      -
+      - ✅
+      - ✅
+      - ✅
+      - ✅
+
+.. note:: When using vLLM, you need to configure the appropriate tool calling parameters for different models during deployment, such as ``--enable-auto-tool-choice``, ``--tool-call-parser``, etc. For more details, refer to the `official vLLM documentation <https://docs.vllm.ai/en/latest/features/tool_calling.html>`_.
+
+.. note:: For OpenAI-compatible models (e.g. vLLM, Deepseek), developers can use the ``OpenAIChatModel`` class, and specify the API endpoint by the ``client_kwargs`` parameter: ``client_kwargs={"base_url": "http://your-api-endpoint"}``. For example:
+
+    .. code-block:: python
+
+        OpenAIChatModel(client_kwargs={"base_url": "http://localhost:8000/v1"})
+
+.. note:: Model behavior parameters (such as temperature, maximum length, etc.) can be preset in the constructor function via the ``generate_kwargs`` parameter. For example:
+
+    .. code-block:: python
+
+        OpenAIChatModel(generate_kwargs={"temperature": 0.3, "max_tokens": 1000})
+
+To provide unified model interfaces, the above model classes has the following common methods:
+
+- The first three arguments of the ``__call__`` method are ``messages`` , ``tools`` and ``tool_choice``, representing the input messages, JSON schema of tool functions, and tool selection mode, respectively.
+- The return type are either a ``ChatResponse`` instance or an async generator of ``ChatResponse`` in streaming mode.
+
+.. note:: Different model APIs differ in the input message format, refer to :ref:`prompt` for more details.
+
+The ``ChatResponse`` instance contains the generated thinking/text/tool use content, identity, created time and usage information.
+"""
+import asyncio
+import json
+import os
+
+from agentscope.message import TextBlock, ToolUseBlock, ThinkingBlock, Msg
+from agentscope.model import ChatResponse, DashScopeChatModel
+
+response = ChatResponse(
+    content=[
+        ThinkingBlock(
+            type="thinking",
+            thinking="I should search for AgentScope on Google.",
+        ),
+        TextBlock(type="text", text="I'll search for AgentScope on Google."),
+        ToolUseBlock(
+            type="tool_use",
+            id="642n298gjna",
+            name="google_search",
+            input={"query": "AgentScope?"},
+        ),
+    ],
+)
+
+print(response)
+
+# %%
+# Taking ``DashScopeChatModel`` as an example, we can use it to create a chat model instance and call it with messages and tools:
+
+
+async def example_model_call() -> None:
+    """An example of using the DashScopeChatModel."""
+    model = DashScopeChatModel(
+        model_name="qwen-max",
+        api_key=os.environ["DASHSCOPE_API_KEY"],
+        stream=False,
+    )
+
+    res = await model(
+        messages=[
+            {"role": "user", "content": "Hi!"},
+        ],
+    )
+
+    # You can directly create a ``Msg`` object with the response content
+    msg_res = Msg("Friday", res.content, "assistant")
+
+    print("The response:", res)
+    print("The response as Msg:", msg_res)
+
+
+asyncio.run(example_model_call())
+
+# %%
+# Streaming
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# To enable streaming model, set the ``stream`` parameter in the model constructor to ``True``.
+# When streaming is enabled, the ``__call__`` method will return an **async generator** that yields ``ChatResponse`` instances as they are generated by the model.
+#
+# .. note:: The streaming mode in AgentScope is designed to be **cumulative**, meaning the content in each chunk contains all the previous content plus the newly generated content.
+#
+
+
+async def example_streaming() -> None:
+    """An example of using the streaming model."""
+    model = DashScopeChatModel(
+        model_name="qwen-max",
+        api_key=os.environ["DASHSCOPE_API_KEY"],
+        stream=True,
+    )
+
+    generator = await model(
+        messages=[
+            {
+                "role": "user",
+                "content": "Count from 1 to 20, and just report the number without any other information.",
+            },
+        ],
+    )
+    print("The type of the response:", type(generator))
+
+    i = 0
+    async for chunk in generator:
+        print(f"Chunk {i}")
+        print(f"\ttype: {type(chunk.content)}")
+        print(f"\t{chunk}\n")
+        i += 1
+
+
+asyncio.run(example_streaming())
+
+# %%
+# Reasoning
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# AgentScope supports reasoning models by providing the ``ThinkingBlock``.
+#
+
+
+async def example_reasoning() -> None:
+    """An example of using the reasoning model."""
+    model = DashScopeChatModel(
+        model_name="qwen-turbo",
+        api_key=os.environ["DASHSCOPE_API_KEY"],
+        enable_thinking=True,
+    )
+
+    res = await model(
+        messages=[
+            {"role": "user", "content": "Who am I?"},
+        ],
+    )
+
+    last_chunk = None
+    async for chunk in res:
+        last_chunk = chunk
+    print("The final response:")
+    print(last_chunk)
+
+
+asyncio.run(example_reasoning())
+
+# %%
+# Tools API
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Different model providers differ in their tools APIs, e.g. the tools JSON schema, the tool call/response format.
+# To provide a unified interface, AgentScope solves the problem by:
+#
+# - Providing unified tool call block :ref:`ToolUseBlock <tool-block>` and tool response block :ref:`ToolResultBlock <tool-block>`, respectively.
+# - Providing a unified tools interface in the ``__call__`` method of the model classes, that accepts a list of tools JSON schemas as follows:
+#
+
+json_schemas = [
+    {
+        "type": "function",
+        "function": {
+            "name": "google_search",
+            "description": "Search for a query on Google.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "The search query.",
+                    },
+                },
+                "required": ["query"],
+            },
+        },
+    },
+]
+
+# %%
+# Further Reading
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# - :ref:`message`
+# - :ref:`prompt`
+#
--- a/docs/tutorial/en/src/task_pipeline.py
+++ b/docs/tutorial/en/src/task_pipeline.py
@@ -0,0 +1,288 @@
+# -*- coding: utf-8 -*-
+"""
+.. _pipeline:
+
+Pipeline
+========================
+
+For multi-agent orchestration, AgentScope provides the ``agentscope.pipeline`` module
+as syntax sugar for chaining agents together, including
+
+- **MsgHub**: a message hub for broadcasting messages among multiple agents
+- **sequential_pipeline** and **SequentialPipeline**: a functional and class-based implementation that chains agents in a sequential manner
+- **fanout_pipeline** and **FanoutPipeline**: a functional and class-based implementation that distributes the same input to multiple agents
+- **stream_printing_messages**: a utility function that convert the printing messages from agent(s) into an async generator
+
+"""
+
+import os, asyncio
+
+from agentscope.formatter import DashScopeMultiAgentFormatter
+from agentscope.message import Msg
+from agentscope.model import DashScopeChatModel
+from agentscope.agent import ReActAgent
+from agentscope.pipeline import MsgHub, stream_printing_messages
+
+
+# %%
+# Broadcasting with MsgHub
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# The ``MsgHub`` class is an **async context manager**, receiving a list of agents as its participants.
+# When one participant generates a replying message, all other participants will receive this message by calling their ``observe`` method.
+#
+# That means within a ``MsgHub`` context, developers don't need to manually send a replying message from one agent to another.
+# The broadcasting is automatically handled.
+#
+# Here we create four agents: Alice, Bob, Charlie and David.
+# Then we start a meeting with Alice, Bob and Charlie by introducing themselves.
+# Note David is not included in this meeting.
+
+
+def create_agent(name: str, age: int, career: str) -> ReActAgent:
+    """Create agent object by the given information."""
+    return ReActAgent(
+        name=name,
+        sys_prompt=f"You're {name}, a {age}-year-old {career}",
+        model=DashScopeChatModel(
+            model_name="qwen-max",
+            api_key=os.environ["DASHSCOPE_API_KEY"],
+        ),
+        formatter=DashScopeMultiAgentFormatter(),
+    )
+
+
+alice = create_agent("Alice", 50, "teacher")
+bob = create_agent("Bob", 35, "engineer")
+charlie = create_agent("Charlie", 28, "designer")
+david = create_agent("David", 30, "developer")
+
+# %%
+# Then we start a meeting and let them introduce themselves without manual message passing:
+#
+# .. hint:: The message in ``announcement`` will be broadcasted to all participants when entering the ``MsgHub`` context.
+#
+
+
+async def example_broadcast_message():
+    """Example of broadcasting messages with MsgHub."""
+
+    # Create a message hub
+    async with MsgHub(
+        participants=[alice, bob, charlie],
+        announcement=Msg(
+            "user",
+            "Now introduce yourself in one sentence, including your name, age and career.",
+            "user",
+        ),
+    ) as hub:
+        # Group chat without manual message passing
+        await alice()
+        await bob()
+        await charlie()
+
+
+asyncio.run(example_broadcast_message())
+
+# %%
+# Now let's check if Bob, Charlie and David received Alice's message.
+#
+
+
+async def check_broadcast_message():
+    """Check if the messages are broadcast correctly."""
+    user_msg = Msg(
+        "user",
+        "Do you know who's Alice, and what she does? Answer me briefly.",
+        "user",
+    )
+
+    await bob(user_msg)
+    await charlie(user_msg)
+    await david(user_msg)
+
+
+asyncio.run(check_broadcast_message())
+
+# %%
+# Now we observe that Bob and Charlie know Alice and her profession, while David has no idea
+# about Alice since he is not included in the ``MsgHub`` context.
+#
+#
+# Dynamic Participant Management
+# ---------------------------------------
+# Additionally, ``MsgHub`` supports to dynamically manage participants by the following methods:
+#
+# - ``add``: add one or multiple agents as new participants
+# - ``delete``: remove one or multiple agents from participants, and they will no longer receive broadcasted messages
+# - ``broadcast``: broadcast a message to all current participants
+#
+# .. note:: The newly added participants will not receive the previous messages.
+#
+# .. code-block:: python
+#
+#       async with MsgHub(participants=[alice]) as hub:
+#           # Add new participants
+#           hub.add(david)
+#
+#           # Remove participants
+#           hub.delete(alice)
+#
+#           # Broadcast to all current participants
+#           await hub.broadcast(
+#               Msg("system", "Now we begin to ...", "system"),
+#           )
+#
+#
+# Pipeline
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Pipeline serves as a syntax sugar for multi-agent orchestration.
+#
+# Currently, AgentScope provides three main pipeline implementations:
+#
+# 1. **Sequential Pipeline**: Execute agents one by one in a predefined order
+# 2. **Fanout Pipeline**: Distribute the same input to multiple agents and collect their responses
+# 3. **Stream Printing Messages**: Convert the printing messages from an agent into an async generator
+#
+# Sequential Pipeline
+# ------------------------
+# The sequential pipeline executes agents one by one, where the output of the previous agent
+# becomes the input of the next agent.
+#
+# For example, the two following code snippets are equivalent:
+#
+#
+# .. code-block:: python
+#     :caption: Code snippet 1: Manually call agents one by one
+#
+#     msg = None
+#     msg = await alice(msg)
+#     msg = await bob(msg)
+#     msg = await charlie(msg)
+#     msg = await david(msg)
+#
+#
+# .. code-block:: python
+#     :caption: Code snippet 2: Use sequential pipeline
+#
+#     from agentscope.pipeline import sequential_pipeline
+#     msg = await sequential_pipeline(
+#         # List of agents to be executed in order
+#         agents=[alice, bob, charlie, david],
+#         # The first input message, can be None
+#         msg=None
+#     )
+#
+
+# %%
+# Fanout Pipeline
+# ------------------------
+# The fanout pipeline distributes the same input message to multiple agents simultaneously and collects all their responses. This is useful when you want to gather different perspectives or expertise on the same topic.
+#
+# For example, the two following code snippets are equivalent:
+#
+#
+# .. code-block:: python
+#     :caption: Code snippet 3: Manually call agents one by one
+#
+#     from copy import deepcopy
+#
+#     msgs = []
+#     msg = None
+#     for agent in [alice, bob, charlie, david]:
+#         msgs.append(await agent(deepcopy(msg)))
+#
+#
+# .. code-block:: python
+#     :caption: Code snippet 4: Use fanout pipeline
+#
+#     from agentscope.pipeline import fanout_pipeline
+#     msgs = await fanout_pipeline(
+#         # List of agents to be executed in order
+#         agents=[alice, bob, charlie, david],
+#         # The first input message, can be None
+#         msg=None,
+#         enable_gather=False,
+#     )
+#
+# .. note::
+#     The ``enable_gather`` parameter controls the execution mode of the fanout pipeline:
+#
+#     - ``enable_gather=True`` (default): Executes all agents **concurrently** using ``asyncio.gather()``. This provides better performance for I/O-bound operations like API calls, as agents run in parallel.
+#     - ``enable_gather=False``: Executes agents **sequentially** one by one. This is useful when you need deterministic execution order or want to avoid overwhelming external services with concurrent requests.
+#
+#     Choose concurrent execution for better performance, or sequential execution for predictable ordering and resource control.
+#
+# .. tip::
+#     By combining ``MsgHub`` and ``sequential_pipeline`` or ``fanout_pipeline``, you can create more complex workflows very easily.
+#
+#
+# Stream Printing Messages
+# -------------------------------------
+# The ``stream_printing_messages`` function converts the printing messages from agent(s) into an async generator.
+# It will help you to obtain the intermediate messages from the agent(s) in a streaming way.
+#
+# It accepts a list of agents and a coroutine task, then returns an async generator that yields tuples of ``(Msg, bool)``,
+# containing the printing message during execution of the coroutine task.
+#
+# Note the messages with the same ``id`` are considered as the same message, and the ``last`` flag indicates whether it's the last chunk of this message.
+#
+# Taking the following code snippet as an example:
+
+
+async def run_example_pipeline() -> None:
+    """Run an example of streaming printing messages."""
+    agent = create_agent("Alice", 20, "student")
+
+    # We disable the terminal printing to avoid messy outputs
+    agent.set_console_output_enabled(False)
+
+    async for msg, last in stream_printing_messages(
+        agents=[agent],
+        coroutine_task=agent(
+            Msg("user", "Hello, who are you?", "user"),
+        ),
+    ):
+        print(msg, last)
+        if last:
+            print()
+
+
+asyncio.run(run_example_pipeline())
+
+
+# %%
+# Advanced Pipeline Features
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# Additionally, for reusability, we also provide a class-based implementation:
+#
+# .. code-block:: python
+#     :caption: Using SequentialPipeline class
+#
+#     from agentscope.pipeline import SequentialPipeline
+#
+#     # Create a pipeline object
+#     pipeline = SequentialPipeline(agents=[alice, bob, charlie, david])
+#
+#     # Call the pipeline
+#     msg = await pipeline(msg=None)
+#
+#     # Reuse the pipeline with different input
+#     msg = await pipeline(msg=Msg("user", "Hello!", "user"))
+#
+#
+# .. code-block:: python
+#     :caption: Using FanoutPipeline class
+#
+#     from agentscope.pipeline import FanoutPipeline
+#
+#     # Create a pipeline object
+#     pipeline = FanoutPipeline(agents=[alice, bob, charlie, david])
+#
+#     # Call the pipeline
+#     msgs = await pipeline(msg=None)
+#
+#     # Reuse the pipeline with different input
+#     msgs = await pipeline(msg=Msg("user", "Hello!", "user"))
+#
--- a/docs/tutorial/en/src/task_plan.py
+++ b/docs/tutorial/en/src/task_plan.py
@@ -0,0 +1,293 @@
+# -*- coding: utf-8 -*-
+"""
+.. _plan:
+
+Plan
+=========================
+
+The Plan Module enables agents to formally break down complex tasks into manageable sub-tasks and execute them systematically. Key features include:
+
+- Support **manual plan specification**
+- Comprehensive plan management capabilities:
+   - **Creating, modifying, abandoning, and restoring** plans
+   - **Switching** between multiple plans
+   - **Gracefully handling interruptions** by temporarily suspending plans to address user queries or urgent tasks
+- **Real-time visualization and monitoring** of plan execution
+
+.. note:: The current plan module has the following limitations, and we are working on improving them:
+
+ - The subtasks in a plan must be executed sequentially
+
+Specifically, the plan module works by
+
+- providing tool functions for plan management
+- inserting hint messages to guide the ReAct agent to complete the plan
+
+The following figure illustrates how the plan module works with the ReAct agent:
+
+.. figure:: ../../_static/images/plan.png
+    :width: 90%
+    :alt: Plan module
+    :class: bordered-image
+    :align: center
+
+    How the plan module works with the ReAct agent
+
+"""
+import asyncio
+import os
+
+from agentscope.agent import ReActAgent
+from agentscope.formatter import DashScopeChatFormatter
+from agentscope.model import DashScopeChatModel
+from agentscope.plan import PlanNotebook, Plan, SubTask
+
+# %%
+# PlanNotebook
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# The `PlanNotebook` class is the core of the plan module, responsible for providing
+#
+# - plan-related tool functions
+# - hint messages to guide the agent to finish the plan
+#
+# The `PlanNotebook` class can be instantiated with the following parameters:
+#
+# .. list-table:: Parameters of the `PlanNotebook` constructor
+#   :header-rows: 1
+#
+#   * - Name
+#     - Type
+#     - Description
+#   * - ``max_subtasks``
+#     - ``int | None``
+#     - The maximum number of subtasks allowed in a plan, infinite if None
+#   * - ``plan_to_hint``
+#     - ``Callable[[Plan | None], str | None] | None``
+#     - The function to generate hint message based on the current plan. If not provided, a default `DefaultPlanToHint` object will be used.
+#   * - ``storage``
+#     - ``PlanStorageBase | None``
+#     - The plan storage. If not provided, a default in-memory storage will be used.
+#
+# The ``plan_to_hint`` callable object is the most important part of the
+# `PlanNotebook` class, also serves as the interface for prompt engineering.
+# We have built a default `DefaultPlanToHint` class that can be used directly.
+# Developers are encouraged to providing their own ``plan_to_hint`` function
+# for better performance.
+#
+# The ``storage`` is to store historical plans, allowing agent to
+# retrieve and restore historical plans. Developers are encouraged to
+# implement their own plan storage by inheriting the ``PlanStorageBase`` class.
+# If not provided, a default in-memory storage will be used.
+#
+# .. tip:: The ``PlanStorageBase`` class inherits from the ``StateModule``
+#  class, so that the plan storage will also be saved and loaded by the
+#  session management.
+#
+# The core attributes and methods of the `PlanNotebook` class are summarized
+# as follows:
+#
+# .. list-table:: Core attributes and methods of the `PlanNotebook` class
+#    :header-rows: 1
+#
+#    * - Type
+#      - Name
+#      - Description
+#    * - attribute
+#      - ``current_plan``
+#      - The current plan that the agent is executing
+#    * -
+#      - ``storage``
+#      - The storage for historical plans, used for retrieving and restoring historical plans
+#    * -
+#      - ``plan_to_hint``
+#      - A callable object that takes the current plan as input and generates a hint message to guide the agent to finish the plan
+#    * - method
+#      - ``list_tools``
+#      - List all the tool functions provided by the `PlanNotebook` class
+#    * -
+#      - ``get_current_hint``
+#      - Get the hint message for the current plan, which will call the ``plan_to_hint`` function
+#    * -
+#      - | ``create_plan``,
+#        | ``view_subtasks``,
+#        | ``revise_current_plan``,
+#        | ``update_subtask_state``,
+#        | ``finish_subtask``,
+#        | ``finish_plan``,
+#        | ``view_historical_plans``,
+#        | ``recover_historical_plan``
+#      - The tool functions that allows the agent to manage the plan and subtasks
+#    * -
+#      - ``register_plan_change_hook``
+#      - Register a hook function that will be called when the plan is changed, used to plan visualization and monitoring
+#    * -
+#      - ``remove_plan_change_hook``
+#      - Remove a registered plan change hook function
+#
+# The ``list_tools`` method is a quick way to obtain all tool functions, so that you can register them to the agent's toolkit.
+
+plan_notebook = PlanNotebook()
+
+
+async def list_tools() -> None:
+    """List the tool functions provided by PlanNotebook."""
+    print("The tools provided by PlanNotebook:")
+    for tool in plan_notebook.list_tools():
+        print(tool.__name__)
+
+
+asyncio.run(list_tools())
+
+
+# %%
+# Working with ReActAgent
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# The `ReActAgent` in AgentScope has integrated the plan module by a ``plan_notebook`` parameter in its constructor.
+# Once provided, the agent will
+#
+# - be equipped with the plan management tool functions, and
+# - be inserted with the hint messages at the beginning of each reasoning step
+#
+# There are two ways to use the plan module with the `ReActAgent`:
+#
+# - Manual plan specification: Users can manually create a plan by calling the ``create_plan`` tool function, and initialize the `ReActAgent` with the plan notebook.
+# - Agent-managed plan execution: The agent will create and manage the plan by itself, by calling the plan management tool functions.
+#
+# Manual Plan Specification
+# ---------------------------------
+# Manually creating a plan is straightforward by calling the ``create_plan`` tool function.
+# The following is an example of manually creating a plan to conduct a comprehensive research on the LLM-empowered agent.
+#
+async def manual_plan_specification() -> None:
+    """Manual plan specification example."""
+    await plan_notebook.create_plan(
+        name="Research on Agent",
+        description="Conduct a comprehensive research on the LLM-empowered agent.",
+        expected_outcome="A Markdown format report answer three questions: 1. What's agent? 2. What's the current state of the art of agent? 3. What's the future trend of agent?",
+        subtasks=[
+            SubTask(
+                name="Search agent-related survey papers",
+                description=(
+                    "Search for survey parers on multiple sources, including "
+                    "Google Scholar, arXiv, and Semantic Scholar. Must be "
+                    "published after 2021 and have more than 50 citations."
+                ),
+                expected_outcome="A paper list in Markdown format",
+            ),
+            SubTask(
+                name="Read and summarize the papers",
+                description=(
+                    "Read the papers found in the previous step, and "
+                    "summarize the key points, including the definition, "
+                    "taxonomy, challenges, and key directions."
+                ),
+                expected_outcome="A summary of the key points in Markdown format",
+            ),
+            SubTask(
+                name="Research on recent advances of large company",
+                description=(
+                    "Research on the recent advances of large companies, "
+                    "including Google, Microsoft, OpenAI, Anthropic, Alibaba "
+                    "and Meta. Find the official blogs or news articles."
+                ),
+                expected_outcome="A recent advances of large company ",
+            ),
+            SubTask(
+                name="Write a report",
+                description=(
+                    "Write a report based on the previous steps, and answer "
+                    "the three questions in the expected outcome."
+                ),
+                expected_outcome=(
+                    "A Markdown format report answer three questions: 1. "
+                    "What's agent? 2. What's the current state of the art of "
+                    "agent? 3. What's the future trend of agent?"
+                ),
+            ),
+        ],
+    )
+
+    print("The current hint message:\n")
+    msg = await plan_notebook.get_current_hint()
+    print(f"{msg.name}: {msg.content}")
+
+
+asyncio.run(manual_plan_specification())
+
+# %%
+# After creating the plan, you can initialize the `ReActAgent` with the
+# plan notebook as follows:
+
+agent = ReActAgent(
+    name="Friday",
+    sys_prompt="You are a helpful assistant.",
+    model=DashScopeChatModel(
+        model_name="qwen-max",
+        api_key=os.environ["DASHSCOPE_API_KEY"],
+    ),
+    formatter=DashScopeChatFormatter(),
+    plan_notebook=plan_notebook,
+)
+
+# %%
+# Agent-Managed Plan Execution
+# ---------------------------------
+# Agent can also create and manage the plan by itself, by calling the plan management tool functions.
+# We just need to initialize the `ReActAgent` with the plan notebook as follows:
+#
+
+agent = ReActAgent(
+    name="Friday",
+    sys_prompt="You are a helpful assistant.",
+    model=DashScopeChatModel(
+        model_name="qwen-max",
+        api_key=os.environ["DASHSCOPE_API_KEY"],
+    ),
+    formatter=DashScopeChatFormatter(),
+    plan_notebook=PlanNotebook(),
+)
+
+# %%
+# After that, we can build a loop to interact with the agent as follows.
+# Once the task is complex, the agent will create a plan by itself and
+# execute the plan step by step.
+#
+# .. code-block:: python
+#     :caption: Build conversation with the plan agent
+#
+#     async def interact_with_agent() -> None:
+#         """Interact with the plan agent."""
+#         user = UserAgent(name="user")
+#
+#         msg = None
+#         while True:
+#             msg = await user(msg)
+#             if msg.get_text_content() == "exit":
+#                 break
+#             msg = await agent(msg)
+#
+#     asyncio.run(interact_with_agent())
+#
+#
+# Plan Visualization and Monitoring
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# AgentScope supports real-time visualization and monitoring of the plan
+# execution by the plan change hook function.
+#
+# They will be triggered when the plan is changed by calling the tool
+# functions. A template of the plan change hook function is as follows:
+#
+
+
+def plan_change_hook_template(self: PlanNotebook, plan: Plan) -> None:
+    """A template of the plan change hook function.
+
+    Args:
+        self (`PlanNotebook`):
+            The PlanNotebook instance.
+        plan (`Plan`):
+            The current plan instance (after the change).
+    """
+    # Forward the plan to the frontend for visualization or other processing
--- a/docs/tutorial/en/src/task_prompt.py
+++ b/docs/tutorial/en/src/task_prompt.py
@@ -0,0 +1,328 @@
+# -*- coding: utf-8 -*-
+"""
+.. _prompt:
+
+Prompt Formatter
+=========================
+
+The formatter module in AgentScope is responsible for
+
+- converting messages into the expected format for different LLM APIs,
+- (optional) truncating messages to fit within token limits,
+- (optional) prompt engineering, e.g. summarizing long conversations.
+
+The last two are optional and can also be handled by developers within the memory or at the agent level.
+
+In AgentScope, there are two types of formatters, "ChatFormatter" and "MultiAgentFormatter", distinguished by the agent identities in their input messages.
+
+- **ChatFormatter**: Designed for standard user-assistant scenario (chatbot), using the ``role`` field to identify the user and assistant.
+- **MultiAgentFormatter**: Designed for multi-agent scenario, use the ``name`` field to identify different agents, which will combine conversation history into a single user message dictionary.
+
+The built-in formatters are listed below
+
+.. list-table:: The built-in formatters in AgentScope
+    :header-rows: 1
+
+    * - API Provider
+      - User-assistant Scenario
+      - Multi-Agent Scenario
+    * - OpenAI
+      - ``OpenAIChatFormatter``
+      - ``OpenAIMultiAgentFormatter``
+    * - Anthropic
+      - ``AnthropicChatFormatter``
+      - ``AnthropicMultiAgentFormatter``
+    * - DashScope
+      - ``DashScopeChatFormatter``
+      - ``DashScopeMultiAgentFormatter``
+    * - Gemini
+      - ``GeminiChatFormatter``
+      - ``GeminiChatFormatter``
+    * - Ollama
+      - ``OllamaChatFormatter``
+      - ``OllamaMultiAgentFormatter``
+    * - DeepSeek
+      - ``DeepSeekChatFormatter``
+      - ``DeepSeekMultiAgentFormatter``
+    * - vLLM
+      - ``OpenAIChatFormatter``
+      - ``OpenAIMultiAgentFormatter``
+
+.. tip:: The OpenAI API supports the `name` field, so that `OpenAIChatFormatter` can also be used in multi-agent scenario. You can also use the `OpenAIMultiAgentFormatter` instead, which combine conversation history into a single user message.
+
+Besides, the built-in formatters support to convert different message blocks into the expected format for the target API, which are list below:
+
+.. list-table:: The supported message blocks in the built-in formatters
+    :header-rows: 1
+
+    * - Formatter
+      - tool_use/result
+      - image
+      - audio
+      - video
+      - thinking
+    * - ``OpenAIChatFormatter``
+      - ✅
+      - ✅
+      - ✅
+      - ❌
+      -
+    * - ``DashScopeChatFormatter``
+      - ✅
+      - ✅
+      - ✅
+      - ❌
+      -
+    * - ``DashScopeMultiAgentFormatter``
+      - ✅
+      - ✅
+      - ✅
+      - ❌
+      -
+    * - ``AnthropicChatFormatter``
+      - ✅
+      - ✅
+      - ❌
+      - ❌
+      - ✅
+    * - ``AnthropicMultiAgentFormatter``
+      - ✅
+      - ✅
+      - ❌
+      - ❌
+      - ✅
+    * - ``GeminiChatFormatter``
+      - ✅
+      - ✅
+      - ✅
+      - ✅
+      -
+    * - ``GeminiMultiAgentFormatter``
+      - ✅
+      - ✅
+      - ✅
+      - ✅
+      -
+    * - ``OllamaChatFormatter``
+      - ✅
+      - ✅
+      - ❌
+      - ❌
+      -
+    * - ``OllamaMultiAgentFormatter``
+      - ✅
+      - ✅
+      - ❌
+      - ❌
+      -
+    * - ``DeepSeekChatFormatter``
+      - ✅
+      - ❌
+      - ❌
+      - ❌
+      -
+    * - ``DeepSeekMultiAgentFormatter``
+      - ✅
+      - ❌
+      - ❌
+      - ❌
+      -
+
+.. note:: As stated in the `official documentation <https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#preserving-thinking-blocks>`_, only Anthropic suggests to preserve the thinking blocks in prompt formatting. For the others, we just ignore the thinking blocks in the input messages.
+
+ReAct-Oriented Formatting
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The built-in formatters are all designed to support ReAct-style agents, where the input messages **consist of alternating conversation history and tool call sequences**.
+
+In user-assistant scenario, the conversation history includes the user and assistant messages, we just convert them into the expected format directly.
+However, in multi-agent scenario, the conversation history is a list of messages from different agents as follows:
+
+.. figure:: ../../_static/images/multiagent_msgs.png
+    :alt: example of multiagent messages
+    :width: 85%
+    :align: center
+
+    *Example of multi-agent messages*
+
+
+Therefore, we have to merge the conversation history into a single user message with tags "<history>" and "</history>".
+Taking DashScope as an example, the formatted message will look like this:
+"""
+
+from agentscope.token import HuggingFaceTokenCounter
+from agentscope.formatter import DashScopeMultiAgentFormatter
+from agentscope.message import Msg, ToolResultBlock, ToolUseBlock, TextBlock
+import asyncio, json
+
+
+input_msgs = [
+    # System prompt
+    Msg("system", "You're a helpful assistant named Friday", "system"),
+    # Conversation history
+    Msg("Bob", "Hi, Alice, do you know the nearest library?", "assistant"),
+    Msg(
+        "Alice",
+        "Sorry, I don't know. Do you have any idea, Charlie?",
+        "assistant",
+    ),
+    Msg(
+        "Charlie",
+        "No, let's ask Friday. Friday, get me the nearest library.",
+        "assistant",
+    ),
+    # Tool sequence
+    Msg(
+        "Friday",
+        [
+            ToolUseBlock(
+                type="tool_use",
+                name="get_current_location",
+                id="1",
+                input={},
+            ),
+        ],
+        "assistant",
+    ),
+    Msg(
+        "system",
+        [
+            ToolResultBlock(
+                type="tool_result",
+                name="get_current_location",
+                id="1",
+                output=[TextBlock(type="text", text="104.48, 36.30")],
+            ),
+        ],
+        "system",
+    ),
+    Msg(
+        "Friday",
+        [
+            ToolUseBlock(
+                type="tool_use",
+                name="search_around",
+                id="2",
+                input={"location": [104.48, 36.30], "keyword": "library"},
+            ),
+        ],
+        "assistant",
+    ),
+    Msg(
+        "system",
+        [
+            ToolResultBlock(
+                type="tool_result",
+                name="search_around",
+                id="2",
+                output=[TextBlock(type="text", text="[...]")],
+            ),
+        ],
+        "system",
+    ),
+    # Conversation history continues
+    Msg("Friday", "The nearest library is ...", "assistant"),
+    Msg("Bob", "Thanks, Friday!", "assistant"),
+    Msg("Alice", "Let's go together.", "assistant"),
+]
+
+
+async def run_formatter_example() -> list[dict]:
+    """Example of how to format multi-agent messages."""
+    formatter = DashScopeMultiAgentFormatter()
+    formatted_message = await formatter.format(input_msgs)
+    print("The formatted message:")
+    print(json.dumps(formatted_message, indent=4))
+    return formatted_message
+
+
+formatted_message = asyncio.run(run_formatter_example())
+
+# %%
+# Specifically, the conversation histories are formatted into:
+#
+print("The first conversation history:")
+print(formatted_message[1]["content"])
+
+print("\nThe second conversation history:")
+print(formatted_message[-1]["content"])
+
+# %%
+# Truncation-based Formatting
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# With the token module in AgentScope, the built-in formatters support to truncate the input messages by **deleting the oldest messages** (except the system prompt message) when the token exceeds the limit.
+#
+# Taking OpenAIFormatter as an example, we first calculate the total number of tokens of the input messages.
+#
+
+
+async def run_token_counter() -> int:
+    """Compute the token number of the input messages."""
+    # We use huggingface token counter for dashscope models.
+    token_counter = HuggingFaceTokenCounter(
+        "Qwen/Qwen2.5-VL-3B-Instruct",
+        use_mirror=False,
+    )
+
+    return await token_counter.count(formatted_message)
+
+
+# %%
+# Then we set the maximum token limit to 20 tokens less than the total number of tokens and run the formatter.
+#
+
+
+async def run_truncated_formatter() -> None:
+    """Example of how to format messages with truncation."""
+    token_counter = HuggingFaceTokenCounter(
+        pretrained_model_name_or_path="Qwen/Qwen2.5-VL-3B-Instruct",
+        use_mirror=False,
+    )
+    formatter = DashScopeMultiAgentFormatter(
+        token_counter=token_counter,
+        max_tokens=n_tokens - 20,
+    )
+    truncated_formatted_message = await formatter.format(input_msgs)
+    n_truncated_tokens = await token_counter.count(truncated_formatted_message)
+    print("The tokens after truncation: ", n_truncated_tokens)
+
+    print("\nThe conversation history after truncation:")
+    print(truncated_formatted_message[1]["content"])
+
+
+# %%
+# We can see the first two messages from Bob and Alice are removed to fit within the context length limits.
+#
+#
+# Customizing Formatter
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# AgentScope provides two base classes ``FormatterBase`` and its child class ``TruncatedFormatterBase``.
+# The ``TruncatedFormatterBase`` class provides the FIFO truncation strategy, and all the built-in formatters are inherited from it.
+#
+# .. list-table:: The base classes of formatters in AgentScope
+#   :header-rows: 1
+#
+#   * - Class
+#     - Abstract Method
+#     - Description
+#   * - ``FormatterBase``
+#     - ``format``
+#     - Format the input ``Msg`` objects into the expected format for the target API
+#   * - ``TruncatedFormatterBase``
+#     - ``_format_agent_message``
+#     - Format the agent messages, which may contain multiple identities in multi-agent scenario
+#   * -
+#     - ``_format_tool_sequence``
+#     - Format the tool use and result sequence into the expected format
+#   * -
+#     - ``_format`` (optional)
+#     - Format the input ``Msg`` objects into the expected format for the target API
+#
+# .. tip:: - The ``_format`` in ``TruncatedFormatterBase`` groups input messages into agent messages and tool sequences, and then format them by calling ``_format_agent_message`` and ``_format_tool_sequence`` respectively. You can override it to implement your own formatting strategy.
+#  - Optionally, you can override the ``_truncate`` method in ``TruncatedFormatterBase`` to implement your own truncation strategy.
+#
+# Further Reading
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# - :ref:`token`
+# - :ref:`model`
+#
--- a/docs/tutorial/en/src/task_rag.py
+++ b/docs/tutorial/en/src/task_rag.py
@@ -0,0 +1,422 @@
+# -*- coding: utf-8 -*-
+"""
+.. _rag:
+
+RAG
+===========================
+
+AgentScope provides built-in support for Retrieval-Augmented Generation (RAG)
+tasks. This tutorial demonstrates
+
+- how to use the RAG module in AgentScope,
+- how to use **multimodal** RAG,
+- how to integrate the RAG module with the ``ReActAgent`` in
+    - **agentic manner** and
+    - **generic manner**:
+
+.. list-table:: RAG module integration methods
+    :header-rows: 1
+
+    * - Integration Manner
+      - Description
+      - Advantages
+      - Disadvantages
+    * - Agentic Manner
+      - The RAG module is integrated with the agent as a tool, and the agent can decide when to retrieve knowledge and the queries to be retrieved.
+      - - The query rewriting and knowledge retrieval are integrated into the ReAct process, which is more flexible,
+        - the agent can rewrite the query based on all the available information,
+        - only retrieve knowledge when necessary.
+      - High requirements for the LLM's reasoning and tool-use capabilities.
+    * - Generic Manner
+      - Retrieve knowledge at the beginning of each reply, and attach the retrieved knowledge to the prompt in a user message.
+      - - Simple, easy to implement,
+        - does not require high reasoning and tool-use capabilities from the LLM.
+      - - Still retrieve knowledge even when not necessary, and
+        - if the retrieval is imperceptible to the user, the waiting time may be longer.
+
+.. note:: As an open-source project, AgentScope doesn't insist that developers
+ use the built-in RAG module. Our target is make the development easier and
+ more enjoyable, so integrating other RAG implementations, frameworks, or
+ services are welcome and encouraged!
+
+"""
+import asyncio
+import json
+import os
+
+from matplotlib import pyplot as plt
+
+import agentscope
+from agentscope.agent import ReActAgent
+from agentscope.embedding import (
+    DashScopeTextEmbedding,
+    DashScopeMultiModalEmbedding,
+)
+from agentscope.formatter import DashScopeChatFormatter
+from agentscope.message import Msg
+from agentscope.model import DashScopeChatModel
+from agentscope.rag import (
+    TextReader,
+    SimpleKnowledge,
+    QdrantStore,
+    Document,
+    ImageReader,
+)
+from agentscope.tool import Toolkit
+
+# %%
+# Using RAG Module
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# The RAG module in AgentScope is composed of two core components:
+#
+# - **Reader**: responsible for reading and chunking the input documents.
+# - **Knowledge**: responsible for algorithm implementation of knowledge retrieval and updating.
+#
+# .. note:: We're integrating more vector databases and readers into AgentScope. Contributions are welcome!
+#
+# The currently built-in readers include:
+#
+
+for _ in agentscope.rag.__all__:
+    if _.endswith("Reader"):
+        print(f"- {_}")
+
+# %%
+# they are responsible for reading data and chunking them into ``Document`` objects.
+# The ``Document`` class has the following fields:
+#
+# - ``metadata``: the metadata of the document, including the content, doc_id, chunk_id, and total_chunks.
+# - ``embedding``: the embedding vector of the document, which will be filled when the document is added to or retrieved from the knowledge base.
+# - ``score``: the relevance score of the document, which will be filled when the document is retrieved from the knowledge base.
+#
+# Taking the ``TextReader`` as an example, it can read and chunk documents from text strings.
+#
+
+
+async def example_text_reader(print_docs: bool) -> list[Document]:
+    """The example of using TextReader."""
+    # Create a text reader with chunk size of 512 characters, split by characters
+    reader = TextReader(chunk_size=512, split_by="paragraph")
+
+    # Read documents from a text string
+    documents = await reader(
+        text=(
+            # Fake personal profile for demonstration
+            "I'm John Doe, 28 years old.\n"
+            "I live in San Francisco. I work at OpenAI as a "
+            "software engineer. I love hiking and photography.\n"
+            "My father is Michael Doe, a doctor. I'm very proud of him. "
+            "My mother is Sarah Doe, a teacher. She is very kind and "
+            "always helps me with my studies.\n"
+            "I'm now a PhD student at Stanford University, majoring in "
+            "Computer Science. My advisor is Prof. Jane Williams, who is "
+            "a leading expert in artificial intelligence. I have published "
+            "several papers in top conferences, such as NeurIPS and ICML.\n"
+            "My best friend is James Smith.\n"
+        ),
+    )
+
+    if print_docs:
+        print("The length of the documents:", len(documents))
+        for idx, doc in enumerate(documents):
+            print("Document #", idx)
+            print("\tScore: ", doc.score)
+            print("\tMetadata: ", json.dumps(doc.metadata, indent=2), "\n")
+
+    return documents
+
+
+docs = asyncio.run(example_text_reader(print_docs=True))
+
+# %%
+# Note there doesn't exist a universally best chunk size and splitting method, especially for PDF files, we highly
+# encourage developers to implement or contribute their own readers according to their specific scenarios.
+# To create a custom reader, you only need to inherit the ``ReaderBase`` class and implement the ``__call__`` method.
+#
+# After chunking the documents, we can create a knowledge base to store the documents and perform retrieval.
+# Such a knowledge base is initialized by providing **an embedding model** and **an embedding store** (also known as a vector database).
+# Agentscope provides built-in support for `Qdrant <https://qdrant.tech/>`_ as the embedding store and a simple knowledge base implementation ``SimpleKnowledge``.
+# They can be used as follows:
+#
+# .. note::
+#
+#  - We're integrating more vector databases into AgentScope. Contributions are welcome!
+#  - The Qdrant store supports various storage backends by the ``location`` parameter, including in-memory, local file, and remote server. Refer to the `Qdrant documentation <https://qdrant.tech/>`_ for more details.
+#
+
+
+async def build_knowledge_base() -> SimpleKnowledge:
+    """Build a knowledge base with sample documents."""
+    # Read documents using the text reader
+    documents = await example_text_reader(print_docs=False)
+
+    # Create an in-memory knowledge base instance
+    knowledge = SimpleKnowledge(
+        # Choose an embedding model to convert text to embedding vectors
+        embedding_model=DashScopeTextEmbedding(
+            api_key=os.environ["DASHSCOPE_API_KEY"],
+            model_name="text-embedding-v4",
+            dimensions=1024,
+        ),
+        # Choose Qdrant as the embedding store
+        embedding_store=QdrantStore(
+            location=":memory:",  # Use in-memory storage for demonstration
+            collection_name="test_collection",
+            dimensions=1024,  # The dimension of the embedding vectors
+        ),
+    )
+
+    # Insert documents into the knowledge base
+    await knowledge.add_documents(documents)
+
+    # Retrieve relevant documents based on a given query
+    docs = await knowledge.retrieve(
+        query="Who is John Doe's father?",
+        limit=3,
+        score_threshold=0.5,
+    )
+
+    print("Retrieved Documents:")
+    for doc in docs:
+        print(doc, "\n")
+
+    return knowledge
+
+
+knowledge = asyncio.run(build_knowledge_base())
+
+# %%
+# The knowledge base class provides two main methods: ``add_documents`` and
+# ``retrieve``, which are used to add documents to the knowledge base and
+# retrieve relevant documents based on a given query, respectively.
+#
+# In addition, the knowledge base class also provides a convenient method
+# ``retrieve_knowledge``, which wraps the ``retrieve`` method into a tool
+# function that can be directly registered in the toolkit of an agent.
+#
+#
+# Customizing RAG Components
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# AgentScope supports and encourages developers to customize their own RAG components, including readers, knowledge bases and embedding stores.
+# Specifically, we provide the following base classes for customization:
+#
+# .. list-table:: RAG Base Classes
+#     :header-rows: 1
+#
+#     * - Base Class
+#       - Description
+#       - Abstract Methods
+#     * - ``ReaderBase``
+#       - The base class for all readers.
+#       - ``__call__``
+#     * - ``VDBStoreBase``
+#       - The base class for embedding stores (vector databases).
+#       - | ``add``
+#         | ``search``
+#         | ``get_client`` (optional)
+#         | ``delete`` (optional)
+#     * - ``KnowledgeBase``
+#       - The base class for knowledge bases.
+#       - | ``retrieve``
+#         | ``add_documents``
+#
+#
+# The `get_client` method in the ``VDBStoreBase`` allows developers to access the full functionality of the underlying vector database.
+# So that they can implement more advanced features based on the vector database, e.g. index management, advanced search, etc.
+#
+# Integrating with ReActAgent
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Next we demonstrate how to integrate the RAG module with the ``ReActAgent``
+# class in AgentScope in agentic and generic manners.
+#
+# Agentic Manner
+# --------------------------------
+# In agentic manner, the ReAct agent is empowered with the ability to decide when to retrieve knowledge and the queries to be retrieved.
+# It's very easy to integrate the RAG module with the ``ReActAgent`` class in AgentScope, just by registering the ``retrieve_knowledge`` method of the knowledge base as a tool,
+# and providing a proper description for the tool.
+
+
+async def example_agentic_manner() -> None:
+    """The example of integrating RAG module with ReActAgent in agentic manner."""
+    # Create a ReAct agent
+    toolkit = Toolkit()
+
+    # Create the ReAct agent with DashScope as the model
+    agent = ReActAgent(
+        name="Friday",
+        sys_prompt="You are a helpful assistant named Friday.",
+        model=DashScopeChatModel(
+            api_key=os.environ["DASHSCOPE_API_KEY"],
+            model_name="qwen-max",
+        ),
+        formatter=DashScopeChatFormatter(),
+        toolkit=toolkit,
+    )
+
+    print("The first response: ")
+    # Ask some questions about Tony Stank
+    await agent(
+        Msg(
+            "user",
+            "John Doe is my best friend.",
+            "user",
+        ),
+    )
+
+    # Register the retrieve_knowledge method as a tool function in the toolkit
+    toolkit.register_tool_function(
+        knowledge.retrieve_knowledge,
+        func_description=(  # Provide a clear description for the tool
+            "The tool used to retrieve documents relevant to the given query. "
+            "Use this tool when you need to find some information about John Doe."
+        ),
+    )
+
+    print("\n\nThe second response: ")
+    # We hope the agent can rewrite the query to be more specific, e.g.
+    # "Who is Tony Stank's father?" or "Tony Stank's father"
+    await agent(
+        Msg(
+            "user",
+            "Do you know who his father is?",
+            "user",
+        ),
+    )
+
+
+asyncio.run(example_agentic_manner())
+
+# %%
+# In the above example, our question is "Do you know who his father is?".
+# We hope the agent can rewrite the query with the historical information, and
+# rewrite it to be more specific, e.g. "Who is John Doe's father?" or "John Doe's father".
+#
+#
+# Generic Manner
+# --------------------------------
+# The ``ReActAgent`` also integrates the RAG module in a generic manner, which
+# retrieves knowledge at the beginning of each reply, and attaches the
+# retrieved knowledge to the prompt in a user message.
+#
+# Just set the ``knowledge`` parameter of the ``ReActAgent``, and the agent
+# will automatically retrieve knowledge at the beginning of each reply.
+#
+
+
+async def example_generic_manner() -> None:
+    """The example of integrating RAG module with ReActAgent in generic manner."""
+    # Create a ReAct agent
+    agent = ReActAgent(
+        name="Friday",
+        sys_prompt="You are a helpful assistant named Friday.",
+        model=DashScopeChatModel(
+            api_key=os.environ["DASHSCOPE_API_KEY"],
+            model_name="qwen-max",
+        ),
+        formatter=DashScopeChatFormatter(),
+        #
+        knowledge=knowledge,
+    )
+
+    await agent(
+        Msg(
+            "user",
+            "Do you know who John Doe's father is?",
+            "user",
+        ),
+    )
+
+    print("Take a look at the agent's memory:")
+    content = (await agent.memory.get_memory())[1].content
+    print(json.dumps(content, indent=2, ensure_ascii=False))
+
+
+asyncio.run(example_generic_manner())
+
+
+# %%
+# Multimodal RAG
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# The RAG module in AgentScope supports multimodal RAG natively, as
+#
+# - AgentScope supports multimodal embedding API, e.g. ``DashScopeMultimodalEmbedding``.
+# - The ``Document`` class supports text, image, and other modalities in its ``metadata`` field.
+#
+# Thus, we can directly use the multimodal reader and embedding model to build
+# a multimodal knowledge base as follows.
+#
+# First we prepare an image with some text about my name.
+
+# Prepare an image with the text "John Doe's father is Michael Doe."
+path_image = "./example.jpg"
+plt.figure(figsize=(8, 3))
+plt.text(
+    0.5,
+    0.5,
+    "My name is Tony Stank",
+    ha="center",
+    va="center",
+    fontsize=30,
+)
+plt.axis("off")
+plt.savefig(path_image, bbox_inches="tight", pad_inches=0.1)
+plt.close()
+
+# %%
+# Then we can build a multimodal knowledge base with the image document.
+# The example is the same as before, just using the ``ImageReader`` and
+# ``DashScopeMultiModalEmbedding`` instead of the text counterparts.
+#
+
+
+async def example_multimodal_rag() -> None:
+    """The example of using multimodal RAG."""
+    # Read the image using the ImageReader
+    reader = ImageReader()
+    docs = await reader(image_url=path_image)
+
+    # Create a knowledge base with the new image document
+    knowledge = SimpleKnowledge(
+        embedding_model=DashScopeMultiModalEmbedding(
+            api_key=os.environ["DASHSCOPE_API_KEY"],
+            model_name="multimodal-embedding-v1",
+            dimensions=1024,
+        ),
+        embedding_store=QdrantStore(
+            location=":memory:",
+            collection_name="test_collection",
+            dimensions=1024,
+        ),
+    )
+
+    await knowledge.add_documents(docs)
+
+    agent = ReActAgent(
+        name="Friday",
+        sys_prompt="You are a helpful assistant named Friday.",
+        model=DashScopeChatModel(
+            api_key=os.environ["DASHSCOPE_API_KEY"],
+            model_name="qwen-vl-max",
+        ),
+        formatter=DashScopeChatFormatter(),
+        knowledge=knowledge,
+    )
+
+    await agent(
+        Msg(
+            "user",
+            "What's my name?",
+            "user",
+        ),
+    )
+
+    # Let's see the last message from the agent
+    print("\nThe image is attached in the agent's memory:")
+    print((await agent.memory.get_memory())[1])
+
+
+asyncio.run(example_multimodal_rag())
+
+# %%
+# We can see that the agent can answer the question based on the retrieved
+# image.
--- a/docs/tutorial/en/src/task_realtime.py
+++ b/docs/tutorial/en/src/task_realtime.py
@@ -0,0 +1,496 @@
+# -*- coding: utf-8 -*-
+"""
+.. _realtime:
+
+Realtime Agent
+====================
+
+The **realtime** agent is designed to handle real-time interactions, such as
+voice conversations or live chat sessions.
+The realtime agent in AgentScope features:
+
+- Integration with OpenAI, DashScope, Gemini, and other realtime model APIs
+- Unified event interface to simplify interactions with different realtime models
+- Support for tool calling capabilities
+- Support for multi-agent interactions
+
+.. note:: The realtime agent is currently under active development. We welcome
+    community contributions, discussions, and feedback! If you're interested in
+    realtime agents, please join our discussion and development.
+
+"""
+
+import asyncio
+import os
+from agentscope.agent import RealtimeAgent
+from agentscope.realtime import (
+    DashScopeRealtimeModel,
+    OpenAIRealtimeModel,
+    GeminiRealtimeModel,
+)
+
+# %%
+# Creating Realtime Models
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# AgentScope currently supports the following realtime model APIs:
+#
+# .. list-table::
+#    :header-rows: 1
+#    :widths: 15 25 25 15 20
+#
+#    * - Provider
+#      - Class
+#      - Supported Models
+#      - Input Modalities
+#      - Tool Support
+#    * - DashScope
+#      - ``DashScopeRealtimeModel``
+#      - ``qwen3-omni-flash-realtime``
+#      - Text, Audio, Image
+#      - No
+#    * - OpenAI
+#      - ``OpenAIRealtimeModel``
+#      - ``gpt-4o-realtime-preview``
+#      - Text, Audio
+#      - Yes
+#    * - Gemini
+#      - ``GeminiRealtimeModel``
+#      - ``gemini-2.5-flash-native-audio-preview-09-2025``
+#      - Text, Audio, Image
+#      - Yes
+#
+#
+# Here are examples of initializing different realtime models:
+#
+# .. code-block:: python
+#     :caption: Example of initializing different realtime models
+#     # DashScope realtime model
+#     dashscope_model = DashScopeRealtimeModel(
+#         model_name="qwen3-omni-flash-realtime",
+#         api_key=os.getenv("DASHSCOPE_API_KEY"),
+#         voice="Cherry",  # Options: "Cherry", "Serena", "Ethan", "Chelsie"
+#         enable_input_audio_transcription=True,
+#     )
+#
+#     # OpenAI realtime model
+#     openai_model = OpenAIRealtimeModel(
+#         model_name="gpt-4o-realtime-preview",
+#         api_key=os.getenv("OPENAI_API_KEY"),
+#         voice="alloy",  # Options: "alloy", "echo", "marin", "cedar"
+#         enable_input_audio_transcription=True,
+#     )
+#
+#     # Gemini realtime model
+#     gemini_model = GeminiRealtimeModel(
+#         model_name="gemini-2.5-flash-native-audio-preview-09-2025",
+#         api_key=os.getenv("GEMINI_API_KEY"),
+#         voice="Puck",  # Options: "Puck", "Charon", "Kore", "Fenrir"
+#         enable_input_audio_transcription=True,
+#     )
+#
+# The realtime model provides the following key methods:
+#
+# .. list-table::
+#    :header-rows: 1
+#    :widths: 30 70
+#
+#    * - Method
+#      - Description
+#    * - ``connect(outgoing_queue, instructions, tools)``
+#      - Establish WebSocket connection to the realtime model API
+#    * - ``disconnect()``
+#      - Close the WebSocket connection
+#    * - ``send(data)``
+#      - Send audio/text/image data to the realtime model for processing
+#
+# The ``outgoing_queue`` parameter in ``connect()`` is an asyncio queue used to
+# forward events from the realtime model to the outside (e.g., the agent or frontend).
+#
+#
+# Model Events Interface
+# -----------------------
+#
+# AgentScope provides a unified ``agentscope.realtime.ModelEvents`` interface to simplify
+# interactions with different realtime models. The following events are
+# supported:
+#
+# .. note:: The "session" in ModelEvents refers to the WebSocket connection
+#     session between the realtime model and the model API, not the session
+#     between the frontend and backend.
+#
+# .. list-table::
+#    :header-rows: 1
+#    :widths: 40 60
+#
+#    * - Event
+#      - Description
+#    * - ``ModelEvents.ModelSessionCreatedEvent``
+#      - Session is successfully created
+#    * - ``ModelEvents.ModelSessionEndedEvent``
+#      - Session has ended
+#    * - ``ModelEvents.ModelResponseCreatedEvent``
+#      - Model begins generating a response
+#    * - ``ModelEvents.ModelResponseDoneEvent``
+#      - Model finished generating a response
+#    * - ``ModelEvents.ModelResponseAudioDeltaEvent``
+#      - Streaming audio data chunk from the model
+#    * - ``ModelEvents.ModelResponseAudioDoneEvent``
+#      - Audio response is complete
+#    * - ``ModelEvents.ModelResponseAudioTranscriptDeltaEvent``
+#      - Streaming transcription chunk of audio response
+#    * - ``ModelEvents.ModelResponseAudioTranscriptDoneEvent``
+#      - Audio transcription is complete
+#    * - ``ModelEvents.ModelResponseToolUseDeltaEvent``
+#      - Streaming tool call parameters
+#    * - ``ModelEvents.ModelResponseToolUseDoneEvent``
+#      - Tool call parameters are complete
+#    * - ``ModelEvents.ModelInputTranscriptionDeltaEvent``
+#      - Streaming transcription chunk of user input
+#    * - ``ModelEvents.ModelInputTranscriptionDoneEvent``
+#      - User input transcription is complete
+#    * - ``ModelEvents.ModelInputStartedEvent``
+#      - Detected start of user audio input (VAD)
+#    * - ``ModelEvents.ModelInputDoneEvent``
+#      - Detected end of user audio input (VAD)
+#    * - ``ModelEvents.ModelErrorEvent``
+#      - An error occurred
+#
+#
+#
+# Creating a Realtime Agent
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# The ``RealtimeAgent`` serves as a bridge layer that:
+#
+# - Converts ``ModelEvents`` from realtime models into ``ServerEvents`` for
+#   frontend and other agents
+# - Receives ``ClientEvents`` from frontend or other agents and forwards them
+#   to the realtime model API
+# - Manages the agent's lifecycle and event queues
+#
+# Server and Client Events
+# -------------------------
+#
+# AgentScope provides unified ``ServerEvents`` and ``ClientEvents`` for
+# communication between backend and frontend:
+#
+# **ServerEvents** (Backend → Frontend):
+#
+# .. list-table::
+#    :header-rows: 1
+#    :widths: 40 60
+#
+#    * - Event
+#      - Description
+#    * - ``ServerEvents.ServerSessionCreatedEvent``
+#      - Session created in backend
+#    * - ``ServerEvents.ServerSessionUpdatedEvent``
+#      - Session updated in backend
+#    * - ``ServerEvents.ServerSessionEndedEvent``
+#      - Session ended in backend
+#    * - ``ServerEvents.AgentReadyEvent``
+#      - Agent is ready to receive inputs
+#    * - ``ServerEvents.AgentEndedEvent``
+#      - Agent has ended
+#    * - ``ServerEvents.AgentResponseCreatedEvent``
+#      - Agent starts generating response
+#    * - ``ServerEvents.AgentResponseDoneEvent``
+#      - Agent finished generating response
+#    * - ``ServerEvents.AgentResponseAudioDeltaEvent``
+#      - Streaming audio chunk from agent
+#    * - ``ServerEvents.AgentResponseAudioDoneEvent``
+#      - Audio response complete
+#    * - ``ServerEvents.AgentResponseAudioTranscriptDeltaEvent``
+#      - Streaming transcription of agent response
+#    * - ``ServerEvents.AgentResponseAudioTranscriptDoneEvent``
+#      - Transcription complete
+#    * - ``ServerEvents.AgentResponseToolUseDeltaEvent``
+#      - Streaming tool call data
+#    * - ``ServerEvents.AgentResponseToolUseDoneEvent``
+#      - Tool call complete
+#    * - ``ServerEvents.AgentResponseToolResultEvent``
+#      - Tool execution result
+#    * - ``ServerEvents.AgentInputTranscriptionDeltaEvent``
+#      - Streaming transcription of user input
+#    * - ``ServerEvents.AgentInputTranscriptionDoneEvent``
+#      - Input transcription complete
+#    * - ``ServerEvents.AgentInputStartedEvent``
+#      - User audio input started
+#    * - ``ServerEvents.AgentInputDoneEvent``
+#      - User audio input ended
+#    * - ``ServerEvents.AgentErrorEvent``
+#      - An error occurred
+#
+# **ClientEvents** (Frontend → Backend):
+#
+# .. list-table::
+#    :header-rows: 1
+#    :widths: 40 60
+#
+#    * - Event
+#      - Description
+#    * - ``ClientEvents.ClientSessionCreateEvent``
+#      - Create a new session with specified configuration
+#    * - ``ClientEvents.ClientSessionEndEvent``
+#      - End current session
+#    * - ``ClientEvents.ClientResponseCreateEvent``
+#      - Request agent to generate response immediately
+#    * - ``ClientEvents.ClientResponseCancelEvent``
+#      - Interrupt agent's current response
+#    * - ``ClientEvents.ClientTextAppendEvent``
+#      - Append text input
+#    * - ``ClientEvents.ClientAudioAppendEvent``
+#      - Append audio input
+#    * - ``ClientEvents.ClientAudioCommitEvent``
+#      - Commit audio input (signal end of input)
+#    * - ``ClientEvents.ClientImageAppendEvent``
+#      - Append image input
+#    * - ``ClientEvents.ClientToolResultEvent``
+#      - Send tool execution result
+#
+# Initializing a Realtime Agent
+# ------------------------------
+#
+# Here's how to create and use a realtime agent:
+
+
+async def example_realtime_agent() -> None:
+    """Example of creating and using a realtime agent."""
+    agent = RealtimeAgent(
+        name="Friday",
+        sys_prompt="You are a helpful assistant named Friday.",
+        model=DashScopeRealtimeModel(
+            model_name="qwen3-omni-flash-realtime",
+            api_key=os.getenv("DASHSCOPE_API_KEY"),
+        ),
+    )
+
+    # Create a queue to receive messages from the agent
+    outgoing_queue = asyncio.Queue()
+
+    # The agent is now ready to handle inputs
+    # Handle outgoing messages in a separate task
+    async def handle_agent_messages():
+        while True:
+            event = await outgoing_queue.get()
+            # Process the event (e.g., send to frontend via WebSocket)
+            print(f"Agent event: {event.type}")
+
+    # Start the message handling task
+    asyncio.create_task(handle_agent_messages())
+
+    # Start the agent (establishes connection)
+    await agent.start(outgoing_queue)
+
+    # Stop the agent when done
+    await agent.stop()
+
+
+# %%
+# Starting Realtime Conversation
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Now we can set up a realtime conversation between a user and a realtime agent.
+#
+# Here we take FastAPI as an example backend framework to demonstrate how to set up
+# a realtime conversation.
+#
+# **Backend Setup (Server-side):**
+#
+# The backend needs to:
+#
+# 1. Create a WebSocket endpoint to accept frontend connections
+# 2. Create a ``RealtimeAgent`` when the session starts
+# 3. Forward ``ClientEvents`` from frontend to the agent
+# 4. Forward ``ServerEvents`` from agent to the frontend
+#
+# .. code-block:: python
+#
+#     from fastapi import FastAPI, WebSocket
+#     from agentscope.agent import RealtimeAgent
+#     from agentscope.realtime import (
+#         DashScopeRealtimeModel,
+#         ClientEvents,
+#         ServerEvents,
+#     )
+#
+#     app = FastAPI()
+#
+#     @app.websocket("/ws/{user_id}/{session_id}")
+#     async def websocket_endpoint(
+#         websocket: WebSocket,
+#         user_id: str,
+#         session_id: str,
+#     ):
+#         await websocket.accept()
+#
+#         # Create queue for agent messages
+#         frontend_queue = asyncio.Queue()
+#
+#         # Create agent
+#         agent = RealtimeAgent(
+#             name="Assistant",
+#             sys_prompt="You are a helpful assistant.",
+#             model=DashScopeRealtimeModel(
+#                 model_name="qwen3-omni-flash-realtime",
+#                 api_key=os.getenv("DASHSCOPE_API_KEY"),
+#             ),
+#         )
+#
+#         # Start agent
+#         await agent.start(frontend_queue)
+#
+#         # Forward messages from agent to frontend
+#         async def send_to_frontend():
+#             while True:
+#                 msg = await frontend_queue.get()
+#                 await websocket.send_json(msg.model_dump())
+#
+#         asyncio.create_task(send_to_frontend())
+#
+#         # Receive messages from frontend and forward to agent
+#         while True:
+#             data = await websocket.receive_json()
+#             client_event = ClientEvents.from_json(data)
+#             await agent.handle_input(client_event)
+#
+# **Frontend Setup (Client-side):**
+#
+# The frontend needs to:
+#
+# 1. Establish WebSocket connection to the backend
+# 2. Send ``CLIENT_SESSION_CREATE`` event to initialize the session
+# 3. Capture audio from microphone and send via ``CLIENT_AUDIO_APPEND`` events
+# 4. Receive and handle ``ServerEvents`` (e.g., play audio, display transcripts)
+#
+# .. code-block:: javascript
+#
+#     // Connect to WebSocket
+#     const ws = new WebSocket('ws://localhost:8000/ws/user1/session1');
+#
+#     ws.onopen = () => {
+#         // Create session
+#         ws.send(JSON.stringify({
+#             type: 'client_session_create',
+#             config: {
+#                 instructions: 'You are a helpful assistant.',
+#                 user_name: 'User1'
+#             }
+#         }));
+#     };
+#
+#     // Handle messages from backend
+#     ws.onmessage = (event) => {
+#         const data = JSON.parse(event.data);
+#         if (data.type === 'response_audio_delta') {
+#             // Play audio chunk
+#             playAudio(data.delta);
+#         }
+#     };
+#
+#     // Send audio data
+#     function sendAudioChunk(audioData) {
+#         ws.send(JSON.stringify({
+#             type: 'client_audio_append',
+#             session_id: 'session1',
+#             audio: audioData,  // base64 encoded
+#             format: { encoding: 'pcm16', sample_rate: 16000 }
+#         }));
+#     }
+#
+# For a complete working example, see
+# ``examples/agent/realtime_voice_agent/`` in the AgentScope repository.
+
+# %%
+# Multi-Agent Realtime Conversation
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# AgentScope supports multi-agent realtime interactions through the ``ChatRoom``
+# class.
+#
+# Note currently most realtime model APIs only support single-user interactions,
+# but AgentScope's architecture is designed to support multiple agents and users
+# when API capabilities expand.
+#
+# The Realtime ChatRoom
+# ----------------------------
+#
+# AgentScope introduces the ``ChatRoom`` class to manage multiple realtime
+# agents in a shared conversation space. The ChatRoom provides:
+#
+# - Centralized management of multiple ``RealtimeAgent`` instances
+# - Automatic message broadcasting between agents
+# - Unified message queue for frontend communication
+# - Lifecycle management for all agents in the room
+#
+# Using ChatRoom
+# --------------
+#
+# The usage of ``ChatRoom`` is similar to ``RealtimeAgent``:
+#
+
+
+async def example_chat_room() -> None:
+    """Example of using ChatRoom with multiple realtime agents."""
+    from agentscope.pipeline import ChatRoom
+    from agentscope.agent import RealtimeAgent
+    from agentscope.realtime import DashScopeRealtimeModel
+
+    # Create multiple agents
+    agent1 = RealtimeAgent(
+        name="Agent1",
+        sys_prompt="You are Agent1, a helpful assistant.",
+        model=DashScopeRealtimeModel(
+            model_name="qwen3-omni-flash-realtime",
+            api_key=os.getenv("DASHSCOPE_API_KEY"),
+        ),
+    )
+
+    agent2 = RealtimeAgent(
+        name="Agent2",
+        sys_prompt="You are Agent2, a helpful assistant.",
+        model=DashScopeRealtimeModel(
+            model_name="qwen3-omni-flash-realtime",
+            api_key=os.getenv("DASHSCOPE_API_KEY"),
+        ),
+    )
+
+    # Create a chat room with multiple agents
+    chat_room = ChatRoom(agents=[agent1, agent2])
+
+    # Create queue to receive messages from all agents
+    outgoing_queue = asyncio.Queue()
+
+    # Start the chat room
+    await chat_room.start(outgoing_queue)
+
+    # Handle input from frontend
+    # The chat room will broadcast to all agents
+    from agentscope.realtime import ClientEvents
+
+    client_event = ClientEvents.ClientTextAppendEvent(
+        session_id="session1",
+        text="Hello everyone!",
+    )
+    await chat_room.handle_input(client_event)
+
+    # Stop the chat room when done
+    await chat_room.stop()
+
+
+# %%
+# Roadmap
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# The realtime agent feature is currently experimental and under active
+# development. The future plans include:
+#
+# - Support for more realtime model APIs
+# - Enhanced memory management for conversation history
+# - Comprehensive tool calling support across all providers
+# - Multi-user voice interaction support
+# - Improved VAD (Voice Activity Detection) configuration
+# - Better error handling and recovery mechanisms
+#
+# We welcome contributions and feedback from the community to help shape the
+# future of realtime agents in AgentScope!
--- a/docs/tutorial/en/src/task_state.py
+++ b/docs/tutorial/en/src/task_state.py
@@ -0,0 +1,216 @@
+# -*- coding: utf-8 -*-
+"""
+.. _state:
+
+State/Session Management
+=================================
+
+In AgentScope, the **"state"** refers to the agent status in the running application, including its current system prompt, memory, context, equipped tools, and other information that **change over time**.
+
+To manage the state of an application, AgentScope designs an automatic state registration system and session-level state management, which features:
+
+- Support **automatic state registration** for all variables inherited from ``StateModule``
+- Support **manual state registration** with custom serialization/deserialization methods
+- Support **session/application-level management**
+"""
+import asyncio
+import json
+import os
+
+from agentscope.agent import ReActAgent
+from agentscope.formatter import DashScopeChatFormatter
+from agentscope.memory import InMemoryMemory
+from agentscope.message import Msg
+from agentscope.model import DashScopeChatModel
+from agentscope.module import StateModule
+from agentscope.session import JSONSession
+from agentscope.tool import Toolkit
+
+# %%
+# State Module
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# The ``StateModule`` class is the foundation for state management in AgentScope and provides three basic functions:
+#
+# .. list-table:: Methods of ``StateModule``
+#     :header-rows: 1
+#
+#     * - Method
+#       - Arguments
+#       - Description
+#     * - ``register_state``
+#       - | ``attr_name``,
+#         | ``custom_to_json`` (optional),
+#         | ``custom_from_json`` (optional)
+#       - Register an attribute as its state, with optional serialization/deserialization function.
+#     * - ``state_dict``
+#       - \-
+#       - Get the state dictionary of current object
+#     * - ``load_state_dict``
+#       - | ``state_dict``,
+#         | ``strict`` (optional)
+#       - Load the state dictionary to current object
+#
+# Within an object of ``StateModule``, all the following attributes will be treated as parts of its state:
+#
+# - the **attributes** that inherit from ``StateModule``
+# - the **attributes** registered by the ``register_state`` method
+#
+# Note the ``StateModule`` supports **NESTED** serialization and deserialization:
+#
+
+
+class ClassA(StateModule):
+    def __init__(self) -> None:
+        super().__init__()
+        self.cnt = 123
+        # register cnt attribute as state
+        self.register_state("cnt")
+
+
+class ClassB(StateModule):
+    def __init__(self) -> None:
+        super().__init__()
+
+        # attribute "a" inherits from StateModule
+        self.a = ClassA()
+
+        # register attribute "c" as state manually
+        self.c = "Hello, world!"
+        self.register_state("c")
+
+
+obj_b = ClassB()
+
+print("State of obj_b.a:")
+print(obj_b.a.state_dict())
+
+print("\nState of obj_b:")
+print(json.dumps(obj_b.state_dict(), indent=4))
+
+# %%
+# We can observe the state of ``obj_b`` contains the state of its attribute ``a`` automatically.
+#
+# In AgentScope, the ``AgentBase``, ``MemoryBase``, ``LongTermMemoryBase`` and ``Toolkit`` classes all inherit from ``StateModule``, thus supporting automatic and nested state management.
+#
+
+# Creating an agent
+agent = ReActAgent(
+    name="Friday",
+    sys_prompt="You're a assistant named Friday.",
+    model=DashScopeChatModel(
+        model_name="qwen-max",
+        api_key=os.environ["DASHSCOPE_API_KEY"],
+    ),
+    formatter=DashScopeChatFormatter(),
+    memory=InMemoryMemory(),
+    toolkit=Toolkit(),
+)
+
+initial_state = agent.state_dict()
+
+print("Initial state of the agent:")
+print(json.dumps(initial_state, indent=4))
+
+# %%
+# Then we change its state by generating a reply message:
+#
+
+
+async def example_agent_state() -> None:
+    """Example of agent state management"""
+    await agent(Msg("user", "Hello, agent!", "user"))
+
+    print("State of the agent after generating a reply:")
+    print(json.dumps(agent.state_dict(), indent=4))
+
+
+asyncio.run(example_agent_state())
+
+# %%
+# Now we recover the state of the agent to its initial state:
+#
+
+agent.load_state_dict(initial_state)
+
+print("State after loading the initial state:")
+print(json.dumps(agent.state_dict(), indent=4))
+
+# %%
+# Session Management
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# In AgentScope, a session refers to a collection of ``StateModule`` in an application, e.g. multiple agents.
+#
+# AgentScope provides a ``SessionBase`` class with two abstract methods for session
+# management: ``save_session_state`` and ``load_session_state``.
+# Developers can implement these methods with their own storage solution.
+#
+# In AgentScope, we provide a JSON based session class ``JSONSession`` that
+# stores/loads the session state in/from a JSON file named with the session ID.
+#
+# Here we show how to use the JSON based session management in AgentScope.
+#
+# Saving Session State
+# -----------------------------------------
+#
+
+# change the agent state by generating a reply message
+asyncio.run(example_agent_state())
+
+print("\nState of agent:")
+print(json.dumps(agent.state_dict(), indent=4))
+
+# %%
+# Then we save it to a session file:
+
+
+session = JSONSession(
+    save_dir="./",  # The dir used to save the session files
+)
+
+
+async def example_session() -> None:
+    """Example of session management."""
+    await session.save_session_state(
+        session_id="user_1",  # Use the name as the session id
+        agent=agent,
+    )
+
+    print("The saved state:")
+    with open("./user_1.json", "r", encoding="utf-8") as f:
+        print(json.dumps(json.load(f), indent=4))
+
+
+asyncio.run(example_session())
+
+# %%
+# Loading Session State
+# -----------------------------------------
+# Now we load the session state from the saved file:
+
+
+async def example_load_session() -> None:
+    """Example of loading session state."""
+
+    # we first clear the memory of the agent
+    await agent.memory.clear()
+
+    print("Current state of the agent:")
+    print(json.dumps(agent.state_dict(), indent=4))
+
+    # then we load the session state
+    await session.load_session_state(
+        session_id="user_1",
+        # The keyword argument must be the same as the one used in `save_session_state`
+        agent=agent,
+    )
+    print("After loading the session state:")
+    print(json.dumps(agent.state_dict(), indent=4))
+
+
+asyncio.run(example_load_session())
+
+# %%
+# Now we can see the agent state is restored to the saved state.
+#
--- a/docs/tutorial/en/src/task_studio.py
+++ b/docs/tutorial/en/src/task_studio.py
@@ -0,0 +1,118 @@
+# -*- coding: utf-8 -*-
+"""
+.. _studio:
+
+AgentScope Studio
+=========================
+
+AgentScope Studio is a local-deployed web application that
+
+- provides **project management** for the development of agent applications
+- provides native **visualization** for running applications and tracing
+- provides a **built-in agent** named "Friday" that supports secondary development
+
+.. note:: The Studio is under fast development, more features are coming soon!
+
+.. figure:: ../../_static/images/studio_home.webp
+    :width: 100%
+    :alt: AgentScope Studio Home Page
+    :class: bordered-image
+    :align: center
+
+    AgentScope Studio Home Page
+
+Quick Start
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+AgentScope Studio is installed via ``npm``:
+
+.. code-block:: bash
+
+    npm install -g @agentscope/studio
+
+
+Start the Studio with the following command:
+
+.. code-block:: bash
+
+    as_studio
+
+To connect your application to the Studio, use the ``agentscope.init`` function with the ``studio_url`` parameter:
+
+.. code-block:: python
+
+    import agentscope
+
+    agentscope.init(studio_url="http://localhost:3000")
+
+    # your application code
+    ...
+
+Then, you can see your application in the Studio as follows:
+
+.. figure:: ../../_static/images/studio_project.webp
+    :width: 100%
+    :alt: Project management
+    :class: bordered-image
+    :align: center
+
+    Project management in AgentScope Studio
+
+The details about your running application, e.g. token usage, model invocations, and tracing information, can all be viewed in the Studio.
+
+.. figure:: ../../_static/images/studio_run.webp
+    :width: 100%
+    :alt: AgentScope Studio run Page
+    :class: bordered-image
+    :align: center
+
+    Application visualization in AgentScope Studio
+
+
+Friday Agent
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Friday is an experimental local-deployed agent built by AgentScope, aims at
+
+- answering questions about the AgentScope,
+- providing a quick secondary development environment for developers,
+- integrating all available features in AgentScope to build a more powerful agent, and
+- testing and integrating the advanced features in AgentScope.
+
+.. note:: We highly greet contributions from the community to improve Friday! Feel free to open issues or pull requests on our `GitHub repository <https://github.com/agentscope-ai/agentscope>`_.
+
+We are keeping improving Friday, and currently it integrates the following features in AgentScope:
+
+.. list-table::
+    :header-rows: 1
+
+    * - Feature
+      - Status
+      - Further Reading
+      - Description
+    * - Meta tool
+      - ✅
+      - :ref:`tool`
+      - Group-wise tool management, and allow agent to change equipped tools by itself.
+    * - Agent Hook
+      - ✅
+      - :ref:`hook`
+      - Use hook to forward the printing messages to the frontend.
+    * - Agent Interruption
+      - ✅
+      - :ref:`agent`
+      - Allow use to interrupt the agent's reply process with post-processing.
+    * - Truncated Prompt
+      - ✅
+      - :ref:`prompt`
+      - Support to truncate the prompt with the preset max token limit.
+    * - State & Session Management
+      - ✅
+      - :ref:`state`
+      - Auto state management and session management for agents, maintaining the state between different runs.
+    * - Long-term Memory
+      - 🚧
+      - :ref:`memory`
+      - Support long-term memory management.
+
+
+"""
--- a/docs/tutorial/en/src/task_token.py
+++ b/docs/tutorial/en/src/task_token.py
@@ -0,0 +1,71 @@
+# -*- coding: utf-8 -*-
+"""
+.. _token:
+
+Token
+=========================
+
+AgentScope provides a token counter module under ``agentscope.token`` to
+calculate the number of tokens in the given messages, allowing developers
+to estimate the number of tokens in a prompt before sending it to an API.
+
+Specifically, the following token counters are available:
+
+.. list-table::
+    :header-rows: 1
+
+    * - Provider
+      - Class
+      - Support Image Data
+      - Support Tools
+    * - Anthropic
+      - ``AnthropicTokenCounter``
+      - ✅
+      - ✅
+    * - OpenAI
+      - ``OpenAITokenCounter``
+      - ✅
+      - ✅
+    * - Gemini
+      - ``GeminiTokenCounter``
+      - ✅
+      - ✅
+    * - HuggingFace
+      - ``HuggingFaceTokenCounter``
+      - Depends on the model
+      - Depends on the model
+
+.. tip:: The formatter module has integrated the token counters to support prompt truncation. Refer to the :ref:`prompt` section for more details.
+
+.. note:: For DashScope models, the dashscope library doesn't provide a token counting API. So we recommend using the HuggingFace token counter instead.
+
+We show an example of using the OpenAI token counter to count the number of tokens:
+"""
+
+import asyncio
+from agentscope.token import OpenAITokenCounter
+
+
+async def example_token_counting():
+    # Example messages
+    messages = [
+        {"role": "user", "content": "Hello!"},
+        {"role": "assistant", "content": "Hi, how can I help you?"},
+    ]
+
+    # OpenAI token counting
+    openai_counter = OpenAITokenCounter(model_name="gpt-4.1")
+    n_tokens = await openai_counter.count(messages)
+
+    print(f"Number of tokens: {n_tokens}")
+
+
+asyncio.run(example_token_counting())
+
+
+# %%
+# Further Reading
+# ------------------------------
+#
+# - :ref:`prompt`
+#
--- a/docs/tutorial/en/src/task_tool.py
+++ b/docs/tutorial/en/src/task_tool.py
@@ -0,0 +1,453 @@
+# -*- coding: utf-8 -*-
+"""
+.. _tool:
+
+Tool
+=========================
+
+To ensure accurate and reliable tool parsing, AgentScope fully embraces the use of tools API with the following features:
+
+- Support **automatic** tool parsing from Python functions with their docstrings
+- Support both **synchronous and asynchronous** tool functions
+- Support **streaming** tool responses (either synchronous or asynchronous generators)
+- Support **dynamic extension** to the tool JSON Schema
+- Support **interrupting** the tool execution with proper signal handling
+- Support **autonomous tool management** by agents
+
+All above features are implemented by the ``Toolkit`` class in AgentScope, which is responsible for managing tool functions and their execution.
+
+.. tip:: The support of MCP (Model Context Protocol) refers to the :ref:`mcp` section.
+"""
+import asyncio
+import inspect
+import json
+from typing import Any, AsyncGenerator
+
+from pydantic import BaseModel, Field
+
+import agentscope
+from agentscope.message import TextBlock, ToolUseBlock
+from agentscope.tool import ToolResponse, Toolkit, execute_python_code
+
+
+# %%
+# Tool Function
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# In AgentScope, a tool function is a Python function that
+#
+# - returns a ``ToolResponse`` object or a generator that yields ``ToolResponse`` objects
+# - has a docstring that describes the tool's functionality and parameters
+#
+# A template of a tool function is as follows:
+
+
+def tool_function(a: int, b: str) -> ToolResponse:
+    """{function description}
+
+    Args:
+        a (int):
+            {description of the first parameter}
+        b (str):
+            {description of the second parameter}
+    """
+
+
+# %%
+# .. tip:: Instance method and class method can also be used as tool functions, and the ``self`` and ``cls`` parameters will be ignored.
+#
+# AgentScope provides several built-in tool functions under the ``agentscope.tool`` module, such as ``execute_python_code``, ``execute_shell_command`` and text file write/read functions.
+#
+
+print("Built-in Tool Functions:")
+for _ in agentscope.tool.__all__:
+    if _ not in ["Toolkit", "ToolResponse"]:
+        print(_)
+
+# %%
+# Toolkit
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# The ``Toolkit`` class is designed to manage tool functions, extracting their JSON Schema from docstrings and providing a unified interface for tool execution.
+#
+# Basic Usage
+# ------------------------------
+# The basic functionality of the ``Toolkit`` class is to register tool functions and execute them.
+#
+
+
+# Prepare a custom tool function
+async def my_search(query: str, api_key: str) -> ToolResponse:
+    """A simple example tool function.
+
+    Args:
+        query (str):
+            The search query.
+        api_key (str):
+            The API key for authentication.
+    """
+    return ToolResponse(
+        content=[
+            TextBlock(
+                type="text",
+                text=f"Searching for '{query}' with API key '{api_key}'",
+            ),
+        ],
+    )
+
+
+# Register the tool function in a toolkit
+toolkit = Toolkit()
+toolkit.register_tool_function(my_search)
+
+# %%
+# When registering a tool function, you can get its JSON Schema by calling the ``get_json_schemas`` method.
+#
+
+print("Tool JSON Schemas:")
+print(json.dumps(toolkit.get_json_schemas(), indent=4, ensure_ascii=False))
+
+# %%
+# ``Toolkit`` also allows developers to preset the arguments for tool functions, especially useful for API keys or other sensitive information.
+#
+
+# Clear the toolkit first
+toolkit.clear()
+
+# Register tool function with preset keyword arguments
+toolkit.register_tool_function(my_search, preset_kwargs={"api_key": "xxx"})
+
+print("Tool JSON Schemas with Preset Arguments:")
+print(json.dumps(toolkit.get_json_schemas(), indent=4, ensure_ascii=False))
+
+# %%
+# In ``Toolkit``, the ``call_tool_function`` method takes a tool use block as input and executes the corresponding tool function, returning **a unified asynchronous generator** that yields ``ToolResponse`` objects.
+#
+
+
+async def example_tool_execution() -> None:
+    """Example of executing a tool call."""
+    res = await toolkit.call_tool_function(
+        ToolUseBlock(
+            type="tool_use",
+            id="123",
+            name="my_search",
+            input={"query": "AgentScope"},
+        ),
+    )
+
+    # Only one tool response is expected in this case
+    print("Tool Response:")
+    async for tool_response in res:
+        print(tool_response)
+
+
+asyncio.run(example_tool_execution())
+
+# %%
+# Extending JSON Schema Dynamically
+# --------------------------------------
+#
+# Toolkit allows to extend the JSON schemas of tool functions dynamically by calling the ``set_extended_model`` method.
+# Such feature allows to add more parameters to the tool function without modifying its original definition.
+#
+# .. tip:: Related scenarios include dynamic :ref:`structured-output` and CoT (Chain of Thought) reasoning
+#
+# .. note:: The function to be extended should accept variable keyword arguments (``**kwargs``), so that the additional fields can be passed to it.
+#
+# Taking the CoT reasoning as an example, we can extend all tool functions with a ``thinking`` field, allowing the agent to summarize the current state and then decide what to do next.
+#
+
+
+# Example tool function
+def tool_function(**kwargs: Any) -> ToolResponse:
+    """A tool function"""
+    return ToolResponse(
+        content=[
+            TextBlock(
+                type="text",
+                text=f"Received parameters: {kwargs}",
+            ),
+        ],
+    )
+
+
+# Add a thinking field so that the agent could think before giving the other parameters.
+class ThinkingModel(BaseModel):
+    """A Pydantic model for additional fields."""
+
+    thinking: str = Field(
+        description="Summarize the current state and decide what to do next.",
+    )
+
+
+# Register
+toolkit.set_extended_model("my_search", ThinkingModel)
+
+print("The extended JSON Schema:")
+print(json.dumps(toolkit.get_json_schemas(), indent=4, ensure_ascii=False))
+
+# %%
+# Interrupting Tool Execution
+# ------------------------------
+# The ``Toolkit`` class supports **execution interruption** of **async tool functions** and provides a comprehensive **agent-oriented post-processing mechanism**.
+# Such interruption is implemented based on the asyncio cancellation mechanism, and the post-processing varies depending on the return type of tool function.
+#
+# .. note:: For synchronous tool functions, their execution cannot be interrupted by asyncio cancellation. So the interruption is handled within the agent rather than the toolkit.
+#  Refer to the :ref:`agent` section for more information.
+#
+# Specifically, if the tool function returns a ``ToolResponse`` object, a predefined ``ToolResponse`` object with an interrupted message will be yielded.
+# So that the agent can observe the interruption and handle it accordingly.
+# Besides, a flag ``is_interrupted`` will be set to ``True`` in the response, and the external caller can decide whether to throw the ``CancelledError`` exception to the outer layer.
+#
+# An example of async tool function that can be interrupted is as follows:
+#
+
+
+async def non_streaming_function() -> ToolResponse:
+    """A non-streaming tool function that can be interrupted."""
+    await asyncio.sleep(1)  # Simulate a long-running task
+
+    # Fake interruption for demonstration
+    raise asyncio.CancelledError()
+
+    # The following code won't be executed due to the cancellation
+    return ToolResponse(
+        content=[
+            TextBlock(
+                type="text",
+                text="Run successfully!",
+            ),
+        ],
+    )
+
+
+async def example_tool_interruption() -> None:
+    """Example of tool interruption."""
+    toolkit = Toolkit()
+    toolkit.register_tool_function(non_streaming_function)
+    res = await toolkit.call_tool_function(
+        ToolUseBlock(
+            type="tool_use",
+            id="123",
+            name="non_streaming_function",
+            input={},
+        ),
+    )
+
+    async for tool_response in res:
+        print("Tool Response:")
+        print(tool_response)
+        print("The interrupted flag:")
+        print(tool_response.is_interrupted)
+
+
+asyncio.run(example_tool_interruption())
+
+# %%
+# For streaming tool functions, which returns an asynchronous generator, the ``Toolkit`` will attach the interrupted message to the previous chunk of the response.
+# By this way, the agent can observe what the tool has returned before the interruption.
+#
+# The example of interrupting a streaming tool function is as follows:
+#
+
+
+async def streaming_function() -> AsyncGenerator[ToolResponse, None]:
+    """A streaming tool function that can be interrupted."""
+    # Simulate a chunk of response
+    yield ToolResponse(
+        content=[
+            TextBlock(
+                type="text",
+                text="1234",
+            ),
+        ],
+        stream=True,
+    )
+
+    # Simulate interruption
+    raise asyncio.CancelledError()
+
+    # The following code won't be executed due to the cancellation
+    yield ToolResponse(
+        content=[
+            TextBlock(
+                type="text",
+                text="123456789",
+            ),
+        ],
+    )
+
+
+async def example_streaming_tool_interruption() -> None:
+    """Example of streaming tool interruption."""
+    toolkit = Toolkit()
+    toolkit.register_tool_function(streaming_function)
+
+    res = await toolkit.call_tool_function(
+        ToolUseBlock(
+            type="tool_use",
+            id="xxx",
+            name="streaming_function",
+            input={},
+        ),
+    )
+
+    i = 0
+    async for tool_response in res:
+        print(f"Chunk {i}:")
+        print(tool_response)
+        print("The interrupted flag: ", tool_response.is_interrupted, "\n")
+        i += 1
+
+
+asyncio.run(example_streaming_tool_interruption())
+
+# %%
+# Automatic Tool Management
+# -------------------------------------
+# .. image:: https://img.alicdn.com/imgextra/i3/O1CN013cvRpO27MfesMsTeh_!!6000000007783-2-tps-840-521.png
+#     :width: 100%
+#     :align: center
+#     :alt: Automatic Tool Management
+#
+#
+# The ``Toolkit`` class supports **automatic tool management** by introducing the concept of **tool group**, as well as a **meta tool function** named ``reset_equipped_tools``.
+#
+# The tool group is a set of related tool functions, e.g. browser-use tools, map services tools, etc., which will be managed together.
+# Only the tools in the activated groups will be visible to agents, i.e. accessible by the ``toolkit.get_json_schemas()`` method.
+#
+# Note there is a special group called ``basic``, which is always activated and the tools registered without specifying the group name will be added to this group by default.
+#
+# .. tip:: The ``basic`` group ensures that the basic usage of tools won't be affected by the group features if you don't need them.
+#
+# Now we try to create a tool group named ``browser_use``, which contains some web browsing tools.
+#
+
+
+def navigate(url: str) -> ToolResponse:
+    """Navigate to a web page.
+
+    Args:
+        url (str):
+            The URL of the web page to navigate to.
+    """
+    pass
+
+
+def click_element(element_id: str) -> ToolResponse:
+    """Click an element on the web page.
+
+    Args:
+        element_id (str):
+            The ID of the element to click.
+    """
+    pass
+
+
+toolkit = Toolkit()
+
+# Create a tool group named browser_use
+toolkit.create_tool_group(
+    group_name="browser_use",
+    description="The tool functions for web browsing.",
+    active=False,
+    # The notes when using these tools
+    notes="""1. Use ``navigate`` to open a web page.
+2. When requiring user authentication, ask the user for the credentials
+3. ...""",
+)
+
+toolkit.register_tool_function(navigate, group_name="browser_use")
+toolkit.register_tool_function(click_element, group_name="browser_use")
+
+# We can also register some basic tools
+toolkit.register_tool_function(execute_python_code)
+
+# %%
+# If we check the tools JSON schema, we can only see the ``execute_python_code`` tool, because the ``browser_use`` group is not activated yet:
+
+print("Tool JSON Schemas with Group:")
+print(json.dumps(toolkit.get_json_schemas(), indent=4, ensure_ascii=False))
+
+# %%
+# Use the ``update_tool_groups`` method to activate or deactivate tool groups:
+
+toolkit.update_tool_groups(group_names=["browser_use"], active=True)
+
+print("Tool JSON Schemas with Group:")
+print(json.dumps(toolkit.get_json_schemas(), indent=4, ensure_ascii=False))
+
+# %%
+# Additionally, ``Toolkit`` provides a meta tool function named ``reset_equipped_tools``, taking the current group names as the argument to indicate which groups to activate:
+#
+# .. note:: In ``ReActAgent`` class, you can enable the meta tool function by setting ``enable_meta_tool=True`` in the constructor.
+#
+
+# Register the meta tool function
+toolkit.register_tool_function(toolkit.reset_equipped_tools)
+
+reset_equipped = next(
+    tool
+    for tool in toolkit.get_json_schemas()
+    if tool["function"]["name"] == "reset_equipped_tools"
+)
+print("JSON schema of the ``reset_equipped_tools`` function:")
+print(
+    json.dumps(
+        reset_equipped,
+        indent=4,
+        ensure_ascii=False,
+    ),
+)
+
+# %%
+# When agent calls the ``reset_equipped_tools`` function, the corresponding tool groups will be activated, and the tool response will
+# contain the notes of the activated tool groups.
+#
+
+
+async def mock_agent_reset_tools() -> None:
+    """Mock agent to reset tool groups."""
+    # Call the meta tool function
+    res = await toolkit.call_tool_function(
+        ToolUseBlock(
+            type="tool_use",
+            id="154",
+            name="reset_equipped_tools",
+            input={
+                "browser_user": True,
+            },
+        ),
+    )
+
+    async for tool_response in res:
+        print("Text content in tool Response:")
+        print(tool_response)
+
+
+asyncio.run(mock_agent_reset_tools())
+
+# %%
+# The toolkit also provides a method to gather the notes of the activated tool groups, and you can assemble it into your agent's system prompt.
+#
+# .. tip:: The automatic tool management feature is already implemented in the ``ReActAgent`` class, refer to the :ref:`agent` section for more details.
+#
+
+# Create one more tool group
+toolkit.create_tool_group(
+    group_name="map_service",
+    description="The google map service tools.",
+    active=True,
+    notes="""1. Use ``get_location`` to get the location of a place.
+2. ...""",
+)
+
+print("The gathered notes of the activated tool groups:")
+print(toolkit.get_activated_notes())
+
+# %%
+# Further Reading
+# ---------------------
+# - :ref:`agent`
+# - :ref:`state`
+# - :ref:`mcp`
+#
--- a/docs/tutorial/en/src/task_tracing.py
+++ b/docs/tutorial/en/src/task_tracing.py
@@ -0,0 +1,221 @@
+# -*- coding: utf-8 -*-
+"""
+.. _tracing:
+
+Tracing
+==============================
+
+AgentScope implements OpenTelemetry-based tracing to monitor and debug the
+execution of agent applications, which features
+
+- Provide built-in tracing for LLM, tool, agent, formatter, etc.
+- Support error and exception tracking
+- Provide native tracing **visualization** in AgentScope Studio
+- Support connecting to **third-party platforms** like Alibaba Cloud CloudMonitor, `Arize-Phoenix <https://github.com/Arize-ai/phoenix>`_, `Langfuse <https://langfuse.com/>`_, etc.
+
+Setting Up
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. note:: Connecting to the :ref:`studio` or third-party tracing endpoint should be done at the beginning of your application by the ``agentscope.init`` function.
+
+AgentScope Studio
+---------------------------------------
+
+.. figure:: ../../_static/images/studio_tracing.webp
+    :width: 100%
+    :alt: AgentScope Studio tracing Page
+    :class: bordered-image
+    :align: center
+
+    *Tracing in AgentScope Studio*
+
+When connecting to AgentScope Studio, just provide ``studio_url`` parameter in ``agentscope.init`` function.
+
+.. code-block:: python
+
+    import agentscope
+
+    agentscope.init(studio_url="http://xxx:port")
+
+
+Third-party Platforms
+---------------------------------------
+
+To connect to third-party tracing platforms, set the ``tracing_url`` parameter in the ``agentscope.init`` function.
+The ``tracing_url`` is the URL of your OpenTelemetry collector or any compatible backend that supports OTLP (OpenTelemetry Protocol).
+
+.. code-block:: python
+
+    import agentscope
+
+    # Connect to OpenTelemetry-compatible backends
+    agentscope.init(tracing_url="https://your-tracing-backend:port/traces")
+
+Taking Alibaba Cloud CloudMonitor, Arize-Phoenix, and Langfuse as examples:
+
+**Alibaba Cloud CloudMonitor**: A fully-managed observability platform.
+
+.. code-block:: python
+    :caption: Connect to Alibaba Cloud CloudMonitor
+
+    agentscope.init(tracing_url="https://tracing-cn-hangzhou.arms.aliyuncs.com/adapt_xxx/api/otlp/traces")
+
+.. tip::
+    **Get your Endpoint:** In the `ARMS Console <https://arms.console.aliyun.com/>`_ under **Access Center** > **OpenTelemetry**,
+    select the **Public Endpoint** matching your deployment region. Customize your app name via the ``OTEL_SERVICE_NAME`` environment variable.
+    Alibaba Cloud CloudMonitor provides zero-code instrumentation through `LoongSuite <https://github.com/alibaba/loongsuite-python-agent>`_ agent.
+    Learn more in the `CloudMonitor Documentation <https://www.alibabacloud.com/help/en/cms/cloudmonitor-2-0/user-guide/model-application>`_.
+
+**Arize-Phoenix**: You need to set the ``PHOENIX_API_KEY`` in your environment variables.
+
+.. code-block:: python
+    :caption: Connect to Arize Phoenix
+
+    # Arize Phoenix Integration
+    import os
+
+    PHOENIX_API_KEY = os.environ.get("PHOENIX_API_KEY")
+    os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"api_key={PHOENIX_API_KEY}"
+
+    agentscope.init(tracing_url="https://app.phoenix.arize.com/v1/traces")
+
+**LangFuse**: You need to set the ``LANGFUSE_PUBLIC_KEY`` and
+``LANGFUSE_SECRET_KEY`` in your environment variables. The authorization
+header is constructed using these keys.
+
+.. code-block:: python
+    :caption: Connect to LangFuse
+
+    import os, base64
+
+    LANGFUSE_PUBLIC_KEY = os.environ["LANGFUSE_PUBLIC_KEY"]
+    LANGFUSE_SECRET_KEY = os.environ["LANGFUSE_SECRET_KEY"]
+    LANGFUSE_AUTH_STRING = f"{LANGFUSE_PUBLIC_KEY}:{LANGFUSE_SECRET_KEY}"
+
+    LANGFUSE_AUTH = base64.b64encode(LANGFUSE_AUTH_STRING.encode("utf-8")).decode("ascii")
+    os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"Authorization=Basic {LANGFUSE_AUTH}"
+
+    # EU data region
+    agentscope.init(tracing_url="https://cloud.langfuse.com/api/public/otel/v1/traces")
+    # US data region
+    # agentscope.init(tracing_url="https://us.cloud.langfuse.com/api/public/otel/v1/traces")
+
+
+Customizing Tracing
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+As stated above, the tracing in AgentScope is implemented based on OpenTelemetry.
+That means your own tracing code implemented by OpenTelemetry sdk is compatible with
+AgentScope natively.
+
+Besides, AgentScope has built-in the following decorators to trace the corresponding modules:
+
+- ``@trace_llm``: Trace the ``__call__`` function of classes inherit from ``ChatModelBase``
+- ``@trace_reply``: Trace the ``reply`` function of classes inherit from ``AgentBase``
+- ``@trace_format``: Trace the ``format`` function of classes inherit from ``FormatterBase``
+- ``@trace``: Trace general functions
+
+
+Tracing LLMs
+----------------------------------------
+
+
+The ``@trace_llm`` decorator is to trace the ``__call__`` function of ``ChatModelBase`` classes.
+
+.. note:: Your LLM class must inherit from ``ChatModelBase``
+
+.. code-block:: python
+    :caption: Tracing new ChatModel class
+
+    class ExampleChatModel(ChatModelBase):
+        \"\"\"An example Model\"\"\"
+
+        ...
+
+        @trace_llm
+        async def __call__(
+            self,
+            *args: Any,
+            **kwargs: Any,
+        ) -> AsyncGenerator[ChatResponse, None] | ChatResponse:
+            \"\"\"LLM call\"\"\"
+            ...
+
+
+Tracing Agent
+----------------------------------------
+
+The ``@trace_reply`` decorator is for agent implementations and tracing the `reply` function.
+
+.. note:: Your agent class must inherit from ``AgentBase``
+
+.. code-block:: python
+    :caption: Tracing new Agent class
+
+    class ExampleAgent(AgentBase):
+        \"\"\"An example agent class\"\"\"
+
+        @tracer_reply
+        async def reply(self, *args: Any, **kwargs: Any) -> Msg:
+            \"\"\"Reply to the message.\"\"\"
+            ...
+
+
+Tracing Formatter
+----------------------------------------
+The ``@trace_format`` decorator is for formatters implementations and tracing the `format` function.
+
+.. note:: Your formatter class must inherit from ``FormatterBase``
+
+.. code-block:: python
+    :caption: Tracing new Formatter class
+
+    class ExampleFormatter(FormatterBase):
+            \"\"\"A simple example formatter class\"\"\"
+
+            @trace_format
+            async def format(self, *args: Any, **kwargs: Any) -> list[dict]:
+                \"\"\"Example formatting\"\"\"
+
+
+General Tracing
+----------------------------------------
+
+The ``@trace`` decorator is different from the above decorators, as it is a general-purpose tracing decorator that can be applied to any function.
+It requires a `name` parameter to identify the traced function, and can trace various types of functions, including:
+
+- synchronous functions
+- synchronous generator functions
+- asynchronous functions
+- asynchronous generator functions
+
+.. code-block:: python
+    :caption: General tracing example
+
+    # 1. Synchronous function
+    @trace(name='simple_function')
+    def simple_function(name: str, age: int) -> str:
+        \"\"\"A simple function with automatic tracing.\"\"\"
+        return f"Hello, {name}! You are {age} years old."
+
+    # 2. Synchronous generator function
+    @trace(name='number_generator')
+    def number_generator(n: int) -> Generator[int, None, None]:
+        \"\"\"Generate numbers from 0 to n-1.\"\"\"
+        for i in range(n):
+            yield i
+
+    # 3. Asynchronous function
+    @trace(name='async_function')
+    async def async_function(data: dict) -> dict:
+        \"\"\"Process data asynchronously.\"\"\"
+        return {"processed": data}
+
+    # 4. Asynchronous generator function
+    @trace(name='async_stream')
+    async def async_stream(n: int) -> AsyncGenerator[str, None]:
+        \"\"\"Generate stream of data asynchronously.\"\"\"
+        for i in range(n):
+            yield f"data_{i}"
+
+"""
--- a/docs/tutorial/en/src/task_tts.py
+++ b/docs/tutorial/en/src/task_tts.py
@@ -0,0 +1,255 @@
+# -*- coding: utf-8 -*-
+"""
+.. _tts:
+
+TTS
+====================
+
+AgentScope provides a unified interface for Text-to-Speech (TTS) models across multiple API providers.
+This tutorial demonstrates how to use TTS models in AgentScope.
+
+AgentScope supports the following TTS APIs:
+
+.. list-table:: Built-in TTS Models
+    :header-rows: 1
+
+    * - API
+      - Class
+      - Streaming Input
+      - Non-Streaming Input
+      - Streaming Output
+      - Non-Streaming Output
+    * - DashScope Realtime API
+      - ``DashScopeRealtimeTTSModel``
+      - ✅
+      - ✅
+      - ✅
+      - ✅
+    * - DashScope CosyVoice Realtime API
+      - ``DashScopeCosyVoiceRealtimeTTSModel``
+      - ✅
+      - ✅
+      - ✅
+      - ✅
+    * - DashScope API
+      - ``DashScopeTTSModel``
+      - ❌
+      - ✅
+      - ✅
+      - ✅
+    * - DashScope CosyVoice API
+      - ``DashScopeCosyVoiceTTSModel``
+      - ❌
+      - ✅
+      - ✅
+      - ✅
+    * - OpenAI API
+      - ``OpenAITTSModel``
+      - ❌
+      - ✅
+      - ✅
+      - ✅
+    * - Gemini API
+      - ``GeminiTTSModel``
+      - ❌
+      - ✅
+      - ✅
+      - ✅
+
+.. note:: The streaming input and output in AgentScope TTS models are all accumulative.
+
+**Choosing the Right Model:**
+
+- **Use Non-Realtime TTS** when you have complete text ready (e.g., pre-written
+  responses, complete LLM outputs)
+- **Use Realtime TTS** when text is generated progressively (e.g., streaming
+  LLM responses) for lower latency
+
+"""
+
+import asyncio
+import os
+
+from agentscope.agent import ReActAgent, UserAgent
+from agentscope.formatter import DashScopeChatFormatter
+from agentscope.message import Msg
+from agentscope.model import DashScopeChatModel
+from agentscope.tts import (
+    DashScopeRealtimeTTSModel,
+    DashScopeTTSModel,
+)
+
+# %%
+# Non-Realtime TTS
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Non-realtime TTS models process complete text inputs and are the simplest
+# to use. You can directly call their ``synthesize()`` method.
+#
+# Taking DashScope TTS model as an example:
+
+
+async def example_non_realtime_tts() -> None:
+    """A basic example of using non-realtime TTS models."""
+    # Example with DashScope TTS
+    tts_model = DashScopeTTSModel(
+        api_key=os.environ.get("DASHSCOPE_API_KEY", ""),
+        model_name="qwen3-tts-flash",
+        voice="Cherry",
+        stream=False,  # Non-streaming output
+    )
+
+    msg = Msg(
+        name="assistant",
+        content="Hello, this is DashScope TTS.",
+        role="assistant",
+    )
+
+    # Directly synthesize without connecting
+    tts_response = await tts_model.synthesize(msg)
+
+    # tts_response.content contains an audio block with base64-encoded audio data
+    print(
+        "The length of audio data:",
+        len(tts_response.content["source"]["data"]),
+    )
+
+
+asyncio.run(example_non_realtime_tts())
+
+# %%
+# **Streaming Output for Lower Latency:**
+#
+# When ``stream=True``, the model returns audio chunks progressively, allowing
+# you to start playback before synthesis completes. This reduces perceived latency.
+#
+
+
+async def example_non_realtime_tts_streaming() -> None:
+    """An example of using non-realtime TTS models with streaming output."""
+    # Example with DashScope TTS with streaming output
+    tts_model = DashScopeTTSModel(
+        api_key=os.environ.get("DASHSCOPE_API_KEY", ""),
+        model_name="qwen3-tts-flash",
+        voice="Cherry",
+        stream=True,  # Enable streaming output
+    )
+
+    msg = Msg(
+        name="assistant",
+        content="Hello, this is DashScope TTS with streaming output.",
+        role="assistant",
+    )
+
+    # Synthesize and receive an async generator for streaming output
+    async for tts_response in await tts_model.synthesize(msg):
+        # Process each audio chunk as it arrives
+        print(
+            "Received audio chunk of length:",
+            len(tts_response.content["source"]["data"]),
+        )
+
+
+asyncio.run(example_non_realtime_tts_streaming())
+
+
+# %%
+# Realtime TTS
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Realtime TTS models are designed for scenarios where text is generated
+# incrementally, such as streaming LLM responses. This enables the lowest
+# possible latency by starting audio synthesis before the complete text is ready.
+#
+# **Key Concepts:**
+#
+# - **Stateful Processing**: Realtime TTS maintains state for a single streaming
+#   session, identified by ``msg.id``. Only one streaming session can be active
+#   at a time.
+# - **Two Methods**:
+#
+#   - ``push(msg)``: Non-blocking method that submits text chunks and returns
+#     immediately. May return partial audio if available.
+#   - ``synthesize(msg)``: Blocking method that finalizes the session and returns
+#     all remaining audio. When ``stream=True``, it returns an async generator.
+#
+# .. code-block:: python
+#
+#     async def example_realtime_tts_streaming():
+#         tts_model = DashScopeRealtimeTTSModel(
+#             api_key=os.environ.get("DASHSCOPE_API_KEY", ""),
+#             model_name="qwen3-tts-flash-realtime",
+#             voice="Cherry",
+#             stream=False,
+#         )
+#
+#         # realtime tts model received accumulative text chunks
+#         res = await tts_model.push(msg_chunk_1)  # non-blocking
+#         res = await tts_model.push(msg_chunk_2)  # non-blocking
+#         ...
+#         res = await tts_model.synthesize(final_msg)  # blocking, get all remaining audio
+#
+# When setting ``stream=True`` during initialization, the ``synthesize()`` method returns an async generator of ``TTSResponse`` objects, allowing you to process audio chunks as they arrive.
+#
+#
+# Integrating with ReActAgent
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# AgentScope agents can automatically synthesize their responses to speech
+# when provided with a TTS model. This works seamlessly with both realtime
+# and non-realtime TTS models.
+#
+# **How It Works:**
+#
+# 1. The agent generates a text response (potentially streamed from an LLM)
+# 2. The TTS model synthesizes the text to audio automatically
+# 3. The synthesized audio is attached to the ``speech`` field of the ``Msg`` object
+# 4. The audio is played during the agent's ``self.print()`` method
+#
+
+
+async def example_agent_with_tts() -> None:
+    """An example of using TTS with ReActAgent."""
+    # Create an agent with TTS enabled
+    agent = ReActAgent(
+        name="Assistant",
+        sys_prompt="You are a helpful assistant.",
+        model=DashScopeChatModel(
+            api_key=os.environ.get("DASHSCOPE_API_KEY", ""),
+            model_name="qwen-max",
+            stream=True,
+        ),
+        formatter=DashScopeChatFormatter(),
+        # Enable TTS
+        tts_model=DashScopeRealtimeTTSModel(
+            api_key=os.getenv("DASHSCOPE_API_KEY"),
+            model_name="qwen3-tts-flash-realtime",
+            voice="Cherry",
+        ),
+    )
+    user = UserAgent("User")
+
+    # Build a conversation just like normal
+    msg = None
+    while True:
+        msg = await agent(msg)
+        msg = await user(msg)
+        if msg.get_text_content() == "exit":
+            break
+
+
+# %%
+# Customizing TTS Model
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# You can create custom TTS implementations by inheriting from ``TTSModelBase``.
+# The base class provides a flexible interface for both realtime and non-realtime
+# TTS models.
+# We use an attribute ``supports_streaming_input`` to indicate if the TTS model is realtime or not.
+#
+# For realtime TTS models, you need to implement the ``connect``, ``close``, ``push`` and ``synthesize`` methods to handle the lifecycle and streaming input.
+#
+# While for non-realtime TTS models, you only need to implement the ``synthesize`` method.
+#
+# Further Reading
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# - :ref:`agent` - Learn more about agents in AgentScope
+# - :ref:`message` - Understand message format in AgentScope
+# - API Reference: :class:`agentscope.tts.TTSModelBase`
+#
--- a/docs/tutorial/en/src/task_tuner.py
+++ b/docs/tutorial/en/src/task_tuner.py
@@ -0,0 +1,247 @@
+# -*- coding: utf-8 -*-
+"""
+.. _tuner:
+
+Tuner
+=================
+
+AgentScope provides the ``tuner`` module for training agent applications using reinforcement learning (RL).
+This tutorial will guide you through how to leverage the ``tuner`` module to improve agent performance on specific tasks, including:
+
+- Introducing the core components of the ``tuner`` module
+- Demonstrating the key code required for the tuning workflow
+- Showing how to configure and run the tuning process
+
+Main Components
+~~~~~~~~~~~~~~~~~~~
+The ``tuner`` module introduces three core components essential for RL-based agent training:
+
+- **Task Dataset**: A collection of tasks for training and evaluating the agent.
+- **Workflow Function**: Encapsulates the agent's logic to be tuned.
+- **Judge Function**: Evaluates the agent's performance on tasks and provides reward signals for tuning.
+
+In addition, ``tuner`` provides several configuration classes for customizing the tuning process, including:
+
+- **TunerModelConfig**: Model configurations for tuning purposes.
+- **AlgorithmConfig**: Specifies the RL algorithm (e.g., GRPO, PPO) and its parameters.
+
+Implementation
+~~~~~~~~~~~~~~~~~~~
+This section demonstrates how to use ``tuner`` to train a simple math agent.
+
+Task Dataset
+--------------------
+The task dataset contains tasks for training and evaluating your agent.
+
+You dataset should follow the Huggingface `datasets <https://huggingface.co/docs/datasets/quickstart>`_ format, which can be loaded with ``datasets.load_dataset``. For example:
+
+.. code-block:: text
+
+    my_dataset/
+        ├── train.jsonl  # training samples
+        └── test.jsonl   # evaluation samples
+
+Suppose your `train.jsonl` contains:
+
+.. code-block:: json
+
+    {"question": "What is 2 + 2?", "answer": "4"}
+    {"question": "What is 4 + 4?", "answer": "8"}
+
+Before starting tuning, you can verify that your dataset is loaded correctly with:
+
+.. code-block:: python
+
+    from agentscope.tuner import DatasetConfig
+
+    dataset = DatasetConfig(path="my_dataset", split="train")
+    dataset.preview(n=2)
+    # Output the first two samples to verify correct loading
+    # [
+    #   {
+    #     "question": "What is 2 + 2?",
+    #     "answer": "4"
+    #   },
+    #   {
+    #     "question": "What is 4 + 4?",
+    #     "answer": "8"
+    #   }
+    # ]
+
+Workflow Function
+--------------------
+The workflow function defines how the agent interacts with the environment and makes decisions. All workflow functions should follow the input/output signature defined in ``agentscope.tuner.WorkflowType``.
+
+Below is an example workflow function using a ReAct agent to answer math questions:
+"""
+
+from typing import Dict, Optional
+from agentscope.agent import ReActAgent
+from agentscope.formatter import OpenAIChatFormatter
+from agentscope.message import Msg
+from agentscope.model import ChatModelBase
+from agentscope.tuner import WorkflowOutput
+
+
+async def example_workflow_function(
+    task: Dict,
+    model: ChatModelBase,
+    auxiliary_models: Optional[Dict[str, ChatModelBase]] = None,
+) -> WorkflowOutput:
+    """An example workflow function for tuning.
+
+    Args:
+        task (`Dict`): The task information.
+        model (`ChatModelBase`): The chat model used by the agent.
+        auxiliary_models (`Optional[Dict[str, ChatModelBase]]`): Additional
+            chat models, generally used to simulate the behavior of other
+            non-training agents in multi-agent scenarios.
+
+    Returns:
+        `WorkflowOutput`: The output generated by the workflow.
+    """
+    agent = ReActAgent(
+        name="react_agent",
+        sys_prompt="You are a helpful math problem solving agent.",
+        model=model,
+        formatter=OpenAIChatFormatter(),
+    )
+
+    response = await agent.reply(
+        msg=Msg(
+            "user",
+            task["question"],
+            role="user",
+        ),  # extract question from task
+    )
+
+    return WorkflowOutput(  # return the response
+        response=response,
+    )
+
+
+# %%
+# You can directly run this workflow function with a task dictionary and a ``DashScopeChatModel`` / ``OpenAIChatModel`` to test its correctness before formal training. For example:
+
+import asyncio
+import os
+from agentscope.model import DashScopeChatModel
+
+task = {"question": "What is 123 plus 456?", "answer": "579"}
+model = DashScopeChatModel(
+    model_name="qwen-max",
+    api_key=os.environ["DASHSCOPE_API_KEY"],
+)
+workflow_output = asyncio.run(example_workflow_function(task, model))
+assert isinstance(
+    workflow_output.response,
+    Msg,
+), "In this example, the response should be a Msg instance."
+print("\nWorkflow response:", workflow_output.response.get_text_content())
+
+# %%
+#
+# Judge Function
+# --------------------
+# The judge function evaluates the agent's performance on a given task and provides a reward signal for tuning.
+# All judge functions should follow the input/output signature defined in ``agentscope.tuner.JudgeType``.
+# Below is a simple judge function that compares the agent's response with the ground truth answer:
+
+from typing import Any
+from agentscope.tuner import JudgeOutput
+
+
+async def example_judge_function(
+    task: Dict,
+    response: Any,
+    auxiliary_models: Optional[Dict[str, ChatModelBase]] = None,
+) -> JudgeOutput:
+    """A very simple judge function only for demonstration.
+
+    Args:
+        task (`Dict`): The task information.
+        response (`Any`): The response field from the WorkflowOutput.
+        auxiliary_models (`Optional[Dict[str, ChatModelBase]]`): Additional
+            chat models for LLM-as-a-Judge purpose.
+    Returns:
+        `JudgeOutput`: The reward assigned by the judge.
+    """
+    ground_truth = task["answer"]
+    reward = 1.0 if ground_truth in response.get_text_content() else 0.0
+    return JudgeOutput(reward=reward)
+
+
+judge_output = asyncio.run(
+    example_judge_function(
+        task,
+        workflow_output.response,
+    ),
+)
+print(f"Judge reward: {judge_output.reward}")
+
+# %%
+# The judge function can also be locally tested in the same way as shown above before formal training to ensure its logic is correct.
+#
+# .. tip::
+#    You can leverage existing `MetricBase <https://github.com/agentscope-ai/agentscope/blob/main/src/agentscope/evaluate/_metric_base.py>`_ implementations in your judge function to compute more sophisticated metrics and combine them into a composite reward.
+#
+# Configuration and Running
+# ~~~~~~~~~~~~~~~
+# Finally, you can configure and run the tuning process using the ``tuner`` module.
+# Before starting, ensure that `Trinity-RFT <https://github.com/agentscope-ai/Trinity-RFT>`_ is installed in your environment, as it is required for tuning.
+#
+# Below is an example of configuring and starting the tuning process:
+#
+# .. note::
+#    This example is for demonstration only. For a complete runnable example, see `Tune ReActAgent <https://github.com/agentscope-ai/agentscope/tree/main/examples/tuner/react_agent>`_
+#
+# .. code-block:: python
+#
+#        from agentscope.tuner import tune, AlgorithmConfig, DatasetConfig, TunerModelConfig
+#        # your workflow / judge function here...
+#
+#        if __name__ == "__main__":
+#            dataset = DatasetConfig(path="my_dataset", split="train")
+#            model = TunerModelConfig(model_path="Qwen/Qwen3-0.6B", max_model_len=16384)
+#            algorithm = AlgorithmConfig(
+#                algorithm_type="multi_step_grpo",
+#                group_size=8,
+#                batch_size=32,
+#                learning_rate=1e-6,
+#            )
+#            tune(
+#                workflow_func=example_workflow_function,
+#                judge_func=example_judge_function,
+#                model=model,
+#                train_dataset=dataset,
+#                algorithm=algorithm,
+#            )
+#
+# Here, ``DatasetConfig`` configures the training dataset, ``TunerModelConfig`` sets the parameters for the trainable model, and ``AlgorithmConfig`` specifies the reinforcement learning algorithm and its hyperparameters.
+#
+# .. tip::
+#    The ``tune`` function is based on `Trinity-RFT <https://github.com/agentscope-ai/Trinity-RFT>`_ and internally converts input parameters to a YAML configuration.
+#    Advanced users can skip the ``model``, ``train_dataset``, and ``algorithm`` arguments and instead provide a YAML config file path via the ``config_path`` argument.
+#    Using a configuration file is recommended for fine-grained control and to leverage advanced Trinity-RFT features. See the Trinity-RFT `Configuration Guide <https://agentscope-ai.github.io/Trinity-RFT/en/main/tutorial/trinity_configs.html>`_ for more options.
+#
+# Save the above code as ``main.py`` and run it with:
+#
+# .. code-block:: bash
+#
+#        ray start --head
+#        python main.py
+#
+# Checkpoints and logs are automatically saved to the ``checkpoints/AgentScope`` directory under your workspace, with each run in a timestamped sub-directory. Tensorboard logs can be found in ``monitor/tensorboard`` within the checkpoint directory.
+#
+# .. code-block:: text
+#
+#        your_workspace/
+#            └── checkpoints/
+#                └──AgentScope/
+#                    └── Experiment-20260104185355/  # each run saved in a sub-directory with timestamp
+#                        ├── monitor/
+#                        │   └── tensorboard/  # tensorboard logs
+#                        └── global_step_x/    # saved model checkpoints at step x
+#
+# .. tip::
+#    For more tuning examples, refer to the `tuner directory <https://github.com/agentscope-ai/agentscope-samples/tree/main/tuner>`_ of the AgentScope-Samples repository.
--- a/docs/tutorial/en/src/workflow_concurrent_agents.py
+++ b/docs/tutorial/en/src/workflow_concurrent_agents.py
@@ -0,0 +1,41 @@
+# -*- coding: utf-8 -*-
+"""
+Concurrent Agents
+===================================
+With the help of asynchronous programming, the concurrent agents can be executed by ``asyncio.gather`` in Python.
+
+A simple example is shown below, where two agents are created and executed concurrently.
+"""
+import asyncio
+from datetime import datetime
+from typing import Any
+
+from agentscope.agent import AgentBase
+
+
+class ExampleAgent(AgentBase):
+    """The example agent for concurrent execution."""
+
+    def __init__(self, name: str) -> None:
+        """Initialize the agent with its name."""
+        super().__init__()
+        self.name = name
+
+    async def reply(self, *args: Any, **kwargs: Any) -> None:
+        """Reply to the message."""
+        start_time = datetime.now().strftime("%H:%M:%S.%f")[:-3]
+        print(f"{self.name} started at {start_time}")
+        await asyncio.sleep(3)  # Simulate a long-running task
+        end_time = datetime.now().strftime("%H:%M:%S.%f")[:-3]
+        print(f"{self.name} finished at {end_time}")
+
+
+async def run_concurrent_agents() -> None:
+    """Run the concurrent agents."""
+    agent1 = ExampleAgent("Agent 1")
+    agent2 = ExampleAgent("Agent 2")
+
+    await asyncio.gather(agent1(), agent2())
+
+
+asyncio.run(run_concurrent_agents())
--- a/docs/tutorial/en/src/workflow_conversation.py
+++ b/docs/tutorial/en/src/workflow_conversation.py
@@ -0,0 +1,208 @@
+# -*- coding: utf-8 -*-
+"""
+.. _conversation:
+
+Conversation
+======================
+
+Conversation is a design pattern that agents exchange and share information
+between each other, most commonly in game playing, chatbot, and multi-agent
+discussion scenarios.
+
+In AgentScope, the conversation is built upon the **explicit message
+exchange**. In this tutorial, we will demonstrate how to build a conversation
+
+- between a user and an agent (chatbot)
+- between multiple agents (game playing, discussion, etc.)
+
+Their main difference lies in
+
+- how the **prompt is constructed**, and
+- how the information is **propagated/shared** among agents.
+"""
+import asyncio
+import json
+import os
+
+from agentscope.agent import ReActAgent, UserAgent
+from agentscope.memory import InMemoryMemory
+from agentscope.formatter import (
+    DashScopeChatFormatter,
+    DashScopeMultiAgentFormatter,
+)
+from agentscope.model import DashScopeChatModel
+from agentscope.message import Msg
+from agentscope.pipeline import MsgHub
+from agentscope.tool import Toolkit
+
+# %%
+# User-Agent Conversation
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# User-agent conversation, also known as chatbot, is the most common usage
+# scenario of LLM-empowered agents, and the design target of most LLM APIs.
+# Such conversation features only two participants: a user and an agent.
+#
+# In AgentScope, the formatters with **"Chat"** in its name are designed for
+# user-agent conversation, such as ``DashScopeChatFormatter``,
+# ``AnthropicChatFormatter``, etc.
+# They use the ``role`` field in the message to distinguish the user and the
+# agent, and format the messages accordingly.
+#
+# Here we build a simple conversation between agent ``Friday`` and user.
+#
+# .. tip:: AgentScope provides a built-in ``UserAgent`` class for human-in-the-loop (HITL) interaction. Refer to :ref:`user-agent` for more details.
+#
+
+friday = ReActAgent(
+    name="Friday",
+    sys_prompt="You're a helpful assistant named Friday",
+    model=DashScopeChatModel(
+        model_name="qwen-max",
+        api_key=os.environ["DASHSCOPE_API_KEY"],
+    ),
+    formatter=DashScopeChatFormatter(),  # The formatter for user-agent conversation
+    memory=InMemoryMemory(),
+    toolkit=Toolkit(),
+)
+
+# Create a user agent
+user = UserAgent(name="User")
+
+# %%
+# Now, we can program the conversation by exchanging messages between these two agents until the user types "exit" to end the conversation.
+#
+# .. code-block:: python
+#
+#     async def run_conversation() -> None:
+#         """Run a simple conversation between Friday and User."""
+#         msg = None
+#         while True:
+#             msg = await friday(msg)
+#             msg = await user(msg)
+#             if msg.get_text_content() == "exit":
+#                 break
+#
+#     asyncio.run(run_conversation())
+#
+
+# %%
+# More than Two Agents
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# As stated in the beginning, we demonstrate how to build conversation with multiple agents in terms of **prompt construction** and **information sharing**.
+#
+# Prompt Construction
+# -------------------------------
+# In AgentScope, we provide built-in formatters for multi-agent conversation, featuring **"MultiAgent"** in their names, such as ``DashScopeMultiAgentFormatter``, ``AnthropicMultiAgentFormatter``, etc.
+#
+# Specifically, they use the ``name`` field in the message to distinguish different agents, and format the conversation history into a single user message.
+# Taking ``DashScopeMultiAgentFormatter`` as an example:
+#
+# .. tip:: More details about the formatter can be found in :ref:`prompt`.
+#
+
+
+async def example_multi_agent_prompt() -> None:
+    msgs = [
+        Msg("system", "You're a helpful assistant named Bob.", "system"),
+        Msg("Alice", "Hi!", "user"),
+        Msg("Bob", "Hi! Nice to meet you guys.", "assistant"),
+        Msg("Charlie", "Me too! I'm Charlie, by the way.", "assistant"),
+    ]
+
+    formatter = DashScopeMultiAgentFormatter()
+    prompt = await formatter.format(msgs)
+
+    print("Formatted prompt:")
+    print(json.dumps(prompt, indent=4, ensure_ascii=False))
+
+    # We print the content of the combined user message here for better
+    # understanding:
+    print("-------------")
+    print("Combined message")
+    print(prompt[1]["content"])
+
+
+asyncio.run(example_multi_agent_prompt())
+
+
+# %%
+# Message Sharing
+# -------------------------------
+# In multi-agent conversation, exchanging messages explicitly may not be efficient and convenient, especially when broadcasting messages among multiple agents.
+#
+# Therefore, AgentScope provides an async context manager named ``MsgHub`` to simplify the operation of broadcasting messages.
+# Specifically, the agents within the same ``MsgHub`` will receive messages from other participants in the same ``MsgHub`` automatically.
+#
+
+model = DashScopeChatModel(
+    model_name="qwen-max",
+    api_key=os.environ["DASHSCOPE_API_KEY"],
+)
+formatter = DashScopeMultiAgentFormatter()
+
+alice = ReActAgent(
+    name="Alice",
+    sys_prompt="You're a student named Alice.",
+    model=model,
+    formatter=formatter,
+    toolkit=Toolkit(),
+    memory=InMemoryMemory(),
+)
+
+bob = ReActAgent(
+    name="Bob",
+    sys_prompt="You're a student named Bob.",
+    model=model,
+    formatter=formatter,
+    toolkit=Toolkit(),
+    memory=InMemoryMemory(),
+)
+
+charlie = ReActAgent(
+    name="Charlie",
+    sys_prompt="You're a student named Charlie.",
+    model=model,
+    formatter=formatter,
+    toolkit=Toolkit(),
+    memory=InMemoryMemory(),
+)
+
+
+async def example_msghub() -> None:
+    """Example of using MsgHub for multi-agent conversation."""
+    async with MsgHub(
+        [alice, bob, charlie],
+        announcement=Msg(
+            "system",
+            "Now you meet each other with a brief self-introduction.",
+            "system",
+        ),
+    ):
+        await alice()
+        await bob()
+        await charlie()
+
+
+asyncio.run(example_msghub())
+
+# %%
+# Now we print the memory of Alice to check if her memory is updated correctly.
+#
+
+
+async def example_memory() -> None:
+    """Print the memory of Alice."""
+    print("Memory of Alice:")
+    for msg in await alice.memory.get_memory():
+        print(f"{msg.name}: {msg.get_text_content()}")
+
+
+asyncio.run(example_memory())
+
+# %%
+# Further Reading
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# - :ref:`prompt`
+# - :ref:`pipeline`
+#
--- a/docs/tutorial/en/src/workflow_handoffs.py
+++ b/docs/tutorial/en/src/workflow_handoffs.py
@@ -0,0 +1,92 @@
+# -*- coding: utf-8 -*-
+"""
+Handoffs
+========================================
+
+.. figure:: ../../_static/images/handoffs.png
+   :width: 80%
+   :align: center
+   :alt: Orchestrator-Workers Workflow
+
+   *Handoffs example*
+
+It's very simple to implement the Orchestrator-Workers workflow with tool calls in AgentScope.
+First, we create a function to allow the orchestrator to create workers dynamically.
+
+"""
+
+import asyncio
+import os
+
+from agentscope.agent import ReActAgent
+from agentscope.formatter import DashScopeChatFormatter
+from agentscope.memory import InMemoryMemory
+from agentscope.message import Msg
+from agentscope.model import DashScopeChatModel
+from agentscope.tool import (
+    ToolResponse,
+    Toolkit,
+    execute_python_code,
+)
+
+
+# The tool function to create a worker
+async def create_worker(
+    task_description: str,
+) -> ToolResponse:
+    """Create a worker to finish the given task. The worker is equipped with python execution tool.
+
+    Args:
+        task_description (``str``):
+            The description of the task to be finished by the worker.
+    """
+    # Equip the worker agent with some tools
+    toolkit = Toolkit()
+    toolkit.register_tool_function(execute_python_code)
+
+    # Create a worker agent
+    worker = ReActAgent(
+        name="Worker",
+        sys_prompt="You're a worker agent. Your target is to finish the given task.",
+        model=DashScopeChatModel(
+            model_name="qwen-max",
+            api_key=os.environ["DASHSCOPE_API_KEY"],
+            stream=False,
+        ),
+        formatter=DashScopeChatFormatter(),
+        toolkit=toolkit,
+    )
+    # Let the worker finish the task
+    res = await worker(Msg("user", task_description, "user"))
+    return ToolResponse(
+        content=res.get_content_blocks("text"),
+    )
+
+
+async def run_handoffs() -> None:
+    """Example of handoffs workflow."""
+    # Initialize the orchestrator agent
+    toolkit = Toolkit()
+    toolkit.register_tool_function(create_worker)
+
+    orchestrator = ReActAgent(
+        name="Orchestrator",
+        sys_prompt="You're an orchestrator agent. Your target is to finish the given task by decomposing it into smaller tasks and creating workers to finish them.",
+        model=DashScopeChatModel(
+            model_name="qwen-max",
+            api_key=os.environ["DASHSCOPE_API_KEY"],
+            stream=False,
+        ),
+        memory=InMemoryMemory(),
+        formatter=DashScopeChatFormatter(),
+        toolkit=toolkit,
+    )
+
+    # The task description
+    task_description = "Execute hello world in Python"
+
+    # Create a worker to finish the task
+    await orchestrator(Msg("user", task_description, "user"))
+
+
+asyncio.run(run_handoffs())
--- a/docs/tutorial/en/src/workflow_multiagent_debate.py
+++ b/docs/tutorial/en/src/workflow_multiagent_debate.py
@@ -0,0 +1,139 @@
+# -*- coding: utf-8 -*-
+"""
+.. _multiagent-debate:
+
+Multi-Agent Debate
+========================
+
+Debate workflow simulates a multi-turn discussion between different agents, mostly several solvers and an aggregator.
+Typically, the solvers generate and exchange their answers, while the aggregator collects and summarizes the answers.
+
+We implement the examples in `EMNLP 2024`_, where two debater agents will discuss a topic in a fixed order, and express their
+arguments based on the previous debate history.
+At each round a moderator agent will decide whether the correct answer can be obtained in the current iteration.
+"""
+import asyncio
+import os
+
+from pydantic import Field, BaseModel
+
+from agentscope.agent import ReActAgent
+from agentscope.formatter import (
+    DashScopeMultiAgentFormatter,
+    DashScopeChatFormatter,
+)
+from agentscope.message import Msg
+from agentscope.model import DashScopeChatModel
+from agentscope.pipeline import MsgHub
+
+# Prepare a topic
+topic = (
+    "The two circles are externally tangent and there is no relative sliding. "
+    "The radius of circle A is 1/3 the radius of circle B. Circle A rolls "
+    "around circle B one trip back to its starting point. How many times will "
+    "circle A revolve in total?"
+)
+
+
+# Create two debater agents, Alice and Bob, who will discuss the topic.
+def create_solver_agent(name: str) -> ReActAgent:
+    """Get a solver agent."""
+    return ReActAgent(
+        name=name,
+        sys_prompt=f"You're a debater named {name}. Hello and welcome to the "
+        "debate competition. It's unnecessary to fully agree with "
+        "each other's perspectives, as our objective is to find "
+        "the correct answer. The debate topic is stated as "
+        f"follows: {topic}.",
+        model=DashScopeChatModel(
+            model_name="qwen-max",
+            api_key=os.environ["DASHSCOPE_API_KEY"],
+            stream=False,
+        ),
+        formatter=DashScopeMultiAgentFormatter(),
+    )
+
+
+alice, bob = [create_solver_agent(name) for name in ["Alice", "Bob"]]
+
+# Create a moderator agent
+moderator = ReActAgent(
+    name="Aggregator",
+    sys_prompt=f"""You're a moderator. There will be two debaters involved in a debate competition. They will present their answer and discuss their perspectives on the topic:
+``````
+{topic}
+``````
+At the end of each round, you will evaluate both sides' answers and decide which one is correct.""",
+    model=DashScopeChatModel(
+        model_name="qwen-max",
+        api_key=os.environ["DASHSCOPE_API_KEY"],
+        stream=False,
+    ),
+    # Use multiagent formatter because the moderator will receive messages from more than a user and an assistant
+    formatter=DashScopeMultiAgentFormatter(),
+)
+
+
+# A structured output model for the moderator
+class JudgeModel(BaseModel):
+    """The structured output model for the moderator."""
+
+    finished: bool = Field(
+        description="Whether the debate is finished.",
+    )
+    correct_answer: str | None = Field(
+        description="The correct answer to the debate topic, only if the debate is finished. Otherwise, leave it as None.",
+        default=None,
+    )
+
+
+async def run_multiagent_debate() -> None:
+    """Run the multi-agent debate workflow."""
+    while True:
+        # The reply messages in MsgHub from the participants will be broadcasted to all participants.
+        async with MsgHub(participants=[alice, bob, moderator]):
+            await alice(
+                Msg(
+                    "user",
+                    "You are affirmative side, Please express your viewpoints.",
+                    "user",
+                ),
+            )
+            await bob(
+                Msg(
+                    "user",
+                    "You are negative side. You disagree with the affirmative side. Provide your reason and answer.",
+                    "user",
+                ),
+            )
+
+        # Alice and Bob doesn't need to know the moderator's message, so moderator is called outside the MsgHub.
+        msg_judge = await moderator(
+            Msg(
+                "user",
+                "Now you have heard the answers from the others, have the debate finished, and can you get the correct answer?",
+                "user",
+            ),
+            structured_model=JudgeModel,
+        )
+
+        if msg_judge.metadata.get("finished"):
+            print(
+                "\nThe debate is finished, and the correct answer is: ",
+                msg_judge.metadata.get("correct_answer"),
+            )
+            break
+
+
+asyncio.run(run_multiagent_debate())
+
+
+# %%
+# Further Reading
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# - :ref:`pipeline`
+#
+# .. _EMNLP 2024:
+# Encouraging Divergent Thinking in Large Language Models through Multi-Agent Debate. EMNLP 2024.
+#
--- a/docs/tutorial/en/src/workflow_routing.py
+++ b/docs/tutorial/en/src/workflow_routing.py
@@ -0,0 +1,176 @@
+# -*- coding: utf-8 -*-
+"""
+.. _routing:
+
+Routing
+==========================
+There are two ways to implement routing in AgentScope, both simple and easy to implement:
+
+- Routing by structured output
+- Routing by tool calls
+
+.. tip:: Considering there is no unified standard/definition for agent routing, we follow the setting in `Building effective agents <https://www.anthropic.com/engineering/building-effective-agents>`_
+
+Routing by Structured Output
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+By this way, we can directly use the structured output of the agent to determine which agent to route the message to.
+
+Initialize a routing agent
+"""
+import asyncio
+import json
+import os
+from typing import Literal
+
+from pydantic import BaseModel, Field
+
+from agentscope.agent import ReActAgent
+from agentscope.formatter import DashScopeChatFormatter
+from agentscope.memory import InMemoryMemory
+from agentscope.message import Msg
+from agentscope.model import DashScopeChatModel
+from agentscope.tool import Toolkit, ToolResponse
+
+router = ReActAgent(
+    name="Router",
+    sys_prompt="You're a routing agent. Your target is to route the user query to the right follow-up task.",
+    model=DashScopeChatModel(
+        model_name="qwen-max",
+        api_key=os.environ["DASHSCOPE_API_KEY"],
+        stream=False,
+    ),
+    formatter=DashScopeChatFormatter(),
+)
+
+
+# Use structured output to specify the routing task
+class RoutingChoice(BaseModel):
+    your_choice: Literal[
+        "Content Generation",
+        "Programming",
+        "Information Retrieval",
+        None,
+    ] = Field(
+        description="Choose the right follow-up task, and choose ``None`` if the task is too simple or no suitable task",
+    )
+    task_description: str | None = Field(
+        description="The task description",
+        default=None,
+    )
+
+
+async def example_router_explicit() -> None:
+    """Example of explicit routing with structured output."""
+    msg_user = Msg(
+        "user",
+        "Help me to write a poem",
+        "user",
+    )
+
+    # Route the query
+    msg_res = await router(
+        msg_user,
+        structured_model=RoutingChoice,
+    )
+
+    # The structured output is stored in the metadata field
+    print("The structured output:")
+    print(json.dumps(msg_res.metadata, indent=4, ensure_ascii=False))
+
+
+asyncio.run(example_router_explicit())
+
+# %%
+# Routing by Tool Calls
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# Another way is to wrap the downstream agents into a tool function, so that the routing agent decides which tool to call based on the user query.
+#
+# We first define several tool functions:
+#
+
+
+async def generate_python(demand: str) -> ToolResponse:
+    """Generate Python code based on the demand.
+
+    Args:
+        demand (``str``):
+            The demand for the Python code.
+    """
+    # An example demand agent
+    python_agent = ReActAgent(
+        name="PythonAgent",
+        sys_prompt="You're a Python expert, your target is to generate Python code based on the demand.",
+        model=DashScopeChatModel(
+            model_name="qwen-max",
+            api_key=os.environ["DASHSCOPE_API_KEY"],
+            stream=False,
+        ),
+        memory=InMemoryMemory(),
+        formatter=DashScopeChatFormatter(),
+        toolkit=Toolkit(),
+    )
+    msg_res = await python_agent(Msg("user", demand, "user"))
+
+    return ToolResponse(
+        content=msg_res.get_content_blocks("text"),
+    )
+
+
+# Fake some other tool functions for demonstration purposes
+async def generate_poem(demand: str) -> ToolResponse:
+    """Generate a poem based on the demand.
+
+    Args:
+        demand (``str``):
+            The demand for the poem.
+    """
+    pass
+
+
+async def web_search(query: str) -> ToolResponse:
+    """Search the web for the query.
+
+    Args:
+        query (``str``):
+            The query to search.
+    """
+    pass
+
+
+# %%
+# After that, we define a routing agent and equip it with the above tool functions.
+#
+
+toolkit = Toolkit()
+toolkit.register_tool_function(generate_python)
+toolkit.register_tool_function(generate_poem)
+toolkit.register_tool_function(web_search)
+
+# Initialize the routing agent with the toolkit
+router_implicit = ReActAgent(
+    name="Router",
+    sys_prompt="You're a routing agent. Your target is to route the user query to the right follow-up task.",
+    model=DashScopeChatModel(
+        model_name="qwen-max",
+        api_key=os.environ["DASHSCOPE_API_KEY"],
+        stream=False,
+    ),
+    formatter=DashScopeChatFormatter(),
+    toolkit=toolkit,
+    memory=InMemoryMemory(),
+)
+
+
+async def example_router_implicit() -> None:
+    """Example of implicit routing with tool calls."""
+    msg_user = Msg(
+        "user",
+        "Help me to generate a quick sort function in Python",
+        "user",
+    )
+
+    # Route the query
+    await router_implicit(msg_user)
+
+
+asyncio.run(example_router_implicit())