# -*- coding: utf-8 -*- """ .. _model: Model ==================== In this tutorial, we introduce the model APIs integrated in AgentScope, how to use them and how to integrate new model APIs. The supported model APIs and providers include: .. list-table:: :header-rows: 1 * - API - Class - Compatible - Streaming - Tools - Vision - Reasoning * - OpenAI - ``OpenAIChatModel`` - vLLM, DeepSeek - ✅ - ✅ - ✅ - ✅ * - DashScope - ``DashScopeChatModel`` - - ✅ - ✅ - ✅ - ✅ * - Anthropic - ``AnthropicChatModel`` - - ✅ - ✅ - ✅ - ✅ * - Gemini - ``GeminiChatModel`` - - ✅ - ✅ - ✅ - ✅ * - Ollama - ``OllamaChatModel`` - - ✅ - ✅ - ✅ - ✅ .. note:: When using vLLM, you need to configure the appropriate tool calling parameters for different models during deployment, such as ``--enable-auto-tool-choice``, ``--tool-call-parser``, etc. For more details, refer to the `official vLLM documentation `_. .. note:: For OpenAI-compatible models (e.g. vLLM, Deepseek), developers can use the ``OpenAIChatModel`` class, and specify the API endpoint by the ``client_kwargs`` parameter: ``client_kwargs={"base_url": "http://your-api-endpoint"}``. For example: .. code-block:: python OpenAIChatModel(client_kwargs={"base_url": "http://localhost:8000/v1"}) .. note:: Model behavior parameters (such as temperature, maximum length, etc.) can be preset in the constructor function via the ``generate_kwargs`` parameter. For example: .. code-block:: python OpenAIChatModel(generate_kwargs={"temperature": 0.3, "max_tokens": 1000}) To provide unified model interfaces, the above model classes has the following common methods: - The first three arguments of the ``__call__`` method are ``messages`` , ``tools`` and ``tool_choice``, representing the input messages, JSON schema of tool functions, and tool selection mode, respectively. - The return type are either a ``ChatResponse`` instance or an async generator of ``ChatResponse`` in streaming mode. .. note:: Different model APIs differ in the input message format, refer to :ref:`prompt` for more details. The ``ChatResponse`` instance contains the generated thinking/text/tool use content, identity, created time and usage information. """ import asyncio import json import os from agentscope.message import TextBlock, ToolUseBlock, ThinkingBlock, Msg from agentscope.model import ChatResponse, DashScopeChatModel response = ChatResponse( content=[ ThinkingBlock( type="thinking", thinking="I should search for AgentScope on Google.", ), TextBlock(type="text", text="I'll search for AgentScope on Google."), ToolUseBlock( type="tool_use", id="642n298gjna", name="google_search", input={"query": "AgentScope?"}, ), ], ) print(response) # %% # Taking ``DashScopeChatModel`` as an example, we can use it to create a chat model instance and call it with messages and tools: async def example_model_call() -> None: """An example of using the DashScopeChatModel.""" model = DashScopeChatModel( model_name="qwen-max", api_key=os.environ["DASHSCOPE_API_KEY"], stream=False, ) res = await model( messages=[ {"role": "user", "content": "Hi!"}, ], ) # You can directly create a ``Msg`` object with the response content msg_res = Msg("Friday", res.content, "assistant") print("The response:", res) print("The response as Msg:", msg_res) asyncio.run(example_model_call()) # %% # Streaming # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # To enable streaming model, set the ``stream`` parameter in the model constructor to ``True``. # When streaming is enabled, the ``__call__`` method will return an **async generator** that yields ``ChatResponse`` instances as they are generated by the model. # # .. note:: The streaming mode in AgentScope is designed to be **cumulative**, meaning the content in each chunk contains all the previous content plus the newly generated content. # async def example_streaming() -> None: """An example of using the streaming model.""" model = DashScopeChatModel( model_name="qwen-max", api_key=os.environ["DASHSCOPE_API_KEY"], stream=True, ) generator = await model( messages=[ { "role": "user", "content": "Count from 1 to 20, and just report the number without any other information.", }, ], ) print("The type of the response:", type(generator)) i = 0 async for chunk in generator: print(f"Chunk {i}") print(f"\ttype: {type(chunk.content)}") print(f"\t{chunk}\n") i += 1 asyncio.run(example_streaming()) # %% # Reasoning # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # AgentScope supports reasoning models by providing the ``ThinkingBlock``. # async def example_reasoning() -> None: """An example of using the reasoning model.""" model = DashScopeChatModel( model_name="qwen-turbo", api_key=os.environ["DASHSCOPE_API_KEY"], enable_thinking=True, ) res = await model( messages=[ {"role": "user", "content": "Who am I?"}, ], ) last_chunk = None async for chunk in res: last_chunk = chunk print("The final response:") print(last_chunk) asyncio.run(example_reasoning()) # %% # Tools API # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Different model providers differ in their tools APIs, e.g. the tools JSON schema, the tool call/response format. # To provide a unified interface, AgentScope solves the problem by: # # - Providing unified tool call block :ref:`ToolUseBlock ` and tool response block :ref:`ToolResultBlock `, respectively. # - Providing a unified tools interface in the ``__call__`` method of the model classes, that accepts a list of tools JSON schemas as follows: # json_schemas = [ { "type": "function", "function": { "name": "google_search", "description": "Search for a query on Google.", "parameters": { "type": "object", "properties": { "query": { "type": "string", "description": "The search query.", }, }, "required": ["query"], }, }, }, ] # %% # Further Reading # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # - :ref:`message` # - :ref:`prompt` #