# -*- coding: utf-8 -*-
"""
.. _model:

Model
====================

In this tutorial, we introduce the model APIs integrated in AgentScope, how to use them and how to integrate new model APIs.
The supported model APIs and providers include:

.. list-table::
    :header-rows: 1

    * - API
      - Class
      - Compatible
      - Streaming
      - Tools
      - Vision
      - Reasoning
    * - OpenAI
      - ``OpenAIChatModel``
      - vLLM, DeepSeek
      - ✅
      - ✅
      - ✅
      - ✅
    * - DashScope
      - ``DashScopeChatModel``
      -
      - ✅
      - ✅
      - ✅
      - ✅
    * - Anthropic
      - ``AnthropicChatModel``
      -
      - ✅
      - ✅
      - ✅
      - ✅
    * - Gemini
      - ``GeminiChatModel``
      -
      - ✅
      - ✅
      - ✅
      - ✅
    * - Ollama
      - ``OllamaChatModel``
      -
      - ✅
      - ✅
      - ✅
      - ✅

.. note:: When using vLLM, you need to configure the appropriate tool calling parameters for different models during deployment, such as ``--enable-auto-tool-choice``, ``--tool-call-parser``, etc. For more details, refer to the `official vLLM documentation <https://docs.vllm.ai/en/latest/features/tool_calling.html>`_.

.. note:: For OpenAI-compatible models (e.g. vLLM, Deepseek), developers can use the ``OpenAIChatModel`` class, and specify the API endpoint by the ``client_kwargs`` parameter: ``client_kwargs={"base_url": "http://your-api-endpoint"}``. For example:

    .. code-block:: python

        OpenAIChatModel(client_kwargs={"base_url": "http://localhost:8000/v1"})

.. note:: Model behavior parameters (such as temperature, maximum length, etc.) can be preset in the constructor function via the ``generate_kwargs`` parameter. For example:

    .. code-block:: python

        OpenAIChatModel(generate_kwargs={"temperature": 0.3, "max_tokens": 1000})

To provide unified model interfaces, the above model classes has the following common methods:

- The first three arguments of the ``__call__`` method are ``messages`` , ``tools`` and ``tool_choice``, representing the input messages, JSON schema of tool functions, and tool selection mode, respectively.
- The return type are either a ``ChatResponse`` instance or an async generator of ``ChatResponse`` in streaming mode.

.. note:: Different model APIs differ in the input message format, refer to :ref:`prompt` for more details.

The ``ChatResponse`` instance contains the generated thinking/text/tool use content, identity, created time and usage information.
"""
import asyncio
import json
import os

from agentscope.message import TextBlock, ToolUseBlock, ThinkingBlock, Msg
from agentscope.model import ChatResponse, DashScopeChatModel

response = ChatResponse(
    content=[
        ThinkingBlock(
            type="thinking",
            thinking="I should search for AgentScope on Google.",
        ),
        TextBlock(type="text", text="I'll search for AgentScope on Google."),
        ToolUseBlock(
            type="tool_use",
            id="642n298gjna",
            name="google_search",
            input={"query": "AgentScope?"},
        ),
    ],
)

print(response)

# %%
# Taking ``DashScopeChatModel`` as an example, we can use it to create a chat model instance and call it with messages and tools:


async def example_model_call() -> None:
    """An example of using the DashScopeChatModel."""
    model = DashScopeChatModel(
        model_name="qwen-max",
        api_key=os.environ["DASHSCOPE_API_KEY"],
        stream=False,
    )

    res = await model(
        messages=[
            {"role": "user", "content": "Hi!"},
        ],
    )

    # You can directly create a ``Msg`` object with the response content
    msg_res = Msg("Friday", res.content, "assistant")

    print("The response:", res)
    print("The response as Msg:", msg_res)


asyncio.run(example_model_call())

# %%
# Streaming
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# To enable streaming model, set the ``stream`` parameter in the model constructor to ``True``.
# When streaming is enabled, the ``__call__`` method will return an **async generator** that yields ``ChatResponse`` instances as they are generated by the model.
#
# .. note:: The streaming mode in AgentScope is designed to be **cumulative**, meaning the content in each chunk contains all the previous content plus the newly generated content.
#


async def example_streaming() -> None:
    """An example of using the streaming model."""
    model = DashScopeChatModel(
        model_name="qwen-max",
        api_key=os.environ["DASHSCOPE_API_KEY"],
        stream=True,
    )

    generator = await model(
        messages=[
            {
                "role": "user",
                "content": "Count from 1 to 20, and just report the number without any other information.",
            },
        ],
    )
    print("The type of the response:", type(generator))

    i = 0
    async for chunk in generator:
        print(f"Chunk {i}")
        print(f"\ttype: {type(chunk.content)}")
        print(f"\t{chunk}\n")
        i += 1


asyncio.run(example_streaming())

# %%
# Reasoning
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# AgentScope supports reasoning models by providing the ``ThinkingBlock``.
#


async def example_reasoning() -> None:
    """An example of using the reasoning model."""
    model = DashScopeChatModel(
        model_name="qwen-turbo",
        api_key=os.environ["DASHSCOPE_API_KEY"],
        enable_thinking=True,
    )

    res = await model(
        messages=[
            {"role": "user", "content": "Who am I?"},
        ],
    )

    last_chunk = None
    async for chunk in res:
        last_chunk = chunk
    print("The final response:")
    print(last_chunk)


asyncio.run(example_reasoning())

# %%
# Tools API
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Different model providers differ in their tools APIs, e.g. the tools JSON schema, the tool call/response format.
# To provide a unified interface, AgentScope solves the problem by:
#
# - Providing unified tool call block :ref:`ToolUseBlock <tool-block>` and tool response block :ref:`ToolResultBlock <tool-block>`, respectively.
# - Providing a unified tools interface in the ``__call__`` method of the model classes, that accepts a list of tools JSON schemas as follows:
#

json_schemas = [
    {
        "type": "function",
        "function": {
            "name": "google_search",
            "description": "Search for a query on Google.",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "The search query.",
                    },
                },
                "required": ["query"],
            },
        },
    },
]

# %%
# Further Reading
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# - :ref:`message`
# - :ref:`prompt`
#