jxnl · jxnl · Apr 8, 2024 · Apr 8, 2024 · Apr 8, 2024 · Apr 10, 2024
diff --git a/instructor/client.py b/instructor/client.py
@@ -15,8 +15,12 @@
     Union,
     Awaitable,
     AsyncGenerator,
+    Optional,
+    Literal,
+    Coroutine,
     Any,
 )
+from tenacity import Retrying, AsyncRetrying
 from typing_extensions import Self
 from pydantic import BaseModel
 from instructor.dsl.partial import Partial
@@ -58,16 +62,39 @@ def completions(self) -> Self:
     def messages(self) -> Self:
         return self
 
+    @overload
+    def create(
+        self,
+        response_model: Type[T],
+        messages: List[ChatCompletionMessageParam],
+        n: Optional[Literal[1]] = None,
+        max_retries: int | Retrying = 3,
+        validation_context: dict | None = None,
+        **kwargs,
+    ) -> T: ...
+
+    @overload
+    def create(
+        self,
+        response_model: Type[T],
+        messages: List[ChatCompletionMessageParam],
+        n: int,
+        max_retries: int | Retrying = 3,
+        validation_context: dict | None = None,
+        **kwargs,
+    ) -> List[T]: ...
+
     # TODO: we should overload a case where response_model is None
     def create(
         self,
         response_model: Type[T],
         messages: List[ChatCompletionMessageParam],
-        max_retries: int = 3,
+        n: Optional[int] = None,
+        max_retries: int | Retrying = 3,
         validation_context: dict | None = None,
         **kwargs,
-    ) -> T:
-        kwargs = self.handle_kwargs(kwargs)
+    ) -> T | List[T]:
+        kwargs = self.handle_kwargs(kwargs, n=n)
 
         return self.create_fn(
             response_model=response_model,
@@ -140,10 +167,12 @@ def create_with_completion(
         )
         return model, model._raw_response
 
-    def handle_kwargs(self, kwargs: dict):
+    def handle_kwargs(self, kwargs: dict, n: Optional[int] = None):
         for key, value in self.kwargs.items():
             if key not in kwargs:
                 kwargs[key] = value
+        if self.provider != Provider.ANTHROPIC:
+            kwargs["n"] = n
         return kwargs
 
 
@@ -168,15 +197,38 @@ def __init__(
         self.kwargs = kwargs
         self.provider = provider
 
+    @overload
     async def create(
         self,
+        response_model: Type[T],
         messages: List[ChatCompletionMessageParam],
+        n: Literal[1] = 1,
+        max_retries: int | AsyncRetrying = 3,
+        validation_context: dict | None = None,
+        **kwargs,
+    ) -> Coroutine[Any, Any, T]: ...
+
+    @overload
+    async def create(
+        self,
         response_model: Type[T],
+        messages: List[ChatCompletionMessageParam],
+        n: int,
+        max_retries: int | AsyncRetrying = 3,
         validation_context: dict | None = None,
-        max_retries: int = 3,
         **kwargs,
-    ) -> T:
-        kwargs = self.handle_kwargs(kwargs)
+    ) -> Coroutine[Any, Any, List[T]]: ...
+
+    async def create(
+        self,
+        response_model: Type[T],
+        messages: List[ChatCompletionMessageParam],
+        n: Optional[int] = 1,
+        max_retries: int | AsyncRetrying = 3,
+        validation_context: dict | None = None,
+        **kwargs,
+    ) -> Coroutine[Any, Any, T | List[T]]:
+        kwargs = self.handle_kwargs(kwargs, n=n)
         return await self.create_fn(
             response_model=response_model,
             validation_context=validation_context,
@@ -190,7 +242,6 @@ async def create_partial(
         response_model: Type[T],
         messages: List[ChatCompletionMessageParam],
         validation_context: dict | None = None,
-        max_retries: int = 3,
         **kwargs,
     ) -> AsyncGenerator[T, None]:
         assert self.provider != Provider.ANTHROPIC, "Anthropic doesn't support partial"
@@ -200,7 +251,6 @@ async def create_partial(
         async for item in await self.create_fn(
             response_model=instructor.Partial[response_model],  # type: ignore
             validation_context=validation_context,
-            max_retries=max_retries,
             messages=messages,
             **kwargs,
         ):
@@ -211,7 +261,6 @@ async def create_iterable(
         response_model: Type[T],
         messages: List[ChatCompletionMessageParam],
         validation_context: dict | None = None,
-        max_retries: int = 3,
         **kwargs,
     ) -> AsyncGenerator[T, None]:
         assert self.provider != Provider.ANTHROPIC, "Anthropic doesn't support iterable"
@@ -221,7 +270,6 @@ async def create_iterable(
         async for item in await self.create_fn(
             response_model=Iterable[response_model],
             validation_context=validation_context,
-            max_retries=max_retries,
             messages=messages,
             **kwargs,
         ):
@@ -232,7 +280,7 @@ async def create_with_completion(
         response_model: Type[T],
         messages: List[ChatCompletionMessageParam],
         validation_context: dict | None = None,
-        max_retries: int = 3,
+        max_retries: int | AsyncRetrying = 3,
         **kwargs,
     ) -> Tuple[T, dict]:
         kwargs = self.handle_kwargs(kwargs)

diff --git a/instructor/function_calls.py b/instructor/function_calls.py
@@ -3,15 +3,15 @@
 from functools import wraps
 from pydantic import BaseModel, create_model
 from openai.types.chat import ChatCompletion
-from typing import Any, Dict, Optional, Type
+from typing import Any, Dict, Optional, Type, List
 from instructor.mode import Mode
 from instructor.utils import extract_json_from_codeblock
 from instructor.exceptions import IncompleteOutputException
 from instructor.mode import Mode
 import logging
 
 
-T = TypeVar("T")
+T = TypeVar("T", bound=BaseModel)
 
 logger = logging.getLogger("instructor")
 
@@ -76,7 +76,7 @@ def from_response(
         validation_context: Optional[Dict[str, Any]] = None,
         strict: Optional[bool] = None,
         mode: Mode = Mode.TOOLS,
-    ) -> BaseModel:
+    ) -> BaseModel | List[BaseModel]:
         """Execute the function from the response of an openai chat completion
 
         Parameters:
@@ -116,9 +116,15 @@ def parse_anthropic_tools(
         validation_context: Optional[Dict[str, Any]] = None,
         strict: Optional[bool] = None,
     ) -> BaseModel:
+        from anthropic.types import Message
+
+        assert isinstance(completion, Message)
+
         tool_call = [c.input for c in completion.content if c.type == "tool_use"][0]
 
-        return cls.model_validate(tool_call, context=validation_context, strict=strict)  # type:ignore
+        return cls.model_validate(
+            tool_call, context=validation_context, strict=strict
+        )  # type:ignore
 
     @classmethod
     def parse_anthropic_json(
@@ -143,53 +149,64 @@ def parse_functions(
         completion: ChatCompletion,
         validation_context: Optional[Dict[str, Any]] = None,
         strict: Optional[bool] = None,
-    ) -> BaseModel:
-        message = completion.choices[0].message
-        assert (
-            message.function_call.name == cls.openai_schema["name"]  # type: ignore[index]
-        ), "Function name does not match"
-        return cls.model_validate_json(
-            message.function_call.arguments,  # type: ignore[attr-defined]
-            context=validation_context,
-            strict=strict,
-        )
+    ) -> BaseModel | List[BaseModel]:
+        models = []
+        for choice in completion.choices:
+            message = choice.message
+            assert (
+                message.function_call.name == cls.openai_schema["name"]  # type: ignore[index]
+            ), "Function name does not match"
+            model = cls.model_validate_json(
+                message.function_call.arguments,  # type: ignore[attr-defined]
+                context=validation_context,
+                strict=strict,
+            )
+            models.append(model)
+        return models if len(models) > 1 else models[0]
 
     @classmethod
     def parse_tools(
         cls: Type[BaseModel],
         completion: ChatCompletion,
         validation_context: Optional[Dict[str, Any]] = None,
         strict: Optional[bool] = None,
-    ) -> BaseModel:
-        message = completion.choices[0].message
-        assert (
-            len(message.tool_calls or []) == 1
-        ), "Instructor does not support multiple tool calls, use List[Model] instead."
-        tool_call = message.tool_calls[0]  # type: ignore
-        assert (
-            tool_call.function.name == cls.openai_schema["name"]  # type: ignore[index]
-        ), "Tool name does not match"
-        return cls.model_validate_json(
-            tool_call.function.arguments,
-            context=validation_context,
-            strict=strict,
-        )
+    ) -> BaseModel | List[BaseModel]:
+        models = []
+        for choice in completion.choices:
+            message = choice.message
+            assert (
+                len(message.tool_calls or []) == 1
+            ), "Instructor does not support multiple tool calls per message, use List[Model] instead."
+            tool_call = message.tool_calls[0]  # type: ignore
+            assert (
+                tool_call.function.name == cls.openai_schema["name"]  # type: ignore[index]
+            ), "Tool name does not match"
+            model = cls.model_validate_json(
+                tool_call.function.arguments,
+                context=validation_context,
+                strict=strict,
+            )
+            models.append(model)
+        return models if len(models) > 1 else models[0]
 
     @classmethod
     def parse_json(
         cls: Type[BaseModel],
         completion: ChatCompletion,
         validation_context: Optional[Dict[str, Any]] = None,
         strict: Optional[bool] = None,
-    ) -> BaseModel:
-        message = completion.choices[0].message.content or ""
-        message = extract_json_from_codeblock(message)
-
-        return cls.model_validate_json(
-            message,
-            context=validation_context,
-            strict=strict,
-        )
+    ) -> BaseModel | List[BaseModel]:
+        models = []
+        for choice in completion.choices:
+            message = choice.message.content or ""
+            message = extract_json_from_codeblock(message)
+            model = cls.model_validate_json(
+                message,
+                context=validation_context,
+                strict=strict,
+            )
+            models.append(model)
+        return models if len(models) > 1 else models[0]
 
 
 def openai_schema(cls: Type[BaseModel]) -> OpenAISchema:

diff --git a/instructor/process_response.py b/instructor/process_response.py
@@ -153,6 +153,8 @@ def process_response(
     # ? attaching usage data and the raw response to the model we return.
     if isinstance(model, IterableBase):
         logger.debug(f"Returning takes from IterableBase")
+        for task in model.tasks:
+            task._raw_response = response
         return [task for task in model.tasks]
 
     if isinstance(response_model, ParallelBase):
@@ -163,7 +165,8 @@ def process_response(
         logger.debug(f"Returning model from AdapterBase")
         return model.content
 
-    model._raw_response = response
+    if isinstance(model, BaseModel):
+        model._raw_response = response
     return model
 
 
@@ -306,7 +309,9 @@ def handle_response_model(
                 + "\n\n".join(openai_system_messages)
             )
 
-            new_kwargs["system"] += f"""
+            new_kwargs[
+                "system"
+            ] += f"""
             You must only response in JSON format that adheres to the following schema:
 
             <JSON_SCHEMA>

diff --git a/tests/test_new_client.py b/tests/test_new_client.py
@@ -50,6 +50,18 @@ def test_client_chat_completions_create_with_response():
     assert isinstance(completion, ChatCompletion)
 
 
+def test_client_chat_completions_create_many():
+    client = instructor.from_openai(openai.OpenAI(), model="gpt-3.5-turbo")
+
+    user = client.chat.completions.create(
+        response_model=User,
+        messages=[{"role": "user", "content": "Jason is 10"}],
+        temperature=0,
+        n=2,
+    )
+    assert len(user) == 2
+
+
 def test_client_chat_completions_create():
     client = instructor.from_openai(openai.OpenAI(), model="gpt-3.5-turbo")