avoid timeout errors with high concurrency in api_model (#2307)

* avoid timeout errors with high concurrency in api_model * style * add timeout * add docs --------- Co-authored-by: Baber <[email protected]>
EleutherAI · Dec 3, 2024 · 9632b34 · 9632b34
1 parent f49b037
commit 9632b34
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 2 deletions.
diff --git a/docs/API_guide.md b/docs/API_guide.md
@@ -50,6 +50,10 @@ When initializing a `TemplateAPI` instance or a subclass, you can provide severa
    - Useful for APIs that support parallel processing.
    - Default is 1 (sequential processing).
 
+- `timeout` (int, optional):
+   - Timeout for API requests in seconds.
+   - Default is 30.
+
 - `tokenized_requests` (bool):
   - Determines whether the input is pre-tokenized. Defaults to `True`.
   - Requests can be sent in either tokenized form (`list[list[int]]`) or as text (`list[str]`, or `str` for batch_size=1).

diff --git a/lm_eval/models/api_models.py b/lm_eval/models/api_models.py
@@ -21,7 +21,7 @@
 
 try:
     import requests
-    from aiohttp import ClientSession, TCPConnector
+    from aiohttp import ClientSession, ClientTimeout, TCPConnector
     from tenacity import RetryError, retry, stop_after_attempt, wait_exponential
     from tqdm import tqdm
     from tqdm.asyncio import tqdm_asyncio
@@ -81,6 +81,8 @@ def __init__(
         use_fast_tokenizer: bool = True,
         verify_certificate: bool = True,
         eos_string: str = None,
+        # timeout in seconds
+        timeout: int = 300,
         **kwargs,
     ) -> None:
         super().__init__()
@@ -126,6 +128,7 @@ def __init__(
         self.max_retries = int(max_retries)
         self.verify_certificate = verify_certificate
         self._eos_string = eos_string
+        self.timeout = int(timeout)
 
         eval_logger.info(f"Using tokenizer {self.tokenizer_backend}")
         if self.tokenizer_backend is None:
@@ -466,7 +469,9 @@ async def get_batched_requests(
     ) -> Union[List[List[str]], List[List[Tuple[float, bool]]]]:
         ctxlens = ctxlens if ctxlens else [None] * len(requests)
         conn = TCPConnector(limit=self._concurrent)
-        async with ClientSession(connector=conn) as session:
+        async with ClientSession(
+            connector=conn, timeout=ClientTimeout(total=self.timeout)
+        ) as session:
             retry_: Callable[..., Awaitable[Any]] = retry(
                 stop=stop_after_attempt(self.max_retries),
                 wait=wait_exponential(multiplier=0.5, min=1, max=10),