Skip to content

RuntimeError: Attempted to send an sync request with an AsyncClient instance - kserve mlflow pytorch model error #2276

@matiaschaud

Description

@matiaschaud

Hello, I'm trying to request succesfully to my mlflow kserve end point.

I really appreciate your help with solving this problem!

The pytorch MLModel has this signature:

signature:
  inputs: '[{"name": "user", "type": "tensor", "tensor-spec": {"dtype": "int64", "shape":
    [-1]}}, {"name": "movie", "type": "tensor", "tensor-spec": {"dtype": "int64",
    "shape": [-1]}}]'
  outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float32", "shape": [-1,
    1]}}]'

I'm serving the docker image generates by the mlflow :

MODEL_URI="models:/recommender_production/1"

mlflow models build-docker \
    --model-uri $MODEL_URI \
    --name $IMAGE_NAME \
    --enable-mlserver \
    --env-manager conda

I'm trying this code:

from kserve import (
    RESTConfig, InferenceRESTClient,
    Model,
    ModelServer,
    InferRequest,
    InferInput,
    InferResponse,
    model_server,
)

from mlserver.types import InferenceRequest, Parameters, RequestInput
from mlserver.codecs import PandasCodec, NumpyCodec

config = RESTConfig(protocol="v2", retries=5, timeout=30)
client = InferenceRESTClient(config)
base_url = "http://movie-recommender-gpu.kubeflow-user-example-com.svc.cluster.local"
model_name = "recommender_production"
# Define the correct V2 payload structure
data_v2 = InferenceRequest(model_name = model_name,
    inputs=[
            RequestInput(name= "user", 
                  shape= [-1],  
                  datatype= "INT64",
                  data= [123,123],
                  parameters=Parameters(content_type=NumpyCodec.ContentType),),
            RequestInput(name="movie", 
                  shape=[-1],  
                  datatype="INT64",
                  data=[321,512],
                  parameters=Parameters(content_type=NumpyCodec.ContentType),)
    ],
    parameters=Parameters(content_type=PandasCodec.ContentType)
)

result = await client.infer(base_url, data_v2, model_name=model_name)
print(result)

But I'm getting this error:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[50], line 35
     18 # Define the correct V2 payload structure
     19 data_v2 = InferenceRequest(model_name = model_name,
     20     inputs=[
     21             RequestInput(name= "user", 
   (...)     32     parameters=Parameters(content_type=PandasCodec.ContentType)
     33 )
---> 35 result = await client.infer(base_url, data_v2, model_name=model_name)
     36 print(result)

File /opt/conda/envs/movie-recommender-env/lib/python3.11/site-packages/kserve/inference_client.py:501, in InferenceRESTClient.infer(self, base_url, data, model_name, headers, response_headers, is_graph_endpoint, timeout)
    499 if isinstance(data, dict):
    500     data = orjson.dumps(data)
--> 501 response = await self._client.post(
    502     url, content=data, headers=headers, timeout=timeout
    503 )
    504 if self._config.verbose:
    505     logger.info(
    506         "response code: %s, content: %s", response.status_code, response.text
    507     )

File /opt/conda/envs/movie-recommender-env/lib/python3.11/site-packages/httpx/_client.py:1905, in AsyncClient.post(self, url, content, data, files, json, params, headers, cookies, auth, follow_redirects, timeout, extensions)
   1884 async def post(
   1885     self,
   1886     url: URL | str,
   (...)   1898     extensions: RequestExtensions | None = None,
   1899 ) -> Response:
   1900     """
   1901     Send a `POST` request.
   1902 
   1903     **Parameters**: See `httpx.request`.
   1904     """
-> 1905     return await self.request(
   1906         "POST",
   1907         url,
   1908         content=content,
   1909         data=data,
   1910         files=files,
   1911         json=json,
   1912         params=params,
   1913         headers=headers,
   1914         cookies=cookies,
   1915         auth=auth,
   1916         follow_redirects=follow_redirects,
   1917         timeout=timeout,
   1918         extensions=extensions,
   1919     )

File /opt/conda/envs/movie-recommender-env/lib/python3.11/site-packages/httpx/_client.py:1585, in AsyncClient.request(self, method, url, content, data, files, json, params, headers, cookies, auth, follow_redirects, timeout, extensions)
   1570     warnings.warn(message, DeprecationWarning)
   1572 request = self.build_request(
   1573     method=method,
   1574     url=url,
   (...)   1583     extensions=extensions,
   1584 )
-> 1585 return await self.send(request, auth=auth, follow_redirects=follow_redirects)

File /opt/conda/envs/movie-recommender-env/lib/python3.11/site-packages/httpx/_client.py:1674, in AsyncClient.send(self, request, stream, auth, follow_redirects)
   1670 self._set_timeout(request)
   1672 auth = self._build_request_auth(request, auth)
-> 1674 response = await self._send_handling_auth(
   1675     request,
   1676     auth=auth,
   1677     follow_redirects=follow_redirects,
   1678     history=[],
   1679 )
   1680 try:
   1681     if not stream:

File /opt/conda/envs/movie-recommender-env/lib/python3.11/site-packages/httpx/_client.py:1702, in AsyncClient._send_handling_auth(self, request, auth, follow_redirects, history)
   1699 request = await auth_flow.__anext__()
   1701 while True:
-> 1702     response = await self._send_handling_redirects(
   1703         request,
   1704         follow_redirects=follow_redirects,
   1705         history=history,
   1706     )
   1707     try:
   1708         try:

File /opt/conda/envs/movie-recommender-env/lib/python3.11/site-packages/httpx/_client.py:1739, in AsyncClient._send_handling_redirects(self, request, follow_redirects, history)
   1736 for hook in self._event_hooks["request"]:
   1737     await hook(request)
-> 1739 response = await self._send_single_request(request)
   1740 try:
   1741     for hook in self._event_hooks["response"]:

File /opt/conda/envs/movie-recommender-env/lib/python3.11/site-packages/httpx/_client.py:1771, in AsyncClient._send_single_request(self, request)
   1768 await timer.async_start()
   1770 if not isinstance(request.stream, AsyncByteStream):
-> 1771     raise RuntimeError(
   1772         "Attempted to send an sync request with an AsyncClient instance."
   1773     )
   1775 with request_context(request=request):
   1776     response = await transport.handle_async_request(request)

RuntimeError: Attempted to send an sync request with an AsyncClient instance.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions