diff --git a/.evergreen/resync-specs.sh b/.evergreen/resync-specs.sh index d2bd89c781..4bb9c86304 100755 --- a/.evergreen/resync-specs.sh +++ b/.evergreen/resync-specs.sh @@ -94,6 +94,9 @@ do change-streams|change_streams) cpjson change-streams/tests/ change_streams/ ;; + client-backpressure|client_backpressure) + cpjson client-backpressure/tests client-backpressure + ;; client-side-encryption|csfle|fle) cpjson client-side-encryption/tests/ client-side-encryption/spec cpjson client-side-encryption/corpus/ client-side-encryption/corpus diff --git a/pymongo/asynchronous/client_bulk.py b/pymongo/asynchronous/client_bulk.py index 151942c8a8..508b8e41c3 100644 --- a/pymongo/asynchronous/client_bulk.py +++ b/pymongo/asynchronous/client_bulk.py @@ -563,9 +563,21 @@ async def _execute_command( error, ConnectionFailure ) and not isinstance(error, (NotPrimaryError, WaitQueueTimeoutError)) + retryable_label_error = ( + hasattr(error, "details") + and isinstance(error.details, dict) + and "errorLabels" in error.details + and isinstance(error.details["errorLabels"], list) + and "RetryableError" in error.details["errorLabels"] + ) + # Synthesize the full bulk result without modifying the # current one because this write operation may be retried. - if retryable and (retryable_top_level_error or retryable_network_error): + if retryable and ( + retryable_top_level_error + or retryable_network_error + or retryable_label_error + ): full = copy.deepcopy(full_result) _merge_command(self.ops, self.idx_offset, full, result) _throw_client_bulk_write_exception(full, self.verbose_results) diff --git a/pymongo/asynchronous/client_session.py b/pymongo/asynchronous/client_session.py index c1e5a404d2..8212d1396a 100644 --- a/pymongo/asynchronous/client_session.py +++ b/pymongo/asynchronous/client_session.py @@ -135,7 +135,9 @@ from __future__ import annotations +import asyncio import collections +import random import time import uuid from collections.abc import Mapping as _Mapping @@ -162,7 +164,9 @@ from pymongo.errors import ( ConfigurationError, ConnectionFailure, + ExecutionTimeout, InvalidOperation, + NetworkTimeout, OperationFailure, PyMongoError, WTimeoutError, @@ -427,6 +431,7 @@ def __init__(self, opts: Optional[TransactionOptions], client: AsyncMongoClient[ self.recovery_token = None self.attempt = 0 self.client = client + self.has_completed_command = False def active(self) -> bool: return self.state in (_TxnState.STARTING, _TxnState.IN_PROGRESS) @@ -434,6 +439,9 @@ def active(self) -> bool: def starting(self) -> bool: return self.state == _TxnState.STARTING + def set_starting(self) -> None: + self.state = _TxnState.STARTING + @property def pinned_conn(self) -> Optional[AsyncConnection]: if self.active() and self.conn_mgr: @@ -493,11 +501,24 @@ def _max_time_expired_error(exc: PyMongoError) -> bool: # This limit is non-configurable and was chosen to be twice the 60 second # default value of MongoDB's `transactionLifetimeLimitSeconds` parameter. _WITH_TRANSACTION_RETRY_TIME_LIMIT = 120 +_BACKOFF_MAX = 0.500 # 500ms max backoff +_BACKOFF_INITIAL = 0.005 # 5ms initial backoff -def _within_time_limit(start_time: float) -> bool: +def _within_time_limit(start_time: float, backoff: float = 0) -> bool: """Are we within the with_transaction retry limit?""" - return time.monotonic() - start_time < _WITH_TRANSACTION_RETRY_TIME_LIMIT + remaining = _csot.remaining() + if remaining is not None and remaining <= 0: + return False + return time.monotonic() + backoff - start_time < _WITH_TRANSACTION_RETRY_TIME_LIMIT + + +def _make_timeout_error(error: BaseException) -> PyMongoError: + """Convert error to a NetworkTimeout or ExecutionTimeout as appropriate.""" + if _csot.remaining() is not None: + return ExecutionTimeout(str(error), 50, {"ok": 0, "errmsg": str(error), "code": 50}) + else: + return NetworkTimeout(str(error)) _T = TypeVar("_T") @@ -744,7 +765,17 @@ async def callback(session, custom_arg, custom_kwarg=None): https://github.com/mongodb/specifications/blob/master/source/transactions-convenient-api/transactions-convenient-api.md#handling-errors-inside-the-callback """ start_time = time.monotonic() + retry = 0 + last_error: Optional[BaseException] = None while True: + if retry: # Implement exponential backoff on retry. + jitter = random.random() # noqa: S311 + backoff = jitter * min(_BACKOFF_INITIAL * (1.5**retry), _BACKOFF_MAX) + if not _within_time_limit(start_time, backoff): + assert last_error is not None + raise _make_timeout_error(last_error) from last_error + await asyncio.sleep(backoff) + retry += 1 await self.start_transaction( read_concern, write_concern, read_preference, max_commit_time_ms ) @@ -752,15 +783,16 @@ async def callback(session, custom_arg, custom_kwarg=None): ret = await callback(self) # Catch KeyboardInterrupt, CancelledError, etc. and cleanup. except BaseException as exc: + last_error = exc if self.in_transaction: await self.abort_transaction() - if ( - isinstance(exc, PyMongoError) - and exc.has_error_label("TransientTransactionError") - and _within_time_limit(start_time) + if isinstance(exc, PyMongoError) and exc.has_error_label( + "TransientTransactionError" ): - # Retry the entire transaction. - continue + if _within_time_limit(start_time): + # Retry the entire transaction. + continue + raise _make_timeout_error(last_error) from exc raise if not self.in_transaction: @@ -771,17 +803,16 @@ async def callback(session, custom_arg, custom_kwarg=None): try: await self.commit_transaction() except PyMongoError as exc: - if ( - exc.has_error_label("UnknownTransactionCommitResult") - and _within_time_limit(start_time) - and not _max_time_expired_error(exc) - ): + last_error = exc + if not _within_time_limit(start_time): + raise _make_timeout_error(last_error) from exc + if exc.has_error_label( + "UnknownTransactionCommitResult" + ) and not _max_time_expired_error(exc): # Retry the commit. continue - if exc.has_error_label("TransientTransactionError") and _within_time_limit( - start_time - ): + if exc.has_error_label("TransientTransactionError"): # Retry the entire transaction. break raise diff --git a/pymongo/asynchronous/collection.py b/pymongo/asynchronous/collection.py index 53b4992493..127136dd48 100644 --- a/pymongo/asynchronous/collection.py +++ b/pymongo/asynchronous/collection.py @@ -20,7 +20,6 @@ from typing import ( TYPE_CHECKING, Any, - AsyncContextManager, Callable, Coroutine, Generic, @@ -252,6 +251,7 @@ def __init__( unicode_decode_error_handler="replace", document_class=dict ) self._timeout = database.client.options.timeout + self._retry_policy = database.client._retry_policy if create or kwargs: if _IS_SYNC: @@ -571,11 +571,6 @@ async def watch( await change_stream._initialize_cursor() return change_stream - async def _conn_for_writes( - self, session: Optional[AsyncClientSession], operation: str - ) -> AsyncContextManager[AsyncConnection]: - return await self._database.client._conn_for_writes(session, operation) - async def _command( self, conn: AsyncConnection, @@ -652,7 +647,10 @@ async def _create_helper( if "size" in options: options["size"] = float(options["size"]) cmd.update(options) - async with await self._conn_for_writes(session, operation=_Op.CREATE) as conn: + + async def inner( + session: Optional[AsyncClientSession], conn: AsyncConnection, _retryable_write: bool + ) -> None: if qev2_required and conn.max_wire_version < 21: raise ConfigurationError( "Driver support of Queryable Encryption is incompatible with server. " @@ -669,6 +667,8 @@ async def _create_helper( session=session, ) + await self.database.client._retryable_write(False, inner, session, _Op.CREATE) + async def _create( self, options: MutableMapping[str, Any], @@ -2240,7 +2240,10 @@ async def _create_indexes( command (like maxTimeMS) can be passed as keyword arguments. """ names = [] - async with await self._conn_for_writes(session, operation=_Op.CREATE_INDEXES) as conn: + + async def inner( + session: Optional[AsyncClientSession], conn: AsyncConnection, _retryable_write: bool + ) -> list[str]: supports_quorum = conn.max_wire_version >= 9 def gen_indexes() -> Iterator[Mapping[str, Any]]: @@ -2269,7 +2272,11 @@ def gen_indexes() -> Iterator[Mapping[str, Any]]: write_concern=self._write_concern_for(session), session=session, ) - return names + return names + + return await self.database.client._retryable_write( + False, inner, session, _Op.CREATE_INDEXES + ) async def create_index( self, @@ -2422,7 +2429,6 @@ async def drop_indexes( kwargs["comment"] = comment await self._drop_index("*", session=session, **kwargs) - @_csot.apply async def drop_index( self, index_or_name: _IndexKeyHint, @@ -2490,7 +2496,10 @@ async def _drop_index( cmd.update(kwargs) if comment is not None: cmd["comment"] = comment - async with await self._conn_for_writes(session, operation=_Op.DROP_INDEXES) as conn: + + async def inner( + session: Optional[AsyncClientSession], conn: AsyncConnection, _retryable_write: bool + ) -> None: await self._command( conn, cmd, @@ -2500,6 +2509,8 @@ async def _drop_index( session=session, ) + await self.database.client._retryable_write(False, inner, session, _Op.DROP_INDEXES) + async def list_indexes( self, session: Optional[AsyncClientSession] = None, @@ -2763,17 +2774,22 @@ def gen_indexes() -> Iterator[Mapping[str, Any]]: cmd = {"createSearchIndexes": self.name, "indexes": list(gen_indexes())} cmd.update(kwargs) - async with await self._conn_for_writes( - session, operation=_Op.CREATE_SEARCH_INDEXES - ) as conn: + async def inner( + session: Optional[AsyncClientSession], conn: AsyncConnection, _retryable_write: bool + ) -> list[str]: resp = await self._command( conn, cmd, read_preference=ReadPreference.PRIMARY, codec_options=_UNICODE_REPLACE_CODEC_OPTIONS, + session=session, ) return [index["name"] for index in resp["indexesCreated"]] + return await self.database.client._retryable_write( + False, inner, session, _Op.CREATE_SEARCH_INDEXES + ) + async def drop_search_index( self, name: str, @@ -2799,15 +2815,21 @@ async def drop_search_index( cmd.update(kwargs) if comment is not None: cmd["comment"] = comment - async with await self._conn_for_writes(session, operation=_Op.DROP_SEARCH_INDEXES) as conn: + + async def inner( + session: Optional[AsyncClientSession], conn: AsyncConnection, _retryable_write: bool + ) -> None: await self._command( conn, cmd, read_preference=ReadPreference.PRIMARY, allowable_errors=["ns not found", 26], codec_options=_UNICODE_REPLACE_CODEC_OPTIONS, + session=session, ) + await self.database.client._retryable_write(False, inner, session, _Op.DROP_SEARCH_INDEXES) + async def update_search_index( self, name: str, @@ -2835,15 +2857,21 @@ async def update_search_index( cmd.update(kwargs) if comment is not None: cmd["comment"] = comment - async with await self._conn_for_writes(session, operation=_Op.UPDATE_SEARCH_INDEX) as conn: + + async def inner( + session: Optional[AsyncClientSession], conn: AsyncConnection, _retryable_write: bool + ) -> None: await self._command( conn, cmd, read_preference=ReadPreference.PRIMARY, allowable_errors=["ns not found", 26], codec_options=_UNICODE_REPLACE_CODEC_OPTIONS, + session=session, ) + await self.database.client._retryable_write(False, inner, session, _Op.UPDATE_SEARCH_INDEX) + async def options( self, session: Optional[AsyncClientSession] = None, @@ -2918,6 +2946,7 @@ async def _aggregate( session, retryable=not cmd._performs_write, operation=_Op.AGGREGATE, + is_aggregate_write=cmd._performs_write, ) async def aggregate( @@ -3123,17 +3152,21 @@ async def rename( if comment is not None: cmd["comment"] = comment write_concern = self._write_concern_for_cmd(cmd, session) + client = self._database.client - async with await self._conn_for_writes(session, operation=_Op.RENAME) as conn: - async with self._database.client._tmp_session(session) as s: - return await conn.command( - "admin", - cmd, - write_concern=write_concern, - parse_write_concern_error=True, - session=s, - client=self._database.client, - ) + async def inner( + session: Optional[AsyncClientSession], conn: AsyncConnection, _retryable_write: bool + ) -> MutableMapping[str, Any]: + return await conn.command( + "admin", + cmd, + write_concern=write_concern, + parse_write_concern_error=True, + session=session, + client=client, + ) + + return await client._retryable_write(False, inner, session, _Op.RENAME) async def distinct( self, diff --git a/pymongo/asynchronous/database.py b/pymongo/asynchronous/database.py index 5aa206ee24..2c482f0415 100644 --- a/pymongo/asynchronous/database.py +++ b/pymongo/asynchronous/database.py @@ -135,6 +135,7 @@ def __init__( self._name = name self._client: AsyncMongoClient[_DocumentType] = client self._timeout = client.options.timeout + self._retry_policy = client._retry_policy @property def client(self) -> AsyncMongoClient[_DocumentType]: @@ -931,14 +932,15 @@ async def command( if read_preference is None: read_preference = (session and session._txn_read_preference()) or ReadPreference.PRIMARY - async with await self._client._conn_for_reads( - read_preference, session, operation=command_name - ) as ( - connection, - read_preference, - ): + + async def inner( + session: Optional[AsyncClientSession], + _server: Server, + conn: AsyncConnection, + read_preference: _ServerMode, + ) -> Union[dict[str, Any], _CodecDocumentType]: return await self._command( - connection, + conn, command, value, check, @@ -949,6 +951,10 @@ async def command( **kwargs, ) + return await self._client._retryable_read( + inner, read_preference, session, command_name, None, False, is_run_command=True + ) + @_csot.apply async def cursor_command( self, @@ -1016,17 +1022,17 @@ async def cursor_command( async with self._client._tmp_session(session) as tmp_session: opts = codec_options or DEFAULT_CODEC_OPTIONS - if read_preference is None: read_preference = ( tmp_session and tmp_session._txn_read_preference() ) or ReadPreference.PRIMARY - async with await self._client._conn_for_reads( - read_preference, tmp_session, command_name - ) as ( - conn, - read_preference, - ): + + async def inner( + session: Optional[AsyncClientSession], + _server: Server, + conn: AsyncConnection, + read_preference: _ServerMode, + ) -> AsyncCommandCursor[_DocumentType]: response = await self._command( conn, command, @@ -1035,7 +1041,7 @@ async def cursor_command( None, read_preference, opts, - session=tmp_session, + session=session, **kwargs, ) coll = self.get_collection("$cmd", read_preference=read_preference) @@ -1045,7 +1051,7 @@ async def cursor_command( response["cursor"], conn.address, max_await_time_ms=max_await_time_ms, - session=tmp_session, + session=session, comment=comment, ) await cmd_cursor._maybe_pin_connection(conn) @@ -1053,6 +1059,10 @@ async def cursor_command( else: raise InvalidOperation("Command does not return a cursor.") + return await self.client._retryable_read( + inner, read_preference, tmp_session, command_name, None, False + ) + async def _retryable_read_command( self, command: Union[str, MutableMapping[str, Any]], @@ -1254,9 +1264,11 @@ async def _drop_helper( if comment is not None: command["comment"] = comment - async with await self._client._conn_for_writes(session, operation=_Op.DROP) as connection: + async def inner( + session: Optional[AsyncClientSession], conn: AsyncConnection, _retryable_write: bool + ) -> dict[str, Any]: return await self._command( - connection, + conn, command, allowable_errors=["ns not found", 26], write_concern=self._write_concern_for(session), @@ -1264,6 +1276,8 @@ async def _drop_helper( session=session, ) + return await self.client._retryable_write(False, inner, session, _Op.DROP) + @_csot.apply async def drop_collection( self, diff --git a/pymongo/asynchronous/helpers.py b/pymongo/asynchronous/helpers.py index ccda16e28b..2c01c19b7a 100644 --- a/pymongo/asynchronous/helpers.py +++ b/pymongo/asynchronous/helpers.py @@ -17,8 +17,11 @@ import asyncio import builtins +import functools +import random import socket import sys +import time as time # noqa: PLC0414 # needed in sync version from typing import ( Any, Callable, @@ -26,10 +29,12 @@ cast, ) +from pymongo import _csot from pymongo.errors import ( OperationFailure, ) from pymongo.helpers_shared import _REAUTHENTICATION_REQUIRED_CODE +from pymongo.lock import _async_create_lock _IS_SYNC = False @@ -38,6 +43,7 @@ def _handle_reauth(func: F) -> F: + @functools.wraps(func) async def inner(*args: Any, **kwargs: Any) -> Any: no_reauth = kwargs.pop("no_reauth", False) from pymongo.asynchronous.pool import AsyncConnection @@ -70,6 +76,96 @@ async def inner(*args: Any, **kwargs: Any) -> Any: return cast(F, inner) +_MAX_RETRIES = 5 +_BACKOFF_INITIAL = 0.1 +_BACKOFF_MAX = 10 +DEFAULT_RETRY_TOKEN_CAPACITY = 1000.0 +DEFAULT_RETRY_TOKEN_RETURN = 0.1 + + +def _backoff( + attempt: int, initial_delay: float = _BACKOFF_INITIAL, max_delay: float = _BACKOFF_MAX +) -> float: + jitter = random.random() # noqa: S311 + return jitter * min(initial_delay * (2**attempt), max_delay) + + +class _TokenBucket: + """A token bucket implementation for rate limiting.""" + + def __init__( + self, + capacity: float = DEFAULT_RETRY_TOKEN_CAPACITY, + return_rate: float = DEFAULT_RETRY_TOKEN_RETURN, + ): + self.lock = _async_create_lock() + self.capacity = capacity + self.tokens = capacity + self.return_rate = return_rate + + async def consume(self) -> bool: + """Consume a token from the bucket if available.""" + async with self.lock: + if self.tokens >= 1: + self.tokens -= 1 + return True + return False + + async def deposit(self, retry: bool = False) -> None: + """Deposit a token back into the bucket.""" + retry_token = 1 if retry else 0 + async with self.lock: + self.tokens = min(self.capacity, self.tokens + retry_token + self.return_rate) + + +class _RetryPolicy: + """A retry limiter that performs exponential backoff with jitter. + + When adaptive retries are enabled, retry attempts are limited by a token bucket to prevent overwhelming the server during + a prolonged outage or high load. + """ + + def __init__( + self, + token_bucket: _TokenBucket, + attempts: int = _MAX_RETRIES, + backoff_initial: float = _BACKOFF_INITIAL, + backoff_max: float = _BACKOFF_MAX, + adaptive_retry: bool = False, + ): + self.token_bucket = token_bucket + self.attempts = attempts + self.backoff_initial = backoff_initial + self.backoff_max = backoff_max + self.adaptive_retry = adaptive_retry + + async def record_success(self, retry: bool) -> None: + """Record a successful operation.""" + if self.adaptive_retry: + await self.token_bucket.deposit(retry) + + def backoff(self, attempt: int) -> float: + """Return the backoff duration for the given .""" + return _backoff(max(0, attempt - 1), self.backoff_initial, self.backoff_max) + + async def should_retry(self, attempt: int, delay: float) -> bool: + """Return if we have budget to retry and how long to backoff.""" + if attempt > self.attempts: + return False + + # If the delay would exceed the deadline, bail early before consuming a token. + if _csot.get_timeout(): + if time.monotonic() + delay > _csot.get_deadline(): + return False + + # Check token bucket last since we only want to consume a token if we actually retry. + if self.adaptive_retry and not await self.token_bucket.consume(): + # DRIVERS-3246 Improve diagnostics when this case happens. + # We could add info to the exception and log. + return False + return True + + async def _getaddrinfo( host: Any, port: Any, **kwargs: Any ) -> list[ diff --git a/pymongo/asynchronous/mongo_client.py b/pymongo/asynchronous/mongo_client.py index 95f2e3746e..8936068af6 100644 --- a/pymongo/asynchronous/mongo_client.py +++ b/pymongo/asynchronous/mongo_client.py @@ -35,6 +35,7 @@ import asyncio import contextlib import os +import time as time # noqa: PLC0414 # needed in sync version import warnings import weakref from collections import defaultdict @@ -67,6 +68,10 @@ from pymongo.asynchronous.client_bulk import _AsyncClientBulk from pymongo.asynchronous.client_session import _SESSION, _EmptyServerSession from pymongo.asynchronous.command_cursor import AsyncCommandCursor +from pymongo.asynchronous.helpers import ( + _RetryPolicy, + _TokenBucket, +) from pymongo.asynchronous.settings import TopologySettings from pymongo.asynchronous.topology import Topology, _ErrorContext from pymongo.client_options import ClientOptions @@ -610,8 +615,18 @@ def __init__( client to use Stable API. See `versioned API `_ for details. + | **Adaptive retry options:** + | (If not enabled explicitly, adaptive retries will not be enabled.) + + - `adaptive_retries`: (boolean) Whether the adaptive retry mechanism is enabled for this client. + If enabled, server overload errors will use a token-bucket based system to mitigate further overload. + Defaults to ``False``. + .. seealso:: The MongoDB documentation on `connections `_. + .. versionchanged:: 4.17 + Added the ``adaptive_retries`` URI and keyword argument. + .. versionchanged:: 4.5 Added the ``serverMonitoringMode`` keyword argument. @@ -879,11 +894,16 @@ def __init__( self._options.read_concern, ) + self._retry_policy = _RetryPolicy( + _TokenBucket(), adaptive_retry=self._options.adaptive_retries + ) + self._init_based_on_options(self._seeds, srv_max_hosts, srv_service_name) self._opened = False self._closed = False self._loop: Optional[asyncio.AbstractEventLoop] = None + if not is_srv: self._init_background() @@ -1991,6 +2011,8 @@ async def _retry_internal( read_pref: Optional[_ServerMode] = None, retryable: bool = False, operation_id: Optional[int] = None, + is_run_command: bool = False, + is_aggregate_write: bool = False, ) -> T: """Internal retryable helper for all client transactions. @@ -2002,6 +2024,8 @@ async def _retry_internal( :param address: Server Address, defaults to None :param read_pref: Topology of read operation, defaults to None :param retryable: If the operation should be retried once, defaults to None + :param is_run_command: If this is a runCommand operation, defaults to False + :param is_aggregate_write: If this is a aggregate operation with a write, defaults to False. :return: Output of the calling func() """ @@ -2016,6 +2040,8 @@ async def _retry_internal( address=address, retryable=retryable, operation_id=operation_id, + is_run_command=is_run_command, + is_aggregate_write=is_aggregate_write, ).run() async def _retryable_read( @@ -2027,6 +2053,8 @@ async def _retryable_read( address: Optional[_Address] = None, retryable: bool = True, operation_id: Optional[int] = None, + is_run_command: bool = False, + is_aggregate_write: bool = False, ) -> T: """Execute an operation with consecutive retries if possible @@ -2042,6 +2070,8 @@ async def _retryable_read( :param address: Optional address when sending a message, defaults to None :param retryable: if we should attempt retries (may not always be supported even if supplied), defaults to False + :param is_run_command: If this is a runCommand operation, defaults to False. + :param is_aggregate_write: If this is a aggregate operation with a write, defaults to False. """ # Ensure that the client supports retrying on reads and there is no session in @@ -2060,6 +2090,8 @@ async def _retryable_read( read_pref=read_pref, retryable=retryable, operation_id=operation_id, + is_run_command=is_run_command, + is_aggregate_write=is_aggregate_write, ) async def _retryable_write( @@ -2454,15 +2486,13 @@ async def drop_database( f"name_or_database must be an instance of str or a AsyncDatabase, not {type(name)}" ) - async with await self._conn_for_writes(session, operation=_Op.DROP_DATABASE) as conn: - await self[name]._command( - conn, - {"dropDatabase": 1, "comment": comment}, - read_preference=ReadPreference.PRIMARY, - write_concern=self._write_concern_for(session), - parse_write_concern_error=True, - session=session, - ) + await self[name].command( + {"dropDatabase": 1, "comment": comment}, + read_preference=ReadPreference.PRIMARY, + write_concern=self._write_concern_for(session), + parse_write_concern_error=True, + session=session, + ) @_csot.apply async def bulk_write( @@ -2746,12 +2776,15 @@ def __init__( address: Optional[_Address] = None, retryable: bool = False, operation_id: Optional[int] = None, + is_run_command: bool = False, + is_aggregate_write: bool = False, ): self._last_error: Optional[Exception] = None self._retrying = False + self._always_retryable = False self._multiple_retries = _csot.get_timeout() is not None self._client = mongo_client - + self._retry_policy = mongo_client._retry_policy self._func = func self._bulk = bulk self._session = session @@ -2767,6 +2800,8 @@ def __init__( self._operation = operation self._operation_id = operation_id self._attempt_number = 0 + self._is_run_command = is_run_command + self._is_aggregate_write = is_aggregate_write async def run(self) -> T: """Runs the supplied func() and attempts a retry @@ -2786,7 +2821,14 @@ async def run(self) -> T: while True: self._check_last_error(check_csot=True) try: - return await self._read() if self._is_read else await self._write() + res = await self._read() if self._is_read else await self._write() + await self._retry_policy.record_success(self._attempt_number > 0) + # Track whether the transaction has completed a command. + # If we need to apply backpressure to the first command, + # we will need to revert back to starting state. + if self._session is not None and self._session.in_transaction: + self._session._transaction.has_completed_command = True + return res except ServerSelectionTimeoutError: # The application may think the write was never attempted # if we raise ServerSelectionTimeoutError on the retry @@ -2797,37 +2839,77 @@ async def run(self) -> T: # most likely be a waste of time. raise except PyMongoError as exc: + always_retryable = False + overloaded = False + exc_to_check = exc + + if self._is_run_command and not ( + self._client.options.retry_reads and self._client.options.retry_writes + ): + raise + if self._is_aggregate_write and not self._client.options.retry_writes: + raise + # Execute specialized catch on read if self._is_read: if isinstance(exc, (ConnectionFailure, OperationFailure)): # ConnectionFailures do not supply a code property exc_code = getattr(exc, "code", None) - if self._is_not_eligible_for_retry() or ( - isinstance(exc, OperationFailure) - and exc_code not in helpers_shared._RETRYABLE_ERROR_CODES + overloaded = exc.has_error_label("SystemOverloadedError") + always_retryable = exc.has_error_label("RetryableError") and overloaded + if ( + not self._client.options.retry_reads + or not always_retryable + and ( + self._is_not_eligible_for_retry() + or ( + isinstance(exc, OperationFailure) + and exc_code not in helpers_shared._RETRYABLE_ERROR_CODES + ) + ) ): raise self._retrying = True self._last_error = exc self._attempt_number += 1 + + # Revert back to starting state if we're in a transaction but haven't completed the first + # command. + if ( + overloaded + and self._session is not None + and self._session.in_transaction + ): + transaction = self._session._transaction + if not transaction.has_completed_command: + transaction.set_starting() + transaction.attempt = 0 else: raise # Specialized catch on write operation if not self._is_read: - if not self._retryable: + if isinstance(exc, ClientBulkWriteException) and isinstance( + exc.error, PyMongoError + ): + exc_to_check = exc.error + retryable_write_label = exc_to_check.has_error_label("RetryableWriteError") + overloaded = exc_to_check.has_error_label("SystemOverloadedError") + always_retryable = exc_to_check.has_error_label("RetryableError") and overloaded + + # Always retry abortTransaction and commitTransaction up to once + if self._operation not in ["abortTransaction", "commitTransaction"] and ( + not self._client.options.retry_writes + or not (self._retryable or always_retryable) + ): raise - if isinstance(exc, ClientBulkWriteException) and exc.error: - retryable_write_error_exc = isinstance( - exc.error, PyMongoError - ) and exc.error.has_error_label("RetryableWriteError") - else: - retryable_write_error_exc = exc.has_error_label("RetryableWriteError") - if retryable_write_error_exc: + if retryable_write_label or always_retryable: assert self._session await self._session._unpin() - if not retryable_write_error_exc or self._is_not_eligible_for_retry(): - if exc.has_error_label("NoWritesPerformed") and self._last_error: + if not always_retryable and ( + not retryable_write_label or self._is_not_eligible_for_retry() + ): + if exc_to_check.has_error_label("NoWritesPerformed") and self._last_error: raise self._last_error from exc else: raise @@ -2836,10 +2918,17 @@ async def run(self) -> T: self._bulk.retrying = True else: self._retrying = True - if not exc.has_error_label("NoWritesPerformed"): + if not exc_to_check.has_error_label("NoWritesPerformed"): self._last_error = exc if self._last_error is None: self._last_error = exc + # Revert back to starting state if we're in a transaction but haven't completed the first + # command. + if overloaded and self._session is not None and self._session.in_transaction: + transaction = self._session._transaction + if not transaction.has_completed_command: + transaction.set_starting() + transaction.attempt = 0 if ( self._server is not None @@ -2848,6 +2937,16 @@ async def run(self) -> T: ): self._deprioritized_servers.append(self._server) + self._always_retryable = always_retryable + if overloaded: + delay = self._retry_policy.backoff(self._attempt_number) if overloaded else 0 + if not await self._retry_policy.should_retry(self._attempt_number, delay): + if exc_to_check.has_error_label("NoWritesPerformed") and self._last_error: + raise self._last_error from exc + else: + raise + await asyncio.sleep(delay) + def _is_not_eligible_for_retry(self) -> bool: """Checks if the exchange is not eligible for retry""" return not self._retryable or (self._is_retrying() and not self._multiple_retries) @@ -2909,7 +3008,7 @@ async def _write(self) -> T: and conn.supports_sessions ) is_mongos = conn.is_mongos - if not sessions_supported: + if not self._always_retryable and not sessions_supported: # A retry is not possible because this server does # not support sessions raise the last error. self._check_last_error() @@ -2941,7 +3040,7 @@ async def _read(self) -> T: conn, read_pref, ): - if self._retrying and not self._retryable: + if self._retrying and not self._retryable and not self._always_retryable: self._check_last_error() if self._retrying: _debug_log( diff --git a/pymongo/asynchronous/pool.py b/pymongo/asynchronous/pool.py index 4e1e7c0638..3c1a85246e 100644 --- a/pymongo/asynchronous/pool.py +++ b/pymongo/asynchronous/pool.py @@ -19,6 +19,8 @@ import contextlib import logging import os +import socket +import ssl import sys import time import weakref @@ -52,10 +54,12 @@ DocumentTooLarge, ExecutionTimeout, InvalidOperation, + NetworkTimeout, NotPrimaryError, OperationFailure, PyMongoError, WaitQueueTimeoutError, + _CertificateError, ) from pymongo.hello import Hello, HelloCompat from pymongo.helpers_shared import _get_timeout_details, format_timeout_details @@ -250,6 +254,7 @@ async def _hello( cmd = self.hello_cmd() performing_handshake = not self.performed_handshake awaitable = False + cmd["backpressure"] = True if performing_handshake: self.performed_handshake = True cmd["client"] = self.opts.metadata @@ -752,8 +757,8 @@ def __init__( # Enforces: maxConnecting # Also used for: clearing the wait queue self._max_connecting_cond = _async_create_condition(self.lock) - self._max_connecting = self.opts.max_connecting self._pending = 0 + self._max_connecting = self.opts.max_connecting self._client_id = client_id if self.enabled_for_cmap: assert self.opts._event_listeners is not None @@ -986,6 +991,21 @@ async def remove_stale_sockets(self, reference_generation: int) -> None: self.requests -= 1 self.size_cond.notify() + def _handle_connection_error(self, error: BaseException) -> None: + # Handle system overload condition for non-sdam pools. + # Look for errors of type AutoReconnect and add error labels if appropriate. + if self.is_sdam or type(error) not in (AutoReconnect, NetworkTimeout): + return + assert isinstance(error, AutoReconnect) # Appease type checker. + # If the original error was a DNS, certificate, or SSL error, ignore it. + if isinstance(error.__cause__, (_CertificateError, SSLErrors, socket.gaierror)): + # End of file errors are excluded, because the server may have disconnected + # during the handshake. + if not isinstance(error.__cause__, (ssl.SSLEOFError, ssl.SSLZeroReturnError)): + return + error._add_error_label("SystemOverloadedError") + error._add_error_label("RetryableError") + async def connect(self, handler: Optional[_MongoClientErrorHandler] = None) -> AsyncConnection: """Connect to Mongo and return a new AsyncConnection. @@ -1037,10 +1057,10 @@ async def connect(self, handler: Optional[_MongoClientErrorHandler] = None) -> A reason=_verbose_connection_error_reason(ConnectionClosedReason.ERROR), error=ConnectionClosedReason.ERROR, ) + self._handle_connection_error(error) if isinstance(error, (IOError, OSError, *SSLErrors)): details = _get_timeout_details(self.opts) _raise_connection_failure(self.address, error, timeout_details=details) - raise conn = AsyncConnection(networking_interface, self, self.address, conn_id, self.is_sdam) # type: ignore[arg-type] @@ -1049,18 +1069,22 @@ async def connect(self, handler: Optional[_MongoClientErrorHandler] = None) -> A self.active_contexts.discard(tmp_context) if tmp_context.cancelled: conn.cancel_context.cancel() + completed_hello = False try: if not self.is_sdam: await conn.hello() + completed_hello = True self.is_writable = conn.is_writable if handler: handler.contribute_socket(conn, completed_handshake=False) await conn.authenticate() # Catch KeyboardInterrupt, CancelledError, etc. and cleanup. - except BaseException: + except BaseException as e: async with self.lock: self.active_contexts.discard(conn.cancel_context) + if not completed_hello: + self._handle_connection_error(e) await conn.close_conn(ConnectionClosedReason.ERROR) raise @@ -1389,8 +1413,8 @@ async def _perished(self, conn: AsyncConnection) -> bool: :class:`~pymongo.errors.AutoReconnect` exceptions on server hiccups, etc. We only check if the socket was closed by an external error if it has been > 1 second since the socket was checked into the - pool, to keep performance reasonable - we can't avoid AutoReconnects - completely anyway. + pool to keep performance reasonable - + we can't avoid AutoReconnects completely anyway. """ idle_time_seconds = conn.idle_time_seconds() # If socket is idle, open a new one. @@ -1401,8 +1425,9 @@ async def _perished(self, conn: AsyncConnection) -> bool: await conn.close_conn(ConnectionClosedReason.IDLE) return True - if self._check_interval_seconds is not None and ( - self._check_interval_seconds == 0 or idle_time_seconds > self._check_interval_seconds + check_interval_seconds = self._check_interval_seconds + if check_interval_seconds is not None and ( + check_interval_seconds == 0 or idle_time_seconds > check_interval_seconds ): if conn.conn_closed(): await conn.close_conn(ConnectionClosedReason.ERROR) diff --git a/pymongo/asynchronous/topology.py b/pymongo/asynchronous/topology.py index c171848cac..01e346bfa8 100644 --- a/pymongo/asynchronous/topology.py +++ b/pymongo/asynchronous/topology.py @@ -913,7 +913,9 @@ async def _handle_error(self, address: _Address, err_ctx: _ErrorContext) -> None # Clear the pool. await server.reset(service_id) elif isinstance(error, ConnectionFailure): - if isinstance(error, WaitQueueTimeoutError): + if isinstance(error, WaitQueueTimeoutError) or ( + error.has_error_label("SystemOverloadedError") + ): return # "Client MUST replace the server's description with type Unknown # ... MUST NOT request an immediate check of the server." diff --git a/pymongo/client_options.py b/pymongo/client_options.py index 8b4eea7e65..1e488c2b8f 100644 --- a/pymongo/client_options.py +++ b/pymongo/client_options.py @@ -235,6 +235,11 @@ def __init__( self.__server_monitoring_mode = options.get( "servermonitoringmode", common.SERVER_MONITORING_MODE ) + self.__adaptive_retries = ( + options.get("adaptive_retries", common.ADAPTIVE_RETRIES) + if "adaptive_retries" in options + else options.get("adaptiveretries", common.ADAPTIVE_RETRIES) + ) @property def _options(self) -> Mapping[str, Any]: @@ -346,3 +351,11 @@ def server_monitoring_mode(self) -> str: .. versionadded:: 4.5 """ return self.__server_monitoring_mode + + @property + def adaptive_retries(self) -> bool: + """The configured adaptiveRetries option. + + .. versionadded:: 4.XX + """ + return self.__adaptive_retries diff --git a/pymongo/common.py b/pymongo/common.py index e23adac426..8b9797682f 100644 --- a/pymongo/common.py +++ b/pymongo/common.py @@ -140,6 +140,9 @@ # Default value for serverMonitoringMode SERVER_MONITORING_MODE = "auto" # poll/stream/auto +# Default value for adaptiveRetries +ADAPTIVE_RETRIES = False + # Auth mechanism properties that must raise an error instead of warning if they invalidate. _MECH_PROP_MUST_RAISE = ["CANONICALIZE_HOST_NAME"] @@ -738,6 +741,7 @@ def validate_server_monitoring_mode(option: str, value: str) -> str: "srvmaxhosts": validate_non_negative_integer, "timeoutms": validate_timeoutms, "servermonitoringmode": validate_server_monitoring_mode, + "adaptiveretries": validate_boolean_or_string, } # Dictionary where keys are the names of URI options specific to pymongo, @@ -771,6 +775,7 @@ def validate_server_monitoring_mode(option: str, value: str) -> str: "server_selector": validate_is_callable_or_none, "auto_encryption_opts": validate_auto_encryption_opts_or_none, "authoidcallowedhosts": validate_list, + "adaptive_retries": validate_boolean_or_string, } # Dictionary where keys are any URI option name, and values are the diff --git a/pymongo/logger.py b/pymongo/logger.py index 1b3fe43b86..ccfc45ed88 100644 --- a/pymongo/logger.py +++ b/pymongo/logger.py @@ -42,6 +42,7 @@ class _ConnectionStatusMessage(str, enum.Enum): POOL_READY = "Connection pool ready" POOL_CLOSED = "Connection pool closed" POOL_CLEARED = "Connection pool cleared" + POOL_BACKOFF = "Connection pool backoff" CONN_CREATED = "Connection created" CONN_READY = "Connection ready" @@ -88,6 +89,7 @@ class _SDAMStatusMessage(str, enum.Enum): _VERBOSE_CONNECTION_ERROR_REASONS = { ConnectionClosedReason.POOL_CLOSED: "Connection pool was closed", ConnectionCheckOutFailedReason.POOL_CLOSED: "Connection pool was closed", + ConnectionClosedReason.POOL_BACKOFF: "Connection pool is in backoff", ConnectionClosedReason.STALE: "Connection pool was stale", ConnectionClosedReason.ERROR: "An error occurred while using the connection", ConnectionCheckOutFailedReason.CONN_ERROR: "An error occurred while trying to establish a new connection", diff --git a/pymongo/monitoring.py b/pymongo/monitoring.py index 46a78aea0b..0dfbbb915a 100644 --- a/pymongo/monitoring.py +++ b/pymongo/monitoring.py @@ -934,6 +934,9 @@ class ConnectionClosedReason: POOL_CLOSED = "poolClosed" """The pool was closed, making the connection no longer valid.""" + POOL_BACKOFF = "poolBackoff" + """The pool is in backoff mode.""" + class ConnectionCheckOutFailedReason: """An enum that defines values for `reason` on a diff --git a/pymongo/synchronous/client_bulk.py b/pymongo/synchronous/client_bulk.py index a606d028e1..e8167bcedc 100644 --- a/pymongo/synchronous/client_bulk.py +++ b/pymongo/synchronous/client_bulk.py @@ -561,9 +561,21 @@ def _execute_command( error, ConnectionFailure ) and not isinstance(error, (NotPrimaryError, WaitQueueTimeoutError)) + retryable_label_error = ( + hasattr(error, "details") + and isinstance(error.details, dict) + and "errorLabels" in error.details + and isinstance(error.details["errorLabels"], list) + and "RetryableError" in error.details["errorLabels"] + ) + # Synthesize the full bulk result without modifying the # current one because this write operation may be retried. - if retryable and (retryable_top_level_error or retryable_network_error): + if retryable and ( + retryable_top_level_error + or retryable_network_error + or retryable_label_error + ): full = copy.deepcopy(full_result) _merge_command(self.ops, self.idx_offset, full, result) _throw_client_bulk_write_exception(full, self.verbose_results) diff --git a/pymongo/synchronous/client_session.py b/pymongo/synchronous/client_session.py index 5ef18a66bd..f8211a60b1 100644 --- a/pymongo/synchronous/client_session.py +++ b/pymongo/synchronous/client_session.py @@ -136,6 +136,7 @@ from __future__ import annotations import collections +import random import time import uuid from collections.abc import Mapping as _Mapping @@ -160,7 +161,9 @@ from pymongo.errors import ( ConfigurationError, ConnectionFailure, + ExecutionTimeout, InvalidOperation, + NetworkTimeout, OperationFailure, PyMongoError, WTimeoutError, @@ -426,6 +429,7 @@ def __init__(self, opts: Optional[TransactionOptions], client: MongoClient[Any]) self.recovery_token = None self.attempt = 0 self.client = client + self.has_completed_command = False def active(self) -> bool: return self.state in (_TxnState.STARTING, _TxnState.IN_PROGRESS) @@ -433,6 +437,9 @@ def active(self) -> bool: def starting(self) -> bool: return self.state == _TxnState.STARTING + def set_starting(self) -> None: + self.state = _TxnState.STARTING + @property def pinned_conn(self) -> Optional[Connection]: if self.active() and self.conn_mgr: @@ -492,11 +499,24 @@ def _max_time_expired_error(exc: PyMongoError) -> bool: # This limit is non-configurable and was chosen to be twice the 60 second # default value of MongoDB's `transactionLifetimeLimitSeconds` parameter. _WITH_TRANSACTION_RETRY_TIME_LIMIT = 120 +_BACKOFF_MAX = 0.500 # 500ms max backoff +_BACKOFF_INITIAL = 0.005 # 5ms initial backoff -def _within_time_limit(start_time: float) -> bool: +def _within_time_limit(start_time: float, backoff: float = 0) -> bool: """Are we within the with_transaction retry limit?""" - return time.monotonic() - start_time < _WITH_TRANSACTION_RETRY_TIME_LIMIT + remaining = _csot.remaining() + if remaining is not None and remaining <= 0: + return False + return time.monotonic() + backoff - start_time < _WITH_TRANSACTION_RETRY_TIME_LIMIT + + +def _make_timeout_error(error: BaseException) -> PyMongoError: + """Convert error to a NetworkTimeout or ExecutionTimeout as appropriate.""" + if _csot.remaining() is not None: + return ExecutionTimeout(str(error), 50, {"ok": 0, "errmsg": str(error), "code": 50}) + else: + return NetworkTimeout(str(error)) _T = TypeVar("_T") @@ -743,21 +763,32 @@ def callback(session, custom_arg, custom_kwarg=None): https://github.com/mongodb/specifications/blob/master/source/transactions-convenient-api/transactions-convenient-api.md#handling-errors-inside-the-callback """ start_time = time.monotonic() + retry = 0 + last_error: Optional[BaseException] = None while True: + if retry: # Implement exponential backoff on retry. + jitter = random.random() # noqa: S311 + backoff = jitter * min(_BACKOFF_INITIAL * (1.5**retry), _BACKOFF_MAX) + if not _within_time_limit(start_time, backoff): + assert last_error is not None + raise _make_timeout_error(last_error) from last_error + time.sleep(backoff) + retry += 1 self.start_transaction(read_concern, write_concern, read_preference, max_commit_time_ms) try: ret = callback(self) # Catch KeyboardInterrupt, CancelledError, etc. and cleanup. except BaseException as exc: + last_error = exc if self.in_transaction: self.abort_transaction() - if ( - isinstance(exc, PyMongoError) - and exc.has_error_label("TransientTransactionError") - and _within_time_limit(start_time) + if isinstance(exc, PyMongoError) and exc.has_error_label( + "TransientTransactionError" ): - # Retry the entire transaction. - continue + if _within_time_limit(start_time): + # Retry the entire transaction. + continue + raise _make_timeout_error(last_error) from exc raise if not self.in_transaction: @@ -768,17 +799,16 @@ def callback(session, custom_arg, custom_kwarg=None): try: self.commit_transaction() except PyMongoError as exc: - if ( - exc.has_error_label("UnknownTransactionCommitResult") - and _within_time_limit(start_time) - and not _max_time_expired_error(exc) - ): + last_error = exc + if not _within_time_limit(start_time): + raise _make_timeout_error(last_error) from exc + if exc.has_error_label( + "UnknownTransactionCommitResult" + ) and not _max_time_expired_error(exc): # Retry the commit. continue - if exc.has_error_label("TransientTransactionError") and _within_time_limit( - start_time - ): + if exc.has_error_label("TransientTransactionError"): # Retry the entire transaction. break raise diff --git a/pymongo/synchronous/collection.py b/pymongo/synchronous/collection.py index edc6047330..34fd7190d1 100644 --- a/pymongo/synchronous/collection.py +++ b/pymongo/synchronous/collection.py @@ -21,7 +21,6 @@ TYPE_CHECKING, Any, Callable, - ContextManager, Generic, Iterable, Iterator, @@ -255,6 +254,7 @@ def __init__( unicode_decode_error_handler="replace", document_class=dict ) self._timeout = database.client.options.timeout + self._retry_policy = database.client._retry_policy if create or kwargs: if _IS_SYNC: @@ -572,11 +572,6 @@ def watch( change_stream._initialize_cursor() return change_stream - def _conn_for_writes( - self, session: Optional[ClientSession], operation: str - ) -> ContextManager[Connection]: - return self._database.client._conn_for_writes(session, operation) - def _command( self, conn: Connection, @@ -653,7 +648,10 @@ def _create_helper( if "size" in options: options["size"] = float(options["size"]) cmd.update(options) - with self._conn_for_writes(session, operation=_Op.CREATE) as conn: + + def inner( + session: Optional[ClientSession], conn: Connection, _retryable_write: bool + ) -> None: if qev2_required and conn.max_wire_version < 21: raise ConfigurationError( "Driver support of Queryable Encryption is incompatible with server. " @@ -670,6 +668,8 @@ def _create_helper( session=session, ) + self.database.client._retryable_write(False, inner, session, _Op.CREATE) + def _create( self, options: MutableMapping[str, Any], @@ -2237,7 +2237,10 @@ def _create_indexes( command (like maxTimeMS) can be passed as keyword arguments. """ names = [] - with self._conn_for_writes(session, operation=_Op.CREATE_INDEXES) as conn: + + def inner( + session: Optional[ClientSession], conn: Connection, _retryable_write: bool + ) -> list[str]: supports_quorum = conn.max_wire_version >= 9 def gen_indexes() -> Iterator[Mapping[str, Any]]: @@ -2266,7 +2269,9 @@ def gen_indexes() -> Iterator[Mapping[str, Any]]: write_concern=self._write_concern_for(session), session=session, ) - return names + return names + + return self.database.client._retryable_write(False, inner, session, _Op.CREATE_INDEXES) def create_index( self, @@ -2419,7 +2424,6 @@ def drop_indexes( kwargs["comment"] = comment self._drop_index("*", session=session, **kwargs) - @_csot.apply def drop_index( self, index_or_name: _IndexKeyHint, @@ -2487,7 +2491,10 @@ def _drop_index( cmd.update(kwargs) if comment is not None: cmd["comment"] = comment - with self._conn_for_writes(session, operation=_Op.DROP_INDEXES) as conn: + + def inner( + session: Optional[ClientSession], conn: Connection, _retryable_write: bool + ) -> None: self._command( conn, cmd, @@ -2497,6 +2504,8 @@ def _drop_index( session=session, ) + self.database.client._retryable_write(False, inner, session, _Op.DROP_INDEXES) + def list_indexes( self, session: Optional[ClientSession] = None, @@ -2760,15 +2769,22 @@ def gen_indexes() -> Iterator[Mapping[str, Any]]: cmd = {"createSearchIndexes": self.name, "indexes": list(gen_indexes())} cmd.update(kwargs) - with self._conn_for_writes(session, operation=_Op.CREATE_SEARCH_INDEXES) as conn: + def inner( + session: Optional[ClientSession], conn: Connection, _retryable_write: bool + ) -> list[str]: resp = self._command( conn, cmd, read_preference=ReadPreference.PRIMARY, codec_options=_UNICODE_REPLACE_CODEC_OPTIONS, + session=session, ) return [index["name"] for index in resp["indexesCreated"]] + return self.database.client._retryable_write( + False, inner, session, _Op.CREATE_SEARCH_INDEXES + ) + def drop_search_index( self, name: str, @@ -2794,15 +2810,21 @@ def drop_search_index( cmd.update(kwargs) if comment is not None: cmd["comment"] = comment - with self._conn_for_writes(session, operation=_Op.DROP_SEARCH_INDEXES) as conn: + + def inner( + session: Optional[ClientSession], conn: Connection, _retryable_write: bool + ) -> None: self._command( conn, cmd, read_preference=ReadPreference.PRIMARY, allowable_errors=["ns not found", 26], codec_options=_UNICODE_REPLACE_CODEC_OPTIONS, + session=session, ) + self.database.client._retryable_write(False, inner, session, _Op.DROP_SEARCH_INDEXES) + def update_search_index( self, name: str, @@ -2830,15 +2852,21 @@ def update_search_index( cmd.update(kwargs) if comment is not None: cmd["comment"] = comment - with self._conn_for_writes(session, operation=_Op.UPDATE_SEARCH_INDEX) as conn: + + def inner( + session: Optional[ClientSession], conn: Connection, _retryable_write: bool + ) -> None: self._command( conn, cmd, read_preference=ReadPreference.PRIMARY, allowable_errors=["ns not found", 26], codec_options=_UNICODE_REPLACE_CODEC_OPTIONS, + session=session, ) + self.database.client._retryable_write(False, inner, session, _Op.UPDATE_SEARCH_INDEX) + def options( self, session: Optional[ClientSession] = None, @@ -2911,6 +2939,7 @@ def _aggregate( session, retryable=not cmd._performs_write, operation=_Op.AGGREGATE, + is_aggregate_write=cmd._performs_write, ) def aggregate( @@ -3116,17 +3145,21 @@ def rename( if comment is not None: cmd["comment"] = comment write_concern = self._write_concern_for_cmd(cmd, session) + client = self._database.client - with self._conn_for_writes(session, operation=_Op.RENAME) as conn: - with self._database.client._tmp_session(session) as s: - return conn.command( - "admin", - cmd, - write_concern=write_concern, - parse_write_concern_error=True, - session=s, - client=self._database.client, - ) + def inner( + session: Optional[ClientSession], conn: Connection, _retryable_write: bool + ) -> MutableMapping[str, Any]: + return conn.command( + "admin", + cmd, + write_concern=write_concern, + parse_write_concern_error=True, + session=session, + client=client, + ) + + return client._retryable_write(False, inner, session, _Op.RENAME) def distinct( self, diff --git a/pymongo/synchronous/database.py b/pymongo/synchronous/database.py index a453a94265..cc041a2e30 100644 --- a/pymongo/synchronous/database.py +++ b/pymongo/synchronous/database.py @@ -135,6 +135,7 @@ def __init__( self._name = name self._client: MongoClient[_DocumentType] = client self._timeout = client.options.timeout + self._retry_policy = client._retry_policy @property def client(self) -> MongoClient[_DocumentType]: @@ -931,12 +932,15 @@ def command( if read_preference is None: read_preference = (session and session._txn_read_preference()) or ReadPreference.PRIMARY - with self._client._conn_for_reads(read_preference, session, operation=command_name) as ( - connection, - read_preference, - ): + + def inner( + session: Optional[ClientSession], + _server: Server, + conn: Connection, + read_preference: _ServerMode, + ) -> Union[dict[str, Any], _CodecDocumentType]: return self._command( - connection, + conn, command, value, check, @@ -947,6 +951,10 @@ def command( **kwargs, ) + return self._client._retryable_read( + inner, read_preference, session, command_name, None, False, is_run_command=True + ) + @_csot.apply def cursor_command( self, @@ -1014,15 +1022,17 @@ def cursor_command( with self._client._tmp_session(session) as tmp_session: opts = codec_options or DEFAULT_CODEC_OPTIONS - if read_preference is None: read_preference = ( tmp_session and tmp_session._txn_read_preference() ) or ReadPreference.PRIMARY - with self._client._conn_for_reads(read_preference, tmp_session, command_name) as ( - conn, - read_preference, - ): + + def inner( + session: Optional[ClientSession], + _server: Server, + conn: Connection, + read_preference: _ServerMode, + ) -> CommandCursor[_DocumentType]: response = self._command( conn, command, @@ -1031,7 +1041,7 @@ def cursor_command( None, read_preference, opts, - session=tmp_session, + session=session, **kwargs, ) coll = self.get_collection("$cmd", read_preference=read_preference) @@ -1041,7 +1051,7 @@ def cursor_command( response["cursor"], conn.address, max_await_time_ms=max_await_time_ms, - session=tmp_session, + session=session, comment=comment, ) cmd_cursor._maybe_pin_connection(conn) @@ -1049,6 +1059,10 @@ def cursor_command( else: raise InvalidOperation("Command does not return a cursor.") + return self.client._retryable_read( + inner, read_preference, tmp_session, command_name, None, False + ) + def _retryable_read_command( self, command: Union[str, MutableMapping[str, Any]], @@ -1247,9 +1261,11 @@ def _drop_helper( if comment is not None: command["comment"] = comment - with self._client._conn_for_writes(session, operation=_Op.DROP) as connection: + def inner( + session: Optional[ClientSession], conn: Connection, _retryable_write: bool + ) -> dict[str, Any]: return self._command( - connection, + conn, command, allowable_errors=["ns not found", 26], write_concern=self._write_concern_for(session), @@ -1257,6 +1273,8 @@ def _drop_helper( session=session, ) + return self.client._retryable_write(False, inner, session, _Op.DROP) + @_csot.apply def drop_collection( self, diff --git a/pymongo/synchronous/helpers.py b/pymongo/synchronous/helpers.py index 1fff9a0f23..1a27fc11a5 100644 --- a/pymongo/synchronous/helpers.py +++ b/pymongo/synchronous/helpers.py @@ -17,8 +17,11 @@ import asyncio import builtins +import functools +import random import socket import sys +import time as time # noqa: PLC0414 # needed in sync version from typing import ( Any, Callable, @@ -26,10 +29,12 @@ cast, ) +from pymongo import _csot from pymongo.errors import ( OperationFailure, ) from pymongo.helpers_shared import _REAUTHENTICATION_REQUIRED_CODE +from pymongo.lock import _create_lock _IS_SYNC = True @@ -38,6 +43,7 @@ def _handle_reauth(func: F) -> F: + @functools.wraps(func) def inner(*args: Any, **kwargs: Any) -> Any: no_reauth = kwargs.pop("no_reauth", False) from pymongo.message import _BulkWriteContext @@ -70,6 +76,96 @@ def inner(*args: Any, **kwargs: Any) -> Any: return cast(F, inner) +_MAX_RETRIES = 5 +_BACKOFF_INITIAL = 0.1 +_BACKOFF_MAX = 10 +DEFAULT_RETRY_TOKEN_CAPACITY = 1000.0 +DEFAULT_RETRY_TOKEN_RETURN = 0.1 + + +def _backoff( + attempt: int, initial_delay: float = _BACKOFF_INITIAL, max_delay: float = _BACKOFF_MAX +) -> float: + jitter = random.random() # noqa: S311 + return jitter * min(initial_delay * (2**attempt), max_delay) + + +class _TokenBucket: + """A token bucket implementation for rate limiting.""" + + def __init__( + self, + capacity: float = DEFAULT_RETRY_TOKEN_CAPACITY, + return_rate: float = DEFAULT_RETRY_TOKEN_RETURN, + ): + self.lock = _create_lock() + self.capacity = capacity + self.tokens = capacity + self.return_rate = return_rate + + def consume(self) -> bool: + """Consume a token from the bucket if available.""" + with self.lock: + if self.tokens >= 1: + self.tokens -= 1 + return True + return False + + def deposit(self, retry: bool = False) -> None: + """Deposit a token back into the bucket.""" + retry_token = 1 if retry else 0 + with self.lock: + self.tokens = min(self.capacity, self.tokens + retry_token + self.return_rate) + + +class _RetryPolicy: + """A retry limiter that performs exponential backoff with jitter. + + When adaptive retries are enabled, retry attempts are limited by a token bucket to prevent overwhelming the server during + a prolonged outage or high load. + """ + + def __init__( + self, + token_bucket: _TokenBucket, + attempts: int = _MAX_RETRIES, + backoff_initial: float = _BACKOFF_INITIAL, + backoff_max: float = _BACKOFF_MAX, + adaptive_retry: bool = False, + ): + self.token_bucket = token_bucket + self.attempts = attempts + self.backoff_initial = backoff_initial + self.backoff_max = backoff_max + self.adaptive_retry = adaptive_retry + + def record_success(self, retry: bool) -> None: + """Record a successful operation.""" + if self.adaptive_retry: + self.token_bucket.deposit(retry) + + def backoff(self, attempt: int) -> float: + """Return the backoff duration for the given .""" + return _backoff(max(0, attempt - 1), self.backoff_initial, self.backoff_max) + + def should_retry(self, attempt: int, delay: float) -> bool: + """Return if we have budget to retry and how long to backoff.""" + if attempt > self.attempts: + return False + + # If the delay would exceed the deadline, bail early before consuming a token. + if _csot.get_timeout(): + if time.monotonic() + delay > _csot.get_deadline(): + return False + + # Check token bucket last since we only want to consume a token if we actually retry. + if self.adaptive_retry and not self.token_bucket.consume(): + # DRIVERS-3246 Improve diagnostics when this case happens. + # We could add info to the exception and log. + return False + return True + + def _getaddrinfo( host: Any, port: Any, **kwargs: Any ) -> list[ diff --git a/pymongo/synchronous/mongo_client.py b/pymongo/synchronous/mongo_client.py index 161a28d48d..ab74650780 100644 --- a/pymongo/synchronous/mongo_client.py +++ b/pymongo/synchronous/mongo_client.py @@ -35,6 +35,7 @@ import asyncio import contextlib import os +import time as time # noqa: PLC0414 # needed in sync version import warnings import weakref from collections import defaultdict @@ -110,6 +111,10 @@ from pymongo.synchronous.client_bulk import _ClientBulk from pymongo.synchronous.client_session import _SESSION, _EmptyServerSession from pymongo.synchronous.command_cursor import CommandCursor +from pymongo.synchronous.helpers import ( + _RetryPolicy, + _TokenBucket, +) from pymongo.synchronous.settings import TopologySettings from pymongo.synchronous.topology import Topology, _ErrorContext from pymongo.topology_description import TOPOLOGY_TYPE, TopologyDescription @@ -610,8 +615,18 @@ def __init__( client to use Stable API. See `versioned API `_ for details. + | **Adaptive retry options:** + | (If not enabled explicitly, adaptive retries will not be enabled.) + + - `adaptive_retries`: (boolean) Whether the adaptive retry mechanism is enabled for this client. + If enabled, server overload errors will use a token-bucket based system to mitigate further overload. + Defaults to ``False``. + .. seealso:: The MongoDB documentation on `connections `_. + .. versionchanged:: 4.17 + Added the ``adaptive_retries`` URI and keyword argument. + .. versionchanged:: 4.5 Added the ``serverMonitoringMode`` keyword argument. @@ -879,11 +894,16 @@ def __init__( self._options.read_concern, ) + self._retry_policy = _RetryPolicy( + _TokenBucket(), adaptive_retry=self._options.adaptive_retries + ) + self._init_based_on_options(self._seeds, srv_max_hosts, srv_service_name) self._opened = False self._closed = False self._loop: Optional[asyncio.AbstractEventLoop] = None + if not is_srv: self._init_background() @@ -1987,6 +2007,8 @@ def _retry_internal( read_pref: Optional[_ServerMode] = None, retryable: bool = False, operation_id: Optional[int] = None, + is_run_command: bool = False, + is_aggregate_write: bool = False, ) -> T: """Internal retryable helper for all client transactions. @@ -1998,6 +2020,8 @@ def _retry_internal( :param address: Server Address, defaults to None :param read_pref: Topology of read operation, defaults to None :param retryable: If the operation should be retried once, defaults to None + :param is_run_command: If this is a runCommand operation, defaults to False + :param is_aggregate_write: If this is a aggregate operation with a write, defaults to False. :return: Output of the calling func() """ @@ -2012,6 +2036,8 @@ def _retry_internal( address=address, retryable=retryable, operation_id=operation_id, + is_run_command=is_run_command, + is_aggregate_write=is_aggregate_write, ).run() def _retryable_read( @@ -2023,6 +2049,8 @@ def _retryable_read( address: Optional[_Address] = None, retryable: bool = True, operation_id: Optional[int] = None, + is_run_command: bool = False, + is_aggregate_write: bool = False, ) -> T: """Execute an operation with consecutive retries if possible @@ -2038,6 +2066,8 @@ def _retryable_read( :param address: Optional address when sending a message, defaults to None :param retryable: if we should attempt retries (may not always be supported even if supplied), defaults to False + :param is_run_command: If this is a runCommand operation, defaults to False. + :param is_aggregate_write: If this is a aggregate operation with a write, defaults to False. """ # Ensure that the client supports retrying on reads and there is no session in @@ -2056,6 +2086,8 @@ def _retryable_read( read_pref=read_pref, retryable=retryable, operation_id=operation_id, + is_run_command=is_run_command, + is_aggregate_write=is_aggregate_write, ) def _retryable_write( @@ -2444,15 +2476,13 @@ def drop_database( f"name_or_database must be an instance of str or a Database, not {type(name)}" ) - with self._conn_for_writes(session, operation=_Op.DROP_DATABASE) as conn: - self[name]._command( - conn, - {"dropDatabase": 1, "comment": comment}, - read_preference=ReadPreference.PRIMARY, - write_concern=self._write_concern_for(session), - parse_write_concern_error=True, - session=session, - ) + self[name].command( + {"dropDatabase": 1, "comment": comment}, + read_preference=ReadPreference.PRIMARY, + write_concern=self._write_concern_for(session), + parse_write_concern_error=True, + session=session, + ) @_csot.apply def bulk_write( @@ -2736,12 +2766,15 @@ def __init__( address: Optional[_Address] = None, retryable: bool = False, operation_id: Optional[int] = None, + is_run_command: bool = False, + is_aggregate_write: bool = False, ): self._last_error: Optional[Exception] = None self._retrying = False + self._always_retryable = False self._multiple_retries = _csot.get_timeout() is not None self._client = mongo_client - + self._retry_policy = mongo_client._retry_policy self._func = func self._bulk = bulk self._session = session @@ -2757,6 +2790,8 @@ def __init__( self._operation = operation self._operation_id = operation_id self._attempt_number = 0 + self._is_run_command = is_run_command + self._is_aggregate_write = is_aggregate_write def run(self) -> T: """Runs the supplied func() and attempts a retry @@ -2776,7 +2811,14 @@ def run(self) -> T: while True: self._check_last_error(check_csot=True) try: - return self._read() if self._is_read else self._write() + res = self._read() if self._is_read else self._write() + self._retry_policy.record_success(self._attempt_number > 0) + # Track whether the transaction has completed a command. + # If we need to apply backpressure to the first command, + # we will need to revert back to starting state. + if self._session is not None and self._session.in_transaction: + self._session._transaction.has_completed_command = True + return res except ServerSelectionTimeoutError: # The application may think the write was never attempted # if we raise ServerSelectionTimeoutError on the retry @@ -2787,37 +2829,77 @@ def run(self) -> T: # most likely be a waste of time. raise except PyMongoError as exc: + always_retryable = False + overloaded = False + exc_to_check = exc + + if self._is_run_command and not ( + self._client.options.retry_reads and self._client.options.retry_writes + ): + raise + if self._is_aggregate_write and not self._client.options.retry_writes: + raise + # Execute specialized catch on read if self._is_read: if isinstance(exc, (ConnectionFailure, OperationFailure)): # ConnectionFailures do not supply a code property exc_code = getattr(exc, "code", None) - if self._is_not_eligible_for_retry() or ( - isinstance(exc, OperationFailure) - and exc_code not in helpers_shared._RETRYABLE_ERROR_CODES + overloaded = exc.has_error_label("SystemOverloadedError") + always_retryable = exc.has_error_label("RetryableError") and overloaded + if ( + not self._client.options.retry_reads + or not always_retryable + and ( + self._is_not_eligible_for_retry() + or ( + isinstance(exc, OperationFailure) + and exc_code not in helpers_shared._RETRYABLE_ERROR_CODES + ) + ) ): raise self._retrying = True self._last_error = exc self._attempt_number += 1 + + # Revert back to starting state if we're in a transaction but haven't completed the first + # command. + if ( + overloaded + and self._session is not None + and self._session.in_transaction + ): + transaction = self._session._transaction + if not transaction.has_completed_command: + transaction.set_starting() + transaction.attempt = 0 else: raise # Specialized catch on write operation if not self._is_read: - if not self._retryable: + if isinstance(exc, ClientBulkWriteException) and isinstance( + exc.error, PyMongoError + ): + exc_to_check = exc.error + retryable_write_label = exc_to_check.has_error_label("RetryableWriteError") + overloaded = exc_to_check.has_error_label("SystemOverloadedError") + always_retryable = exc_to_check.has_error_label("RetryableError") and overloaded + + # Always retry abortTransaction and commitTransaction up to once + if self._operation not in ["abortTransaction", "commitTransaction"] and ( + not self._client.options.retry_writes + or not (self._retryable or always_retryable) + ): raise - if isinstance(exc, ClientBulkWriteException) and exc.error: - retryable_write_error_exc = isinstance( - exc.error, PyMongoError - ) and exc.error.has_error_label("RetryableWriteError") - else: - retryable_write_error_exc = exc.has_error_label("RetryableWriteError") - if retryable_write_error_exc: + if retryable_write_label or always_retryable: assert self._session self._session._unpin() - if not retryable_write_error_exc or self._is_not_eligible_for_retry(): - if exc.has_error_label("NoWritesPerformed") and self._last_error: + if not always_retryable and ( + not retryable_write_label or self._is_not_eligible_for_retry() + ): + if exc_to_check.has_error_label("NoWritesPerformed") and self._last_error: raise self._last_error from exc else: raise @@ -2826,10 +2908,17 @@ def run(self) -> T: self._bulk.retrying = True else: self._retrying = True - if not exc.has_error_label("NoWritesPerformed"): + if not exc_to_check.has_error_label("NoWritesPerformed"): self._last_error = exc if self._last_error is None: self._last_error = exc + # Revert back to starting state if we're in a transaction but haven't completed the first + # command. + if overloaded and self._session is not None and self._session.in_transaction: + transaction = self._session._transaction + if not transaction.has_completed_command: + transaction.set_starting() + transaction.attempt = 0 if ( self._server is not None @@ -2838,6 +2927,16 @@ def run(self) -> T: ): self._deprioritized_servers.append(self._server) + self._always_retryable = always_retryable + if overloaded: + delay = self._retry_policy.backoff(self._attempt_number) if overloaded else 0 + if not self._retry_policy.should_retry(self._attempt_number, delay): + if exc_to_check.has_error_label("NoWritesPerformed") and self._last_error: + raise self._last_error from exc + else: + raise + time.sleep(delay) + def _is_not_eligible_for_retry(self) -> bool: """Checks if the exchange is not eligible for retry""" return not self._retryable or (self._is_retrying() and not self._multiple_retries) @@ -2899,7 +2998,7 @@ def _write(self) -> T: and conn.supports_sessions ) is_mongos = conn.is_mongos - if not sessions_supported: + if not self._always_retryable and not sessions_supported: # A retry is not possible because this server does # not support sessions raise the last error. self._check_last_error() @@ -2931,7 +3030,7 @@ def _read(self) -> T: conn, read_pref, ): - if self._retrying and not self._retryable: + if self._retrying and not self._retryable and not self._always_retryable: self._check_last_error() if self._retrying: _debug_log( diff --git a/pymongo/synchronous/pool.py b/pymongo/synchronous/pool.py index 89d2080bc8..d33cb59a98 100644 --- a/pymongo/synchronous/pool.py +++ b/pymongo/synchronous/pool.py @@ -19,6 +19,8 @@ import contextlib import logging import os +import socket +import ssl import sys import time import weakref @@ -49,10 +51,12 @@ DocumentTooLarge, ExecutionTimeout, InvalidOperation, + NetworkTimeout, NotPrimaryError, OperationFailure, PyMongoError, WaitQueueTimeoutError, + _CertificateError, ) from pymongo.hello import Hello, HelloCompat from pymongo.helpers_shared import _get_timeout_details, format_timeout_details @@ -250,6 +254,7 @@ def _hello( cmd = self.hello_cmd() performing_handshake = not self.performed_handshake awaitable = False + cmd["backpressure"] = True if performing_handshake: self.performed_handshake = True cmd["client"] = self.opts.metadata @@ -750,8 +755,8 @@ def __init__( # Enforces: maxConnecting # Also used for: clearing the wait queue self._max_connecting_cond = _create_condition(self.lock) - self._max_connecting = self.opts.max_connecting self._pending = 0 + self._max_connecting = self.opts.max_connecting self._client_id = client_id if self.enabled_for_cmap: assert self.opts._event_listeners is not None @@ -982,6 +987,21 @@ def remove_stale_sockets(self, reference_generation: int) -> None: self.requests -= 1 self.size_cond.notify() + def _handle_connection_error(self, error: BaseException) -> None: + # Handle system overload condition for non-sdam pools. + # Look for errors of type AutoReconnect and add error labels if appropriate. + if self.is_sdam or type(error) not in (AutoReconnect, NetworkTimeout): + return + assert isinstance(error, AutoReconnect) # Appease type checker. + # If the original error was a DNS, certificate, or SSL error, ignore it. + if isinstance(error.__cause__, (_CertificateError, SSLErrors, socket.gaierror)): + # End of file errors are excluded, because the server may have disconnected + # during the handshake. + if not isinstance(error.__cause__, (ssl.SSLEOFError, ssl.SSLZeroReturnError)): + return + error._add_error_label("SystemOverloadedError") + error._add_error_label("RetryableError") + def connect(self, handler: Optional[_MongoClientErrorHandler] = None) -> Connection: """Connect to Mongo and return a new Connection. @@ -1033,10 +1053,10 @@ def connect(self, handler: Optional[_MongoClientErrorHandler] = None) -> Connect reason=_verbose_connection_error_reason(ConnectionClosedReason.ERROR), error=ConnectionClosedReason.ERROR, ) + self._handle_connection_error(error) if isinstance(error, (IOError, OSError, *SSLErrors)): details = _get_timeout_details(self.opts) _raise_connection_failure(self.address, error, timeout_details=details) - raise conn = Connection(networking_interface, self, self.address, conn_id, self.is_sdam) # type: ignore[arg-type] @@ -1045,18 +1065,22 @@ def connect(self, handler: Optional[_MongoClientErrorHandler] = None) -> Connect self.active_contexts.discard(tmp_context) if tmp_context.cancelled: conn.cancel_context.cancel() + completed_hello = False try: if not self.is_sdam: conn.hello() + completed_hello = True self.is_writable = conn.is_writable if handler: handler.contribute_socket(conn, completed_handshake=False) conn.authenticate() # Catch KeyboardInterrupt, CancelledError, etc. and cleanup. - except BaseException: + except BaseException as e: with self.lock: self.active_contexts.discard(conn.cancel_context) + if not completed_hello: + self._handle_connection_error(e) conn.close_conn(ConnectionClosedReason.ERROR) raise @@ -1385,8 +1409,8 @@ def _perished(self, conn: Connection) -> bool: :class:`~pymongo.errors.AutoReconnect` exceptions on server hiccups, etc. We only check if the socket was closed by an external error if it has been > 1 second since the socket was checked into the - pool, to keep performance reasonable - we can't avoid AutoReconnects - completely anyway. + pool to keep performance reasonable - + we can't avoid AutoReconnects completely anyway. """ idle_time_seconds = conn.idle_time_seconds() # If socket is idle, open a new one. @@ -1397,8 +1421,9 @@ def _perished(self, conn: Connection) -> bool: conn.close_conn(ConnectionClosedReason.IDLE) return True - if self._check_interval_seconds is not None and ( - self._check_interval_seconds == 0 or idle_time_seconds > self._check_interval_seconds + check_interval_seconds = self._check_interval_seconds + if check_interval_seconds is not None and ( + check_interval_seconds == 0 or idle_time_seconds > check_interval_seconds ): if conn.conn_closed(): conn.close_conn(ConnectionClosedReason.ERROR) diff --git a/pymongo/synchronous/topology.py b/pymongo/synchronous/topology.py index 38e916b1e7..ec1615f0c6 100644 --- a/pymongo/synchronous/topology.py +++ b/pymongo/synchronous/topology.py @@ -911,7 +911,9 @@ def _handle_error(self, address: _Address, err_ctx: _ErrorContext) -> None: # Clear the pool. server.reset(service_id) elif isinstance(error, ConnectionFailure): - if isinstance(error, WaitQueueTimeoutError): + if isinstance(error, WaitQueueTimeoutError) or ( + error.has_error_label("SystemOverloadedError") + ): return # "Client MUST replace the server's description with type Unknown # ... MUST NOT request an immediate check of the server." diff --git a/test/asynchronous/test_client.py b/test/asynchronous/test_client.py index 5511765bae..f3cee70e15 100644 --- a/test/asynchronous/test_client.py +++ b/test/asynchronous/test_client.py @@ -652,6 +652,21 @@ async def test_detected_environment_warning(self, mock_get_hosts): with self.assertWarns(UserWarning): self.simple_client(multi_host) + async def test_adaptive_retries(self): + # Assert that adaptive retries are disabled by default. + c = self.simple_client(connect=False) + self.assertFalse(c.options.adaptive_retries) + + # Assert that adaptive retries can be enabled through connection or client options. + c = self.simple_client(connect=False, adaptive_retries=True) + self.assertTrue(c.options.adaptive_retries) + + c = self.simple_client(connect=False, adaptiveRetries=True) + self.assertTrue(c.options.adaptive_retries) + + c = self.simple_client(host="mongodb://localhost/?adaptiveretries=true", connect=False) + self.assertTrue(c.options.adaptive_retries) + class TestClient(AsyncIntegrationTest): def test_multiple_uris(self): diff --git a/test/asynchronous/test_client_backpressure.py b/test/asynchronous/test_client_backpressure.py new file mode 100644 index 0000000000..d79a9114dc --- /dev/null +++ b/test/asynchronous/test_client_backpressure.py @@ -0,0 +1,361 @@ +# Copyright 2025-present MongoDB, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test Client Backpressure spec.""" +from __future__ import annotations + +import os +import pathlib +import sys +from time import perf_counter +from unittest.mock import patch + +sys.path[0:0] = [""] + +from test.asynchronous import ( + AsyncIntegrationTest, + AsyncPyMongoTestCase, + async_client_context, + unittest, +) +from test.asynchronous.unified_format import generate_test_classes +from test.utils_shared import EventListener, OvertCommandListener + +import pymongo +from pymongo.asynchronous import helpers +from pymongo.asynchronous.helpers import _MAX_RETRIES, _RetryPolicy, _TokenBucket +from pymongo.errors import OperationFailure, PyMongoError + +_IS_SYNC = False + +# Mock an system overload error. +mock_overload_error = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["find", "insert", "update"], + "errorCode": 462, # IngressRequestRateLimitExceeded + "errorLabels": ["RetryableError", "SystemOverloadedError"], + }, +} + + +class TestBackpressure(AsyncIntegrationTest): + RUN_ON_LOAD_BALANCER = True + + @async_client_context.require_failCommand_appName + async def test_retry_overload_error_command(self): + await self.db.t.insert_one({"x": 1}) + + # Ensure command is retried on overload error. + fail_many = mock_overload_error.copy() + fail_many["mode"] = {"times": _MAX_RETRIES} + async with self.fail_point(fail_many): + await self.db.command("find", "t") + + # Ensure command stops retrying after _MAX_RETRIES. + fail_too_many = mock_overload_error.copy() + fail_too_many["mode"] = {"times": _MAX_RETRIES + 1} + async with self.fail_point(fail_too_many): + with self.assertRaises(PyMongoError) as error: + await self.db.command("find", "t") + + self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) + + @async_client_context.require_failCommand_appName + async def test_retry_overload_error_find(self): + await self.db.t.insert_one({"x": 1}) + + # Ensure command is retried on overload error. + fail_many = mock_overload_error.copy() + fail_many["mode"] = {"times": _MAX_RETRIES} + async with self.fail_point(fail_many): + await self.db.t.find_one() + + # Ensure command stops retrying after _MAX_RETRIES. + fail_too_many = mock_overload_error.copy() + fail_too_many["mode"] = {"times": _MAX_RETRIES + 1} + async with self.fail_point(fail_too_many): + with self.assertRaises(PyMongoError) as error: + await self.db.t.find_one() + + self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) + + @async_client_context.require_failCommand_appName + async def test_retry_overload_error_insert_one(self): + # Ensure command is retried on overload error. + fail_many = mock_overload_error.copy() + fail_many["mode"] = {"times": _MAX_RETRIES} + async with self.fail_point(fail_many): + await self.db.t.insert_one({"x": 1}) + + # Ensure command stops retrying after _MAX_RETRIES. + fail_too_many = mock_overload_error.copy() + fail_too_many["mode"] = {"times": _MAX_RETRIES + 1} + async with self.fail_point(fail_too_many): + with self.assertRaises(PyMongoError) as error: + await self.db.t.insert_one({"x": 1}) + + self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) + + @async_client_context.require_failCommand_appName + async def test_retry_overload_error_update_many(self): + # Even though update_many is not a retryable write operation, it will + # still be retried via the "RetryableError" error label. + await self.db.t.insert_one({"x": 1}) + + # Ensure command is retried on overload error. + fail_many = mock_overload_error.copy() + fail_many["mode"] = {"times": _MAX_RETRIES} + async with self.fail_point(fail_many): + await self.db.t.update_many({}, {"$set": {"x": 2}}) + + # Ensure command stops retrying after _MAX_RETRIES. + fail_too_many = mock_overload_error.copy() + fail_too_many["mode"] = {"times": _MAX_RETRIES + 1} + async with self.fail_point(fail_too_many): + with self.assertRaises(PyMongoError) as error: + await self.db.t.update_many({}, {"$set": {"x": 2}}) + + self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) + + @async_client_context.require_failCommand_appName + async def test_retry_overload_error_getMore(self): + coll = self.db.t + await coll.insert_many([{"x": 1} for _ in range(10)]) + + # Ensure command is retried on overload error. + fail_many = { + "configureFailPoint": "failCommand", + "mode": {"times": _MAX_RETRIES}, + "data": { + "failCommands": ["getMore"], + "errorCode": 462, # IngressRequestRateLimitExceeded + "errorLabels": ["RetryableError", "SystemOverloadedError"], + }, + } + cursor = coll.find(batch_size=2) + await cursor.next() + async with self.fail_point(fail_many): + await cursor.to_list() + + # Ensure command stops retrying after _MAX_RETRIES. + fail_too_many = fail_many.copy() + fail_too_many["mode"] = {"times": _MAX_RETRIES + 1} + cursor = coll.find(batch_size=2) + await cursor.next() + async with self.fail_point(fail_too_many): + with self.assertRaises(PyMongoError) as error: + await cursor.to_list() + + self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) + + +class TestRetryPolicy(AsyncPyMongoTestCase): + async def test_retry_policy(self): + capacity = 10 + retry_policy = _RetryPolicy(_TokenBucket(capacity=capacity), adaptive_retry=True) + self.assertEqual(retry_policy.attempts, helpers._MAX_RETRIES) + self.assertEqual(retry_policy.backoff_initial, helpers._BACKOFF_INITIAL) + self.assertEqual(retry_policy.backoff_max, helpers._BACKOFF_MAX) + for i in range(1, helpers._MAX_RETRIES + 1): + self.assertTrue(await retry_policy.should_retry(i, 0)) + self.assertFalse(await retry_policy.should_retry(helpers._MAX_RETRIES + 1, 0)) + for i in range(capacity - helpers._MAX_RETRIES): + self.assertTrue(await retry_policy.should_retry(1, 0)) + # No tokens left, should not retry. + self.assertFalse(await retry_policy.should_retry(1, 0)) + self.assertEqual(retry_policy.token_bucket.tokens, 0) + + # record_success should generate tokens. + for _ in range(int(2 / helpers.DEFAULT_RETRY_TOKEN_RETURN)): + await retry_policy.record_success(retry=False) + self.assertAlmostEqual(retry_policy.token_bucket.tokens, 2) + for i in range(2): + self.assertTrue(await retry_policy.should_retry(1, 0)) + self.assertFalse(await retry_policy.should_retry(1, 0)) + + # Recording a successful retry should return 1 additional token. + await retry_policy.record_success(retry=True) + self.assertAlmostEqual( + retry_policy.token_bucket.tokens, 1 + helpers.DEFAULT_RETRY_TOKEN_RETURN + ) + self.assertTrue(await retry_policy.should_retry(1, 0)) + self.assertFalse(await retry_policy.should_retry(1, 0)) + self.assertAlmostEqual(retry_policy.token_bucket.tokens, helpers.DEFAULT_RETRY_TOKEN_RETURN) + + async def test_retry_policy_csot(self): + retry_policy = _RetryPolicy(_TokenBucket()) + self.assertTrue(await retry_policy.should_retry(1, 0.5)) + with pymongo.timeout(0.5): + self.assertTrue(await retry_policy.should_retry(1, 0)) + self.assertTrue(await retry_policy.should_retry(1, 0.1)) + # Would exceed the timeout, should not retry. + self.assertFalse(await retry_policy.should_retry(1, 1.0)) + self.assertTrue(await retry_policy.should_retry(1, 1.0)) + + +# Prose tests. +class AsyncTestClientBackpressure(AsyncIntegrationTest): + listener: EventListener + + @classmethod + def setUpClass(cls) -> None: + cls.listener = OvertCommandListener() + + @async_client_context.require_connection + async def asyncSetUp(self) -> None: + await super().asyncSetUp() + self.listener.reset() + self.app_name = self.__class__.__name__.lower() + self.client = await self.async_rs_or_single_client( + event_listeners=[self.listener], appName=self.app_name + ) + + @patch("random.random") + @async_client_context.require_failCommand_appName + async def test_01_operation_retry_uses_exponential_backoff(self, random_func): + # Drivers should test that retries do not occur immediately when a SystemOverloadedError is encountered. + + # 1. let `client` be a `MongoClient` + client = self.client + + # 2. let `collection` be a collection + collection = client.test.test + + # 3. Now, run transactions without backoff: + + # a. Configure the random number generator used for jitter to always return `0` -- this effectively disables backoff. + random_func.return_value = 0 + + # b. Configure the following failPoint: + fail_point = dict( + mode="alwaysOn", + data=dict( + failCommands=["insert"], + errorCode=2, + errorLabels=["SystemOverloadedError", "RetryableError"], + appName=self.app_name, + ), + ) + async with self.fail_point(fail_point): + # c. Execute the following command. Expect that the command errors. Measure the duration of the command execution. + start0 = perf_counter() + with self.assertRaises(OperationFailure): + await collection.insert_one({"a": 1}) + end0 = perf_counter() + + # d. Configure the random number generator used for jitter to always return `1`. + random_func.return_value = 1 + + # e. Execute step c again. + start1 = perf_counter() + with self.assertRaises(OperationFailure): + await collection.insert_one({"a": 1}) + end1 = perf_counter() + + # f. Compare the two time between the two runs. + # The sum of 5 backoffs is 3.1 seconds. There is a 1-second window to account for potential variance between the two + # runs. + self.assertTrue(abs((end1 - start1) - (end0 - start0 + 3.1)) < 1) + + @async_client_context.require_failCommand_appName + async def test_03_overload_retries_limited(self): + # Drivers should test that without adaptive retries enabled, overload errors are retried a maximum of five times. + + # 1. Let `client` be a `MongoClient`. + client = self.client + # 2. Let `coll` be a collection. + coll = client.pymongo_test.coll + + # 3. Configure the following failpoint: + failpoint = { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": ["find"], + "errorCode": 462, # IngressRequestRateLimitExceeded + "errorLabels": ["RetryableError", "SystemOverloadedError"], + }, + } + + # 4. Perform a find operation with `coll` that fails. + async with self.fail_point(failpoint): + with self.assertRaises(PyMongoError) as error: + await coll.find_one({}) + + # 5. Assert that the raised error contains both the `RetryableError` and `SystemOverLoadedError` error labels. + self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) + + # 6. Assert that the total number of started commands is MAX_RETRIES + 1. + self.assertEqual(len(self.listener.started_events), _MAX_RETRIES + 1) + + @async_client_context.require_failCommand_appName + async def test_04_adaptive_retries_limited_by_tokens(self): + # Drivers should test that when enabled, adaptive retries are limited by the number of tokens in the bucket. + + # 1. Let `client` be a `MongoClient` with adaptiveRetries=True. + client = await self.async_rs_or_single_client( + adaptive_retries=True, event_listeners=[self.listener] + ) + # 2. Set `client`'s retry token bucket to have 2 tokens. + client._retry_policy.token_bucket.tokens = 2 + # 3. Let `coll` be a collection. + coll = client.pymongo_test.coll + + # 4. Configure the following failpoint: + failpoint = { + "configureFailPoint": "failCommand", + "mode": {"times": 3}, + "data": { + "failCommands": ["find"], + "errorCode": 462, # IngressRequestRateLimitExceeded + "errorLabels": ["RetryableError", "SystemOverloadedError"], + }, + } + + # 5. Perform a find operation with `coll` that fails. + async with self.fail_point(failpoint): + with self.assertRaises(PyMongoError) as error: + await coll.find_one({}) + + # 6. Assert that the raised error contains both the `RetryableError` and `SystemOverLoadedError` error labels. + self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) + + # 7. Assert that the total number of started commands is 3: one for the initial attempt and two for the retries. + self.assertEqual(len(self.listener.started_events), 3) + + +# Location of JSON test specifications. +if _IS_SYNC: + _TEST_PATH = os.path.join(pathlib.Path(__file__).resolve().parent, "client-backpressure") +else: + _TEST_PATH = os.path.join(pathlib.Path(__file__).resolve().parent.parent, "client-backpressure") + +globals().update( + generate_test_classes( + _TEST_PATH, + module=__name__, + ) +) + +if __name__ == "__main__": + unittest.main() diff --git a/test/asynchronous/test_client_metadata.py b/test/asynchronous/test_client_metadata.py index 45c1bd1b3b..d4f887b1fc 100644 --- a/test/asynchronous/test_client_metadata.py +++ b/test/asynchronous/test_client_metadata.py @@ -219,6 +219,19 @@ async def test_duplicate_driver_name_no_op(self): # add same metadata again await self.check_metadata_added(client, "Framework", None, None) + async def test_handshake_documents_include_backpressure(self): + # Create a `MongoClient` that is configured to record all handshake documents sent to the server as a part of + # connection establishment. + client = await self.async_rs_or_single_client("mongodb://" + self.server.address_string) + + # Send a `ping` command to the server and verify that the command succeeds. This ensure that a connection is + # established on all topologies. Note: MockupDB only supports standalone servers. + await client.admin.command("ping") + + # Assert that for every handshake document intercepted: + # the document has a field `backpressure` whose value is `true`. + self.assertEqual(self.handshake_req["backpressure"], True) + if __name__ == "__main__": unittest.main() diff --git a/test/asynchronous/test_discovery_and_monitoring.py b/test/asynchronous/test_discovery_and_monitoring.py index 0bbf471d87..17a90db60f 100644 --- a/test/asynchronous/test_discovery_and_monitoring.py +++ b/test/asynchronous/test_discovery_and_monitoring.py @@ -25,8 +25,10 @@ from pathlib import Path from test.asynchronous.helpers import ConcurrentRunner from test.asynchronous.utils import flaky +from test.utils_shared import delay from pymongo.asynchronous.pool import AsyncConnection +from pymongo.errors import ConnectionFailure from pymongo.operations import _Op from pymongo.server_selectors import writable_server_selector @@ -70,7 +72,12 @@ ) from pymongo.hello import Hello, HelloCompat from pymongo.helpers_shared import _check_command_response, _check_write_command_response -from pymongo.monitoring import ServerHeartbeatFailedEvent, ServerHeartbeatStartedEvent +from pymongo.monitoring import ( + ConnectionCheckOutFailedEvent, + PoolClearedEvent, + ServerHeartbeatFailedEvent, + ServerHeartbeatStartedEvent, +) from pymongo.server_description import SERVER_TYPE, ServerDescription from pymongo.topology_description import TOPOLOGY_TYPE @@ -131,6 +138,9 @@ async def got_app_error(topology, app_error): raise AssertionError except (AutoReconnect, NotPrimaryError, OperationFailure) as e: if when == "beforeHandshakeCompletes": + # The pool would have added the SystemOverloadedError in this case. + if isinstance(e, AutoReconnect): + e._add_error_label("SystemOverloadedError") completed_handshake = False elif when == "afterHandshakeCompletes": completed_handshake = True @@ -439,6 +449,59 @@ async def mock_close(self, reason): AsyncConnection.close_conn = original_close +class TestPoolBackpressure(AsyncIntegrationTest): + @async_client_context.require_version_min(7, 0, 0) + async def test_connection_pool_is_not_cleared(self): + listener = CMAPListener() + + # Create a client that listens to CMAP events, with maxConnecting=100. + client = await self.async_rs_or_single_client(maxConnecting=100, event_listeners=[listener]) + + # Enable the ingress rate limiter. + await client.admin.command( + "setParameter", 1, ingressConnectionEstablishmentRateLimiterEnabled=True + ) + await client.admin.command("setParameter", 1, ingressConnectionEstablishmentRatePerSec=20) + await client.admin.command( + "setParameter", 1, ingressConnectionEstablishmentBurstCapacitySecs=1 + ) + await client.admin.command("setParameter", 1, ingressConnectionEstablishmentMaxQueueDepth=1) + + # Disable the ingress rate limiter on teardown. + # Sleep for 1 second before disabling to avoid the rate limiter. + async def teardown(): + await asyncio.sleep(1) + await client.admin.command( + "setParameter", 1, ingressConnectionEstablishmentRateLimiterEnabled=False + ) + + self.addAsyncCleanup(teardown) + + # Make sure the collection has at least one document. + await client.test.test.delete_many({}) + await client.test.test.insert_one({}) + + # Run a slow operation to tie up the connection. + async def target(): + try: + await client.test.test.find_one({"$where": delay(0.1)}) + except ConnectionFailure: + pass + + # Run 100 parallel operations that contend for connections. + tasks = [] + for _ in range(100): + tasks.append(ConcurrentRunner(target=target)) + for t in tasks: + await t.start() + for t in tasks: + await t.join() + + # Verify there were at least 10 connection checkout failed event but no pool cleared events. + self.assertGreater(len(listener.events_by_type(ConnectionCheckOutFailedEvent)), 10) + self.assertEqual(len(listener.events_by_type(PoolClearedEvent)), 0) + + class TestServerMonitoringMode(AsyncIntegrationTest): @async_client_context.require_no_load_balancer async def asyncSetUp(self): diff --git a/test/asynchronous/test_pooling.py b/test/asynchronous/test_pooling.py index 3193d9e3d5..2f0d5fc962 100644 --- a/test/asynchronous/test_pooling.py +++ b/test/asynchronous/test_pooling.py @@ -29,6 +29,7 @@ from pymongo.errors import AutoReconnect, ConnectionFailure, DuplicateKeyError from pymongo.hello import HelloCompat from pymongo.lock import _async_create_lock +from pymongo.read_preferences import ReadPreference sys.path[0:0] = [""] @@ -513,6 +514,39 @@ async def test_connection_timeout_message(self): str(error.exception), ) + @async_client_context.require_failCommand_appName + async def test_pool_backpressure_preserves_existing_connections(self): + client = await self.async_rs_or_single_client() + coll = client.pymongo_test.t + pool = await async_get_pool(client) + await coll.insert_many([{"x": 1} for _ in range(10)]) + t = SocketGetter(self.c, pool) + await t.start() + while t.state != "connection": + await asyncio.sleep(0.1) + + assert not t.sock.conn_closed() + + # Mock a session establishment overload. + mock_connection_fail = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "closeConnection": True, + }, + } + + async with self.fail_point(mock_connection_fail): + await coll.find_one({}) + + # Make sure the existing socket was not affected. + assert not t.sock.conn_closed() + + # Cleanup + await t.release_conn() + await t.join() + await pool.close() + class TestPoolMaxSize(_TestPoolingBase): async def test_max_pool_size(self): diff --git a/test/asynchronous/test_retryable_writes.py b/test/asynchronous/test_retryable_writes.py index ddb1d39eb7..d1568b3ec3 100644 --- a/test/asynchronous/test_retryable_writes.py +++ b/test/asynchronous/test_retryable_writes.py @@ -43,14 +43,16 @@ from bson.int64 import Int64 from bson.raw_bson import RawBSONDocument from bson.son import SON +from pymongo import MongoClient from pymongo.errors import ( AutoReconnect, ConnectionFailure, - OperationFailure, + NotPrimaryError, ServerSelectionTimeoutError, WriteConcernError, ) from pymongo.monitoring import ( + CommandFailedEvent, CommandSucceededEvent, ConnectionCheckedOutEvent, ConnectionCheckOutFailedEvent, @@ -601,5 +603,192 @@ def raise_connection_err_select_server(*args, **kwargs): self.assertEqual(sent_txn_id, final_txn_id, msg) +class TestErrorPropagationAfterEncounteringMultipleErrors(AsyncIntegrationTest): + # Only run against replica sets as mongos does not propagate the NoWritesPerformed label to the drivers. + @async_client_context.require_replica_set + # Run against server versions 6.0 and above. + @async_client_context.require_version_min(6, 0) # type: ignore[untyped-decorator] + async def asyncSetUp(self) -> None: + await super().asyncSetUp() + self.setup_client = MongoClient(**async_client_context.default_client_options) + self.addCleanup(self.setup_client.close) + + # TODO: After PYTHON-4595 we can use async event handlers and remove this workaround. + def configure_fail_point_sync(self, command_args, off=False) -> None: + cmd = {"configureFailPoint": "failCommand"} + cmd.update(command_args) + if off: + cmd["mode"] = "off" + cmd.pop("data", None) + self.setup_client.admin.command(cmd) + + async def test_01_drivers_return_the_correct_error_when_receiving_only_errors_without_NoWritesPerformed( + self + ) -> None: + # Create a client with retryWrites=true. + listener = OvertCommandListener() + + # Configure a fail point with error code 91 (ShutdownInProgress) with the RetryableError and SystemOverloadedError error labels. + command_args = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["insert"], + "errorLabels": ["RetryableError", "SystemOverloadedError"], + "errorCode": 91, + }, + } + + # Via the command monitoring CommandFailedEvent, configure a fail point with error code 10107 (NotWritablePrimary). + command_args_inner = { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": ["insert"], + "errorCode": 10107, + "errorLabels": ["RetryableError", "SystemOverloadedError"], + }, + } + + def failed(event: CommandFailedEvent) -> None: + # Configure the 10107 fail point command only if the the failed event is for the 91 error configured in step 2. + if listener.failed_events: + return + assert event.failure["code"] == 91 + self.configure_fail_point_sync(command_args_inner) + listener.failed_events.append(event) + + listener.failed = failed + + client = await self.async_rs_client(retryWrites=True, event_listeners=[listener]) + + self.configure_fail_point_sync(command_args) + + # Attempt an insertOne operation on any record for any database and collection. + # Expect the insertOne to fail with a server error. + with self.assertRaises(NotPrimaryError) as exc: + await client.test.test.insert_one({}) + + # Assert that the error code of the server error is 10107. + assert exc.exception.errors["code"] == 10107 # type:ignore[call-overload] + + # Disable the fail point. + self.configure_fail_point_sync({}, off=True) + + async def test_02_drivers_return_the_correct_error_when_receiving_only_errors_with_NoWritesPerformed( + self + ) -> None: + # Create a client with retryWrites=true. + listener = OvertCommandListener() + + # Configure a fail point with error code 91 (ShutdownInProgress) with the RetryableError and SystemOverloadedError error labels. + command_args = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["insert"], + "errorLabels": ["RetryableError", "SystemOverloadedError", "NoWritesPerformed"], + "errorCode": 91, + }, + } + + # Via the command monitoring CommandFailedEvent, configure a fail point with error code `10107` (NotWritablePrimary) + # and a NoWritesPerformed label. + command_args_inner = { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": ["insert"], + "errorCode": 10107, + "errorLabels": ["RetryableError", "SystemOverloadedError", "NoWritesPerformed"], + }, + } + + def failed(event: CommandFailedEvent) -> None: + if listener.failed_events: + return + # Configure the 10107 fail point command only if the the failed event is for the 91 error configured in step 2. + assert event.failure["code"] == 91 + self.configure_fail_point_sync(command_args_inner) + listener.failed_events.append(event) + + listener.failed = failed + + client = await self.async_rs_client(retryWrites=True, event_listeners=[listener]) + + self.configure_fail_point_sync(command_args) + + # Attempt an insertOne operation on any record for any database and collection. + # Expect the insertOne to fail with a server error. + with self.assertRaises(NotPrimaryError) as exc: + await client.test.test.insert_one({}) + + # Assert that the error code of the server error is 91. + assert exc.exception.errors["code"] == 91 # type:ignore[call-overload] + + # Disable the fail point. + self.configure_fail_point_sync({}, off=True) + + async def test_03_drivers_return_the_correct_error_when_receiving_some_errors_with_NoWritesPerformed_and_some_without_NoWritesPerformed( + self + ) -> None: + # TODO: read the expected behavior and add breakpoint() to the retry loop + # Create a client with retryWrites=true. + listener = OvertCommandListener() + + # Configure the client to listen to CommandFailedEvents. In the attached listener, configure a fail point with error + # code `91` (NotWritablePrimary) and the `NoWritesPerformed`, `RetryableError` and `SystemOverloadedError` labels. + command_args_inner = { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": ["insert"], + "errorLabels": ["RetryableError", "SystemOverloadedError", "NoWritesPerformed"], + "errorCode": 91, + }, + } + + # Configure a fail point with error code `91` (ShutdownInProgress) with the `RetryableError` and + # `SystemOverloadedError` error labels but without the `NoWritesPerformed` error label. + command_args = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["insert"], + "errorCode": 91, + "errorLabels": ["RetryableError", "SystemOverloadedError"], + }, + } + + def failed(event: CommandFailedEvent) -> None: + # Configure the fail point command only if the the failed event is for the 91 error configured in step 2. + if listener.failed_events: + return + assert event.failure["code"] == 91 + self.configure_fail_point_sync(command_args_inner) + listener.failed_events.append(event) + + listener.failed = failed + + client = await self.async_rs_client(retryWrites=True, event_listeners=[listener]) + + self.configure_fail_point_sync(command_args) + + # Attempt an insertOne operation on any record for any database and collection. + # Expect the insertOne to fail with a server error. + from pymongo.errors import OperationFailure + + with self.assertRaises(Exception) as exc: + await client.test.test.insert_one({}) + + # Assert that the error code of the server error is 91. + assert exc.exception.errors["code"] == 91 + # Assert that the error does not contain the error label `NoWritesPerformed`. + assert "NoWritesPerformed" not in exc.exception.errors["errorLabels"] + + # Disable the fail point. + self.configure_fail_point_sync({}, off=True) + + if __name__ == "__main__": unittest.main() diff --git a/test/asynchronous/test_transactions.py b/test/asynchronous/test_transactions.py index 4fc20fba3b..95a07a743c 100644 --- a/test/asynchronous/test_transactions.py +++ b/test/asynchronous/test_transactions.py @@ -16,9 +16,12 @@ from __future__ import annotations import asyncio +import random import sys +import time from io import BytesIO +import pymongo from gridfs.asynchronous.grid_file import AsyncGridFS, AsyncGridFSBucket from pymongo.asynchronous.pool import PoolState from pymongo.server_selectors import writable_server_selector @@ -45,7 +48,9 @@ CollectionInvalid, ConfigurationError, ConnectionFailure, + ExecutionTimeout, InvalidOperation, + NetworkTimeout, OperationFailure, ) from pymongo.operations import IndexModel, InsertOne @@ -434,7 +439,7 @@ async def set_fail_point(self, command_args): await self.configure_fail_point(client, command_args) @async_client_context.require_transactions - async def test_callback_raises_custom_error(self): + async def test_1_callback_raises_custom_error(self): class _MyException(Exception): pass @@ -446,7 +451,7 @@ async def raise_error(_): await s.with_transaction(raise_error) @async_client_context.require_transactions - async def test_callback_returns_value(self): + async def test_2_callback_returns_value(self): async def callback(_): return "Foo" @@ -474,7 +479,7 @@ def callback(_): self.assertEqual(await s.with_transaction(callback), "Foo") @async_client_context.require_transactions - async def test_callback_not_retried_after_timeout(self): + async def test_3_1_callback_not_retried_after_timeout(self): listener = OvertCommandListener() client = await self.async_rs_client(event_listeners=[listener]) coll = client[self.db.name].test @@ -495,14 +500,14 @@ async def callback(session): listener.reset() async with client.start_session() as s: with PatchSessionTimeout(0): - with self.assertRaises(OperationFailure): + with self.assertRaises(NetworkTimeout): await s.with_transaction(callback) self.assertEqual(listener.started_command_names(), ["insert", "abortTransaction"]) @async_client_context.require_test_commands @async_client_context.require_transactions - async def test_callback_not_retried_after_commit_timeout(self): + async def test_3_2_callback_not_retried_after_commit_timeout(self): listener = OvertCommandListener() client = await self.async_rs_client(event_listeners=[listener]) coll = client[self.db.name].test @@ -529,14 +534,14 @@ async def callback(session): async with client.start_session() as s: with PatchSessionTimeout(0): - with self.assertRaises(OperationFailure): + with self.assertRaises(NetworkTimeout): await s.with_transaction(callback) self.assertEqual(listener.started_command_names(), ["insert", "commitTransaction"]) @async_client_context.require_test_commands @async_client_context.require_transactions - async def test_commit_not_retried_after_timeout(self): + async def test_3_3_commit_not_retried_after_timeout(self): listener = OvertCommandListener() client = await self.async_rs_client(event_listeners=[listener]) coll = client[self.db.name].test @@ -560,7 +565,7 @@ async def callback(session): async with client.start_session() as s: with PatchSessionTimeout(0): - with self.assertRaises(ConnectionFailure): + with self.assertRaises(NetworkTimeout): await s.with_transaction(callback) # One insert for the callback and two commits (includes the automatic @@ -569,6 +574,38 @@ async def callback(session): listener.started_command_names(), ["insert", "commitTransaction", "commitTransaction"] ) + @async_client_context.require_transactions + async def test_callback_not_retried_after_csot_timeout(self): + listener = OvertCommandListener() + client = await self.async_rs_client(event_listeners=[listener]) + coll = client[self.db.name].test + + async def callback(session): + await coll.insert_one({}, session=session) + err: dict = { + "ok": 0, + "errmsg": "Transaction 7819 has been aborted.", + "code": 251, + "codeName": "NoSuchTransaction", + "errorLabels": ["TransientTransactionError"], + } + raise OperationFailure(err["errmsg"], err["code"], err) + + # Create the collection. + await coll.insert_one({}) + listener.reset() + async with client.start_session() as s: + with pymongo.timeout(1.0): + with self.assertRaises(ExecutionTimeout): + await s.with_transaction(callback) + + # At least two attempts: the original and one or more retries. + inserts = len([x for x in listener.started_command_names() if x == "insert"]) + aborts = len([x for x in listener.started_command_names() if x == "abortTransaction"]) + + self.assertGreaterEqual(inserts, 2) + self.assertGreaterEqual(aborts, 2) + # Tested here because this supports Motor's convenient transactions API. @async_client_context.require_transactions async def test_in_transaction_property(self): @@ -606,6 +643,72 @@ async def callback(session): await s.with_transaction(callback) self.assertFalse(s.in_transaction) + @async_client_context.require_test_commands + @async_client_context.require_transactions + async def test_4_retry_backoff_is_enforced(self): + client = async_client_context.client + coll = client[self.db.name].test + # patch random to make it deterministic -- once to effectively have + # no backoff and the second time with "max" backoff (always waiting the longest + # possible time) + _original_random_random = random.random + + def always_one(): + return 1 + + def always_zero(): + return 0 + + random.random = always_zero + # set fail point to trigger transaction failure and trigger backoff + await self.set_fail_point( + { + "configureFailPoint": "failCommand", + "mode": {"times": 13}, + "data": { + "failCommands": ["commitTransaction"], + "errorCode": 251, + }, + } + ) + self.addAsyncCleanup( + self.set_fail_point, {"configureFailPoint": "failCommand", "mode": "off"} + ) + + async def callback(session): + await coll.insert_one({}, session=session) + + start = time.monotonic() + async with self.client.start_session() as s: + await s.with_transaction(callback) + end = time.monotonic() + no_backoff_time = end - start + + random.random = always_one + # set fail point to trigger transaction failure and trigger backoff + await self.set_fail_point( + { + "configureFailPoint": "failCommand", + "mode": { + "times": 13 + }, # sufficiently high enough such that the time effect of backoff is noticeable + "data": { + "failCommands": ["commitTransaction"], + "errorCode": 251, + }, + } + ) + self.addAsyncCleanup( + self.set_fail_point, {"configureFailPoint": "failCommand", "mode": "off"} + ) + start = time.monotonic() + async with self.client.start_session() as s: + await s.with_transaction(callback) + end = time.monotonic() + self.assertLess(abs(end - start - (no_backoff_time + 2.2)), 1) # sum of 13 backoffs is 2.2 + + random.random = _original_random_random + class TestOptionsInsideTransactionProse(AsyncTransactionsBase): @async_client_context.require_transactions diff --git a/test/client-backpressure/backpressure-connection-checkin.json b/test/client-backpressure/backpressure-connection-checkin.json new file mode 100644 index 0000000000..307fc9fb53 --- /dev/null +++ b/test/client-backpressure/backpressure-connection-checkin.json @@ -0,0 +1,141 @@ +{ + "description": "tests that connections are returned to the pool on retry attempts for overload errors", + "schemaVersion": "1.3", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "topologies": [ + "replicaset", + "sharded", + "load-balanced" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "client", + "useMultipleMongoses": false, + "observeEvents": [ + "connectionCheckedOutEvent", + "connectionCheckedInEvent" + ] + } + }, + { + "client": { + "id": "fail_point_client", + "useMultipleMongoses": false + } + }, + { + "database": { + "id": "database", + "client": "client", + "databaseName": "retryable-writes-tests" + } + }, + { + "collection": { + "id": "collection", + "database": "database", + "collectionName": "coll" + } + } + ], + "initialData": [ + { + "collectionName": "coll", + "databaseName": "retryable-writes-tests", + "documents": [ + { + "_id": 1, + "x": 11 + } + ] + } + ], + "tests": [ + { + "description": "overload error retry attempts return connections to the pool", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "find" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "find", + "object": "collection", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "eventType": "cmap", + "events": [ + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + }, + { + "connectionCheckedOutEvent": {} + }, + { + "connectionCheckedInEvent": {} + } + ] + } + ] + } + ] +} diff --git a/test/client-backpressure/backpressure-retry-loop.json b/test/client-backpressure/backpressure-retry-loop.json new file mode 100644 index 0000000000..0e8840f523 --- /dev/null +++ b/test/client-backpressure/backpressure-retry-loop.json @@ -0,0 +1,4883 @@ +{ + "description": "tests that operations respect overload backoff retry loop", + "schemaVersion": "1.3", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "topologies": [ + "replicaset", + "sharded", + "load-balanced" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "client", + "useMultipleMongoses": false, + "observeEvents": [ + "commandStartedEvent", + "commandSucceededEvent", + "commandFailedEvent" + ], + "ignoreCommandMonitoringEvents": [ + "killCursors" + ] + } + }, + { + "client": { + "id": "internal_client", + "useMultipleMongoses": false + } + }, + { + "database": { + "id": "internal_db", + "client": "internal_client", + "databaseName": "retryable-writes-tests" + } + }, + { + "collection": { + "id": "retryable-writes-tests", + "database": "internal_db", + "collectionName": "coll" + } + }, + { + "database": { + "id": "database", + "client": "client", + "databaseName": "retryable-writes-tests" + } + }, + { + "collection": { + "id": "collection", + "database": "database", + "collectionName": "coll" + } + }, + { + "client": { + "id": "client_retryReads_false", + "useMultipleMongoses": false, + "observeEvents": [ + "commandStartedEvent", + "commandSucceededEvent", + "commandFailedEvent" + ], + "ignoreCommandMonitoringEvents": [ + "killCursors" + ], + "uriOptions": { + "retryReads": false + } + } + }, + { + "database": { + "id": "database_retryReads_false", + "client": "client_retryReads_false", + "databaseName": "retryable-writes-tests" + } + }, + { + "collection": { + "id": "collection_retryReads_false", + "database": "database_retryReads_false", + "collectionName": "coll" + } + }, + { + "client": { + "id": "client_retryWrites_false", + "useMultipleMongoses": false, + "observeEvents": [ + "commandStartedEvent", + "commandSucceededEvent", + "commandFailedEvent" + ], + "ignoreCommandMonitoringEvents": [ + "killCursors" + ], + "uriOptions": { + "retryWrites": false + } + } + }, + { + "database": { + "id": "database_retryWrites_false", + "client": "client_retryWrites_false", + "databaseName": "backpressure-db" + } + }, + { + "collection": { + "id": "collection_retryWrites_false", + "database": "database_retryWrites_false", + "collectionName": "coll" + } + } + ], + "initialData": [ + { + "collectionName": "coll", + "databaseName": "retryable-writes-tests", + "documents": [] + } + ], + "_yamlAnchors": { + "bulWriteInsertNamespace": "retryable-writes-tests.coll" + }, + "tests": [ + { + "description": "client.listDatabases retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "listDatabases" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "listDatabases", + "object": "client", + "arguments": { + "filter": {} + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandSucceededEvent": { + "commandName": "listDatabases" + } + } + ] + } + ] + }, + { + "description": "client.listDatabases (read) does not retry if retryReads=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "listDatabases" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "listDatabases", + "object": "client_retryReads_false", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryReads_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + } + ] + } + ] + }, + { + "description": "client.listDatabaseNames retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "listDatabases" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "listDatabaseNames", + "object": "client" + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandSucceededEvent": { + "commandName": "listDatabases" + } + } + ] + } + ] + }, + { + "description": "client.listDatabaseNames (read) does not retry if retryReads=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "listDatabases" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "listDatabaseNames", + "object": "client_retryReads_false", + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryReads_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + } + ] + } + ] + }, + { + "description": "client.createChangeStream retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "createChangeStream", + "object": "client", + "arguments": { + "pipeline": [] + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandSucceededEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "client.createChangeStream (read) does not retry if retryReads=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "createChangeStream", + "object": "client_retryReads_false", + "arguments": { + "pipeline": [] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryReads_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "client.clientBulkWrite retries using operation loop", + "runOnRequirements": [ + { + "minServerVersion": "8.0" + } + ], + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "bulkWrite" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "clientBulkWrite", + "object": "client", + "arguments": { + "models": [ + { + "insertOne": { + "namespace": "retryable-writes-tests.coll", + "document": { + "_id": 8, + "x": 88 + } + } + } + ] + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandFailedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandStartedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandFailedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandStartedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandFailedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandStartedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandSucceededEvent": { + "commandName": "bulkWrite" + } + } + ] + } + ] + }, + { + "description": "client.clientBulkWrite (write) does not retry if retryWrites=false", + "runOnRequirements": [ + { + "minServerVersion": "8.0" + } + ], + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "bulkWrite" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "clientBulkWrite", + "object": "client_retryWrites_false", + "arguments": { + "models": [ + { + "insertOne": { + "namespace": "retryable-writes-tests.coll", + "document": { + "_id": 8, + "x": 88 + } + } + } + ] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandFailedEvent": { + "commandName": "bulkWrite" + } + } + ] + } + ] + }, + { + "description": "database.aggregate retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "aggregate", + "object": "database", + "arguments": { + "pipeline": [ + { + "$listLocalSessions": {} + }, + { + "$limit": 1 + } + ] + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandSucceededEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "database.aggregate (read) does not retry if retryReads=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "aggregate", + "object": "database_retryReads_false", + "arguments": { + "pipeline": [ + { + "$listLocalSessions": {} + }, + { + "$limit": 1 + } + ] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryReads_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "database.listCollections retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "listCollections" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "listCollections", + "object": "database", + "arguments": { + "filter": {} + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandSucceededEvent": { + "commandName": "listCollections" + } + } + ] + } + ] + }, + { + "description": "database.listCollections (read) does not retry if retryReads=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "listCollections" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "listCollections", + "object": "database_retryReads_false", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryReads_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + } + ] + } + ] + }, + { + "description": "database.listCollectionNames retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "listCollections" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "listCollectionNames", + "object": "database", + "arguments": { + "filter": {} + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandSucceededEvent": { + "commandName": "listCollections" + } + } + ] + } + ] + }, + { + "description": "database.listCollectionNames (read) does not retry if retryReads=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "listCollections" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "listCollectionNames", + "object": "database_retryReads_false", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryReads_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + } + ] + } + ] + }, + { + "description": "database.runCommand retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "ping" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "runCommand", + "object": "database", + "arguments": { + "command": { + "ping": 1 + }, + "commandName": "ping" + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "ping" + } + }, + { + "commandFailedEvent": { + "commandName": "ping" + } + }, + { + "commandStartedEvent": { + "commandName": "ping" + } + }, + { + "commandFailedEvent": { + "commandName": "ping" + } + }, + { + "commandStartedEvent": { + "commandName": "ping" + } + }, + { + "commandFailedEvent": { + "commandName": "ping" + } + }, + { + "commandStartedEvent": { + "commandName": "ping" + } + }, + { + "commandSucceededEvent": { + "commandName": "ping" + } + } + ] + } + ] + }, + { + "description": "database.runCommand (read) does not retry if retryReads=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "ping" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "runCommand", + "object": "database_retryReads_false", + "arguments": { + "command": { + "ping": 1 + }, + "commandName": "ping" + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryReads_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "ping" + } + }, + { + "commandFailedEvent": { + "commandName": "ping" + } + } + ] + } + ] + }, + { + "description": "database.runCommand (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "ping" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "runCommand", + "object": "database_retryWrites_false", + "arguments": { + "command": { + "ping": 1 + }, + "commandName": "ping" + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "ping" + } + }, + { + "commandFailedEvent": { + "commandName": "ping" + } + } + ] + } + ] + }, + { + "description": "database.createChangeStream retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "createChangeStream", + "object": "database", + "arguments": { + "pipeline": [] + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandSucceededEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "database.createChangeStream (read) does not retry if retryReads=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "createChangeStream", + "object": "database_retryReads_false", + "arguments": { + "pipeline": [] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryReads_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "collection.aggregate retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "aggregate", + "object": "collection", + "arguments": { + "pipeline": [] + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandSucceededEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "collection.aggregate (read) does not retry if retryReads=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "aggregate", + "object": "collection_retryReads_false", + "arguments": { + "pipeline": [] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryReads_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "collection.countDocuments retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "countDocuments", + "object": "collection", + "arguments": { + "filter": {} + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandSucceededEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "collection.countDocuments (read) does not retry if retryReads=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "countDocuments", + "object": "collection_retryReads_false", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryReads_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "collection.estimatedDocumentCount retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "count" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "estimatedDocumentCount", + "object": "collection" + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "count" + } + }, + { + "commandFailedEvent": { + "commandName": "count" + } + }, + { + "commandStartedEvent": { + "commandName": "count" + } + }, + { + "commandFailedEvent": { + "commandName": "count" + } + }, + { + "commandStartedEvent": { + "commandName": "count" + } + }, + { + "commandFailedEvent": { + "commandName": "count" + } + }, + { + "commandStartedEvent": { + "commandName": "count" + } + }, + { + "commandSucceededEvent": { + "commandName": "count" + } + } + ] + } + ] + }, + { + "description": "collection.estimatedDocumentCount (read) does not retry if retryReads=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "count" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "estimatedDocumentCount", + "object": "collection_retryReads_false", + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryReads_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "count" + } + }, + { + "commandFailedEvent": { + "commandName": "count" + } + } + ] + } + ] + }, + { + "description": "collection.distinct retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "distinct" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "distinct", + "object": "collection", + "arguments": { + "fieldName": "x", + "filter": {} + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "distinct" + } + }, + { + "commandFailedEvent": { + "commandName": "distinct" + } + }, + { + "commandStartedEvent": { + "commandName": "distinct" + } + }, + { + "commandFailedEvent": { + "commandName": "distinct" + } + }, + { + "commandStartedEvent": { + "commandName": "distinct" + } + }, + { + "commandFailedEvent": { + "commandName": "distinct" + } + }, + { + "commandStartedEvent": { + "commandName": "distinct" + } + }, + { + "commandSucceededEvent": { + "commandName": "distinct" + } + } + ] + } + ] + }, + { + "description": "collection.distinct (read) does not retry if retryReads=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "distinct" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "distinct", + "object": "collection_retryReads_false", + "arguments": { + "fieldName": "x", + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryReads_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "distinct" + } + }, + { + "commandFailedEvent": { + "commandName": "distinct" + } + } + ] + } + ] + }, + { + "description": "collection.find retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "find" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "find", + "object": "collection", + "arguments": { + "filter": {} + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandSucceededEvent": { + "commandName": "find" + } + } + ] + } + ] + }, + { + "description": "collection.find (read) does not retry if retryReads=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "find" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "find", + "object": "collection_retryReads_false", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryReads_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + } + ] + } + ] + }, + { + "description": "collection.findOne retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "find" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "findOne", + "object": "collection", + "arguments": { + "filter": {} + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandSucceededEvent": { + "commandName": "find" + } + } + ] + } + ] + }, + { + "description": "collection.findOne (read) does not retry if retryReads=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "find" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "findOne", + "object": "collection_retryReads_false", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryReads_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + } + ] + } + ] + }, + { + "description": "collection.listIndexes retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "listIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "listIndexes", + "object": "collection" + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandSucceededEvent": { + "commandName": "listIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.listIndexes (read) does not retry if retryReads=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "listIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "listIndexes", + "object": "collection_retryReads_false", + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryReads_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.listIndexNames retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "listIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "listIndexNames", + "object": "collection" + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandSucceededEvent": { + "commandName": "listIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.listIndexNames (read) does not retry if retryReads=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "listIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "listIndexNames", + "object": "collection_retryReads_false", + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryReads_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.createChangeStream retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "createChangeStream", + "object": "collection", + "arguments": { + "pipeline": [] + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandSucceededEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "collection.createChangeStream (read) does not retry if retryReads=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "createChangeStream", + "object": "collection_retryReads_false", + "arguments": { + "pipeline": [] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryReads_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "collection.insertOne retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "insertOne", + "object": "collection", + "arguments": { + "document": { + "_id": 2, + "x": 22 + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandSucceededEvent": { + "commandName": "insert" + } + } + ] + } + ] + }, + { + "description": "collection.insertOne (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "insertOne", + "object": "collection_retryWrites_false", + "arguments": { + "document": { + "_id": 2, + "x": 22 + } + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + } + ] + } + ] + }, + { + "description": "collection.insertMany retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "insertMany", + "object": "collection", + "arguments": { + "documents": [ + { + "_id": 2, + "x": 22 + } + ] + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandSucceededEvent": { + "commandName": "insert" + } + } + ] + } + ] + }, + { + "description": "collection.insertMany (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "insertMany", + "object": "collection_retryWrites_false", + "arguments": { + "documents": [ + { + "_id": 2, + "x": 22 + } + ] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + } + ] + } + ] + }, + { + "description": "collection.deleteOne retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "delete" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "deleteOne", + "object": "collection", + "arguments": { + "filter": {} + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandSucceededEvent": { + "commandName": "delete" + } + } + ] + } + ] + }, + { + "description": "collection.deleteOne (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "delete" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "deleteOne", + "object": "collection_retryWrites_false", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + } + ] + } + ] + }, + { + "description": "collection.deleteMany retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "delete" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "deleteMany", + "object": "collection", + "arguments": { + "filter": {} + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandSucceededEvent": { + "commandName": "delete" + } + } + ] + } + ] + }, + { + "description": "collection.deleteMany (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "delete" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "deleteMany", + "object": "collection_retryWrites_false", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + } + ] + } + ] + }, + { + "description": "collection.replaceOne retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "update" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "replaceOne", + "object": "collection", + "arguments": { + "filter": {}, + "replacement": { + "x": 22 + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandSucceededEvent": { + "commandName": "update" + } + } + ] + } + ] + }, + { + "description": "collection.replaceOne (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "update" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "replaceOne", + "object": "collection_retryWrites_false", + "arguments": { + "filter": {}, + "replacement": { + "x": 22 + } + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + } + ] + } + ] + }, + { + "description": "collection.updateOne retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "update" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "updateOne", + "object": "collection", + "arguments": { + "filter": {}, + "update": { + "$set": { + "x": 22 + } + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandSucceededEvent": { + "commandName": "update" + } + } + ] + } + ] + }, + { + "description": "collection.updateOne (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "update" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "updateOne", + "object": "collection_retryWrites_false", + "arguments": { + "filter": {}, + "update": { + "$set": { + "x": 22 + } + } + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + } + ] + } + ] + }, + { + "description": "collection.updateMany retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "update" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "updateMany", + "object": "collection", + "arguments": { + "filter": {}, + "update": { + "$set": { + "x": 22 + } + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandSucceededEvent": { + "commandName": "update" + } + } + ] + } + ] + }, + { + "description": "collection.updateMany (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "update" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "updateMany", + "object": "collection_retryWrites_false", + "arguments": { + "filter": {}, + "update": { + "$set": { + "x": 22 + } + } + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + } + ] + } + ] + }, + { + "description": "collection.findOneAndDelete retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "findAndModify" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "findOneAndDelete", + "object": "collection", + "arguments": { + "filter": {} + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandSucceededEvent": { + "commandName": "findAndModify" + } + } + ] + } + ] + }, + { + "description": "collection.findOneAndDelete (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "findAndModify" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "findOneAndDelete", + "object": "collection_retryWrites_false", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + } + ] + } + ] + }, + { + "description": "collection.findOneAndReplace retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "findAndModify" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "findOneAndReplace", + "object": "collection", + "arguments": { + "filter": {}, + "replacement": { + "x": 22 + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandSucceededEvent": { + "commandName": "findAndModify" + } + } + ] + } + ] + }, + { + "description": "collection.findOneAndReplace (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "findAndModify" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "findOneAndReplace", + "object": "collection_retryWrites_false", + "arguments": { + "filter": {}, + "replacement": { + "x": 22 + } + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + } + ] + } + ] + }, + { + "description": "collection.findOneAndUpdate retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "findAndModify" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "findOneAndUpdate", + "object": "collection", + "arguments": { + "filter": {}, + "update": { + "$set": { + "x": 22 + } + } + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandSucceededEvent": { + "commandName": "findAndModify" + } + } + ] + } + ] + }, + { + "description": "collection.findOneAndUpdate (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "findAndModify" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "findOneAndUpdate", + "object": "collection_retryWrites_false", + "arguments": { + "filter": {}, + "update": { + "$set": { + "x": 22 + } + } + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + } + ] + } + ] + }, + { + "description": "collection.bulkWrite retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "bulkWrite", + "object": "collection", + "arguments": { + "requests": [ + { + "insertOne": { + "document": { + "_id": 2, + "x": 22 + } + } + } + ] + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandSucceededEvent": { + "commandName": "insert" + } + } + ] + } + ] + }, + { + "description": "collection.bulkWrite (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "bulkWrite", + "object": "collection_retryWrites_false", + "arguments": { + "requests": [ + { + "insertOne": { + "document": { + "_id": 2, + "x": 22 + } + } + } + ] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + } + ] + } + ] + }, + { + "description": "collection.createIndex retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "createIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "createIndex", + "object": "collection", + "arguments": { + "keys": { + "x": 11 + }, + "name": "x_11" + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandSucceededEvent": { + "commandName": "createIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.createIndex (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "createIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "createIndex", + "object": "collection_retryWrites_false", + "arguments": { + "keys": { + "x": 11 + }, + "name": "x_11" + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "createIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.dropIndex retries using operation loop", + "operations": [ + { + "name": "createIndex", + "object": "retryable-writes-tests", + "arguments": { + "keys": { + "x": 11 + }, + "name": "x_11" + } + }, + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "dropIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "dropIndex", + "object": "collection", + "arguments": { + "name": "x_11" + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandSucceededEvent": { + "commandName": "dropIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.dropIndex (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "createIndex", + "object": "retryable-writes-tests", + "arguments": { + "keys": { + "x": 11 + }, + "name": "x_11" + } + }, + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "dropIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "dropIndex", + "object": "collection_retryWrites_false", + "arguments": { + "name": "x_11" + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.dropIndexes retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "dropIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "dropIndexes", + "object": "collection" + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandSucceededEvent": { + "commandName": "dropIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.dropIndexes (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "dropIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "dropIndexes", + "object": "collection_retryWrites_false", + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.aggregate retries using operation loop", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "aggregate", + "object": "collection", + "arguments": { + "pipeline": [ + { + "$out": "output" + } + ] + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandSucceededEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "collection.aggregate (write) does not retry if retryWrites=false", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "internal_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "aggregate", + "object": "collection_retryWrites_false", + "arguments": { + "pipeline": [ + { + "$out": "output" + } + ] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client_retryWrites_false", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + } + ] +} diff --git a/test/client-backpressure/backpressure-retry-max-attempts.json b/test/client-backpressure/backpressure-retry-max-attempts.json new file mode 100644 index 0000000000..1e6f46f076 --- /dev/null +++ b/test/client-backpressure/backpressure-retry-max-attempts.json @@ -0,0 +1,3559 @@ +{ + "description": "tests that operations retry at most maxAttempts=5 times", + "schemaVersion": "1.3", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "topologies": [ + "replicaset", + "sharded", + "load-balanced" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "client", + "useMultipleMongoses": false, + "observeEvents": [ + "commandStartedEvent", + "commandSucceededEvent", + "commandFailedEvent" + ], + "ignoreCommandMonitoringEvents": [ + "killCursors" + ] + } + }, + { + "client": { + "id": "fail_point_client", + "useMultipleMongoses": false + } + }, + { + "database": { + "id": "database", + "client": "client", + "databaseName": "retryable-writes-tests" + } + }, + { + "collection": { + "id": "collection", + "database": "database", + "collectionName": "coll" + } + } + ], + "_yamlAnchors": { + "bulkWriteInsertNamespace": "retryable-writes-tests.coll" + }, + "initialData": [ + { + "collectionName": "coll", + "databaseName": "retryable-writes-tests", + "documents": [ + { + "_id": 1, + "x": 11 + }, + { + "_id": 2, + "x": 22 + } + ] + } + ], + "tests": [ + { + "description": "client.listDatabases retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "listDatabases" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "listDatabases", + "object": "client", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + } + ] + } + ] + }, + { + "description": "client.listDatabaseNames retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "listDatabases" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "listDatabaseNames", + "object": "client", + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandStartedEvent": { + "commandName": "listDatabases" + } + }, + { + "commandFailedEvent": { + "commandName": "listDatabases" + } + } + ] + } + ] + }, + { + "description": "client.createChangeStream retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "createChangeStream", + "object": "client", + "arguments": { + "pipeline": [] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "client.clientBulkWrite retries at most maxAttempts=5 times", + "runOnRequirements": [ + { + "minServerVersion": "8.0" + } + ], + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "bulkWrite" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "clientBulkWrite", + "object": "client", + "arguments": { + "models": [ + { + "insertOne": { + "namespace": "retryable-writes-tests.coll", + "document": { + "_id": 8, + "x": 88 + } + } + } + ] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandFailedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandStartedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandFailedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandStartedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandFailedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandStartedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandFailedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandStartedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandFailedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandStartedEvent": { + "commandName": "bulkWrite" + } + }, + { + "commandFailedEvent": { + "commandName": "bulkWrite" + } + } + ] + } + ] + }, + { + "description": "database.aggregate retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "aggregate", + "object": "database", + "arguments": { + "pipeline": [ + { + "$listLocalSessions": {} + }, + { + "$limit": 1 + } + ] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "database.listCollections retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "listCollections" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "listCollections", + "object": "database", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + } + ] + } + ] + }, + { + "description": "database.listCollectionNames retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "listCollections" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "listCollectionNames", + "object": "database", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + }, + { + "commandStartedEvent": { + "commandName": "listCollections" + } + }, + { + "commandFailedEvent": { + "commandName": "listCollections" + } + } + ] + } + ] + }, + { + "description": "database.runCommand retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "ping" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "runCommand", + "object": "database", + "arguments": { + "command": { + "ping": 1 + }, + "commandName": "ping" + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "ping" + } + }, + { + "commandFailedEvent": { + "commandName": "ping" + } + }, + { + "commandStartedEvent": { + "commandName": "ping" + } + }, + { + "commandFailedEvent": { + "commandName": "ping" + } + }, + { + "commandStartedEvent": { + "commandName": "ping" + } + }, + { + "commandFailedEvent": { + "commandName": "ping" + } + }, + { + "commandStartedEvent": { + "commandName": "ping" + } + }, + { + "commandFailedEvent": { + "commandName": "ping" + } + }, + { + "commandStartedEvent": { + "commandName": "ping" + } + }, + { + "commandFailedEvent": { + "commandName": "ping" + } + }, + { + "commandStartedEvent": { + "commandName": "ping" + } + }, + { + "commandFailedEvent": { + "commandName": "ping" + } + } + ] + } + ] + }, + { + "description": "database.createChangeStream retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "createChangeStream", + "object": "database", + "arguments": { + "pipeline": [] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "collection.aggregate retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "aggregate", + "object": "collection", + "arguments": { + "pipeline": [] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "collection.countDocuments retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "countDocuments", + "object": "collection", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "collection.estimatedDocumentCount retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "count" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "estimatedDocumentCount", + "object": "collection", + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "count" + } + }, + { + "commandFailedEvent": { + "commandName": "count" + } + }, + { + "commandStartedEvent": { + "commandName": "count" + } + }, + { + "commandFailedEvent": { + "commandName": "count" + } + }, + { + "commandStartedEvent": { + "commandName": "count" + } + }, + { + "commandFailedEvent": { + "commandName": "count" + } + }, + { + "commandStartedEvent": { + "commandName": "count" + } + }, + { + "commandFailedEvent": { + "commandName": "count" + } + }, + { + "commandStartedEvent": { + "commandName": "count" + } + }, + { + "commandFailedEvent": { + "commandName": "count" + } + }, + { + "commandStartedEvent": { + "commandName": "count" + } + }, + { + "commandFailedEvent": { + "commandName": "count" + } + } + ] + } + ] + }, + { + "description": "collection.distinct retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "distinct" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "distinct", + "object": "collection", + "arguments": { + "fieldName": "x", + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "distinct" + } + }, + { + "commandFailedEvent": { + "commandName": "distinct" + } + }, + { + "commandStartedEvent": { + "commandName": "distinct" + } + }, + { + "commandFailedEvent": { + "commandName": "distinct" + } + }, + { + "commandStartedEvent": { + "commandName": "distinct" + } + }, + { + "commandFailedEvent": { + "commandName": "distinct" + } + }, + { + "commandStartedEvent": { + "commandName": "distinct" + } + }, + { + "commandFailedEvent": { + "commandName": "distinct" + } + }, + { + "commandStartedEvent": { + "commandName": "distinct" + } + }, + { + "commandFailedEvent": { + "commandName": "distinct" + } + }, + { + "commandStartedEvent": { + "commandName": "distinct" + } + }, + { + "commandFailedEvent": { + "commandName": "distinct" + } + } + ] + } + ] + }, + { + "description": "collection.find retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "find" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "find", + "object": "collection", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + } + ] + } + ] + }, + { + "description": "collection.findOne retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "find" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "findOne", + "object": "collection", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandFailedEvent": { + "commandName": "find" + } + } + ] + } + ] + }, + { + "description": "collection.listIndexes retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "listIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "listIndexes", + "object": "collection", + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.listIndexNames retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "listIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "listIndexNames", + "object": "collection", + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "listIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "listIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.createChangeStream retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "createChangeStream", + "object": "collection", + "arguments": { + "pipeline": [] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + }, + { + "description": "collection.insertOne retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "insertOne", + "object": "collection", + "arguments": { + "document": { + "_id": 2, + "x": 22 + } + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + } + ] + } + ] + }, + { + "description": "collection.insertMany retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "insertMany", + "object": "collection", + "arguments": { + "documents": [ + { + "_id": 2, + "x": 22 + } + ] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + } + ] + } + ] + }, + { + "description": "collection.deleteOne retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "delete" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "deleteOne", + "object": "collection", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + } + ] + } + ] + }, + { + "description": "collection.deleteMany retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "delete" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "deleteMany", + "object": "collection", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + }, + { + "commandStartedEvent": { + "commandName": "delete" + } + }, + { + "commandFailedEvent": { + "commandName": "delete" + } + } + ] + } + ] + }, + { + "description": "collection.replaceOne retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "update" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "replaceOne", + "object": "collection", + "arguments": { + "filter": {}, + "replacement": { + "x": 22 + } + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + } + ] + } + ] + }, + { + "description": "collection.updateOne retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "update" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "updateOne", + "object": "collection", + "arguments": { + "filter": {}, + "update": { + "$set": { + "x": 22 + } + } + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + } + ] + } + ] + }, + { + "description": "collection.updateMany retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "update" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "updateMany", + "object": "collection", + "arguments": { + "filter": {}, + "update": { + "$set": { + "x": 22 + } + } + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + }, + { + "commandStartedEvent": { + "commandName": "update" + } + }, + { + "commandFailedEvent": { + "commandName": "update" + } + } + ] + } + ] + }, + { + "description": "collection.findOneAndDelete retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "findAndModify" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "findOneAndDelete", + "object": "collection", + "arguments": { + "filter": {} + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + } + ] + } + ] + }, + { + "description": "collection.findOneAndReplace retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "findAndModify" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "findOneAndReplace", + "object": "collection", + "arguments": { + "filter": {}, + "replacement": { + "x": 22 + } + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + } + ] + } + ] + }, + { + "description": "collection.findOneAndUpdate retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "findAndModify" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "findOneAndUpdate", + "object": "collection", + "arguments": { + "filter": {}, + "update": { + "$set": { + "x": 22 + } + } + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandStartedEvent": { + "commandName": "findAndModify" + } + }, + { + "commandFailedEvent": { + "commandName": "findAndModify" + } + } + ] + } + ] + }, + { + "description": "collection.bulkWrite retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "bulkWrite", + "object": "collection", + "arguments": { + "requests": [ + { + "insertOne": { + "document": { + "_id": 2, + "x": 22 + } + } + } + ] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandFailedEvent": { + "commandName": "insert" + } + } + ] + } + ] + }, + { + "description": "collection.createIndex retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "createIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "createIndex", + "object": "collection", + "arguments": { + "keys": { + "x": 11 + }, + "name": "x_11" + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "createIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "createIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.dropIndex retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "dropIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "dropIndex", + "object": "collection", + "arguments": { + "name": "x_11" + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.dropIndexes retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "dropIndexes" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "dropIndexes", + "object": "collection", + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandStartedEvent": { + "commandName": "dropIndexes" + } + }, + { + "commandFailedEvent": { + "commandName": "dropIndexes" + } + } + ] + } + ] + }, + { + "description": "collection.aggregate retries at most maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "fail_point_client", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "aggregate" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "aggregate", + "object": "collection", + "arguments": { + "pipeline": [ + { + "$out": "output" + } + ] + }, + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client", + "events": [ + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + }, + { + "commandStartedEvent": { + "commandName": "aggregate" + } + }, + { + "commandFailedEvent": { + "commandName": "aggregate" + } + } + ] + } + ] + } + ] +} diff --git a/test/client-backpressure/getMore-retried.json b/test/client-backpressure/getMore-retried.json new file mode 100644 index 0000000000..70eff84612 --- /dev/null +++ b/test/client-backpressure/getMore-retried.json @@ -0,0 +1,291 @@ +{ + "description": "getMore-retries-backpressure", + "schemaVersion": "1.3", + "runOnRequirements": [ + { + "minServerVersion": "4.4" + } + ], + "createEntities": [ + { + "client": { + "id": "client0", + "observeEvents": [ + "commandStartedEvent", + "commandFailedEvent", + "commandSucceededEvent" + ] + } + }, + { + "client": { + "id": "failPointClient" + } + }, + { + "database": { + "id": "db", + "client": "client0", + "databaseName": "default" + } + }, + { + "collection": { + "id": "coll", + "database": "db", + "collectionName": "default" + } + } + ], + "initialData": [ + { + "databaseName": "default", + "collectionName": "default", + "documents": [ + { + "a": 1 + }, + { + "a": 2 + }, + { + "a": 3 + } + ] + } + ], + "tests": [ + { + "description": "getMores are retried", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 3 + }, + "data": { + "failCommands": [ + "getMore" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "find", + "arguments": { + "batchSize": 2, + "filter": {}, + "sort": { + "a": 1 + } + }, + "object": "coll", + "expectResult": [ + { + "a": 1 + }, + { + "a": 2 + }, + { + "a": 3 + } + ] + } + ], + "expectEvents": [ + { + "client": "client0", + "events": [ + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandSucceededEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "getMore" + } + }, + { + "commandFailedEvent": { + "commandName": "getMore" + } + }, + { + "commandStartedEvent": { + "commandName": "getMore" + } + }, + { + "commandFailedEvent": { + "commandName": "getMore" + } + }, + { + "commandStartedEvent": { + "commandName": "getMore" + } + }, + { + "commandFailedEvent": { + "commandName": "getMore" + } + }, + { + "commandStartedEvent": { + "commandName": "getMore" + } + }, + { + "commandSucceededEvent": { + "commandName": "getMore" + } + } + ] + } + ] + }, + { + "description": "getMores are retried maxAttempts=5 times", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "failPointClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "getMore" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 2 + } + } + } + }, + { + "name": "find", + "arguments": { + "batchSize": 2, + "filter": {} + }, + "object": "coll", + "expectError": { + "isError": true, + "isClientError": false + } + } + ], + "expectEvents": [ + { + "client": "client0", + "events": [ + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandSucceededEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "getMore" + } + }, + { + "commandFailedEvent": { + "commandName": "getMore" + } + }, + { + "commandStartedEvent": { + "commandName": "getMore" + } + }, + { + "commandFailedEvent": { + "commandName": "getMore" + } + }, + { + "commandStartedEvent": { + "commandName": "getMore" + } + }, + { + "commandFailedEvent": { + "commandName": "getMore" + } + }, + { + "commandStartedEvent": { + "commandName": "getMore" + } + }, + { + "commandFailedEvent": { + "commandName": "getMore" + } + }, + { + "commandStartedEvent": { + "commandName": "getMore" + } + }, + { + "commandFailedEvent": { + "commandName": "getMore" + } + }, + { + "commandStartedEvent": { + "commandName": "getMore" + } + }, + { + "commandFailedEvent": { + "commandName": "getMore" + } + }, + { + "commandStartedEvent": { + "commandName": "killCursors" + } + }, + { + "commandSucceededEvent": { + "commandName": "killCursors" + } + } + ] + } + ] + } + ] +} diff --git a/test/connection_monitoring/pool-create-min-size-error.json b/test/connection_monitoring/pool-create-min-size-error.json index 5c8ad02dbd..4334ce2571 100644 --- a/test/connection_monitoring/pool-create-min-size-error.json +++ b/test/connection_monitoring/pool-create-min-size-error.json @@ -9,9 +9,7 @@ ], "failPoint": { "configureFailPoint": "failCommand", - "mode": { - "times": 50 - }, + "mode": "alwaysOn", "data": { "failCommands": [ "isMaster", diff --git a/test/discovery_and_monitoring/errors/error_handling_handshake.json b/test/discovery_and_monitoring/errors/error_handling_handshake.json index 65f8ad0a33..c60ee453dd 100644 --- a/test/discovery_and_monitoring/errors/error_handling_handshake.json +++ b/test/discovery_and_monitoring/errors/error_handling_handshake.json @@ -97,14 +97,22 @@ "outcome": { "servers": { "a:27017": { - "type": "Unknown", - "topologyVersion": null, + "type": "RSPrimary", + "setName": "rs", + "topologyVersion": { + "processId": { + "$oid": "000000000000000000000001" + }, + "counter": { + "$numberLong": "1" + } + }, "pool": { - "generation": 1 + "generation": 0 } } }, - "topologyType": "ReplicaSetNoPrimary", + "topologyType": "ReplicaSetWithPrimary", "logicalSessionTimeoutMinutes": null, "setName": "rs" } diff --git a/test/discovery_and_monitoring/unified/backpressure-network-error-fail-replicaset.json b/test/discovery_and_monitoring/unified/backpressure-network-error-fail-replicaset.json new file mode 100644 index 0000000000..ccaea8d135 --- /dev/null +++ b/test/discovery_and_monitoring/unified/backpressure-network-error-fail-replicaset.json @@ -0,0 +1,142 @@ +{ + "description": "backpressure-network-error-fail-replicaset", + "schemaVersion": "1.17", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "serverless": "forbid", + "topologies": [ + "replicaset" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "setupClient", + "useMultipleMongoses": false + } + } + ], + "initialData": [ + { + "collectionName": "backpressure-network-error-fail", + "databaseName": "sdam-tests", + "documents": [ + { + "_id": 1 + }, + { + "_id": 2 + } + ] + } + ], + "tests": [ + { + "description": "apply backpressure on network connection errors during connection establishment", + "operations": [ + { + "name": "createEntities", + "object": "testRunner", + "arguments": { + "entities": [ + { + "client": { + "id": "client", + "useMultipleMongoses": false, + "observeEvents": [ + "serverDescriptionChangedEvent", + "poolClearedEvent" + ], + "uriOptions": { + "retryWrites": false, + "heartbeatFrequencyMS": 1000000, + "serverMonitoringMode": "poll", + "appname": "backpressureNetworkErrorFailTest" + } + } + }, + { + "database": { + "id": "database", + "client": "client", + "databaseName": "sdam-tests" + } + }, + { + "collection": { + "id": "collection", + "database": "database", + "collectionName": "backpressure-network-error-fail" + } + } + ] + } + }, + { + "name": "waitForEvent", + "object": "testRunner", + "arguments": { + "client": "client", + "event": { + "serverDescriptionChangedEvent": { + "newDescription": { + "type": "RSPrimary" + } + } + }, + "count": 1 + } + }, + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "setupClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "isMaster", + "hello" + ], + "appName": "backpressureNetworkErrorFailTest", + "closeConnection": true + } + } + } + }, + { + "name": "insertMany", + "object": "collection", + "arguments": { + "documents": [ + { + "_id": 3 + }, + { + "_id": 4 + } + ] + }, + "expectError": { + "isError": true, + "errorLabelsContain": [ + "SystemOverloadedError", + "RetryableError" + ] + } + } + ], + "expectEvents": [ + { + "client": "client", + "eventType": "cmap", + "events": [] + } + ] + } + ] +} diff --git a/test/discovery_and_monitoring/unified/backpressure-network-error-fail-single.json b/test/discovery_and_monitoring/unified/backpressure-network-error-fail-single.json new file mode 100644 index 0000000000..c1ff67c732 --- /dev/null +++ b/test/discovery_and_monitoring/unified/backpressure-network-error-fail-single.json @@ -0,0 +1,142 @@ +{ + "description": "backpressure-network-error-fail-single", + "schemaVersion": "1.17", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "serverless": "forbid", + "topologies": [ + "single" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "setupClient", + "useMultipleMongoses": false + } + } + ], + "initialData": [ + { + "collectionName": "backpressure-network-error-fail", + "databaseName": "sdam-tests", + "documents": [ + { + "_id": 1 + }, + { + "_id": 2 + } + ] + } + ], + "tests": [ + { + "description": "apply backpressure on network connection errors during connection establishment", + "operations": [ + { + "name": "createEntities", + "object": "testRunner", + "arguments": { + "entities": [ + { + "client": { + "id": "client", + "useMultipleMongoses": false, + "observeEvents": [ + "serverDescriptionChangedEvent", + "poolClearedEvent" + ], + "uriOptions": { + "retryWrites": false, + "heartbeatFrequencyMS": 1000000, + "serverMonitoringMode": "poll", + "appname": "backpressureNetworkErrorFailTest" + } + } + }, + { + "database": { + "id": "database", + "client": "client", + "databaseName": "sdam-tests" + } + }, + { + "collection": { + "id": "collection", + "database": "database", + "collectionName": "backpressure-network-error-fail" + } + } + ] + } + }, + { + "name": "waitForEvent", + "object": "testRunner", + "arguments": { + "client": "client", + "event": { + "serverDescriptionChangedEvent": { + "newDescription": { + "type": "Standalone" + } + } + }, + "count": 1 + } + }, + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "setupClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "isMaster", + "hello" + ], + "appName": "backpressureNetworkErrorFailTest", + "closeConnection": true + } + } + } + }, + { + "name": "insertMany", + "object": "collection", + "arguments": { + "documents": [ + { + "_id": 3 + }, + { + "_id": 4 + } + ] + }, + "expectError": { + "isError": true, + "errorLabelsContain": [ + "SystemOverloadedError", + "RetryableError" + ] + } + } + ], + "expectEvents": [ + { + "client": "client", + "eventType": "cmap", + "events": [] + } + ] + } + ] +} diff --git a/test/discovery_and_monitoring/unified/backpressure-network-timeout-fail-replicaset.json b/test/discovery_and_monitoring/unified/backpressure-network-timeout-fail-replicaset.json new file mode 100644 index 0000000000..35b088f422 --- /dev/null +++ b/test/discovery_and_monitoring/unified/backpressure-network-timeout-fail-replicaset.json @@ -0,0 +1,145 @@ +{ + "description": "backpressure-network-timeout-error-replicaset", + "schemaVersion": "1.17", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "serverless": "forbid", + "topologies": [ + "replicaset" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "setupClient", + "useMultipleMongoses": false + } + } + ], + "initialData": [ + { + "collectionName": "backpressure-network-timeout-error", + "databaseName": "sdam-tests", + "documents": [ + { + "_id": 1 + }, + { + "_id": 2 + } + ] + } + ], + "tests": [ + { + "description": "apply backpressure on network timeout error during connection establishment", + "operations": [ + { + "name": "createEntities", + "object": "testRunner", + "arguments": { + "entities": [ + { + "client": { + "id": "client", + "useMultipleMongoses": false, + "observeEvents": [ + "serverDescriptionChangedEvent", + "poolClearedEvent" + ], + "uriOptions": { + "retryWrites": false, + "heartbeatFrequencyMS": 1000000, + "appname": "backpressureNetworkTimeoutErrorTest", + "serverMonitoringMode": "poll", + "connectTimeoutMS": 250, + "socketTimeoutMS": 250 + } + } + }, + { + "database": { + "id": "database", + "client": "client", + "databaseName": "sdam-tests" + } + }, + { + "collection": { + "id": "collection", + "database": "database", + "collectionName": "backpressure-network-timeout-error" + } + } + ] + } + }, + { + "name": "waitForEvent", + "object": "testRunner", + "arguments": { + "client": "client", + "event": { + "serverDescriptionChangedEvent": { + "newDescription": { + "type": "RSPrimary" + } + } + }, + "count": 1 + } + }, + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "setupClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "isMaster", + "hello" + ], + "blockConnection": true, + "blockTimeMS": 500, + "appName": "backpressureNetworkTimeoutErrorTest" + } + } + } + }, + { + "name": "insertMany", + "object": "collection", + "arguments": { + "documents": [ + { + "_id": 3 + }, + { + "_id": 4 + } + ] + }, + "expectError": { + "isError": true, + "errorLabelsContain": [ + "SystemOverloadedError", + "RetryableError" + ] + } + } + ], + "expectEvents": [ + { + "client": "client", + "eventType": "cmap", + "events": [] + } + ] + } + ] +} diff --git a/test/discovery_and_monitoring/unified/backpressure-network-timeout-fail-single.json b/test/discovery_and_monitoring/unified/backpressure-network-timeout-fail-single.json new file mode 100644 index 0000000000..54b11d4d5b --- /dev/null +++ b/test/discovery_and_monitoring/unified/backpressure-network-timeout-fail-single.json @@ -0,0 +1,145 @@ +{ + "description": "backpressure-network-timeout-error-single", + "schemaVersion": "1.17", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "serverless": "forbid", + "topologies": [ + "single" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "setupClient", + "useMultipleMongoses": false + } + } + ], + "initialData": [ + { + "collectionName": "backpressure-network-timeout-error", + "databaseName": "sdam-tests", + "documents": [ + { + "_id": 1 + }, + { + "_id": 2 + } + ] + } + ], + "tests": [ + { + "description": "apply backpressure on network timeout error during connection establishment", + "operations": [ + { + "name": "createEntities", + "object": "testRunner", + "arguments": { + "entities": [ + { + "client": { + "id": "client", + "useMultipleMongoses": false, + "observeEvents": [ + "serverDescriptionChangedEvent", + "poolClearedEvent" + ], + "uriOptions": { + "retryWrites": false, + "heartbeatFrequencyMS": 1000000, + "appname": "backpressureNetworkTimeoutErrorTest", + "serverMonitoringMode": "poll", + "connectTimeoutMS": 250, + "socketTimeoutMS": 250 + } + } + }, + { + "database": { + "id": "database", + "client": "client", + "databaseName": "sdam-tests" + } + }, + { + "collection": { + "id": "collection", + "database": "database", + "collectionName": "backpressure-network-timeout-error" + } + } + ] + } + }, + { + "name": "waitForEvent", + "object": "testRunner", + "arguments": { + "client": "client", + "event": { + "serverDescriptionChangedEvent": { + "newDescription": { + "type": "Standalone" + } + } + }, + "count": 1 + } + }, + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "setupClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "isMaster", + "hello" + ], + "blockConnection": true, + "blockTimeMS": 500, + "appName": "backpressureNetworkTimeoutErrorTest" + } + } + } + }, + { + "name": "insertMany", + "object": "collection", + "arguments": { + "documents": [ + { + "_id": 3 + }, + { + "_id": 4 + } + ] + }, + "expectError": { + "isError": true, + "errorLabelsContain": [ + "SystemOverloadedError", + "RetryableError" + ] + } + } + ], + "expectEvents": [ + { + "client": "client", + "eventType": "cmap", + "events": [] + } + ] + } + ] +} diff --git a/test/discovery_and_monitoring/unified/backpressure-server-description-unchanged-on-min-pool-size-population-error.json b/test/discovery_and_monitoring/unified/backpressure-server-description-unchanged-on-min-pool-size-population-error.json new file mode 100644 index 0000000000..f0597124b7 --- /dev/null +++ b/test/discovery_and_monitoring/unified/backpressure-server-description-unchanged-on-min-pool-size-population-error.json @@ -0,0 +1,106 @@ +{ + "description": "backpressure-server-description-unchanged-on-min-pool-size-population-error", + "schemaVersion": "1.17", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "serverless": "forbid", + "topologies": [ + "single" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "setupClient", + "useMultipleMongoses": false + } + } + ], + "tests": [ + { + "description": "the server description is not changed on handshake error during minPoolSize population", + "operations": [ + { + "name": "failPoint", + "object": "testRunner", + "arguments": { + "client": "setupClient", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "skip": 1 + }, + "data": { + "failCommands": [ + "hello", + "isMaster" + ], + "appName": "authErrorTest", + "closeConnection": true + } + } + } + }, + { + "name": "createEntities", + "object": "testRunner", + "arguments": { + "entities": [ + { + "client": { + "id": "client", + "observeEvents": [ + "serverDescriptionChangedEvent", + "connectionClosedEvent" + ], + "uriOptions": { + "appname": "authErrorTest", + "minPoolSize": 5, + "maxConnecting": 1, + "serverMonitoringMode": "poll", + "heartbeatFrequencyMS": 1000000 + } + } + } + ] + } + }, + { + "name": "waitForEvent", + "object": "testRunner", + "arguments": { + "client": "client", + "event": { + "serverDescriptionChangedEvent": {} + }, + "count": 1 + } + }, + { + "name": "waitForEvent", + "object": "testRunner", + "arguments": { + "client": "client", + "event": { + "connectionClosedEvent": {} + }, + "count": 1 + } + } + ], + "expectEvents": [ + { + "client": "client", + "eventType": "sdam", + "events": [ + { + "serverDescriptionChangedEvent": {} + } + ] + } + ] + } + ] +} diff --git a/test/test_client.py b/test/test_client.py index 737b3afe60..9cfa36b2cd 100644 --- a/test/test_client.py +++ b/test/test_client.py @@ -645,6 +645,21 @@ def test_detected_environment_warning(self, mock_get_hosts): with self.assertWarns(UserWarning): self.simple_client(multi_host) + def test_adaptive_retries(self): + # Assert that adaptive retries are disabled by default. + c = self.simple_client(connect=False) + self.assertFalse(c.options.adaptive_retries) + + # Assert that adaptive retries can be enabled through connection or client options. + c = self.simple_client(connect=False, adaptive_retries=True) + self.assertTrue(c.options.adaptive_retries) + + c = self.simple_client(connect=False, adaptiveRetries=True) + self.assertTrue(c.options.adaptive_retries) + + c = self.simple_client(host="mongodb://localhost/?adaptiveretries=true", connect=False) + self.assertTrue(c.options.adaptive_retries) + class TestClient(IntegrationTest): def test_multiple_uris(self): diff --git a/test/test_client_backpressure.py b/test/test_client_backpressure.py new file mode 100644 index 0000000000..b82846d35d --- /dev/null +++ b/test/test_client_backpressure.py @@ -0,0 +1,359 @@ +# Copyright 2025-present MongoDB, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test Client Backpressure spec.""" +from __future__ import annotations + +import os +import pathlib +import sys +from time import perf_counter +from unittest.mock import patch + +sys.path[0:0] = [""] + +from test import ( + IntegrationTest, + PyMongoTestCase, + client_context, + unittest, +) +from test.unified_format import generate_test_classes +from test.utils_shared import EventListener, OvertCommandListener + +import pymongo +from pymongo.errors import OperationFailure, PyMongoError +from pymongo.synchronous import helpers +from pymongo.synchronous.helpers import _MAX_RETRIES, _RetryPolicy, _TokenBucket + +_IS_SYNC = True + +# Mock an system overload error. +mock_overload_error = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["find", "insert", "update"], + "errorCode": 462, # IngressRequestRateLimitExceeded + "errorLabels": ["RetryableError", "SystemOverloadedError"], + }, +} + + +class TestBackpressure(IntegrationTest): + RUN_ON_LOAD_BALANCER = True + + @client_context.require_failCommand_appName + def test_retry_overload_error_command(self): + self.db.t.insert_one({"x": 1}) + + # Ensure command is retried on overload error. + fail_many = mock_overload_error.copy() + fail_many["mode"] = {"times": _MAX_RETRIES} + with self.fail_point(fail_many): + self.db.command("find", "t") + + # Ensure command stops retrying after _MAX_RETRIES. + fail_too_many = mock_overload_error.copy() + fail_too_many["mode"] = {"times": _MAX_RETRIES + 1} + with self.fail_point(fail_too_many): + with self.assertRaises(PyMongoError) as error: + self.db.command("find", "t") + + self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) + + @client_context.require_failCommand_appName + def test_retry_overload_error_find(self): + self.db.t.insert_one({"x": 1}) + + # Ensure command is retried on overload error. + fail_many = mock_overload_error.copy() + fail_many["mode"] = {"times": _MAX_RETRIES} + with self.fail_point(fail_many): + self.db.t.find_one() + + # Ensure command stops retrying after _MAX_RETRIES. + fail_too_many = mock_overload_error.copy() + fail_too_many["mode"] = {"times": _MAX_RETRIES + 1} + with self.fail_point(fail_too_many): + with self.assertRaises(PyMongoError) as error: + self.db.t.find_one() + + self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) + + @client_context.require_failCommand_appName + def test_retry_overload_error_insert_one(self): + # Ensure command is retried on overload error. + fail_many = mock_overload_error.copy() + fail_many["mode"] = {"times": _MAX_RETRIES} + with self.fail_point(fail_many): + self.db.t.insert_one({"x": 1}) + + # Ensure command stops retrying after _MAX_RETRIES. + fail_too_many = mock_overload_error.copy() + fail_too_many["mode"] = {"times": _MAX_RETRIES + 1} + with self.fail_point(fail_too_many): + with self.assertRaises(PyMongoError) as error: + self.db.t.insert_one({"x": 1}) + + self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) + + @client_context.require_failCommand_appName + def test_retry_overload_error_update_many(self): + # Even though update_many is not a retryable write operation, it will + # still be retried via the "RetryableError" error label. + self.db.t.insert_one({"x": 1}) + + # Ensure command is retried on overload error. + fail_many = mock_overload_error.copy() + fail_many["mode"] = {"times": _MAX_RETRIES} + with self.fail_point(fail_many): + self.db.t.update_many({}, {"$set": {"x": 2}}) + + # Ensure command stops retrying after _MAX_RETRIES. + fail_too_many = mock_overload_error.copy() + fail_too_many["mode"] = {"times": _MAX_RETRIES + 1} + with self.fail_point(fail_too_many): + with self.assertRaises(PyMongoError) as error: + self.db.t.update_many({}, {"$set": {"x": 2}}) + + self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) + + @client_context.require_failCommand_appName + def test_retry_overload_error_getMore(self): + coll = self.db.t + coll.insert_many([{"x": 1} for _ in range(10)]) + + # Ensure command is retried on overload error. + fail_many = { + "configureFailPoint": "failCommand", + "mode": {"times": _MAX_RETRIES}, + "data": { + "failCommands": ["getMore"], + "errorCode": 462, # IngressRequestRateLimitExceeded + "errorLabels": ["RetryableError", "SystemOverloadedError"], + }, + } + cursor = coll.find(batch_size=2) + cursor.next() + with self.fail_point(fail_many): + cursor.to_list() + + # Ensure command stops retrying after _MAX_RETRIES. + fail_too_many = fail_many.copy() + fail_too_many["mode"] = {"times": _MAX_RETRIES + 1} + cursor = coll.find(batch_size=2) + cursor.next() + with self.fail_point(fail_too_many): + with self.assertRaises(PyMongoError) as error: + cursor.to_list() + + self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) + + +class TestRetryPolicy(PyMongoTestCase): + def test_retry_policy(self): + capacity = 10 + retry_policy = _RetryPolicy(_TokenBucket(capacity=capacity), adaptive_retry=True) + self.assertEqual(retry_policy.attempts, helpers._MAX_RETRIES) + self.assertEqual(retry_policy.backoff_initial, helpers._BACKOFF_INITIAL) + self.assertEqual(retry_policy.backoff_max, helpers._BACKOFF_MAX) + for i in range(1, helpers._MAX_RETRIES + 1): + self.assertTrue(retry_policy.should_retry(i, 0)) + self.assertFalse(retry_policy.should_retry(helpers._MAX_RETRIES + 1, 0)) + for i in range(capacity - helpers._MAX_RETRIES): + self.assertTrue(retry_policy.should_retry(1, 0)) + # No tokens left, should not retry. + self.assertFalse(retry_policy.should_retry(1, 0)) + self.assertEqual(retry_policy.token_bucket.tokens, 0) + + # record_success should generate tokens. + for _ in range(int(2 / helpers.DEFAULT_RETRY_TOKEN_RETURN)): + retry_policy.record_success(retry=False) + self.assertAlmostEqual(retry_policy.token_bucket.tokens, 2) + for i in range(2): + self.assertTrue(retry_policy.should_retry(1, 0)) + self.assertFalse(retry_policy.should_retry(1, 0)) + + # Recording a successful retry should return 1 additional token. + retry_policy.record_success(retry=True) + self.assertAlmostEqual( + retry_policy.token_bucket.tokens, 1 + helpers.DEFAULT_RETRY_TOKEN_RETURN + ) + self.assertTrue(retry_policy.should_retry(1, 0)) + self.assertFalse(retry_policy.should_retry(1, 0)) + self.assertAlmostEqual(retry_policy.token_bucket.tokens, helpers.DEFAULT_RETRY_TOKEN_RETURN) + + def test_retry_policy_csot(self): + retry_policy = _RetryPolicy(_TokenBucket()) + self.assertTrue(retry_policy.should_retry(1, 0.5)) + with pymongo.timeout(0.5): + self.assertTrue(retry_policy.should_retry(1, 0)) + self.assertTrue(retry_policy.should_retry(1, 0.1)) + # Would exceed the timeout, should not retry. + self.assertFalse(retry_policy.should_retry(1, 1.0)) + self.assertTrue(retry_policy.should_retry(1, 1.0)) + + +# Prose tests. +class TestClientBackpressure(IntegrationTest): + listener: EventListener + + @classmethod + def setUpClass(cls) -> None: + cls.listener = OvertCommandListener() + + @client_context.require_connection + def setUp(self) -> None: + super().setUp() + self.listener.reset() + self.app_name = self.__class__.__name__.lower() + self.client = self.rs_or_single_client( + event_listeners=[self.listener], appName=self.app_name + ) + + @patch("random.random") + @client_context.require_failCommand_appName + def test_01_operation_retry_uses_exponential_backoff(self, random_func): + # Drivers should test that retries do not occur immediately when a SystemOverloadedError is encountered. + + # 1. let `client` be a `MongoClient` + client = self.client + + # 2. let `collection` be a collection + collection = client.test.test + + # 3. Now, run transactions without backoff: + + # a. Configure the random number generator used for jitter to always return `0` -- this effectively disables backoff. + random_func.return_value = 0 + + # b. Configure the following failPoint: + fail_point = dict( + mode="alwaysOn", + data=dict( + failCommands=["insert"], + errorCode=2, + errorLabels=["SystemOverloadedError", "RetryableError"], + appName=self.app_name, + ), + ) + with self.fail_point(fail_point): + # c. Execute the following command. Expect that the command errors. Measure the duration of the command execution. + start0 = perf_counter() + with self.assertRaises(OperationFailure): + collection.insert_one({"a": 1}) + end0 = perf_counter() + + # d. Configure the random number generator used for jitter to always return `1`. + random_func.return_value = 1 + + # e. Execute step c again. + start1 = perf_counter() + with self.assertRaises(OperationFailure): + collection.insert_one({"a": 1}) + end1 = perf_counter() + + # f. Compare the two time between the two runs. + # The sum of 5 backoffs is 3.1 seconds. There is a 1-second window to account for potential variance between the two + # runs. + self.assertTrue(abs((end1 - start1) - (end0 - start0 + 3.1)) < 1) + + @client_context.require_failCommand_appName + def test_03_overload_retries_limited(self): + # Drivers should test that without adaptive retries enabled, overload errors are retried a maximum of five times. + + # 1. Let `client` be a `MongoClient`. + client = self.client + # 2. Let `coll` be a collection. + coll = client.pymongo_test.coll + + # 3. Configure the following failpoint: + failpoint = { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": ["find"], + "errorCode": 462, # IngressRequestRateLimitExceeded + "errorLabels": ["RetryableError", "SystemOverloadedError"], + }, + } + + # 4. Perform a find operation with `coll` that fails. + with self.fail_point(failpoint): + with self.assertRaises(PyMongoError) as error: + coll.find_one({}) + + # 5. Assert that the raised error contains both the `RetryableError` and `SystemOverLoadedError` error labels. + self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) + + # 6. Assert that the total number of started commands is MAX_RETRIES + 1. + self.assertEqual(len(self.listener.started_events), _MAX_RETRIES + 1) + + @client_context.require_failCommand_appName + def test_04_adaptive_retries_limited_by_tokens(self): + # Drivers should test that when enabled, adaptive retries are limited by the number of tokens in the bucket. + + # 1. Let `client` be a `MongoClient` with adaptiveRetries=True. + client = self.rs_or_single_client(adaptive_retries=True, event_listeners=[self.listener]) + # 2. Set `client`'s retry token bucket to have 2 tokens. + client._retry_policy.token_bucket.tokens = 2 + # 3. Let `coll` be a collection. + coll = client.pymongo_test.coll + + # 4. Configure the following failpoint: + failpoint = { + "configureFailPoint": "failCommand", + "mode": {"times": 3}, + "data": { + "failCommands": ["find"], + "errorCode": 462, # IngressRequestRateLimitExceeded + "errorLabels": ["RetryableError", "SystemOverloadedError"], + }, + } + + # 5. Perform a find operation with `coll` that fails. + with self.fail_point(failpoint): + with self.assertRaises(PyMongoError) as error: + coll.find_one({}) + + # 6. Assert that the raised error contains both the `RetryableError` and `SystemOverLoadedError` error labels. + self.assertIn("RetryableError", str(error.exception)) + self.assertIn("SystemOverloadedError", str(error.exception)) + + # 7. Assert that the total number of started commands is 3: one for the initial attempt and two for the retries. + self.assertEqual(len(self.listener.started_events), 3) + + +# Location of JSON test specifications. +if _IS_SYNC: + _TEST_PATH = os.path.join(pathlib.Path(__file__).resolve().parent, "client-backpressure") +else: + _TEST_PATH = os.path.join(pathlib.Path(__file__).resolve().parent.parent, "client-backpressure") + +globals().update( + generate_test_classes( + _TEST_PATH, + module=__name__, + ) +) + +if __name__ == "__main__": + unittest.main() diff --git a/test/test_client_metadata.py b/test/test_client_metadata.py index 5f103f739a..8cdb728ea2 100644 --- a/test/test_client_metadata.py +++ b/test/test_client_metadata.py @@ -219,6 +219,19 @@ def test_duplicate_driver_name_no_op(self): # add same metadata again self.check_metadata_added(client, "Framework", None, None) + def test_handshake_documents_include_backpressure(self): + # Create a `MongoClient` that is configured to record all handshake documents sent to the server as a part of + # connection establishment. + client = self.rs_or_single_client("mongodb://" + self.server.address_string) + + # Send a `ping` command to the server and verify that the command succeeds. This ensure that a connection is + # established on all topologies. Note: MockupDB only supports standalone servers. + client.admin.command("ping") + + # Assert that for every handshake document intercepted: + # the document has a field `backpressure` whose value is `true`. + self.assertEqual(self.handshake_req["backpressure"], True) + if __name__ == "__main__": unittest.main() diff --git a/test/test_discovery_and_monitoring.py b/test/test_discovery_and_monitoring.py index 8375d63e97..7fb6f312c5 100644 --- a/test/test_discovery_and_monitoring.py +++ b/test/test_discovery_and_monitoring.py @@ -25,7 +25,9 @@ from pathlib import Path from test.helpers import ConcurrentRunner from test.utils import flaky +from test.utils_shared import delay +from pymongo.errors import ConnectionFailure from pymongo.operations import _Op from pymongo.server_selectors import writable_server_selector from pymongo.synchronous.pool import Connection @@ -67,7 +69,12 @@ ) from pymongo.hello import Hello, HelloCompat from pymongo.helpers_shared import _check_command_response, _check_write_command_response -from pymongo.monitoring import ServerHeartbeatFailedEvent, ServerHeartbeatStartedEvent +from pymongo.monitoring import ( + ConnectionCheckOutFailedEvent, + PoolClearedEvent, + ServerHeartbeatFailedEvent, + ServerHeartbeatStartedEvent, +) from pymongo.server_description import SERVER_TYPE, ServerDescription from pymongo.synchronous.settings import TopologySettings from pymongo.synchronous.topology import Topology, _ErrorContext @@ -131,6 +138,9 @@ def got_app_error(topology, app_error): raise AssertionError except (AutoReconnect, NotPrimaryError, OperationFailure) as e: if when == "beforeHandshakeCompletes": + # The pool would have added the SystemOverloadedError in this case. + if isinstance(e, AutoReconnect): + e._add_error_label("SystemOverloadedError") completed_handshake = False elif when == "afterHandshakeCompletes": completed_handshake = True @@ -437,6 +447,57 @@ def mock_close(self, reason): Connection.close_conn = original_close +class TestPoolBackpressure(IntegrationTest): + @client_context.require_version_min(7, 0, 0) + def test_connection_pool_is_not_cleared(self): + listener = CMAPListener() + + # Create a client that listens to CMAP events, with maxConnecting=100. + client = self.rs_or_single_client(maxConnecting=100, event_listeners=[listener]) + + # Enable the ingress rate limiter. + client.admin.command( + "setParameter", 1, ingressConnectionEstablishmentRateLimiterEnabled=True + ) + client.admin.command("setParameter", 1, ingressConnectionEstablishmentRatePerSec=20) + client.admin.command("setParameter", 1, ingressConnectionEstablishmentBurstCapacitySecs=1) + client.admin.command("setParameter", 1, ingressConnectionEstablishmentMaxQueueDepth=1) + + # Disable the ingress rate limiter on teardown. + # Sleep for 1 second before disabling to avoid the rate limiter. + def teardown(): + time.sleep(1) + client.admin.command( + "setParameter", 1, ingressConnectionEstablishmentRateLimiterEnabled=False + ) + + self.addCleanup(teardown) + + # Make sure the collection has at least one document. + client.test.test.delete_many({}) + client.test.test.insert_one({}) + + # Run a slow operation to tie up the connection. + def target(): + try: + client.test.test.find_one({"$where": delay(0.1)}) + except ConnectionFailure: + pass + + # Run 100 parallel operations that contend for connections. + tasks = [] + for _ in range(100): + tasks.append(ConcurrentRunner(target=target)) + for t in tasks: + t.start() + for t in tasks: + t.join() + + # Verify there were at least 10 connection checkout failed event but no pool cleared events. + self.assertGreater(len(listener.events_by_type(ConnectionCheckOutFailedEvent)), 10) + self.assertEqual(len(listener.events_by_type(PoolClearedEvent)), 0) + + class TestServerMonitoringMode(IntegrationTest): @client_context.require_no_load_balancer def setUp(self): diff --git a/test/test_pooling.py b/test/test_pooling.py index cb5b206996..0f7ef144f6 100644 --- a/test/test_pooling.py +++ b/test/test_pooling.py @@ -29,6 +29,7 @@ from pymongo.errors import AutoReconnect, ConnectionFailure, DuplicateKeyError from pymongo.hello import HelloCompat from pymongo.lock import _create_lock +from pymongo.read_preferences import ReadPreference sys.path[0:0] = [""] @@ -511,6 +512,39 @@ def test_connection_timeout_message(self): str(error.exception), ) + @client_context.require_failCommand_appName + def test_pool_backpressure_preserves_existing_connections(self): + client = self.rs_or_single_client() + coll = client.pymongo_test.t + pool = get_pool(client) + coll.insert_many([{"x": 1} for _ in range(10)]) + t = SocketGetter(self.c, pool) + t.start() + while t.state != "connection": + time.sleep(0.1) + + assert not t.sock.conn_closed() + + # Mock a session establishment overload. + mock_connection_fail = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "closeConnection": True, + }, + } + + with self.fail_point(mock_connection_fail): + coll.find_one({}) + + # Make sure the existing socket was not affected. + assert not t.sock.conn_closed() + + # Cleanup + t.release_conn() + t.join() + pool.close() + class TestPoolMaxSize(_TestPoolingBase): def test_max_pool_size(self): diff --git a/test/test_retryable_writes.py b/test/test_retryable_writes.py index a74a3e8030..8763c89de0 100644 --- a/test/test_retryable_writes.py +++ b/test/test_retryable_writes.py @@ -43,14 +43,16 @@ from bson.int64 import Int64 from bson.raw_bson import RawBSONDocument from bson.son import SON +from pymongo import MongoClient from pymongo.errors import ( AutoReconnect, ConnectionFailure, - OperationFailure, + NotPrimaryError, ServerSelectionTimeoutError, WriteConcernError, ) from pymongo.monitoring import ( + CommandFailedEvent, CommandSucceededEvent, ConnectionCheckedOutEvent, ConnectionCheckOutFailedEvent, @@ -597,5 +599,192 @@ def raise_connection_err_select_server(*args, **kwargs): self.assertEqual(sent_txn_id, final_txn_id, msg) +class TestErrorPropagationAfterEncounteringMultipleErrors(IntegrationTest): + # Only run against replica sets as mongos does not propagate the NoWritesPerformed label to the drivers. + @client_context.require_replica_set + # Run against server versions 6.0 and above. + @client_context.require_version_min(6, 0) # type: ignore[untyped-decorator] + def setUp(self) -> None: + super().setUp() + self.setup_client = MongoClient(**client_context.default_client_options) + self.addCleanup(self.setup_client.close) + + # TODO: After PYTHON-4595 we can use async event handlers and remove this workaround. + def configure_fail_point_sync(self, command_args, off=False) -> None: + cmd = {"configureFailPoint": "failCommand"} + cmd.update(command_args) + if off: + cmd["mode"] = "off" + cmd.pop("data", None) + self.setup_client.admin.command(cmd) + + def test_01_drivers_return_the_correct_error_when_receiving_only_errors_without_NoWritesPerformed( + self + ) -> None: + # Create a client with retryWrites=true. + listener = OvertCommandListener() + + # Configure a fail point with error code 91 (ShutdownInProgress) with the RetryableError and SystemOverloadedError error labels. + command_args = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["insert"], + "errorLabels": ["RetryableError", "SystemOverloadedError"], + "errorCode": 91, + }, + } + + # Via the command monitoring CommandFailedEvent, configure a fail point with error code 10107 (NotWritablePrimary). + command_args_inner = { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": ["insert"], + "errorCode": 10107, + "errorLabels": ["RetryableError", "SystemOverloadedError"], + }, + } + + def failed(event: CommandFailedEvent) -> None: + # Configure the 10107 fail point command only if the the failed event is for the 91 error configured in step 2. + if listener.failed_events: + return + assert event.failure["code"] == 91 + self.configure_fail_point_sync(command_args_inner) + listener.failed_events.append(event) + + listener.failed = failed + + client = self.rs_client(retryWrites=True, event_listeners=[listener]) + + self.configure_fail_point_sync(command_args) + + # Attempt an insertOne operation on any record for any database and collection. + # Expect the insertOne to fail with a server error. + with self.assertRaises(NotPrimaryError) as exc: + client.test.test.insert_one({}) + + # Assert that the error code of the server error is 10107. + assert exc.exception.errors["code"] == 10107 # type:ignore[call-overload] + + # Disable the fail point. + self.configure_fail_point_sync({}, off=True) + + def test_02_drivers_return_the_correct_error_when_receiving_only_errors_with_NoWritesPerformed( + self + ) -> None: + # Create a client with retryWrites=true. + listener = OvertCommandListener() + + # Configure a fail point with error code 91 (ShutdownInProgress) with the RetryableError and SystemOverloadedError error labels. + command_args = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["insert"], + "errorLabels": ["RetryableError", "SystemOverloadedError", "NoWritesPerformed"], + "errorCode": 91, + }, + } + + # Via the command monitoring CommandFailedEvent, configure a fail point with error code `10107` (NotWritablePrimary) + # and a NoWritesPerformed label. + command_args_inner = { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": ["insert"], + "errorCode": 10107, + "errorLabels": ["RetryableError", "SystemOverloadedError", "NoWritesPerformed"], + }, + } + + def failed(event: CommandFailedEvent) -> None: + if listener.failed_events: + return + # Configure the 10107 fail point command only if the the failed event is for the 91 error configured in step 2. + assert event.failure["code"] == 91 + self.configure_fail_point_sync(command_args_inner) + listener.failed_events.append(event) + + listener.failed = failed + + client = self.rs_client(retryWrites=True, event_listeners=[listener]) + + self.configure_fail_point_sync(command_args) + + # Attempt an insertOne operation on any record for any database and collection. + # Expect the insertOne to fail with a server error. + with self.assertRaises(NotPrimaryError) as exc: + client.test.test.insert_one({}) + + # Assert that the error code of the server error is 91. + assert exc.exception.errors["code"] == 91 # type:ignore[call-overload] + + # Disable the fail point. + self.configure_fail_point_sync({}, off=True) + + def test_03_drivers_return_the_correct_error_when_receiving_some_errors_with_NoWritesPerformed_and_some_without_NoWritesPerformed( + self + ) -> None: + # TODO: read the expected behavior and add breakpoint() to the retry loop + # Create a client with retryWrites=true. + listener = OvertCommandListener() + + # Configure the client to listen to CommandFailedEvents. In the attached listener, configure a fail point with error + # code `91` (NotWritablePrimary) and the `NoWritesPerformed`, `RetryableError` and `SystemOverloadedError` labels. + command_args_inner = { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": ["insert"], + "errorLabels": ["RetryableError", "SystemOverloadedError", "NoWritesPerformed"], + "errorCode": 91, + }, + } + + # Configure a fail point with error code `91` (ShutdownInProgress) with the `RetryableError` and + # `SystemOverloadedError` error labels but without the `NoWritesPerformed` error label. + command_args = { + "configureFailPoint": "failCommand", + "mode": {"times": 1}, + "data": { + "failCommands": ["insert"], + "errorCode": 91, + "errorLabels": ["RetryableError", "SystemOverloadedError"], + }, + } + + def failed(event: CommandFailedEvent) -> None: + # Configure the fail point command only if the the failed event is for the 91 error configured in step 2. + if listener.failed_events: + return + assert event.failure["code"] == 91 + self.configure_fail_point_sync(command_args_inner) + listener.failed_events.append(event) + + listener.failed = failed + + client = self.rs_client(retryWrites=True, event_listeners=[listener]) + + self.configure_fail_point_sync(command_args) + + # Attempt an insertOne operation on any record for any database and collection. + # Expect the insertOne to fail with a server error. + from pymongo.errors import OperationFailure + + with self.assertRaises(Exception) as exc: + client.test.test.insert_one({}) + + # Assert that the error code of the server error is 91. + assert exc.exception.errors["code"] == 91 + # Assert that the error does not contain the error label `NoWritesPerformed`. + assert "NoWritesPerformed" not in exc.exception.errors["errorLabels"] + + # Disable the fail point. + self.configure_fail_point_sync({}, off=True) + + if __name__ == "__main__": unittest.main() diff --git a/test/test_transactions.py b/test/test_transactions.py index 01b7ba1553..9e370294ef 100644 --- a/test/test_transactions.py +++ b/test/test_transactions.py @@ -16,9 +16,12 @@ from __future__ import annotations import asyncio +import random import sys +import time from io import BytesIO +import pymongo from gridfs.synchronous.grid_file import GridFS, GridFSBucket from pymongo.server_selectors import writable_server_selector from pymongo.synchronous.pool import PoolState @@ -40,7 +43,9 @@ CollectionInvalid, ConfigurationError, ConnectionFailure, + ExecutionTimeout, InvalidOperation, + NetworkTimeout, OperationFailure, ) from pymongo.operations import IndexModel, InsertOne @@ -426,7 +431,7 @@ def set_fail_point(self, command_args): self.configure_fail_point(client, command_args) @client_context.require_transactions - def test_callback_raises_custom_error(self): + def test_1_callback_raises_custom_error(self): class _MyException(Exception): pass @@ -438,7 +443,7 @@ def raise_error(_): s.with_transaction(raise_error) @client_context.require_transactions - def test_callback_returns_value(self): + def test_2_callback_returns_value(self): def callback(_): return "Foo" @@ -466,7 +471,7 @@ def callback(_): self.assertEqual(s.with_transaction(callback), "Foo") @client_context.require_transactions - def test_callback_not_retried_after_timeout(self): + def test_3_1_callback_not_retried_after_timeout(self): listener = OvertCommandListener() client = self.rs_client(event_listeners=[listener]) coll = client[self.db.name].test @@ -487,14 +492,14 @@ def callback(session): listener.reset() with client.start_session() as s: with PatchSessionTimeout(0): - with self.assertRaises(OperationFailure): + with self.assertRaises(NetworkTimeout): s.with_transaction(callback) self.assertEqual(listener.started_command_names(), ["insert", "abortTransaction"]) @client_context.require_test_commands @client_context.require_transactions - def test_callback_not_retried_after_commit_timeout(self): + def test_3_2_callback_not_retried_after_commit_timeout(self): listener = OvertCommandListener() client = self.rs_client(event_listeners=[listener]) coll = client[self.db.name].test @@ -519,14 +524,14 @@ def callback(session): with client.start_session() as s: with PatchSessionTimeout(0): - with self.assertRaises(OperationFailure): + with self.assertRaises(NetworkTimeout): s.with_transaction(callback) self.assertEqual(listener.started_command_names(), ["insert", "commitTransaction"]) @client_context.require_test_commands @client_context.require_transactions - def test_commit_not_retried_after_timeout(self): + def test_3_3_commit_not_retried_after_timeout(self): listener = OvertCommandListener() client = self.rs_client(event_listeners=[listener]) coll = client[self.db.name].test @@ -548,7 +553,7 @@ def callback(session): with client.start_session() as s: with PatchSessionTimeout(0): - with self.assertRaises(ConnectionFailure): + with self.assertRaises(NetworkTimeout): s.with_transaction(callback) # One insert for the callback and two commits (includes the automatic @@ -557,6 +562,38 @@ def callback(session): listener.started_command_names(), ["insert", "commitTransaction", "commitTransaction"] ) + @client_context.require_transactions + def test_callback_not_retried_after_csot_timeout(self): + listener = OvertCommandListener() + client = self.rs_client(event_listeners=[listener]) + coll = client[self.db.name].test + + def callback(session): + coll.insert_one({}, session=session) + err: dict = { + "ok": 0, + "errmsg": "Transaction 7819 has been aborted.", + "code": 251, + "codeName": "NoSuchTransaction", + "errorLabels": ["TransientTransactionError"], + } + raise OperationFailure(err["errmsg"], err["code"], err) + + # Create the collection. + coll.insert_one({}) + listener.reset() + with client.start_session() as s: + with pymongo.timeout(1.0): + with self.assertRaises(ExecutionTimeout): + s.with_transaction(callback) + + # At least two attempts: the original and one or more retries. + inserts = len([x for x in listener.started_command_names() if x == "insert"]) + aborts = len([x for x in listener.started_command_names() if x == "abortTransaction"]) + + self.assertGreaterEqual(inserts, 2) + self.assertGreaterEqual(aborts, 2) + # Tested here because this supports Motor's convenient transactions API. @client_context.require_transactions def test_in_transaction_property(self): @@ -594,6 +631,68 @@ def callback(session): s.with_transaction(callback) self.assertFalse(s.in_transaction) + @client_context.require_test_commands + @client_context.require_transactions + def test_4_retry_backoff_is_enforced(self): + client = client_context.client + coll = client[self.db.name].test + # patch random to make it deterministic -- once to effectively have + # no backoff and the second time with "max" backoff (always waiting the longest + # possible time) + _original_random_random = random.random + + def always_one(): + return 1 + + def always_zero(): + return 0 + + random.random = always_zero + # set fail point to trigger transaction failure and trigger backoff + self.set_fail_point( + { + "configureFailPoint": "failCommand", + "mode": {"times": 13}, + "data": { + "failCommands": ["commitTransaction"], + "errorCode": 251, + }, + } + ) + self.addCleanup(self.set_fail_point, {"configureFailPoint": "failCommand", "mode": "off"}) + + def callback(session): + coll.insert_one({}, session=session) + + start = time.monotonic() + with self.client.start_session() as s: + s.with_transaction(callback) + end = time.monotonic() + no_backoff_time = end - start + + random.random = always_one + # set fail point to trigger transaction failure and trigger backoff + self.set_fail_point( + { + "configureFailPoint": "failCommand", + "mode": { + "times": 13 + }, # sufficiently high enough such that the time effect of backoff is noticeable + "data": { + "failCommands": ["commitTransaction"], + "errorCode": 251, + }, + } + ) + self.addCleanup(self.set_fail_point, {"configureFailPoint": "failCommand", "mode": "off"}) + start = time.monotonic() + with self.client.start_session() as s: + s.with_transaction(callback) + end = time.monotonic() + self.assertLess(abs(end - start - (no_backoff_time + 2.2)), 1) # sum of 13 backoffs is 2.2 + + random.random = _original_random_random + class TestOptionsInsideTransactionProse(TransactionsBase): @client_context.require_transactions diff --git a/test/transactions/unified/backpressure-retryable-abort.json b/test/transactions/unified/backpressure-retryable-abort.json new file mode 100644 index 0000000000..53fc9c6f09 --- /dev/null +++ b/test/transactions/unified/backpressure-retryable-abort.json @@ -0,0 +1,357 @@ +{ + "description": "backpressure-retryable-abort", + "schemaVersion": "1.3", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "topologies": [ + "replicaset", + "sharded", + "load-balanced" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "client0", + "useMultipleMongoses": false, + "observeEvents": [ + "commandStartedEvent" + ] + } + }, + { + "database": { + "id": "database0", + "client": "client0", + "databaseName": "transaction-tests" + } + }, + { + "collection": { + "id": "collection0", + "database": "database0", + "collectionName": "test" + } + }, + { + "session": { + "id": "session0", + "client": "client0" + } + } + ], + "initialData": [ + { + "collectionName": "test", + "databaseName": "transaction-tests", + "documents": [] + } + ], + "tests": [ + { + "description": "abortTransaction retries if backpressure labels are added", + "operations": [ + { + "object": "testRunner", + "name": "failPoint", + "arguments": { + "client": "client0", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 2 + }, + "data": { + "failCommands": [ + "abortTransaction" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 112 + } + } + } + }, + { + "object": "session0", + "name": "startTransaction" + }, + { + "object": "collection0", + "name": "insertOne", + "arguments": { + "session": "session0", + "document": { + "_id": 1 + } + }, + "expectResult": { + "$$unsetOrMatches": { + "insertedId": { + "$$unsetOrMatches": 1 + } + } + } + }, + { + "object": "session0", + "name": "abortTransaction" + } + ], + "expectEvents": [ + { + "client": "client0", + "events": [ + { + "commandStartedEvent": { + "command": { + "insert": "test", + "documents": [ + { + "_id": 1 + } + ], + "ordered": true, + "readConcern": { + "$$exists": false + }, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": true, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "insert", + "databaseName": "transaction-tests" + } + }, + { + "commandStartedEvent": { + "command": { + "abortTransaction": 1, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": { + "$$exists": false + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "abortTransaction", + "databaseName": "admin" + } + }, + { + "commandStartedEvent": { + "command": { + "abortTransaction": 1, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": { + "$$exists": false + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "abortTransaction", + "databaseName": "admin" + } + }, + { + "commandStartedEvent": { + "command": { + "abortTransaction": 1, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": { + "$$exists": false + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "abortTransaction", + "databaseName": "admin" + } + } + ] + } + ], + "outcome": [ + { + "collectionName": "test", + "databaseName": "transaction-tests", + "documents": [] + } + ] + }, + { + "description": "abortTransaction is retried maxAttempts=5 times if backpressure labels are added", + "operations": [ + { + "object": "testRunner", + "name": "failPoint", + "arguments": { + "client": "client0", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "abortTransaction" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 112 + } + } + } + }, + { + "object": "session0", + "name": "startTransaction" + }, + { + "object": "collection0", + "name": "insertOne", + "arguments": { + "session": "session0", + "document": { + "_id": 1 + } + }, + "expectResult": { + "$$unsetOrMatches": { + "insertedId": { + "$$unsetOrMatches": 1 + } + } + } + }, + { + "object": "session0", + "name": "abortTransaction" + } + ], + "expectEvents": [ + { + "client": "client0", + "events": [ + { + "commandStartedEvent": { + "command": { + "insert": "test", + "documents": [ + { + "_id": 1 + } + ], + "ordered": true, + "readConcern": { + "$$exists": false + }, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": true, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "insert", + "databaseName": "transaction-tests" + } + }, + { + "commandStartedEvent": { + "command": { + "abortTransaction": 1, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": { + "$$exists": false + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "abortTransaction", + "databaseName": "admin" + } + }, + { + "commandStartedEvent": { + "commandName": "abortTransaction" + } + }, + { + "commandStartedEvent": { + "commandName": "abortTransaction" + } + }, + { + "commandStartedEvent": { + "commandName": "abortTransaction" + } + }, + { + "commandStartedEvent": { + "commandName": "abortTransaction" + } + }, + { + "commandStartedEvent": { + "commandName": "abortTransaction" + } + } + ] + } + ], + "outcome": [ + { + "collectionName": "test", + "databaseName": "transaction-tests", + "documents": [] + } + ] + } + ] +} diff --git a/test/transactions/unified/backpressure-retryable-commit.json b/test/transactions/unified/backpressure-retryable-commit.json new file mode 100644 index 0000000000..ae873561a9 --- /dev/null +++ b/test/transactions/unified/backpressure-retryable-commit.json @@ -0,0 +1,374 @@ +{ + "description": "backpressure-retryable-commit", + "schemaVersion": "1.4", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "topologies": [ + "sharded", + "replicaset", + "load-balanced" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "client0", + "useMultipleMongoses": false, + "observeEvents": [ + "commandStartedEvent" + ] + } + }, + { + "database": { + "id": "database0", + "client": "client0", + "databaseName": "transaction-tests" + } + }, + { + "collection": { + "id": "collection0", + "database": "database0", + "collectionName": "test" + } + }, + { + "session": { + "id": "session0", + "client": "client0" + } + } + ], + "initialData": [ + { + "collectionName": "test", + "databaseName": "transaction-tests", + "documents": [] + } + ], + "tests": [ + { + "description": "commitTransaction retries if backpressure labels are added", + "runOnRequirements": [ + { + "serverless": "forbid" + } + ], + "operations": [ + { + "object": "testRunner", + "name": "failPoint", + "arguments": { + "client": "client0", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 2 + }, + "data": { + "failCommands": [ + "commitTransaction" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 112 + } + } + } + }, + { + "object": "session0", + "name": "startTransaction" + }, + { + "object": "collection0", + "name": "insertOne", + "arguments": { + "session": "session0", + "document": { + "_id": 1 + } + }, + "expectResult": { + "$$unsetOrMatches": { + "insertedId": { + "$$unsetOrMatches": 1 + } + } + } + }, + { + "object": "session0", + "name": "commitTransaction" + } + ], + "expectEvents": [ + { + "client": "client0", + "events": [ + { + "commandStartedEvent": { + "command": { + "insert": "test", + "documents": [ + { + "_id": 1 + } + ], + "ordered": true, + "readConcern": { + "$$exists": false + }, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": true, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "insert", + "databaseName": "transaction-tests" + } + }, + { + "commandStartedEvent": { + "command": { + "commitTransaction": 1, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": { + "$$exists": false + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "commitTransaction", + "databaseName": "admin" + } + }, + { + "commandStartedEvent": { + "command": { + "commitTransaction": 1, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": { + "$$exists": false + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "commitTransaction", + "databaseName": "admin" + } + }, + { + "commandStartedEvent": { + "command": { + "commitTransaction": 1, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": { + "$$exists": false + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "commitTransaction", + "databaseName": "admin" + } + } + ] + } + ], + "outcome": [ + { + "collectionName": "test", + "databaseName": "transaction-tests", + "documents": [ + { + "_id": 1 + } + ] + } + ] + }, + { + "description": "commitTransaction is retried maxAttempts=5 times if backpressure labels are added", + "runOnRequirements": [ + { + "serverless": "forbid" + } + ], + "operations": [ + { + "object": "testRunner", + "name": "failPoint", + "arguments": { + "client": "client0", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "commitTransaction" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 112 + } + } + } + }, + { + "object": "session0", + "name": "startTransaction" + }, + { + "object": "collection0", + "name": "insertOne", + "arguments": { + "session": "session0", + "document": { + "_id": 1 + } + }, + "expectResult": { + "$$unsetOrMatches": { + "insertedId": { + "$$unsetOrMatches": 1 + } + } + } + }, + { + "object": "session0", + "name": "commitTransaction", + "expectError": { + "isError": true + } + } + ], + "expectEvents": [ + { + "client": "client0", + "events": [ + { + "commandStartedEvent": { + "command": { + "insert": "test", + "documents": [ + { + "_id": 1 + } + ], + "ordered": true, + "readConcern": { + "$$exists": false + }, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": true, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "insert", + "databaseName": "transaction-tests" + } + }, + { + "commandStartedEvent": { + "command": { + "commitTransaction": 1, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": { + "$$exists": false + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "commitTransaction", + "databaseName": "admin" + } + }, + { + "commandStartedEvent": { + "commandName": "commitTransaction" + } + }, + { + "commandStartedEvent": { + "commandName": "commitTransaction" + } + }, + { + "commandStartedEvent": { + "commandName": "commitTransaction" + } + }, + { + "commandStartedEvent": { + "commandName": "commitTransaction" + } + }, + { + "commandStartedEvent": { + "commandName": "commitTransaction" + } + } + ] + } + ], + "outcome": [ + { + "collectionName": "test", + "databaseName": "transaction-tests", + "documents": [] + } + ] + } + ] +} diff --git a/test/transactions/unified/backpressure-retryable-reads.json b/test/transactions/unified/backpressure-retryable-reads.json new file mode 100644 index 0000000000..731762830e --- /dev/null +++ b/test/transactions/unified/backpressure-retryable-reads.json @@ -0,0 +1,328 @@ +{ + "description": "backpressure-retryable-reads", + "schemaVersion": "1.3", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "topologies": [ + "replicaset", + "sharded", + "load-balanced" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "client0", + "useMultipleMongoses": false, + "observeEvents": [ + "commandStartedEvent" + ] + } + }, + { + "database": { + "id": "database0", + "client": "client0", + "databaseName": "transaction-tests" + } + }, + { + "collection": { + "id": "collection0", + "database": "database0", + "collectionName": "test" + } + }, + { + "session": { + "id": "session0", + "client": "client0" + } + } + ], + "initialData": [ + { + "collectionName": "test", + "databaseName": "transaction-tests", + "documents": [] + } + ], + "tests": [ + { + "description": "reads are retried if backpressure labels are added", + "operations": [ + { + "object": "session0", + "name": "startTransaction" + }, + { + "object": "collection0", + "name": "insertOne", + "arguments": { + "session": "session0", + "document": { + "_id": 1 + } + }, + "expectResult": { + "$$unsetOrMatches": { + "insertedId": { + "$$unsetOrMatches": 1 + } + } + } + }, + { + "object": "testRunner", + "name": "failPoint", + "arguments": { + "client": "client0", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "find" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 112 + } + } + } + }, + { + "object": "collection0", + "name": "find", + "arguments": { + "filter": {}, + "session": "session0" + } + }, + { + "object": "session0", + "name": "commitTransaction" + } + ], + "expectEvents": [ + { + "client": "client0", + "events": [ + { + "commandStartedEvent": { + "command": { + "insert": "test", + "documents": [ + { + "_id": 1 + } + ], + "ordered": true, + "readConcern": { + "$$exists": false + }, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": true, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "insert", + "databaseName": "transaction-tests" + } + }, + { + "commandStartedEvent": { + "command": { + "find": "test", + "readConcern": { + "$$exists": false + }, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "find", + "databaseName": "transaction-tests" + } + }, + { + "commandStartedEvent": { + "command": { + "find": "test", + "readConcern": { + "$$exists": false + }, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "find", + "databaseName": "transaction-tests" + } + }, + { + "commandStartedEvent": { + "command": { + "abortTransaction": { + "$$exists": false + }, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": { + "$$exists": false + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "commitTransaction", + "databaseName": "admin" + } + } + ] + } + ] + }, + { + "description": "reads are retried maxAttempts=5 times if backpressure labels are added", + "operations": [ + { + "object": "session0", + "name": "startTransaction" + }, + { + "object": "collection0", + "name": "insertOne", + "arguments": { + "session": "session0", + "document": { + "_id": 1 + } + }, + "expectResult": { + "$$unsetOrMatches": { + "insertedId": { + "$$unsetOrMatches": 1 + } + } + } + }, + { + "object": "testRunner", + "name": "failPoint", + "arguments": { + "client": "client0", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "find" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 112 + } + } + } + }, + { + "object": "collection0", + "name": "find", + "arguments": { + "filter": {}, + "session": "session0" + }, + "expectError": { + "isError": true + } + }, + { + "object": "session0", + "name": "abortTransaction" + } + ], + "expectEvents": [ + { + "client": "client0", + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "find" + } + }, + { + "commandStartedEvent": { + "commandName": "abortTransaction" + } + } + ] + } + ] + } + ] +} diff --git a/test/transactions/unified/backpressure-retryable-writes.json b/test/transactions/unified/backpressure-retryable-writes.json new file mode 100644 index 0000000000..eea0e6b5da --- /dev/null +++ b/test/transactions/unified/backpressure-retryable-writes.json @@ -0,0 +1,454 @@ +{ + "description": "backpressure-retryable-writes", + "schemaVersion": "1.3", + "runOnRequirements": [ + { + "minServerVersion": "4.4", + "topologies": [ + "replicaset", + "sharded", + "load-balanced" + ] + } + ], + "createEntities": [ + { + "client": { + "id": "client0", + "useMultipleMongoses": false, + "observeEvents": [ + "commandStartedEvent" + ] + } + }, + { + "database": { + "id": "database0", + "client": "client0", + "databaseName": "transaction-tests" + } + }, + { + "collection": { + "id": "collection0", + "database": "database0", + "collectionName": "test" + } + }, + { + "session": { + "id": "session0", + "client": "client0" + } + } + ], + "initialData": [ + { + "collectionName": "test", + "databaseName": "transaction-tests", + "documents": [] + } + ], + "tests": [ + { + "description": "writes are retried if backpressure labels are added", + "operations": [ + { + "object": "session0", + "name": "startTransaction" + }, + { + "object": "collection0", + "name": "insertOne", + "arguments": { + "session": "session0", + "document": { + "_id": 1 + } + }, + "expectResult": { + "$$unsetOrMatches": { + "insertedId": { + "$$unsetOrMatches": 1 + } + } + } + }, + { + "object": "testRunner", + "name": "failPoint", + "arguments": { + "client": "client0", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 112 + } + } + } + }, + { + "object": "collection0", + "name": "insertOne", + "arguments": { + "session": "session0", + "document": { + "_id": 2 + } + } + }, + { + "object": "session0", + "name": "commitTransaction" + } + ], + "expectEvents": [ + { + "client": "client0", + "events": [ + { + "commandStartedEvent": { + "command": { + "insert": "test", + "documents": [ + { + "_id": 1 + } + ], + "ordered": true, + "readConcern": { + "$$exists": false + }, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": true, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "insert", + "databaseName": "transaction-tests" + } + }, + { + "commandStartedEvent": { + "command": { + "insert": "test", + "documents": [ + { + "_id": 2 + } + ], + "ordered": true, + "readConcern": { + "$$exists": false + }, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "insert", + "databaseName": "transaction-tests" + } + }, + { + "commandStartedEvent": { + "command": { + "insert": "test", + "documents": [ + { + "_id": 2 + } + ], + "ordered": true, + "readConcern": { + "$$exists": false + }, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "insert", + "databaseName": "transaction-tests" + } + }, + { + "commandStartedEvent": { + "command": { + "abortTransaction": { + "$$exists": false + }, + "lsid": { + "$$sessionLsid": "session0" + }, + "txnNumber": { + "$numberLong": "1" + }, + "startTransaction": { + "$$exists": false + }, + "autocommit": false, + "writeConcern": { + "$$exists": false + } + }, + "commandName": "commitTransaction", + "databaseName": "admin" + } + } + ] + } + ], + "outcome": [ + { + "collectionName": "test", + "databaseName": "transaction-tests", + "documents": [ + { + "_id": 1 + }, + { + "_id": 2 + } + ] + } + ] + }, + { + "description": "writes are retried maxAttempts=5 times if backpressure labels are added", + "operations": [ + { + "object": "session0", + "name": "startTransaction" + }, + { + "object": "collection0", + "name": "insertOne", + "arguments": { + "session": "session0", + "document": { + "_id": 1 + } + }, + "expectResult": { + "$$unsetOrMatches": { + "insertedId": { + "$$unsetOrMatches": 1 + } + } + } + }, + { + "object": "testRunner", + "name": "failPoint", + "arguments": { + "client": "client0", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": "alwaysOn", + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 112 + } + } + } + }, + { + "object": "collection0", + "name": "insertOne", + "arguments": { + "session": "session0", + "document": { + "_id": 2 + } + }, + "expectError": { + "isError": true + } + }, + { + "object": "session0", + "name": "abortTransaction" + } + ], + "expectEvents": [ + { + "client": "client0", + "events": [ + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "insert" + } + }, + { + "commandStartedEvent": { + "commandName": "abortTransaction" + } + } + ] + } + ], + "outcome": [ + { + "collectionName": "test", + "databaseName": "transaction-tests", + "documents": [] + } + ] + }, + { + "description": "retry succeeds if backpressure labels are added to the first operation in a transaction", + "operations": [ + { + "object": "session0", + "name": "startTransaction" + }, + { + "object": "testRunner", + "name": "failPoint", + "arguments": { + "client": "client0", + "failPoint": { + "configureFailPoint": "failCommand", + "mode": { + "times": 1 + }, + "data": { + "failCommands": [ + "insert" + ], + "errorLabels": [ + "RetryableError", + "SystemOverloadedError" + ], + "errorCode": 112 + } + } + } + }, + { + "object": "collection0", + "name": "insertOne", + "arguments": { + "session": "session0", + "document": { + "_id": 2 + } + } + }, + { + "object": "session0", + "name": "abortTransaction" + } + ], + "expectEvents": [ + { + "client": "client0", + "events": [ + { + "commandStartedEvent": { + "command": { + "startTransaction": true + }, + "commandName": "insert", + "databaseName": "transaction-tests" + } + }, + { + "commandStartedEvent": { + "command": { + "startTransaction": true + }, + "commandName": "insert", + "databaseName": "transaction-tests" + } + }, + { + "commandStartedEvent": { + "command": { + "startTransaction": { + "$$exists": false + } + }, + "commandName": "abortTransaction", + "databaseName": "admin" + } + } + ] + } + ], + "outcome": [ + { + "collectionName": "test", + "databaseName": "transaction-tests", + "documents": [] + } + ] + } + ] +} diff --git a/test/uri_options/client-backpressure-options.json b/test/uri_options/client-backpressure-options.json new file mode 100644 index 0000000000..3fcf2c86b0 --- /dev/null +++ b/test/uri_options/client-backpressure-options.json @@ -0,0 +1,35 @@ +{ + "tests": [ + { + "description": "adaptiveRetries=true is parsed correctly", + "uri": "mongodb://example.com/?adaptiveRetries=true", + "valid": true, + "warning": false, + "hosts": null, + "auth": null, + "options": { + "adaptiveRetries": true + } + }, + { + "description": "adaptiveRetries=false is parsed correctly", + "uri": "mongodb://example.com/?adaptiveRetries=false", + "valid": true, + "warning": false, + "hosts": null, + "auth": null, + "options": { + "adaptiveRetries": false + } + }, + { + "description": "adaptiveRetries with invalid value causes a warning", + "uri": "mongodb://example.com/?adaptiveRetries=invalid", + "valid": true, + "warning": true, + "hosts": null, + "auth": null, + "options": null + } + ] +} diff --git a/tools/synchro.py b/tools/synchro.py index ee719d7429..ed794c5963 100644 --- a/tools/synchro.py +++ b/tools/synchro.py @@ -213,6 +213,7 @@ def async_only_test(f: str) -> bool: "test_bulk.py", "test_change_stream.py", "test_client.py", + "test_client_backpressure.py", "test_client_bulk_write.py", "test_client_context.py", "test_client_metadata.py", @@ -350,7 +351,7 @@ def translate_async_sleeps(lines: list[str]) -> list[str]: sleeps = [line for line in lines if "asyncio.sleep" in line] for line in sleeps: - res = re.search(r"asyncio.sleep\(([^()]*)\)", line) + res = re.search(r"asyncio\.sleep\(\s*(.*?)\)", line) if res: old = res[0] index = lines.index(line)