From 3da6b7df4109cb4a3aae52e97dab62c9403e6977 Mon Sep 17 00:00:00 2001 From: sdhjebngc <2224531525@qq.com> Date: Thu, 18 Dec 2025 16:52:12 +0800 Subject: [PATCH 1/5] DOC: Replace @doc decorator with inline docstring in pandas/io/html.py --- pandas/io/html.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/pandas/io/html.py b/pandas/io/html.py index d3a37ac526a41..a16c73029114b 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -24,10 +24,7 @@ AbstractMethodError, EmptyDataError, ) -from pandas.util._decorators import ( - doc, - set_module, -) +from pandas.util._decorators import set_module from pandas.util._validators import check_dtype_backend from pandas.core.dtypes.common import is_list_like @@ -36,7 +33,6 @@ from pandas.core.indexes.base import Index from pandas.core.indexes.multi import MultiIndex from pandas.core.series import Series -from pandas.core.shared_docs import _shared_docs from pandas.io.common import ( get_handle, @@ -1024,7 +1020,6 @@ def _parse( @set_module("pandas") -@doc(storage_options=_shared_docs["storage_options"]) def read_html( io: FilePath | ReadBuffer[str], *, @@ -1155,7 +1150,15 @@ def read_html( .. versionadded:: 2.0 - {storage_options} + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value pairs + are forwarded to ``urllib.request.Request`` as header options. For other + URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are + forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more + details, and for more examples on storage options refer `here + `_. .. versionadded:: 2.1.0 From dfb690436cd02a29c037d76d4e28479ffc4ce45f Mon Sep 17 00:00:00 2001 From: sdhjebngc <2224531525@qq.com> Date: Fri, 19 Dec 2025 09:49:40 +0800 Subject: [PATCH 2/5] DOC: Replace @doc decorator with inline docstring in pandas/core/algorithms.py --- pandas/core/algorithms.py | 38 +++++++++----------------------------- 1 file changed, 9 insertions(+), 29 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index c31a2cbb41dd3..2178a01190cbc 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -7,7 +7,6 @@ import decimal import operator -from textwrap import dedent from typing import ( TYPE_CHECKING, Literal, @@ -35,10 +34,7 @@ TakeIndexer, npt, ) -from pandas.util._decorators import ( - doc, - set_module, -) +from pandas.util._decorators import set_module from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.cast import ( @@ -654,28 +650,6 @@ def factorize_array( @set_module("pandas") -@doc( - values=dedent( - """\ - values : sequence - A 1-D sequence. Sequences that aren't pandas objects are - coerced to ndarrays before factorization. - """ - ), - sort=dedent( - """\ - sort : bool, default False - Sort `uniques` and shuffle `codes` to maintain the - relationship. - """ - ), - size_hint=dedent( - """\ - size_hint : int, optional - Hint to the hashtable sizer. - """ - ), -) def factorize( values, sort: bool = False, @@ -692,12 +666,18 @@ def factorize( Parameters ---------- - {values}{sort} + values : sequence + A 1-D sequence. Sequences that aren't pandas objects are + coerced to ndarrays before factorization. + sort : bool, default False + Sort `uniques` and shuffle `codes` to maintain the + relationship. use_na_sentinel : bool, default True If True, the sentinel -1 will be used for NaN values. If False, NaN values will be encoded as non-negative integers and will not drop the NaN from the uniques of the values. - {size_hint}\ + size_hint : int, optional + Hint to the hashtable sizer. Returns ------- From ffb87a70523b1214912ff4405fb9ce8e1282c129 Mon Sep 17 00:00:00 2001 From: sdhjebngc <2224531525@qq.com> Date: Fri, 19 Dec 2025 12:29:01 +0800 Subject: [PATCH 3/5] DOC: Fix factorize docstring references in pandas/core/base.py --- pandas/core/base.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 200b16b4b6b1a..53ef13b3a2740 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1279,9 +1279,7 @@ def _memory_usage(self, deep: bool = False) -> int: @doc( algorithms.factorize, - values="", order="", - size_hint="", sort=textwrap.dedent( """\ sort : bool, default False From c8b7a0588515a06cf2fe3cd865955181eaa66fe5 Mon Sep 17 00:00:00 2001 From: sdhjebngc <2224531525@qq.com> Date: Fri, 19 Dec 2025 12:58:22 +0800 Subject: [PATCH 4/5] DOC: Replace @doc decorator with inline docstring in Index/Series.factorize --- pandas/core/base.py | 120 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 109 insertions(+), 11 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 53ef13b3a2740..08a9cd4ec1999 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1277,22 +1277,120 @@ def _memory_usage(self, deep: bool = False) -> int: v += lib.memory_usage_of_objects(values) return v - @doc( - algorithms.factorize, - order="", - sort=textwrap.dedent( - """\ - sort : bool, default False - Sort `uniques` and shuffle `codes` to maintain the - relationship. - """ - ), - ) def factorize( self, sort: bool = False, use_na_sentinel: bool = True, ) -> tuple[npt.NDArray[np.intp], Index]: + """ + Encode the object as an enumerated type or categorical variable. + + This method is useful for obtaining a numeric representation of an + array when all that matters is identifying distinct values. `factorize` + is available as both a top-level function :func:`pandas.factorize`, + and as a method :meth:`Series.factorize` and :meth:`Index.factorize`. + + Parameters + ---------- + sort : bool, default False + Sort `uniques` and shuffle `codes` to maintain the + relationship. + use_na_sentinel : bool, default True + If True, the sentinel -1 will be used for NaN values. If False, + NaN values will be encoded as non-negative integers and will not drop the + NaN from the uniques of the values. + + Returns + ------- + codes : ndarray + An integer ndarray that's an indexer into `uniques`. + ``uniques.take(codes)`` will have the same values as `values`. + uniques : ndarray, Index, or Categorical + The unique valid values. When `values` is Categorical, `uniques` + is a Categorical. When `values` is some other pandas object, an + `Index` is returned. Otherwise, a 1-D ndarray is returned. + + .. note:: + + Even if there's a missing value in `values`, `uniques` will + *not* contain an entry for it. + + See Also + -------- + cut : Discretize continuous-valued array. + unique : Find the unique value in an array. + + Notes + ----- + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + These examples all show factorize as a top-level method like + ``pd.factorize(values)``. The results are identical for methods like + :meth:`Series.factorize`. + + >>> codes, uniques = pd.factorize(np.array(['b', 'b', 'a', 'c', 'b'], dtype="O")) + >>> codes + array([0, 0, 1, 2, 0]) + >>> uniques + array(['b', 'a', 'c'], dtype=object) + + With ``sort=True``, the `uniques` will be sorted, and `codes` will be + shuffled so that the relationship is the maintained. + + >>> codes, uniques = pd.factorize(np.array(['b', 'b', 'a', 'c', 'b'], dtype="O"), + ... sort=True) + >>> codes + array([1, 1, 0, 2, 1]) + >>> uniques + array(['a', 'b', 'c'], dtype=object) + + When ``use_na_sentinel=True`` (the default), missing values are indicated in + the `codes` with the sentinel value ``-1`` and missing values are not + included in `uniques`. + + >>> codes, uniques = pd.factorize(np.array(['b', None, 'a', 'c', 'b'], dtype="O")) + >>> codes + array([ 0, -1, 1, 2, 0]) + >>> uniques + array(['b', 'a', 'c'], dtype=object) + + Thus far, we've only factorized lists (which are internally coerced to + NumPy arrays). When factorizing pandas objects, the type of `uniques` + will differ. For Categoricals, a `Categorical` is returned. + + >>> cat = pd.Categorical(['a', 'a', 'c'], categories=['a', 'b', 'c']) + >>> codes, uniques = pd.factorize(cat) + >>> codes + array([0, 0, 1]) + >>> uniques + ['a', 'c'] + Categories (3, object): ['a', 'b', 'c'] + + Notice that ``'b'`` is in ``uniques.categories``, despite not being + present in ``cat.values``. + + For all other pandas objects, an Index of the appropriate type is + returned. + + >>> cat = pd.Series(['a', 'a', 'c']) + >>> codes, uniques = pd.factorize(cat) + >>> codes + array([0, 0, 1]) + >>> uniques + Index(['a', 'c'], dtype='object') + + If NaN is in the values, and we want to include NaN in the uniques of the + values, it can be achieved by setting ``use_na_sentinel=False``. + + >>> codes, uniques = pd.factorize(np.array(['b', None, 'a', 'c', 'b'], dtype="O"), + ... use_na_sentinel=False) + >>> codes + array([0, 1, 2, 3, 0]) + >>> uniques + array(['b', None, 'a', 'c'], dtype=object) + """ codes, uniques = algorithms.factorize( self._values, sort=sort, use_na_sentinel=use_na_sentinel ) From 09fa6df4941129bf54b52e4668140220e4b1e407 Mon Sep 17 00:00:00 2001 From: sdhjebngc <2224531525@qq.com> Date: Fri, 19 Dec 2025 13:08:37 +0800 Subject: [PATCH 5/5] DOC: Fix line length issues in base.py docstrings --- pandas/core/base.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 08a9cd4ec1999..dfdb2221fddba 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1330,7 +1330,9 @@ def factorize( ``pd.factorize(values)``. The results are identical for methods like :meth:`Series.factorize`. - >>> codes, uniques = pd.factorize(np.array(['b', 'b', 'a', 'c', 'b'], dtype="O")) + >>> codes, uniques = pd.factorize( + ... np.array(['b', 'b', 'a', 'c', 'b'], dtype="O") + ... ) >>> codes array([0, 0, 1, 2, 0]) >>> uniques @@ -1339,8 +1341,9 @@ def factorize( With ``sort=True``, the `uniques` will be sorted, and `codes` will be shuffled so that the relationship is the maintained. - >>> codes, uniques = pd.factorize(np.array(['b', 'b', 'a', 'c', 'b'], dtype="O"), - ... sort=True) + >>> codes, uniques = pd.factorize( + ... np.array(['b', 'b', 'a', 'c', 'b'], dtype="O"), sort=True + ... ) >>> codes array([1, 1, 0, 2, 1]) >>> uniques @@ -1350,7 +1353,9 @@ def factorize( the `codes` with the sentinel value ``-1`` and missing values are not included in `uniques`. - >>> codes, uniques = pd.factorize(np.array(['b', None, 'a', 'c', 'b'], dtype="O")) + >>> codes, uniques = pd.factorize( + ... np.array(['b', None, 'a', 'c', 'b'], dtype="O") + ... ) >>> codes array([ 0, -1, 1, 2, 0]) >>> uniques @@ -1384,8 +1389,10 @@ def factorize( If NaN is in the values, and we want to include NaN in the uniques of the values, it can be achieved by setting ``use_na_sentinel=False``. - >>> codes, uniques = pd.factorize(np.array(['b', None, 'a', 'c', 'b'], dtype="O"), - ... use_na_sentinel=False) + >>> codes, uniques = pd.factorize( + ... np.array(['b', None, 'a', 'c', 'b'], dtype="O"), + ... use_na_sentinel=False, + ... ) >>> codes array([0, 1, 2, 3, 0]) >>> uniques