pandas-dev · sdhjebngc · Dec 18, 2025 · Dec 18, 2025 · Dec 18, 2025 · Dec 18, 2025
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -7,7 +7,6 @@
 
 import decimal
 import operator
-from textwrap import dedent
 from typing import (
     TYPE_CHECKING,
     Literal,
@@ -35,10 +34,7 @@
     TakeIndexer,
     npt,
 )
-from pandas.util._decorators import (
-    doc,
-    set_module,
-)
+from pandas.util._decorators import set_module
 from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.cast import (
@@ -654,28 +650,6 @@ def factorize_array(
 
 
 @set_module("pandas")
-@doc(
-    values=dedent(
-        """\
-    values : sequence
-        A 1-D sequence. Sequences that aren't pandas objects are
-        coerced to ndarrays before factorization.
-    """
-    ),
-    sort=dedent(
-        """\
-    sort : bool, default False
-        Sort `uniques` and shuffle `codes` to maintain the
-        relationship.
-    """
-    ),
-    size_hint=dedent(
-        """\
-    size_hint : int, optional
-        Hint to the hashtable sizer.
-    """
-    ),
-)
 def factorize(
     values,
     sort: bool = False,
@@ -692,12 +666,18 @@ def factorize(
 
     Parameters
     ----------
-    {values}{sort}
+    values : sequence
+        A 1-D sequence. Sequences that aren't pandas objects are
+        coerced to ndarrays before factorization.
+    sort : bool, default False
+        Sort `uniques` and shuffle `codes` to maintain the
+        relationship.
     use_na_sentinel : bool, default True
         If True, the sentinel -1 will be used for NaN values. If False,
         NaN values will be encoded as non-negative integers and will not drop the
         NaN from the uniques of the values.
-    {size_hint}\
+    size_hint : int, optional
+        Hint to the hashtable sizer.
 
     Returns
     -------
@@ -729,7 +709,9 @@ def factorize(
     ``pd.factorize(values)``. The results are identical for methods like
     :meth:`Series.factorize`.
 
-    >>> codes, uniques = pd.factorize(np.array(['b', 'b', 'a', 'c', 'b'], dtype="O"))
+    >>> codes, uniques = pd.factorize(
+    ...     np.array(['b', 'b', 'a', 'c', 'b'], dtype="O")
+    ... )
     >>> codes
     array([0, 0, 1, 2, 0])
     >>> uniques
@@ -738,8 +720,9 @@ def factorize(
     With ``sort=True``, the `uniques` will be sorted, and `codes` will be
     shuffled so that the relationship is the maintained.
 
-    >>> codes, uniques = pd.factorize(np.array(['b', 'b', 'a', 'c', 'b'], dtype="O"),
-    ...                               sort=True)
+    >>> codes, uniques = pd.factorize(
+    ...     np.array(['b', 'b', 'a', 'c', 'b'], dtype="O"), sort=True
+    ... )
     >>> codes
     array([1, 1, 0, 2, 1])
     >>> uniques
@@ -749,7 +732,9 @@ def factorize(
     the `codes` with the sentinel value ``-1`` and missing values are not
     included in `uniques`.
 
-    >>> codes, uniques = pd.factorize(np.array(['b', None, 'a', 'c', 'b'], dtype="O"))
+    >>> codes, uniques = pd.factorize(
+    ...     np.array(['b', None, 'a', 'c', 'b'], dtype="O")
+    ... )
     >>> codes
     array([ 0, -1,  1,  2,  0])
     >>> uniques

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -1277,24 +1277,127 @@ def _memory_usage(self, deep: bool = False) -> int:
             v += lib.memory_usage_of_objects(values)
         return v
 
-    @doc(
-        algorithms.factorize,
-        values="",
-        order="",
-        size_hint="",
-        sort=textwrap.dedent(
-            """\
-            sort : bool, default False
-                Sort `uniques` and shuffle `codes` to maintain the
-                relationship.
-            """
-        ),
-    )
     def factorize(
         self,
         sort: bool = False,
         use_na_sentinel: bool = True,
     ) -> tuple[npt.NDArray[np.intp], Index]:
+        """
+        Encode the object as an enumerated type or categorical variable.
+
+        This method is useful for obtaining a numeric representation of an
+        array when all that matters is identifying distinct values. `factorize`
+        is available as both a top-level function :func:`pandas.factorize`,
+        and as a method :meth:`Series.factorize` and :meth:`Index.factorize`.
+
+        Parameters
+        ----------
+        sort : bool, default False
+            Sort `uniques` and shuffle `codes` to maintain the
+            relationship.
+        use_na_sentinel : bool, default True
+            If True, the sentinel -1 will be used for NaN values. If False,
+            NaN values will be encoded as non-negative integers and will not drop the
+            NaN from the uniques of the values.
+
+        Returns
+        -------
+        codes : ndarray
+            An integer ndarray that's an indexer into `uniques`.
+            ``uniques.take(codes)`` will have the same values as `values`.
+        uniques : ndarray, Index, or Categorical
+            The unique valid values. When `values` is Categorical, `uniques`
+            is a Categorical. When `values` is some other pandas object, an
+            `Index` is returned. Otherwise, a 1-D ndarray is returned.
+
+            .. note::
+
+               Even if there's a missing value in `values`, `uniques` will
+               *not* contain an entry for it.
+
+        See Also
+        --------
+        cut : Discretize continuous-valued array.
+        unique : Find the unique value in an array.
+
+        Notes
+        -----
+        Reference :ref:`the user guide <reshaping.factorize>` for more examples.
+
+        Examples
+        --------
+        These examples all show factorize as a top-level method like
+        ``pd.factorize(values)``. The results are identical for methods like
+        :meth:`Series.factorize`.
+
+        >>> codes, uniques = pd.factorize(
+        ...     np.array(['b', 'b', 'a', 'c', 'b'], dtype="O")
+        ... )
+        >>> codes
+        array([0, 0, 1, 2, 0])
+        >>> uniques
+        array(['b', 'a', 'c'], dtype=object)
+
+        With ``sort=True``, the `uniques` will be sorted, and `codes` will be
+        shuffled so that the relationship is the maintained.
+
+        >>> codes, uniques = pd.factorize(
+        ...     np.array(['b', 'b', 'a', 'c', 'b'], dtype="O"), sort=True
+        ... )
+        >>> codes
+        array([1, 1, 0, 2, 1])
+        >>> uniques
+        array(['a', 'b', 'c'], dtype=object)
+
+        When ``use_na_sentinel=True`` (the default), missing values are indicated in
+        the `codes` with the sentinel value ``-1`` and missing values are not
+        included in `uniques`.
+
+        >>> codes, uniques = pd.factorize(
+        ...     np.array(['b', None, 'a', 'c', 'b'], dtype="O")
+        ... )
+        >>> codes
+        array([ 0, -1,  1,  2,  0])
+        >>> uniques
+        array(['b', 'a', 'c'], dtype=object)
+
+        Thus far, we've only factorized lists (which are internally coerced to
+        NumPy arrays). When factorizing pandas objects, the type of `uniques`
+        will differ. For Categoricals, a `Categorical` is returned.
+
+        >>> cat = pd.Categorical(['a', 'a', 'c'], categories=['a', 'b', 'c'])
+        >>> codes, uniques = pd.factorize(cat)
+        >>> codes
+        array([0, 0, 1])
+        >>> uniques
+        ['a', 'c']
+        Categories (3, object): ['a', 'b', 'c']
+
+        Notice that ``'b'`` is in ``uniques.categories``, despite not being
+        present in ``cat.values``.
+
+        For all other pandas objects, an Index of the appropriate type is
+        returned.
+
+        >>> cat = pd.Series(['a', 'a', 'c'])
+        >>> codes, uniques = pd.factorize(cat)
+        >>> codes
+        array([0, 0, 1])
+        >>> uniques
+        Index(['a', 'c'], dtype='object')
+
+        If NaN is in the values, and we want to include NaN in the uniques of the
+        values, it can be achieved by setting ``use_na_sentinel=False``.
+
+        >>> codes, uniques = pd.factorize(
+        ...     np.array(['b', None, 'a', 'c', 'b'], dtype="O"),
+        ...     use_na_sentinel=False,
+        ... )
+        >>> codes
+        array([0, 1, 2, 3, 0])
+        >>> uniques
+        array(['b', None, 'a', 'c'], dtype=object)
+        """
         codes, uniques = algorithms.factorize(
             self._values, sort=sort, use_na_sentinel=use_na_sentinel
         )