Skip to content

Commit 82a5fba

Browse files
committed
Finish up
1 parent 4debbdf commit 82a5fba

File tree

9 files changed

+58
-45
lines changed

9 files changed

+58
-45
lines changed

.github/workflows/code-checks.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ jobs:
5757
run: sudo apt-get update && sudo apt-get install -y libegl1 libopengl0
5858

5959
- name: Run doctests
60-
run: cd ci && ./code_checks.sh doctests
60+
run: cd ci && PANDAS_FUTURE_PYTHON_SCALARS="1" ./code_checks.sh doctests
6161
if: ${{ steps.build.outcome == 'success' && always() }}
6262

6363
- name: Install pandas in editable mode

pandas/conftest.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,14 +149,15 @@ def pytest_collection_modifyitems(items, config) -> None:
149149
# Warnings from doctests that can be ignored; place reason in comment above.
150150
# Each entry specifies (path, message) - see the ignore_doctest_warning function
151151
ignored_doctest_warnings = [
152-
("api.interchange.from_dataframe", ".*Interchange Protocol is deprecated"),
152+
("api.interchange.from_dataframe", "The DataFrame Interchange Protocol"),
153153
("is_int64_dtype", "is_int64_dtype is deprecated"),
154154
("is_interval_dtype", "is_interval_dtype is deprecated"),
155155
("is_period_dtype", "is_period_dtype is deprecated"),
156156
("is_datetime64tz_dtype", "is_datetime64tz_dtype is deprecated"),
157157
("is_categorical_dtype", "is_categorical_dtype is deprecated"),
158158
("is_sparse", "is_sparse is deprecated"),
159-
("DataFrame.__dataframe__", "Interchange Protocol is deprecated"),
159+
("CategoricalDtype._from_values_or_dtype", "Constructing a Categorical"),
160+
("DataFrame.__dataframe__", "The DataFrame Interchange Protocol"),
160161
("DataFrameGroupBy.fillna", "DataFrameGroupBy.fillna is deprecated"),
161162
("DataFrameGroupBy.corrwith", "DataFrameGroupBy.corrwith is deprecated"),
162163
("NDFrame.replace", "Series.replace without 'value'"),

pandas/core/frame.py

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@
8989
infer_dtype_from_scalar,
9090
invalidate_string_dtypes,
9191
maybe_downcast_to_dtype,
92+
maybe_unbox_numpy_scalar,
9293
)
9394
from pandas.core.dtypes.common import (
9495
infer_dtype_from_object,
@@ -3822,7 +3823,7 @@ def memory_usage(self, index: bool = True, deep: bool = False) -> Series:
38223823
many repeated values.
38233824
38243825
>>> df["object"].astype("category").memory_usage(deep=True)
3825-
5136
3826+
5140
38263827
"""
38273828
result = self._constructor_sliced(
38283829
[c.memory_usage(index=False, deep=deep) for col, c in self.items()],
@@ -4392,11 +4393,11 @@ def _setitem(self, key, value) -> None:
43924393
>>> s = pd.Series([100, 200], index=["b", "d"])
43934394
>>> df["B"] = s
43944395
>>> df
4395-
A B
4396-
a 1 NaN
4397-
b 2 100
4398-
c 3 NaN
4399-
d 4 200
4396+
A B
4397+
a 1 NaN
4398+
b 2 100.0
4399+
c 3 NaN
4400+
d 4 200.0
44004401
44014402
Series index labels NOT in DataFrame, ignored:
44024403
@@ -4408,7 +4409,6 @@ def _setitem(self, key, value) -> None:
44084409
x 1 10
44094410
y 2 20
44104411
z 3 50
4411-
# Values for 'a' and 'b' are completely ignored!
44124412
"""
44134413
key = com.apply_if_callable(key, self)
44144414

@@ -5121,6 +5121,7 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None:
51215121
2 6
51225122
3 8
51235123
4 10
5124+
Name: A, dtype: int64
51245125
"""
51255126
from pandas.core.computation.eval import eval as _eval
51265127

@@ -7810,10 +7811,10 @@ def value_counts(
78107811
78117812
>>> df.value_counts(dropna=False)
78127813
first_name middle_name
7814+
John Smith 1
78137815
Anne NaN 1
7816+
John NaN 1
78147817
Beth Louise 1
7815-
John Smith 1
7816-
NaN 1
78177818
Name: count, dtype: int64
78187819
78197820
>>> df.value_counts("first_name")
@@ -9095,10 +9096,10 @@ def combine(
90959096
2 3.0 NaN NaN
90969097
90979098
>>> df2.combine(df1, take_smaller, overwrite=False)
9098-
A B C
9099-
0 0.0 NaN NaN
9100-
1 0.0 3.0 1.0
9101-
2 NaN 3.0 1.0
9099+
B C A
9100+
0 NaN NaN 0.0
9101+
1 3.0 1.0 0.0
9102+
2 3.0 1.0 NaN
91029103
"""
91039104
other_idxlen = len(other.index) # save for compare
91049105
other_columns = other.columns
@@ -10954,8 +10955,8 @@ def apply(
1095410955
``apply`` has type stability (variables in the function do not change their
1095510956
type during the execution).
1095610957
10957-
>>> import bodo
10958-
>>> df.apply(lambda x: x.A + x.B, axis=1, engine=bodo.jit)
10958+
>>> import bodo # doctest: +SKIP
10959+
>>> df.apply(lambda x: x.A + x.B, axis=1, engine=bodo.jit) # doctest: +SKIP
1095910960
1096010961
Note that JIT compilation is only recommended for functions that take a
1096110962
significant amount of time to run. Fast functions are unlikely to run faster
@@ -12131,7 +12132,7 @@ def _get_data() -> DataFrame:
1213112132
df = df.astype(dtype)
1213212133
arr = concat_compat(list(df._iter_column_arrays()))
1213312134
return arr._reduce(name, skipna=skipna, keepdims=False, **kwds)
12134-
return func(df.values)
12135+
return maybe_unbox_numpy_scalar(func(df.values))
1213512136
elif axis == 1:
1213612137
if len(df.index) == 0:
1213712138
# Taking a transpose would result in no columns, losing the dtype.
@@ -13283,8 +13284,8 @@ def kurt(
1328313284
1328413285
With axis=None
1328513286
13286-
>>> df.kurt(axis=None).round(6)
13287-
-0.988693
13287+
>>> df.kurt(axis=None)
13288+
-0.9886927196984727
1328813289
1328913290
Using axis=1
1329013291
@@ -13465,7 +13466,7 @@ def idxmin(
1346513466
Pork consumption
1346613467
Wheat Products co2_emissions
1346713468
Beef consumption
13468-
dtype: object
13469+
dtype: str
1346913470
"""
1347013471
axis = self._get_axis_number(axis)
1347113472

pandas/core/generic.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13116,8 +13116,8 @@ def make_doc(name: str, ndim: int) -> str:
1311613116
1311713117
With axis=None
1311813118
13119-
>>> df.kurt(axis=None).round(6)
13120-
-0.988693
13119+
>>> df.kurt(axis=None)
13120+
-0.9886927196984727
1312113121
1312213122
Using axis=1
1312313123

pandas/core/indexes/base.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6792,7 +6792,7 @@ def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"
67926792
pos = self[::-1].searchsorted(
67936793
label, side="right" if side == "left" else "left"
67946794
)
6795-
return len(self) - pos
6795+
return maybe_unbox_numpy_scalar(len(self) - pos)
67966796

67976797
raise ValueError("index must be monotonic increasing or decreasing")
67986798

@@ -6979,6 +6979,8 @@ def slice_locs(
69796979
if start_slice == -1:
69806980
start_slice -= len(self)
69816981

6982+
start_slice = maybe_unbox_numpy_scalar(start_slice)
6983+
end_slice = maybe_unbox_numpy_scalar(end_slice)
69826984
return start_slice, end_slice
69836985

69846986
def delete(
@@ -7398,7 +7400,7 @@ def any(self, *args, **kwargs):
73987400
# i.e. EA, call _reduce instead of "any" to get TypeError instead
73997401
# of AttributeError
74007402
return vals._reduce("any")
7401-
return np.any(vals)
7403+
return maybe_unbox_numpy_scalar(np.any(vals))
74027404

74037405
def all(self, *args, **kwargs):
74047406
"""
@@ -7446,7 +7448,7 @@ def all(self, *args, **kwargs):
74467448
# i.e. EA, call _reduce instead of "all" to get TypeError instead
74477449
# of AttributeError
74487450
return vals._reduce("all")
7449-
return np.all(vals)
7451+
return maybe_unbox_numpy_scalar(np.all(vals))
74507452

74517453
@final
74527454
def _maybe_disable_logical_methods(self, opname: str_t) -> None:

pandas/core/indexes/interval.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
infer_dtype_from_scalar,
4141
maybe_box_datetimelike,
4242
maybe_downcast_numeric,
43+
maybe_unbox_numpy_scalar,
4344
maybe_upcast_numeric_to_64bit,
4445
)
4546
from pandas.core.dtypes.common import (
@@ -804,7 +805,7 @@ def get_loc(self, key) -> int | slice | np.ndarray:
804805
if matches == 0:
805806
raise KeyError(key)
806807
if matches == 1:
807-
return mask.argmax()
808+
return maybe_unbox_numpy_scalar(mask.argmax())
808809

809810
res = lib.maybe_booleans_to_slice(mask.view("u1"))
810811
if isinstance(res, slice) and res.stop is None:

pandas/core/indexes/multi.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,10 @@
5858
)
5959
from pandas.util._exceptions import find_stack_level
6060

61-
from pandas.core.dtypes.cast import coerce_indexer_dtype
61+
from pandas.core.dtypes.cast import (
62+
coerce_indexer_dtype,
63+
maybe_unbox_numpy_scalar,
64+
)
6265
from pandas.core.dtypes.common import (
6366
ensure_int64,
6467
ensure_platform_int,
@@ -3115,7 +3118,9 @@ def get_slice_bound(
31153118
"""
31163119
if not isinstance(label, tuple):
31173120
label = (label,)
3118-
return self._partial_tup_index(label, side=side)
3121+
result = self._partial_tup_index(label, side=side)
3122+
result = maybe_unbox_numpy_scalar(result)
3123+
return result
31193124

31203125
def slice_locs(self, start=None, end=None, step=None) -> tuple[int, int]:
31213126
"""
@@ -3702,7 +3707,7 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
37023707
if start == end:
37033708
# The label is present in self.levels[level] but unused:
37043709
raise KeyError(key)
3705-
return slice(start, end)
3710+
return slice(maybe_unbox_numpy_scalar(start), maybe_unbox_numpy_scalar(end))
37063711

37073712
def get_locs(self, seq) -> npt.NDArray[np.intp]:
37083713
"""

pandas/io/formats/info.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,8 @@
9090
<class 'pandas.DataFrame'>
9191
RangeIndex: 5 entries, 0 to 4
9292
Columns: 3 entries, int_col to float_col
93-
dtypes: float64(1), int64(1), object(1)
94-
memory usage: 248.0+ bytes
93+
dtypes: float64(1), int64(1), str(1)
94+
memory usage: 278.0 bytes
9595
9696
Pipe output of DataFrame.info to buffer instead of sys.stdout, get
9797
buffer content and writes to a text file:
@@ -120,23 +120,23 @@
120120
Data columns (total 3 columns):
121121
# Column Non-Null Count Dtype
122122
--- ------ -------------- -----
123-
0 column_1 1000000 non-null object
124-
1 column_2 1000000 non-null object
125-
2 column_3 1000000 non-null object
126-
dtypes: object(3)
127-
memory usage: 22.9+ MB
123+
0 column_1 1000000 non-null str
124+
1 column_2 1000000 non-null str
125+
2 column_3 1000000 non-null str
126+
dtypes: str(3)
127+
memory usage: 25.7 MB
128128
129129
>>> df.info(memory_usage='deep')
130130
<class 'pandas.DataFrame'>
131131
RangeIndex: 1000000 entries, 0 to 999999
132132
Data columns (total 3 columns):
133133
# Column Non-Null Count Dtype
134134
--- ------ -------------- -----
135-
0 column_1 1000000 non-null object
136-
1 column_2 1000000 non-null object
137-
2 column_3 1000000 non-null object
138-
dtypes: object(3)
139-
memory usage: 165.9 MB"""
135+
0 column_1 1000000 non-null str
136+
1 column_2 1000000 non-null str
137+
2 column_3 1000000 non-null str
138+
dtypes: str(3)
139+
memory usage: 25.7 MB"""
140140
)
141141

142142

pandas/tests/series/test_ufunc.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -433,10 +433,13 @@ def test_np_matmul():
433433

434434

435435
@pytest.mark.parametrize("box", [pd.Index, pd.Series])
436-
def test_np_matmul_1D(box):
436+
def test_np_matmul_1D(box, using_python_scalars):
437437
result = np.matmul(box([1, 2]), box([2, 3]))
438438
assert result == 8
439-
assert isinstance(result, np.int64)
439+
if using_python_scalars:
440+
assert isinstance(result, int)
441+
else:
442+
assert isinstance(result, np.int64)
440443

441444

442445
def test_array_ufuncs_for_many_arguments():

0 commit comments

Comments
 (0)