Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -3148,9 +3148,13 @@ def write_multi_index(self, key: str, index: MultiIndex) -> None:
):
# write the level
if isinstance(lev.dtype, ExtensionDtype):
raise NotImplementedError(
"Saving a MultiIndex with an extension dtype is not supported."
)
# GH 63412
if isinstance(lev.dtype, StringDtype):
lev = lev.astype(object)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you replace this with pass, does _convert_index below already convert this data correctly without the astype?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it does. I'll make the modification.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mroeschke Checks passed. I used a different implementation to avoid the two if blocks, I think this is better style, unless you disagree.

else:
raise NotImplementedError(
"Saving a MultiIndex with an extension dtype is not supported."
)
level_key = f"{key}_level{i}"
conv_level = _convert_index(level_key, lev, self.encoding, self.errors)
self.write_array(level_key, conv_level.values)
Expand Down
6 changes: 0 additions & 6 deletions pandas/tests/io/pytables/test_put.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,12 +314,6 @@ def test_column_multiindex(tmp_path, setup_path, using_infer_string):

path = tmp_path / setup_path
with HDFStore(path) as store:
if using_infer_string:
# TODO(infer_string) make this work for string dtype
msg = "Saving a MultiIndex with an extension dtype is not supported."
with pytest.raises(NotImplementedError, match=msg):
store.put("df", df)
return
store.put("df", df)
tm.assert_frame_equal(
store["df"], expected, check_index_type=True, check_column_type=True
Expand Down
6 changes: 0 additions & 6 deletions pandas/tests/io/pytables/test_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,12 +181,6 @@ def test_read_hdf_open_store(tmp_path, setup_path, using_infer_string):
df = df.set_index(keys="E", append=True)

path = tmp_path / setup_path
if using_infer_string:
# TODO(infer_string) make this work for string dtype
msg = "Saving a MultiIndex with an extension dtype is not supported."
with pytest.raises(NotImplementedError, match=msg):
df.to_hdf(path, key="df", mode="w")
return
df.to_hdf(path, key="df", mode="w")
direct = read_hdf(path, "df")
with HDFStore(path, mode="r") as store:
Expand Down
6 changes: 0 additions & 6 deletions pandas/tests/io/pytables/test_round_trip.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,12 +434,6 @@ def test_store_hierarchical(
):
frame = multiindex_dataframe_random_data

if using_infer_string:
# TODO(infer_string) make this work for string dtype
msg = "Saving a MultiIndex with an extension dtype is not supported."
with pytest.raises(NotImplementedError, match=msg):
_check_roundtrip(frame, tm.assert_frame_equal, path=temp_file)
return
_check_roundtrip(frame, tm.assert_frame_equal, path=temp_file)
_check_roundtrip(frame.T, tm.assert_frame_equal, path=temp_file)
_check_roundtrip(frame["A"], tm.assert_series_equal, path=temp_file)
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/io/pytables/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -1129,3 +1129,13 @@ def test_select_categorical_string_columns(tmp_path, model):
result = store.select("df", "modelId == model")
expected = df[df["modelId"] == model]
tm.assert_frame_equal(result, expected)


def test_to_hdf_multiindex_string_dtype_crash(tmp_path):
# GH#63412
path = tmp_path / "test.h5"
index = MultiIndex.from_tuples([("a", "x"), ("b", "y")], names=["level1", "level2"])
df = DataFrame({"value": [1, 2]}, index=index)
df.to_hdf(path, key="test")
result = read_hdf(path, key="test")
tm.assert_frame_equal(df, result, check_dtype=False)
Loading