@@ -74,59 +74,59 @@ def test_transform_ord_cat_cols_to_coded_cols(
7474 tm .assert_frame_equal (out_df , expected_df )
7575
7676
77- def test_transform_ord_cat_cols_to_coded_cols_duplicated_col () -> None :
77+ @pytest .mark .parametrize (
78+ ("input_df_dict" , "expected_df_dict" ),
79+ [
80+ pytest .param (
81+ {
82+ "dup_1" : Categorical (
83+ ["low" , "m" , "h" ],
84+ categories = ["low" , "m" , "h" ],
85+ ordered = True ,
86+ ),
87+ "dup_2" : [5 , 6 , 7 ],
88+ },
89+ {
90+ # After transform: position 0 (ordered cat) becomes codes [0,1,2],
91+ # position 1 remains untouched numbers [5,6,7].
92+ "dup_1" : Series ([0 , 1 , 2 ], dtype = "int8" ),
93+ "dup_2" : [5 , 6 , 7 ],
94+ },
95+ id = "duplicate-names-ordered-first" ,
96+ ),
97+ pytest .param (
98+ {
99+ "dup_1" : ["a" , "b" , "c" ], # non-categorical
100+ "dup_2" : Categorical (
101+ ["p" , "q" , None ],
102+ categories = ["p" , "q" ],
103+ ordered = True ,
104+ ),
105+ "dup_3" : Categorical (
106+ ["low" , "m" , "h" ],
107+ categories = ["low" , "m" , "h" ],
108+ ordered = True ,
109+ ),
110+ },
111+ {
112+ # First stays object; second turns into codes [0, 1, NaN]
113+ # and third changes into codes [0, 1, 2]
114+ "dup_1" : ["a" , "b" , "c" ],
115+ "dup_2" : [0.0 , 1.0 , np .nan ],
116+ "dup_3" : Series ([0 , 1 , 2 ], dtype = "int8" ),
117+ },
118+ id = "duplicate-names-ordered-and-non-categorical-and-none" ,
119+ ),
120+ ],
121+ )
122+ def test_transform_ord_cat_cols_to_coded_cols_duplicated_col (
123+ input_df_dict , expected_df_dict
124+ ) -> None :
78125 # GH #60306
79- input_df_1 = DataFrame (
80- {
81- "dup_1" : Categorical (
82- ["low" , "m" , "h" ],
83- categories = ["low" , "m" , "h" ],
84- ordered = True ,
85- ),
86- "dup_2" : [5 , 6 , 7 ],
87- }
88- )
89- expected_df_1 = DataFrame (
90- {
91- # After transform: position 0 (ordered cat) becomes codes [0,1,2],
92- # position 1 remains untouched numbers [5,6,7].
93- "dup_1" : Series ([0 , 1 , 2 ], dtype = "int8" ),
94- "dup_2" : [5 , 6 , 7 ],
95- }
96- )
97- input_df_1 .columns = ["dup" for _ in range (len (input_df_1 .columns ))]
98- expected_df_1 .columns = ["dup" for _ in range (len (input_df_1 .columns ))]
99-
100- out_df_1 = transform_ord_cat_cols_to_coded_cols (input_df_1 )
101- tm .assert_frame_equal (out_df_1 , expected_df_1 )
102-
103- input_df_2 = DataFrame (
104- {
105- "dup_1" : ["a" , "b" , "c" ], # non-categorical
106- "dup_2" : Categorical (
107- ["p" , "q" , None ],
108- categories = ["p" , "q" ],
109- ordered = True ,
110- ),
111- "dup_3" : Categorical (
112- ["low" , "m" , "h" ],
113- categories = ["low" , "m" , "h" ],
114- ordered = True ,
115- ),
116- }
117- )
118-
119- expected_df_2 = DataFrame (
120- {
121- # First stays object; second turns into codes [0, 1, NaN]
122- # and third changes into codes [0, 1, 2]
123- "dup_1" : ["a" , "b" , "c" ],
124- "dup_2" : [0.0 , 1.0 , np .nan ],
125- "dup_3" : Series ([0 , 1 , 2 ], dtype = "int8" ),
126- }
127- )
128- input_df_2 .columns = ["dup" for _ in range (len (input_df_2 .columns ))]
129- expected_df_2 .columns = ["dup" for _ in range (len (input_df_2 .columns ))]
126+ input_df = DataFrame (input_df_dict )
127+ expected_df = DataFrame (expected_df_dict )
128+ input_df .columns = ["dup" for _ in input_df .columns ]
129+ expected_df .columns = ["dup" for _ in expected_df .columns ]
130130
131- out_df_2 = transform_ord_cat_cols_to_coded_cols (input_df_2 )
132- tm .assert_frame_equal (out_df_2 , expected_df_2 )
131+ out_df = transform_ord_cat_cols_to_coded_cols (input_df )
132+ tm .assert_frame_equal (out_df , expected_df )
0 commit comments