Skip to content

Commit bd7f870

Browse files
authored
Pyarrow Concat to to know merge with promote options as default (#745)
* Update the Pyarrow concat * Updated the tests python versions * Updated github actions * removed python3.12 * Black formatted * Fixed tests * Fixed more tests
1 parent 76a376b commit bd7f870

File tree

11 files changed

+52
-39
lines changed

11 files changed

+52
-39
lines changed

.github/workflows/code-quality-checks.yml

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,22 @@ on:
44
branches:
55
- main
66
pull_request:
7-
branches:
8-
- main
7+
98
jobs:
109
run-unit-tests:
1110
runs-on: ubuntu-latest
1211
strategy:
1312
matrix:
14-
python-version: [3.8, 3.9, "3.10", "3.11"]
13+
python-version: [3.9, "3.10", "3.11"]
1514
steps:
1615
#----------------------------------------------
1716
# check-out repo and set-up python
1817
#----------------------------------------------
1918
- name: Check out repository
20-
uses: actions/checkout@v2
19+
uses: actions/checkout@v4
2120
- name: Set up python ${{ matrix.python-version }}
2221
id: setup-python
23-
uses: actions/setup-python@v2
22+
uses: actions/setup-python@v5
2423
with:
2524
python-version: ${{ matrix.python-version }}
2625
#----------------------------------------------
@@ -29,6 +28,7 @@ jobs:
2928
- name: Install Poetry
3029
uses: snok/install-poetry@v1
3130
with:
31+
version: "2.2.1"
3232
virtualenvs-create: true
3333
virtualenvs-in-project: true
3434
installer-parallel: true
@@ -62,16 +62,16 @@ jobs:
6262
runs-on: ubuntu-latest
6363
strategy:
6464
matrix:
65-
python-version: [3.8, 3.9, "3.10"]
65+
python-version: [3.9, "3.10", "3.11"]
6666
steps:
6767
#----------------------------------------------
6868
# check-out repo and set-up python
6969
#----------------------------------------------
7070
- name: Check out repository
71-
uses: actions/checkout@v2
71+
uses: actions/checkout@v4
7272
- name: Set up python ${{ matrix.python-version }}
7373
id: setup-python
74-
uses: actions/setup-python@v2
74+
uses: actions/setup-python@v5
7575
with:
7676
python-version: ${{ matrix.python-version }}
7777
#----------------------------------------------
@@ -80,6 +80,7 @@ jobs:
8080
- name: Install Poetry
8181
uses: snok/install-poetry@v1
8282
with:
83+
version: "2.2.1"
8384
virtualenvs-create: true
8485
virtualenvs-in-project: true
8586
installer-parallel: true
@@ -114,16 +115,16 @@ jobs:
114115
runs-on: ubuntu-latest
115116
strategy:
116117
matrix:
117-
python-version: [3.8, 3.9, "3.10"]
118+
python-version: [3.9, "3.10", "3.11"]
118119
steps:
119120
#----------------------------------------------
120121
# check-out repo and set-up python
121122
#----------------------------------------------
122123
- name: Check out repository
123-
uses: actions/checkout@v2
124+
uses: actions/checkout@v4
124125
- name: Set up python ${{ matrix.python-version }}
125126
id: setup-python
126-
uses: actions/setup-python@v2
127+
uses: actions/setup-python@v5
127128
with:
128129
python-version: ${{ matrix.python-version }}
129130
#----------------------------------------------
@@ -132,6 +133,7 @@ jobs:
132133
- name: Install Poetry
133134
uses: snok/install-poetry@v1
134135
with:
136+
version: "2.2.1"
135137
virtualenvs-create: true
136138
virtualenvs-in-project: true
137139
installer-parallel: true

.github/workflows/integration.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@ jobs:
2020
# check-out repo and set-up python
2121
#----------------------------------------------
2222
- name: Check out repository
23-
uses: actions/checkout@v3
23+
uses: actions/checkout@v4
2424
- name: Set up python
2525
id: setup-python
26-
uses: actions/setup-python@v4
26+
uses: actions/setup-python@v5
2727
with:
2828
python-version: "3.10"
2929
#----------------------------------------------
@@ -32,6 +32,7 @@ jobs:
3232
- name: Install Poetry
3333
uses: snok/install-poetry@v1
3434
with:
35+
version: "2.2.1"
3536
virtualenvs-create: true
3637
virtualenvs-in-project: true
3738
installer-parallel: true

.github/workflows/publish-test.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@ jobs:
99
# check-out repo and set-up python
1010
#----------------------------------------------
1111
- name: Check out repository
12-
uses: actions/checkout@v2
12+
uses: actions/checkout@v4
1313
- name: Set up python
1414
id: setup-python
15-
uses: actions/setup-python@v2
15+
uses: actions/setup-python@v5
1616
with:
1717
python-version: 3.9
1818
#----------------------------------------------
@@ -21,6 +21,7 @@ jobs:
2121
- name: Install Poetry
2222
uses: snok/install-poetry@v1
2323
with:
24+
version: "2.2.1"
2425
virtualenvs-create: true
2526
virtualenvs-in-project: true
2627
installer-parallel: true

.github/workflows/publish.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ jobs:
1111
# check-out repo and set-up python
1212
#----------------------------------------------
1313
- name: Check out repository
14-
uses: actions/checkout@v2
14+
uses: actions/checkout@v4
1515
- name: Set up python
1616
id: setup-python
17-
uses: actions/setup-python@v2
17+
uses: actions/setup-python@v5
1818
with:
1919
python-version: 3.9
2020
#----------------------------------------------
@@ -23,6 +23,7 @@ jobs:
2323
- name: Install Poetry
2424
uses: snok/install-poetry@v1
2525
with:
26+
version: "2.2.1"
2627
virtualenvs-create: true
2728
virtualenvs-in-project: true
2829
installer-parallel: true

src/databricks/sql/client.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1335,7 +1335,9 @@ def fetchmany_arrow(self, size: int) -> "pyarrow.Table":
13351335
):
13361336
self._fill_results_buffer()
13371337
partial_results = self.results.next_n_rows(n_remaining_rows)
1338-
results = pyarrow.concat_tables([results, partial_results])
1338+
results = pyarrow.concat_tables(
1339+
[results, partial_results], promote_options="default"
1340+
)
13391341
n_remaining_rows -= partial_results.num_rows
13401342
self._next_row_index += partial_results.num_rows
13411343

@@ -1391,7 +1393,9 @@ def fetchall_arrow(self) -> "pyarrow.Table":
13911393
while not self.has_been_closed_server_side and self.has_more_rows:
13921394
self._fill_results_buffer()
13931395
partial_results = self.results.remaining_rows()
1394-
results = pyarrow.concat_tables([results, partial_results])
1396+
results = pyarrow.concat_tables(
1397+
[results, partial_results], promote_options="default"
1398+
)
13951399
self._next_row_index += partial_results.num_rows
13961400

13971401
return results

src/databricks/sql/utils.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,9 @@ def next_n_rows(self, num_rows: int) -> "pyarrow.Table":
265265
# Get remaining of num_rows or the rest of the current table, whichever is smaller
266266
length = min(num_rows, self.table.num_rows - self.table_row_index)
267267
table_slice = self.table.slice(self.table_row_index, length)
268-
results = pyarrow.concat_tables([results, table_slice])
268+
results = pyarrow.concat_tables(
269+
[results, table_slice], promote_options="default"
270+
)
269271
self.table_row_index += table_slice.num_rows
270272

271273
# Replace current table with the next table if we are at the end of the current table
@@ -292,7 +294,9 @@ def remaining_rows(self) -> "pyarrow.Table":
292294
table_slice = self.table.slice(
293295
self.table_row_index, self.table.num_rows - self.table_row_index
294296
)
295-
results = pyarrow.concat_tables([results, table_slice])
297+
results = pyarrow.concat_tables(
298+
[results, table_slice], promote_options="default"
299+
)
296300
self.table_row_index += table_slice.num_rows
297301
self.table = self._create_next_table()
298302
self.table_row_index = 0

tests/e2e/common/large_queries_mixin.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,11 @@ def test_query_with_large_narrow_result_set(self):
8383
assert row[0] == row_id
8484

8585
def test_long_running_query(self):
86-
"""Incrementally increase query size until it takes at least 4 minutes,
86+
"""Incrementally increase query size until it takes at least 2 minutes,
8787
and asserts that the query completes successfully.
8888
"""
8989
minutes = 60
90-
min_duration = 4 * minutes
90+
min_duration = 2 * minutes
9191

9292
duration = -1
9393
scale0 = 10000
@@ -113,5 +113,5 @@ def test_long_running_query(self):
113113
duration = time.time() - start
114114
current_fraction = duration / min_duration
115115
print("Took {} s with scale factor={}".format(duration, scale_factor))
116-
# Extrapolate linearly to reach 4 min and add 50% padding to push over the limit
116+
# Extrapolate linearly to reach 2 min and add 50% padding to push over the limit
117117
scale_factor = math.ceil(1.5 * scale_factor / current_fraction)

tests/e2e/common/staging_ingestion_tests.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def test_staging_ingestion_life_cycle(self, ingestion_user):
4646
) as conn:
4747

4848
cursor = conn.cursor()
49-
query = f"PUT '{temp_path}' INTO 'stage://tmp/{ingestion_user}/tmp/11/15/file1.csv' OVERWRITE"
49+
query = f"PUT '{temp_path}' INTO 'stage://tmp/{ingestion_user}/tmp/11/13/file1.csv' OVERWRITE"
5050
cursor.execute(query)
5151

5252
# GET should succeed
@@ -57,7 +57,7 @@ def test_staging_ingestion_life_cycle(self, ingestion_user):
5757
extra_params={"staging_allowed_local_path": new_temp_path}
5858
) as conn:
5959
cursor = conn.cursor()
60-
query = f"GET 'stage://tmp/{ingestion_user}/tmp/11/15/file1.csv' TO '{new_temp_path}'"
60+
query = f"GET 'stage://tmp/{ingestion_user}/tmp/11/13/file1.csv' TO '{new_temp_path}'"
6161
cursor.execute(query)
6262

6363
with open(new_fh, "rb") as fp:
@@ -67,7 +67,7 @@ def test_staging_ingestion_life_cycle(self, ingestion_user):
6767

6868
# REMOVE should succeed
6969

70-
remove_query = f"REMOVE 'stage://tmp/{ingestion_user}/tmp/11/15/file1.csv'"
70+
remove_query = f"REMOVE 'stage://tmp/{ingestion_user}/tmp/11/13/file1.csv'"
7171

7272
with self.connection(extra_params={"staging_allowed_local_path": "/"}) as conn:
7373
cursor = conn.cursor()
@@ -79,7 +79,7 @@ def test_staging_ingestion_life_cycle(self, ingestion_user):
7979
Error, match="Staging operation over HTTP was unsuccessful: 404"
8080
):
8181
cursor = conn.cursor()
82-
query = f"GET 'stage://tmp/{ingestion_user}/tmp/11/15/file1.csv' TO '{new_temp_path}'"
82+
query = f"GET 'stage://tmp/{ingestion_user}/tmp/11/13/file1.csv' TO '{new_temp_path}'"
8383
cursor.execute(query)
8484

8585
os.remove(temp_path)

tests/e2e/test_complex_types.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def table_fixture(self, connection_details):
1414
# Create the table
1515
cursor.execute(
1616
"""
17-
CREATE TABLE IF NOT EXISTS pysql_test_complex_types_table (
17+
CREATE TABLE IF NOT EXISTS pysql_test_complex_types_table_deprecated (
1818
array_col ARRAY<STRING>,
1919
map_col MAP<STRING, INTEGER>,
2020
struct_col STRUCT<field1: STRING, field2: INTEGER>
@@ -24,7 +24,7 @@ def table_fixture(self, connection_details):
2424
# Insert a record
2525
cursor.execute(
2626
"""
27-
INSERT INTO pysql_test_complex_types_table
27+
INSERT INTO pysql_test_complex_types_table_deprecated
2828
VALUES (
2929
ARRAY('a', 'b', 'c'),
3030
MAP('a', 1, 'b', 2, 'c', 3),
@@ -34,7 +34,7 @@ def table_fixture(self, connection_details):
3434
)
3535
yield
3636
# Clean up the table after the test
37-
cursor.execute("DROP TABLE IF EXISTS pysql_test_complex_types_table")
37+
cursor.execute("DROP TABLE IF EXISTS pysql_test_complex_types_table_deprecated")
3838

3939
@pytest.mark.parametrize(
4040
"field,expected_type",
@@ -45,7 +45,7 @@ def test_read_complex_types_as_arrow(self, field, expected_type, table_fixture):
4545

4646
with self.cursor() as cursor:
4747
result = cursor.execute(
48-
"SELECT * FROM pysql_test_complex_types_table LIMIT 1"
48+
"SELECT * FROM pysql_test_complex_types_table_deprecated LIMIT 1"
4949
).fetchone()
5050

5151
assert isinstance(result[field], expected_type)
@@ -57,7 +57,7 @@ def test_read_complex_types_as_string(self, field, table_fixture):
5757
extra_params={"_use_arrow_native_complex_types": False}
5858
) as cursor:
5959
result = cursor.execute(
60-
"SELECT * FROM pysql_test_complex_types_table LIMIT 1"
60+
"SELECT * FROM pysql_test_complex_types_table_deprecated LIMIT 1"
6161
).fetchone()
6262

6363
assert isinstance(result[field], str)

tests/e2e/test_parameterized_queries.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def inline_table(self, connection_details):
123123
"""
124124

125125
query = """
126-
CREATE TABLE IF NOT EXISTS pysql_e2e_inline_param_test_table (
126+
CREATE TABLE IF NOT EXISTS pysql_e2e_inline_param_test_table_deprecated (
127127
null_col INT,
128128
int_col INT,
129129
bigint_col BIGINT,
@@ -167,9 +167,9 @@ def _inline_roundtrip(self, params: dict, paramstyle: ParamStyle):
167167
This is a no-op but is included to make the test-code easier to read.
168168
"""
169169
target_column = self._get_inline_table_column(params.get("p"))
170-
INSERT_QUERY = f"INSERT INTO pysql_e2e_inline_param_test_table (`{target_column}`) VALUES (%(p)s)"
171-
SELECT_QUERY = f"SELECT {target_column} `col` FROM pysql_e2e_inline_param_test_table LIMIT 1"
172-
DELETE_QUERY = "DELETE FROM pysql_e2e_inline_param_test_table"
170+
INSERT_QUERY = f"INSERT INTO pysql_e2e_inline_param_test_table_deprecated (`{target_column}`) VALUES (%(p)s)"
171+
SELECT_QUERY = f"SELECT {target_column} `col` FROM pysql_e2e_inline_param_test_table_deprecated LIMIT 1"
172+
DELETE_QUERY = "DELETE FROM pysql_e2e_inline_param_test_table_deprecated"
173173

174174
with self.connection(extra_params={"use_inline_params": True}) as conn:
175175
with conn.cursor() as cursor:

0 commit comments

Comments
 (0)