forked from databricks/databricks-sql-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathclient.py
More file actions
789 lines (677 loc) · 29.5 KB
/
client.py
File metadata and controls
789 lines (677 loc) · 29.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
from typing import Dict, Tuple, List, Optional, Any, Union
import pandas
import pyarrow
from databricks.sql import __version__
from databricks.sql import *
from databricks.sql.exc import OperationalError
from databricks.sql.thrift_backend import ThriftBackend
from databricks.sql.utils import ExecuteResponse, ParamEscaper
from databricks.sql.types import Row
from databricks.sql.auth.auth import get_python_sql_connector_auth_provider
from databricks.sql.experimental.oauth_persistence import OAuthPersistence
logger = logging.getLogger(__name__)
DEFAULT_RESULT_BUFFER_SIZE_BYTES = 10485760
DEFAULT_ARRAY_SIZE = 100000
class Connection:
def __init__(
self,
server_hostname: str,
http_path: str,
access_token: Optional[str] = None,
http_headers: Optional[List[Tuple[str, str]]] = None,
session_configuration: Dict[str, Any] = None,
catalog: Optional[str] = None,
schema: Optional[str] = None,
**kwargs
) -> None:
"""
Connect to a Databricks SQL endpoint or a Databricks cluster.
Parameters:
:param server_hostname: Databricks instance host name.
:param http_path: Http path either to a DBSQL endpoint (e.g. /sql/1.0/endpoints/1234567890abcdef)
or to a DBR interactive cluster (e.g. /sql/protocolv1/o/1234567890123456/1234-123456-slid123)
:param access_token: `str`, optional
Http Bearer access token, e.g. Databricks Personal Access Token.
Unless if you use auth_type=`databricks-oauth` you need to pass `access_token.
Examples:
connection = sql.connect(
server_hostname='dbc-12345.staging.cloud.databricks.com',
http_path='sql/protocolv1/o/6789/12abc567',
access_token='dabpi12345678'
)
:param http_headers: An optional list of (k, v) pairs that will be set as Http headers on every request
:param session_configuration: An optional dictionary of Spark session parameters. Defaults to None.
Execute the SQL command `SET -v` to get a full list of available commands.
:param catalog: An optional initial catalog to use. Requires DBR version 9.0+
:param schema: An optional initial schema to use. Requires DBR version 9.0+
Other Parameters:
auth_type: `str`, optional
`databricks-oauth` : to use oauth with fine-grained permission scopes, set to `databricks-oauth`.
This is currently in private preview for Databricks accounts on AWS.
This supports User to Machine OAuth authentication for Databricks on AWS with
any IDP configured. This is only for interactive python applications and open a browser window.
Note this is beta (private preview)
experimental_oauth_persistence: configures preferred storage for persisting oauth tokens.
This has to be a class implementing `OAuthPersistence`.
When `auth_type` is set to `databricks-oauth` without persisting the oauth token in a persistence storage
the oauth tokens will only be maintained in memory and if the python process restarts the end user
will have to login again.
Note this is beta (private preview)
For persisting the oauth token in a prod environment you should subclass and implement OAuthPersistence
from databricks.sql.experimental.oauth_persistence import OAuthPersistence, OAuthToken
class MyCustomImplementation(OAuthPersistence):
def __init__(self, file_path):
self._file_path = file_path
def persist(self, token: OAuthToken):
# implement this method to persist token.refresh_token and token.access_token
def read(self) -> Optional[OAuthToken]:
# implement this method to return an instance of the persisted token
connection = sql.connect(
server_hostname='dbc-12345.staging.cloud.databricks.com',
http_path='sql/protocolv1/o/6789/12abc567',
auth_type="databricks-oauth",
experimental_oauth_persistence=MyCustomImplementation()
)
For development purpose you can use the existing `DevOnlyFilePersistence` which stores the
raw oauth token in the provided file path. Please note this is only for development and for prod you should provide your
own implementation of OAuthPersistence.
Examples:
# for development only
from databricks.sql.experimental.oauth_persistence import DevOnlyFilePersistence
connection = sql.connect(
server_hostname='dbc-12345.staging.cloud.databricks.com',
http_path='sql/protocolv1/o/6789/12abc567',
auth_type="databricks-oauth",
experimental_oauth_persistence=DevOnlyFilePersistence("~/dev-oauth.json")
)
"""
# Internal arguments in **kwargs:
# _user_agent_entry
# Tag to add to User-Agent header. For use by partners.
# _username, _password
# Username and password Basic authentication (no official support)
# _use_cert_as_auth
# Use a TLS cert instead of a token or username / password (internal use only)
# _enable_ssl
# Connect over HTTP instead of HTTPS
# _port
# Which port to connect to
# _skip_routing_headers:
# Don't set routing headers if set to True (for use when connecting directly to server)
# _tls_verify_hostname
# Set to False (Boolean) to disable SSL hostname verification, but check certificate.
# _tls_trusted_ca_file
# Set to the path of the file containing trusted CA certificates for server certificate
# verification. If not provide, uses system truststore.
# _tls_client_cert_file, _tls_client_cert_key_file
# Set client SSL certificate.
# _retry_stop_after_attempts_count
# The maximum number of attempts during a request retry sequence (defaults to 24)
# _socket_timeout
# The timeout in seconds for socket send, recv and connect operations. Defaults to None for
# no timeout. Should be a positive float or integer.
# _disable_pandas
# In case the deserialisation through pandas causes any issues, it can be disabled with
# this flag.
# _use_arrow_native_complex_types
# DBR will return native Arrow types for structs, arrays and maps instead of Arrow strings
# (True by default)
# _use_arrow_native_decimals
# Databricks runtime will return native Arrow types for decimals instead of Arrow strings
# (True by default)
# _use_arrow_native_timestamps
# Databricks runtime will return native Arrow types for timestamps instead of Arrow strings
# (True by default)
if access_token:
access_token_kv = {"access_token": access_token}
kwargs = {**kwargs, **access_token_kv}
self.open = False
self.host = server_hostname
self.port = kwargs.get("_port", 443)
self.disable_pandas = kwargs.get("_disable_pandas", False)
auth_provider = get_python_sql_connector_auth_provider(
server_hostname, **kwargs
)
if not kwargs.get("_user_agent_entry"):
useragent_header = "{}/{}".format(USER_AGENT_NAME, __version__)
else:
useragent_header = "{}/{} ({})".format(
USER_AGENT_NAME, __version__, kwargs.get("_user_agent_entry")
)
base_headers = [("User-Agent", useragent_header)]
self.thrift_backend = ThriftBackend(
self.host,
self.port,
http_path,
(http_headers or []) + base_headers,
auth_provider,
**kwargs
)
self._session_handle = self.thrift_backend.open_session(
session_configuration, catalog, schema
)
self.open = True
logger.info("Successfully opened session " + str(self.get_session_id()))
self._cursors = [] # type: List[Cursor]
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
self.close()
def __del__(self):
if self.open:
logger.debug(
"Closing unclosed connection for session "
"{}".format(self.get_session_id())
)
try:
self._close(close_cursors=False)
except OperationalError as e:
# Close on best-effort basis.
logger.debug("Couldn't close unclosed connection: {}".format(e.message))
def get_session_id(self):
return self.thrift_backend.handle_to_id(self._session_handle)
def cursor(
self,
arraysize: int = DEFAULT_ARRAY_SIZE,
buffer_size_bytes: int = DEFAULT_RESULT_BUFFER_SIZE_BYTES,
) -> "Cursor":
"""
Return a new Cursor object using the connection.
Will throw an Error if the connection has been closed.
"""
if not self.open:
raise Error("Cannot create cursor from closed connection")
cursor = Cursor(
self,
self.thrift_backend,
arraysize=arraysize,
result_buffer_size_bytes=buffer_size_bytes,
)
self._cursors.append(cursor)
return cursor
def close(self) -> None:
"""Close the underlying session and mark all associated cursors as closed."""
self._close()
def _close(self, close_cursors=True) -> None:
if close_cursors:
for cursor in self._cursors:
cursor.close()
self.thrift_backend.close_session(self._session_handle)
self.open = False
def commit(self):
"""No-op because Databricks does not support transactions"""
pass
def rollback(self):
raise NotSupportedError("Transactions are not supported on Databricks")
class Cursor:
def __init__(
self,
connection: Connection,
thrift_backend: ThriftBackend,
result_buffer_size_bytes: int = DEFAULT_RESULT_BUFFER_SIZE_BYTES,
arraysize: int = DEFAULT_ARRAY_SIZE,
) -> None:
"""
These objects represent a database cursor, which is used to manage the context of a fetch
operation.
Cursors are not isolated, i.e., any changes done to the database by a cursor are immediately
visible by other cursors or connections.
"""
self.connection = connection
self.rowcount = -1 # Return -1 as this is not supported
self.buffer_size_bytes = result_buffer_size_bytes
self.active_result_set: Union[ResultSet, None] = None
self.arraysize = arraysize
# Note that Cursor closed => active result set closed, but not vice versa
self.open = True
self.executing_command_id = None
self.thrift_backend = thrift_backend
self.active_op_handle = None
self.escaper = ParamEscaper()
self.lastrowid = None
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
self.close()
def __iter__(self):
if self.active_result_set:
for row in self.active_result_set:
yield row
else:
raise Error("There is no active result set")
def _close_and_clear_active_result_set(self):
try:
if self.active_result_set:
self.active_result_set.close()
finally:
self.active_result_set = None
def _check_not_closed(self):
if not self.open:
raise Error("Attempting operation on closed cursor")
def execute(
self, operation: str, parameters: Optional[Dict[str, str]] = None
) -> "Cursor":
"""
Execute a query and wait for execution to complete.
Parameters should be given in extended param format style: %(...)<s|d|f>.
For example:
operation = "SELECT * FROM %(table_name)s"
parameters = {"table_name": "my_table_name"}
Will result in the query "SELECT * FROM 'my_table_name' being sent to the server
:returns self
"""
if parameters is not None:
operation = operation % self.escaper.escape_args(parameters)
self._check_not_closed()
self._close_and_clear_active_result_set()
execute_response = self.thrift_backend.execute_command(
operation=operation,
session_handle=self.connection._session_handle,
max_rows=self.arraysize,
max_bytes=self.buffer_size_bytes,
cursor=self,
)
self.active_result_set = ResultSet(
self.connection,
execute_response,
self.thrift_backend,
self.buffer_size_bytes,
self.arraysize,
)
return self
def executemany(self, operation, seq_of_parameters):
"""
Prepare a database operation (query or command) and then execute it against all parameter
sequences or mappings found in the sequence ``seq_of_parameters``.
Only the final result set is retained.
:returns self
"""
for parameters in seq_of_parameters:
self.execute(operation, parameters)
return self
def catalogs(self) -> "Cursor":
"""
Get all available catalogs.
:returns self
"""
self._check_not_closed()
self._close_and_clear_active_result_set()
execute_response = self.thrift_backend.get_catalogs(
session_handle=self.connection._session_handle,
max_rows=self.arraysize,
max_bytes=self.buffer_size_bytes,
cursor=self,
)
self.active_result_set = ResultSet(
self.connection,
execute_response,
self.thrift_backend,
self.buffer_size_bytes,
self.arraysize,
)
return self
def schemas(
self, catalog_name: Optional[str] = None, schema_name: Optional[str] = None
) -> "Cursor":
"""
Get schemas corresponding to the catalog_name and schema_name.
Names can contain % wildcards.
:returns self
"""
self._check_not_closed()
self._close_and_clear_active_result_set()
execute_response = self.thrift_backend.get_schemas(
session_handle=self.connection._session_handle,
max_rows=self.arraysize,
max_bytes=self.buffer_size_bytes,
cursor=self,
catalog_name=catalog_name,
schema_name=schema_name,
)
self.active_result_set = ResultSet(
self.connection,
execute_response,
self.thrift_backend,
self.buffer_size_bytes,
self.arraysize,
)
return self
def tables(
self,
catalog_name: Optional[str] = None,
schema_name: Optional[str] = None,
table_name: Optional[str] = None,
table_types: List[str] = None,
) -> "Cursor":
"""
Get tables corresponding to the catalog_name, schema_name and table_name.
Names can contain % wildcards.
:returns self
"""
self._check_not_closed()
self._close_and_clear_active_result_set()
execute_response = self.thrift_backend.get_tables(
session_handle=self.connection._session_handle,
max_rows=self.arraysize,
max_bytes=self.buffer_size_bytes,
cursor=self,
catalog_name=catalog_name,
schema_name=schema_name,
table_name=table_name,
table_types=table_types,
)
self.active_result_set = ResultSet(
self.connection,
execute_response,
self.thrift_backend,
self.buffer_size_bytes,
self.arraysize,
)
return self
def columns(
self,
catalog_name: Optional[str] = None,
schema_name: Optional[str] = None,
table_name: Optional[str] = None,
column_name: Optional[str] = None,
) -> "Cursor":
"""
Get columns corresponding to the catalog_name, schema_name, table_name and column_name.
Names can contain % wildcards.
:returns self
"""
self._check_not_closed()
self._close_and_clear_active_result_set()
execute_response = self.thrift_backend.get_columns(
session_handle=self.connection._session_handle,
max_rows=self.arraysize,
max_bytes=self.buffer_size_bytes,
cursor=self,
catalog_name=catalog_name,
schema_name=schema_name,
table_name=table_name,
column_name=column_name,
)
self.active_result_set = ResultSet(
self.connection,
execute_response,
self.thrift_backend,
self.buffer_size_bytes,
self.arraysize,
)
return self
def fetchall(self) -> List[Row]:
"""
Fetch all (remaining) rows of a query result, returning them as a sequence of sequences.
A databricks.sql.Error (or subclass) exception is raised if the previous call to
execute did not produce any result set or no call was issued yet.
"""
self._check_not_closed()
if self.active_result_set:
return self.active_result_set.fetchall()
else:
raise Error("There is no active result set")
def fetchone(self) -> Optional[Row]:
"""
Fetch the next row of a query result set, returning a single sequence, or ``None`` when
no more data is available.
An databricks.sql.Error (or subclass) exception is raised if the previous call to
execute did not produce any result set or no call was issued yet.
"""
self._check_not_closed()
if self.active_result_set:
return self.active_result_set.fetchone()
else:
raise Error("There is no active result set")
def fetchmany(self, size: int) -> List[Row]:
"""
Fetch the next set of rows of a query result, returning a sequence of sequences (e.g. a
list of tuples).
An empty sequence is returned when no more rows are available.
The number of rows to fetch per call is specified by the parameter n_rows. If it is not
given, the cursor's arraysize determines the number of rows to be fetched. The method
should try to fetch as many rows as indicated by the size parameter. If this is not
possible due to the specified number of rows not being available, fewer rows may be
returned.
A databricks.sql.Error (or subclass) exception is raised if the previous call
to execute did not produce any result set or no call was issued yet.
"""
self._check_not_closed()
if self.active_result_set:
return self.active_result_set.fetchmany(size)
else:
raise Error("There is no active result set")
def fetchall_arrow(self) -> pyarrow.Table:
self._check_not_closed()
if self.active_result_set:
return self.active_result_set.fetchall_arrow()
else:
raise Error("There is no active result set")
def fetchmany_arrow(self, size) -> pyarrow.Table:
self._check_not_closed()
if self.active_result_set:
return self.active_result_set.fetchmany_arrow(size)
else:
raise Error("There is no active result set")
def cancel(self) -> None:
"""
Cancel a running command.
The command should be closed to free resources from the server.
This method can be called from another thread.
"""
if self.active_op_handle is not None:
self.thrift_backend.cancel_command(self.active_op_handle)
else:
logger.warning(
"Attempting to cancel a command, but there is no "
"currently executing command"
)
def close(self) -> None:
"""Close cursor"""
self.open = False
if self.active_result_set:
self._close_and_clear_active_result_set()
@property
def description(self) -> Optional[List[Tuple]]:
"""
This read-only attribute is a sequence of 7-item sequences.
Each of these sequences contains information describing one result column:
- name
- type_code
- display_size (None in current implementation)
- internal_size (None in current implementation)
- precision (None in current implementation)
- scale (None in current implementation)
- null_ok (always True in current implementation)
This attribute will be ``None`` for operations that do not return rows or if the cursor has
not had an operation invoked via the execute method yet.
The ``type_code`` can be interpreted by comparing it to the Type Objects.
"""
if self.active_result_set:
return self.active_result_set.description
else:
return None
@property
def rownumber(self):
"""This read-only attribute should provide the current 0-based index of the cursor in the
result set.
The index can be seen as index of the cursor in a sequence (the result set). The next fetch
operation will fetch the row indexed by ``rownumber`` in that sequence.
"""
return self.active_result_set.rownumber if self.active_result_set else 0
def setinputsizes(self, sizes):
"""Does nothing by default"""
pass
def setoutputsize(self, size, column=None):
"""Does nothing by default"""
pass
class ResultSet:
def __init__(
self,
connection: Connection,
execute_response: ExecuteResponse,
thrift_backend: ThriftBackend,
result_buffer_size_bytes: int = DEFAULT_RESULT_BUFFER_SIZE_BYTES,
arraysize: int = 10000,
):
"""
A ResultSet manages the results of a single command.
:param connection: The parent connection that was used to execute this command
:param execute_response: A `ExecuteResponse` class returned by a command execution
:param result_buffer_size_bytes: The size (in bytes) of the internal buffer + max fetch
amount :param arraysize: The max number of rows to fetch at a time (PEP-249)
"""
self.connection = connection
self.command_id = execute_response.command_handle
self.op_state = execute_response.status
self.has_been_closed_server_side = execute_response.has_been_closed_server_side
self.has_more_rows = execute_response.has_more_rows
self.buffer_size_bytes = result_buffer_size_bytes
self.arraysize = arraysize
self.thrift_backend = thrift_backend
self.description = execute_response.description
self._arrow_schema_bytes = execute_response.arrow_schema_bytes
self._next_row_index = 0
if execute_response.arrow_queue:
# In this case the server has taken the fast path and returned an initial batch of
# results
self.results = execute_response.arrow_queue
else:
# In this case, there are results waiting on the server so we fetch now for simplicity
self._fill_results_buffer()
def __iter__(self):
while True:
row = self.fetchone()
if row:
yield row
else:
break
def _fill_results_buffer(self):
results, has_more_rows = self.thrift_backend.fetch_results(
op_handle=self.command_id,
max_rows=self.arraysize,
max_bytes=self.buffer_size_bytes,
expected_row_start_offset=self._next_row_index,
arrow_schema_bytes=self._arrow_schema_bytes,
description=self.description,
)
self.results = results
self.has_more_rows = has_more_rows
def _convert_arrow_table(self, table):
column_names = [c[0] for c in self.description]
ResultRow = Row(*column_names)
if self.connection.disable_pandas is True:
return [
ResultRow(*[v.as_py() for v in r]) for r in zip(*table.itercolumns())
]
# Need to use nullable types, as otherwise type can change when there are missing values.
# See https://arrow.apache.org/docs/python/pandas.html#nullable-types
# NOTE: This api is epxerimental https://pandas.pydata.org/pandas-docs/stable/user_guide/integer_na.html
dtype_mapping = {
pyarrow.int8(): pandas.Int8Dtype(),
pyarrow.int16(): pandas.Int16Dtype(),
pyarrow.int32(): pandas.Int32Dtype(),
pyarrow.int64(): pandas.Int64Dtype(),
pyarrow.uint8(): pandas.UInt8Dtype(),
pyarrow.uint16(): pandas.UInt16Dtype(),
pyarrow.uint32(): pandas.UInt32Dtype(),
pyarrow.uint64(): pandas.UInt64Dtype(),
pyarrow.bool_(): pandas.BooleanDtype(),
pyarrow.float32(): pandas.Float32Dtype(),
pyarrow.float64(): pandas.Float64Dtype(),
pyarrow.string(): pandas.StringDtype(),
}
# Need to rename columns, as the to_pandas function cannot handle duplicate column names
table_renamed = table.rename_columns([str(c) for c in range(table.num_columns)])
df = table_renamed.to_pandas(
types_mapper=dtype_mapping.get,
date_as_object=True,
timestamp_as_object=True,
)
res = df.to_numpy(na_value=None)
return [ResultRow(*v) for v in res]
@property
def rownumber(self):
return self._next_row_index
def fetchmany_arrow(self, size: int) -> pyarrow.Table:
"""
Fetch the next set of rows of a query result, returning a PyArrow table.
An empty sequence is returned when no more rows are available.
"""
if size < 0:
raise ValueError("size argument for fetchmany is %s but must be >= 0", size)
results = self.results.next_n_rows(size)
n_remaining_rows = size - results.num_rows
self._next_row_index += results.num_rows
while (
n_remaining_rows > 0
and not self.has_been_closed_server_side
and self.has_more_rows
):
self._fill_results_buffer()
partial_results = self.results.next_n_rows(n_remaining_rows)
results = pyarrow.concat_tables([results, partial_results])
n_remaining_rows -= partial_results.num_rows
self._next_row_index += partial_results.num_rows
return results
def fetchall_arrow(self) -> pyarrow.Table:
"""Fetch all (remaining) rows of a query result, returning them as a PyArrow table."""
results = self.results.remaining_rows()
self._next_row_index += results.num_rows
while not self.has_been_closed_server_side and self.has_more_rows:
self._fill_results_buffer()
partial_results = self.results.remaining_rows()
results = pyarrow.concat_tables([results, partial_results])
self._next_row_index += partial_results.num_rows
return results
def fetchone(self) -> Optional[Row]:
"""
Fetch the next row of a query result set, returning a single sequence,
or None when no more data is available.
"""
res = self._convert_arrow_table(self.fetchmany_arrow(1))
if len(res) > 0:
return res[0]
else:
return None
def fetchall(self) -> List[Row]:
"""
Fetch all (remaining) rows of a query result, returning them as a list of rows.
"""
return self._convert_arrow_table(self.fetchall_arrow())
def fetchmany(self, size: int) -> List[Row]:
"""
Fetch the next set of rows of a query result, returning a list of rows.
An empty sequence is returned when no more rows are available.
"""
return self._convert_arrow_table(self.fetchmany_arrow(size))
def close(self) -> None:
"""
Close the cursor.
If the connection has not been closed, and the cursor has not already
been closed on the server for some other reason, issue a request to the server to close it.
"""
try:
if (
self.op_state != self.thrift_backend.CLOSED_OP_STATE
and not self.has_been_closed_server_side
and self.connection.open
):
self.thrift_backend.close_command(self.command_id)
finally:
self.has_been_closed_server_side = True
self.op_state = self.thrift_backend.CLOSED_OP_STATE
@staticmethod
def _get_schema_description(table_schema_message):
"""
Takes a TableSchema message and returns a description 7-tuple as specified by PEP-249
"""
def map_col_type(type_):
if type_.startswith("decimal"):
return "decimal"
else:
return type_
return [
(column.name, map_col_type(column.datatype), None, None, None, None, None)
for column in table_schema_message.columns
]