Skip to content

Commit 1bdca33

Browse files
fix(bq_driver): Performance improvements SQLColumns
1 parent 7d6187d commit 1bdca33

1 file changed

Lines changed: 65 additions & 24 deletions

File tree

google/cloud/odbc/bq_driver/internal/odbc_sql_columns.cc

Lines changed: 65 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,6 @@ using ::google::cloud::odbc_internal::SQLStates;
3232
using ::google::cloud::odbc_internal::StatusRecord;
3333
using ::google::cloud::odbc_internal::StatusRecordOr;
3434

35-
std::string const kTableAndViewTypes =
36-
"TABLE,VIEW,MATERIALIZED VIEW,EXTERNAL,SNAPSHOT,CLONE";
37-
3835
namespace {
3936

4037
bool IsTableNotFound(StatusRecord const& status) {
@@ -417,13 +414,41 @@ StatusRecordOr<std::vector<Table>> FetchBQTablesData(
417414
SQLStates::k_HY000(),
418415
"Invalid or null BQ Client within the connection handle"};
419416
}
420-
// Get Datasets based on search pattern in the dataset argument
421-
StatusRecordOr<std::vector<std::string>> datasets_status =
422-
GetFilteredDatasetIds(*bq_client, catalog, dataset_pattern, metadata_id);
423-
if (!datasets_status) {
424-
LOG(ERROR) << "FetchBQTablesData::GetFilteredDatasetIds:: "
425-
<< datasets_status.GetStatusRecord().message;
426-
return datasets_status.GetStatusRecord();
417+
418+
bool const case_sensitive_match = metadata_id != SQL_TRUE;
419+
420+
if (case_sensitive_match && !IsSearchPatternArgument(dataset_pattern) &&
421+
!IsSearchPatternArgument(table_pattern)) {
422+
auto bq_table_status =
423+
FetchBQTableData(conn_handle, catalog, dataset_pattern, table_pattern);
424+
if (!bq_table_status) {
425+
auto const& status = bq_table_status.GetStatusRecord();
426+
// A non-existent table yields an empty result set, matching the discovery
427+
// path which simply finds no matching tables.
428+
if (IsTableNotFound(status)) {
429+
return result;
430+
}
431+
LOG(ERROR) << "FetchBQTablesData::FetchBQTableData (fast path):: "
432+
<< status.message;
433+
return status;
434+
}
435+
result.push_back(std::move(*bq_table_status));
436+
return result;
437+
}
438+
439+
std::vector<std::string> dataset_ids;
440+
if (case_sensitive_match && !IsSearchPatternArgument(dataset_pattern)) {
441+
dataset_ids.push_back(dataset_pattern);
442+
} else {
443+
StatusRecordOr<std::vector<std::string>> datasets_status =
444+
GetFilteredDatasetIds(*bq_client, catalog, dataset_pattern,
445+
metadata_id);
446+
if (!datasets_status) {
447+
LOG(ERROR) << "FetchBQTablesData::GetFilteredDatasetIds:: "
448+
<< datasets_status.GetStatusRecord().message;
449+
return datasets_status.GetStatusRecord();
450+
}
451+
dataset_ids = std::move(*datasets_status);
427452
}
428453

429454
struct TableTaskInput {
@@ -442,9 +467,9 @@ StatusRecordOr<std::vector<Table>> FetchBQTablesData(
442467
};
443468

444469
std::vector<DatasetTaskInput> dataset_tasks;
445-
dataset_tasks.reserve(datasets_status->size());
446-
for (std::size_t i = 0; i < datasets_status->size(); ++i) {
447-
dataset_tasks.push_back({i, datasets_status->at(i)});
470+
dataset_tasks.reserve(dataset_ids.size());
471+
for (std::size_t i = 0; i < dataset_ids.size(); ++i) {
472+
dataset_tasks.push_back({i, dataset_ids[i]});
448473
}
449474

450475
// Run broad SQLColumns discovery in parallel: first table listing per
@@ -456,29 +481,45 @@ StatusRecordOr<std::vector<Table>> FetchBQTablesData(
456481
}
457482
int max_threads = trace_option->max_threads;
458483

484+
std::regex const table_regex = BuildRegex(table_pattern, metadata_id);
459485
auto fetch_tables_for_dataset_task = [&](DatasetTaskInput const& dataset_task)
460486
-> StatusRecordOr<DatasetTablesBatch> {
461-
StatusRecordOr<std::vector<FilteredTableResponse>> tables_status =
462-
GetFilteredTables(stmt_handle, catalog, dataset_task.dataset,
463-
table_pattern, kTableAndViewTypes, metadata_id);
464-
if (!tables_status) {
465-
LOG(ERROR) << "FetchBQTablesData::GetFilteredTables:: "
466-
<< tables_status.GetStatusRecord().message;
467-
return tables_status.GetStatusRecord();
468-
}
487+
Options options;
488+
options.set<MaxRetriesOption>(conn_handle.GetDsn().max_retries);
489+
auto tables_status =
490+
bq_client->ListAllTables(catalog, dataset_task.dataset, options);
469491

470492
DatasetTablesBatch batch{
471493
dataset_task.dataset_index, dataset_task.dataset, {}};
472-
batch.table_names.reserve(tables_status->size());
473-
for (auto const& filtered_table : *tables_status) {
474-
batch.table_names.push_back(filtered_table.table_name);
494+
if (!tables_status) {
495+
auto const& status = tables_status.GetStatusRecord();
496+
497+
if (IsTableNotFound(status)) {
498+
LOG(WARNING)
499+
<< "FetchBQTablesData:: Skipping dataset not found or with "
500+
<< "no tables: '" << dataset_task.dataset
501+
<< "': " << status.message;
502+
return batch;
503+
}
504+
LOG(ERROR) << "FetchBQTablesData::ListAllTables:: " << status.message;
505+
return status;
506+
}
507+
508+
for (auto const& list_table : *tables_status) {
509+
// Match (and store) the table's real-cased id so the subsequent
510+
// tables.get lookup succeeds even under case-insensitive matching.
511+
std::string const& table_id = list_table.table_reference.table_id;
512+
if (std::regex_match(table_id, table_regex)) {
513+
batch.table_names.push_back(table_id);
514+
}
475515
}
476516
return batch;
477517
};
478518

479519
auto dataset_tables_results_or =
480520
ExecuteParallelTasks<DatasetTaskInput, DatasetTablesBatch>(
481521
max_threads, dataset_tasks, fetch_tables_for_dataset_task);
522+
482523
if (!dataset_tables_results_or) {
483524
LOG(ERROR)
484525
<< "FetchBQTablesData::ExecuteParallelTasks(GetFilteredTables):: "

0 commit comments

Comments
 (0)