Showing preview only (7,897K chars total). Download the full file or copy to clipboard to get everything.
Repository: GoogleCloudDataproc/spark-bigquery-connector
Branch: master
Commit: 2da0fc8b64d1
Files: 676
Total size: 20.0 MB
Directory structure:
gitextract_x5ltg6ft/
├── .github/
│ └── workflows/
│ ├── codeql-analysis.yml
│ ├── cpd.yaml
│ └── spotless.yaml
├── .gitignore
├── .mvn/
│ └── wrapper/
│ ├── MavenWrapperDownloader.java
│ └── maven-wrapper.properties
├── CHANGES.md
├── CONTRIBUTING.md
├── LICENSE
├── README-template.md
├── README.md
├── bigquery-connector-common/
│ ├── pom.xml
│ └── src/
│ ├── main/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── bigquery/
│ │ └── connector/
│ │ └── common/
│ │ ├── AccessToken.java
│ │ ├── AccessTokenProvider.java
│ │ ├── AccessTokenProviderCredentials.java
│ │ ├── ArrowReaderIterator.java
│ │ ├── ArrowUtil.java
│ │ ├── BigQueryClient.java
│ │ ├── BigQueryClientFactory.java
│ │ ├── BigQueryClientFactoryConfig.java
│ │ ├── BigQueryClientModule.java
│ │ ├── BigQueryConfig.java
│ │ ├── BigQueryConfigurationUtil.java
│ │ ├── BigQueryConnectorException.java
│ │ ├── BigQueryCredentialsSupplier.java
│ │ ├── BigQueryDirectDataWriterHelper.java
│ │ ├── BigQueryErrorCode.java
│ │ ├── BigQueryJobCompletionListener.java
│ │ ├── BigQueryMetrics.java
│ │ ├── BigQueryProxyConfig.java
│ │ ├── BigQueryProxyTransporterBuilder.java
│ │ ├── BigQueryPushdownException.java
│ │ ├── BigQueryPushdownUnsupportedException.java
│ │ ├── BigQueryStorageReadRowsTracer.java
│ │ ├── BigQueryTracerFactory.java
│ │ ├── BigQueryUtil.java
│ │ ├── ComparisonResult.java
│ │ ├── DecompressReadRowsResponse.java
│ │ ├── DurationTimer.java
│ │ ├── EnvironmentContext.java
│ │ ├── GcpUtil.java
│ │ ├── HttpUtil.java
│ │ ├── IdentityTokenSupplier.java
│ │ ├── IteratorMultiplexer.java
│ │ ├── LazyInitializationSupplier.java
│ │ ├── LoggingBigQueryStorageReadRowsTracer.java
│ │ ├── LoggingBigQueryTracerFactory.java
│ │ ├── MaterializationConfiguration.java
│ │ ├── NonInterruptibleBlockingBytesChannel.java
│ │ ├── ParallelArrowReader.java
│ │ ├── ParameterMode.java
│ │ ├── QueryParameterHelper.java
│ │ ├── ReadRowsHelper.java
│ │ ├── ReadRowsResponseInputStreamEnumeration.java
│ │ ├── ReadSessionCreator.java
│ │ ├── ReadSessionCreatorConfig.java
│ │ ├── ReadSessionCreatorConfigBuilder.java
│ │ ├── ReadSessionMetrics.java
│ │ ├── ReadSessionResponse.java
│ │ ├── StreamCombiningIterator.java
│ │ ├── UserAgentProvider.java
│ │ ├── VersionProvider.java
│ │ └── WriteStreamStatistics.java
│ └── test/
│ ├── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── bigquery/
│ │ └── connector/
│ │ └── common/
│ │ ├── BigQueryClientFactoryTest.java
│ │ ├── BigQueryConfigurationUtilTest.java
│ │ ├── BigQueryConnectorExceptionTest.java
│ │ ├── BigQueryCredentialsSupplierTest.java
│ │ ├── BigQueryUtilTest.java
│ │ ├── DurationTimerTest.java
│ │ ├── HttpUtilTest.java
│ │ ├── IteratorMultiplexerTest.java
│ │ ├── LoggingBigQueryStorageReadRowsTracerTest.java
│ │ ├── MockResponsesBatch.java
│ │ ├── ParallelArrowReaderTest.java
│ │ ├── ReadRowsHelperTest.java
│ │ ├── ReadSessionCreatorTest.java
│ │ └── integration/
│ │ ├── CustomCredentialsIntegrationTest.java
│ │ └── DefaultCredentialsDelegateAccessTokenProvider.java
│ └── resources/
│ └── external-account-credentials.json
├── cloudbuild/
│ ├── Dockerfile
│ ├── cloudbuild.yaml
│ ├── gcp-settings.xml
│ ├── nightly.sh
│ ├── nightly.yaml
│ └── presubmit.sh
├── coverage/
│ └── pom.xml
├── examples/
│ ├── notebooks/
│ │ ├── Advanced ML Pipelines.ipynb
│ │ ├── Distribute_Generic_Functions.ipynb
│ │ ├── Top words in Shakespeare by work.ipynb
│ │ └── Transform_with_Python.ipynb
│ └── python/
│ ├── query_results.py
│ └── shakespeare.py
├── mvnw
├── mvnw.cmd
├── pom.xml
├── scalastyle-config.xml
├── scripts/
│ └── verify-shading.sh
├── spark-bigquery-connector-common/
│ ├── pom.xml
│ ├── src/
│ │ ├── build/
│ │ │ └── resources/
│ │ │ └── spark-bigquery-connector.properties
│ │ ├── main/
│ │ │ ├── java/
│ │ │ │ ├── com/
│ │ │ │ │ └── google/
│ │ │ │ │ └── cloud/
│ │ │ │ │ └── spark/
│ │ │ │ │ └── bigquery/
│ │ │ │ │ ├── ArrowBinaryIterator.java
│ │ │ │ │ ├── AvroBinaryIterator.java
│ │ │ │ │ ├── BigQueryConnectorUtils.java
│ │ │ │ │ ├── BigQueryRelation.java
│ │ │ │ │ ├── BigQueryRelationProviderBase.java
│ │ │ │ │ ├── BigQueryStreamWriter.java
│ │ │ │ │ ├── BigQueryStreamingSink.java
│ │ │ │ │ ├── BigQueryUtilScala.java
│ │ │ │ │ ├── DataFrameToRDDConverter.java
│ │ │ │ │ ├── DataSourceVersion.java
│ │ │ │ │ ├── GuiceInjectorCreator.java
│ │ │ │ │ ├── InjectorBuilder.java
│ │ │ │ │ ├── InjectorFactory.java
│ │ │ │ │ ├── InternalRowIterator.java
│ │ │ │ │ ├── PartitionOverwriteMode.java
│ │ │ │ │ ├── ProtobufUtils.java
│ │ │ │ │ ├── ReadRowsResponseToInternalRowIteratorConverter.java
│ │ │ │ │ ├── SchemaConverters.java
│ │ │ │ │ ├── SchemaConvertersConfiguration.java
│ │ │ │ │ ├── SparkBigQueryConfig.java
│ │ │ │ │ ├── SparkBigQueryConnectorModule.java
│ │ │ │ │ ├── SparkBigQueryConnectorUserAgentProvider.java
│ │ │ │ │ ├── SparkBigQueryConnectorVersionProvider.java
│ │ │ │ │ ├── SparkBigQueryLineageProvider.java
│ │ │ │ │ ├── SparkBigQueryProxyAndHttpConfig.java
│ │ │ │ │ ├── SparkBigQueryUtil.java
│ │ │ │ │ ├── SparkFilterUtils.java
│ │ │ │ │ ├── SupportedCustomDataType.java
│ │ │ │ │ ├── SupportsQueryPushdown.java
│ │ │ │ │ ├── TypeConverter.java
│ │ │ │ │ ├── direct/
│ │ │ │ │ │ ├── BigQueryPartition.java
│ │ │ │ │ │ ├── BigQueryRDDContext.java
│ │ │ │ │ │ ├── BigQueryRDDFactory.java
│ │ │ │ │ │ ├── DirectBigQueryRelation.java
│ │ │ │ │ │ ├── Scala213BigQueryRDD.java
│ │ │ │ │ │ └── ScalaIterator.java
│ │ │ │ │ ├── events/
│ │ │ │ │ │ ├── BigQueryJobCompletedEvent.java
│ │ │ │ │ │ ├── LoadJobCompletedEvent.java
│ │ │ │ │ │ └── QueryJobCompletedEvent.java
│ │ │ │ │ ├── examples/
│ │ │ │ │ │ └── JavaShakespeare.java
│ │ │ │ │ ├── metrics/
│ │ │ │ │ │ ├── DataOrigin.java
│ │ │ │ │ │ ├── SparkBigQueryConnectorMetricsUtils.java
│ │ │ │ │ │ ├── SparkBigQueryReadSessionMetrics.java
│ │ │ │ │ │ └── SparkMetricsSource.java
│ │ │ │ │ ├── pushdowns/
│ │ │ │ │ │ └── SparkBigQueryPushdown.java
│ │ │ │ │ ├── util/
│ │ │ │ │ │ └── HdfsUtils.java
│ │ │ │ │ └── write/
│ │ │ │ │ ├── BigQueryDataSourceWriterInsertableRelation.java
│ │ │ │ │ ├── BigQueryDeprecatedIndirectInsertableRelation.java
│ │ │ │ │ ├── BigQueryInsertableRelationBase.java
│ │ │ │ │ ├── BigQueryWriteHelper.java
│ │ │ │ │ ├── CreatableRelationProviderHelper.java
│ │ │ │ │ ├── DataSourceWriterContextPartitionHandler.java
│ │ │ │ │ ├── IntermediateDataCleaner.java
│ │ │ │ │ └── context/
│ │ │ │ │ ├── AvroIntermediateRecordWriter.java
│ │ │ │ │ ├── BigQueryDataSourceWriterModule.java
│ │ │ │ │ ├── BigQueryDirectDataSourceWriterContext.java
│ │ │ │ │ ├── BigQueryDirectDataWriterContext.java
│ │ │ │ │ ├── BigQueryDirectDataWriterContextFactory.java
│ │ │ │ │ ├── BigQueryDirectWriterCommitMessageContext.java
│ │ │ │ │ ├── BigQueryIndirectDataSourceWriterContext.java
│ │ │ │ │ ├── BigQueryIndirectDataWriterContext.java
│ │ │ │ │ ├── BigQueryIndirectDataWriterContextFactory.java
│ │ │ │ │ ├── BigQueryIndirectWriterCommitMessageContext.java
│ │ │ │ │ ├── DataSourceWriterContext.java
│ │ │ │ │ ├── DataWriterContext.java
│ │ │ │ │ ├── DataWriterContextFactory.java
│ │ │ │ │ ├── IntermediateRecordWriter.java
│ │ │ │ │ ├── NoOpDataWriterContext.java
│ │ │ │ │ └── WriterCommitMessageContext.java
│ │ │ │ └── org/
│ │ │ │ └── apache/
│ │ │ │ └── spark/
│ │ │ │ └── sql/
│ │ │ │ ├── Scala213SparkSqlUtils.java
│ │ │ │ └── SparkSqlUtils.java
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ ├── io.openlineage.spark.extension.OpenLineageExtensionProvider
│ │ │ └── org.apache.spark.sql.SparkSqlUtils
│ │ └── test/
│ │ ├── java/
│ │ │ ├── com/
│ │ │ │ └── google/
│ │ │ │ └── cloud/
│ │ │ │ └── spark/
│ │ │ │ └── bigquery/
│ │ │ │ ├── AvroSchemaConverterTest.java
│ │ │ │ ├── BigQueryRelationProviderTestBase.java
│ │ │ │ ├── BigQueryRelationTest.java
│ │ │ │ ├── DataSourceOptions.java
│ │ │ │ ├── InjectorBuilderTest.java
│ │ │ │ ├── MockResponsesBatch.java
│ │ │ │ ├── MockSparkBigQueryPushdown.java
│ │ │ │ ├── OptimizeLoadUriListTest.java
│ │ │ │ ├── ProtobufUtilsTest.java
│ │ │ │ ├── SchemaConverterTest.java
│ │ │ │ ├── SparkBigQueryConfigTest.java
│ │ │ │ ├── SparkBigQueryProxyAndHttpConfigTest.java
│ │ │ │ ├── SparkBigQueryUtilTest.java
│ │ │ │ ├── SparkFilterUtilsTest.java
│ │ │ │ ├── SupportedCustomDataTypeTest.java
│ │ │ │ ├── TestConstants.java
│ │ │ │ ├── acceptance/
│ │ │ │ │ ├── AcceptanceTestConstants.java
│ │ │ │ │ ├── AcceptanceTestContext.java
│ │ │ │ │ ├── AcceptanceTestUtils.java
│ │ │ │ │ ├── BigNumericDataprocServerlessAcceptanceTestBase.java
│ │ │ │ │ ├── DataprocAcceptanceTestBase.java
│ │ │ │ │ ├── DataprocServerlessAcceptanceTestBase.java
│ │ │ │ │ ├── ReadSheakspeareDataprocServerlessAcceptanceTestBase.java
│ │ │ │ │ └── WriteStreamDataprocServerlessAcceptanceTestBase.java
│ │ │ │ ├── direct/
│ │ │ │ │ └── Scala213BigQueryRDDTest.java
│ │ │ │ ├── integration/
│ │ │ │ │ ├── CatalogIntegrationTestBase.java
│ │ │ │ │ ├── IntegrationTestUtils.java
│ │ │ │ │ ├── OpenLineageIntegrationTestBase.java
│ │ │ │ │ ├── ReadByFormatIntegrationTestBase.java
│ │ │ │ │ ├── ReadFromQueryIntegrationTestBase.java
│ │ │ │ │ ├── ReadIntegrationTestBase.java
│ │ │ │ │ ├── SparkBigQueryIntegrationTestBase.java
│ │ │ │ │ ├── TestConstants.java
│ │ │ │ │ ├── TestDataset.java
│ │ │ │ │ ├── WriteIntegrationTestBase.java
│ │ │ │ │ └── model/
│ │ │ │ │ ├── ColumnOrderTestClass.java
│ │ │ │ │ ├── Data.java
│ │ │ │ │ ├── Friend.java
│ │ │ │ │ ├── Link.java
│ │ │ │ │ ├── NumStruct.java
│ │ │ │ │ ├── Person.java
│ │ │ │ │ ├── RangeData.java
│ │ │ │ │ └── StringStruct.java
│ │ │ │ ├── metrics/
│ │ │ │ │ ├── SparkBigQueryReadSessionMetricsTest.java
│ │ │ │ │ └── SparkMetricsSourceTest.java
│ │ │ │ ├── util/
│ │ │ │ │ └── HdfsUtilsTest.java
│ │ │ │ └── write/
│ │ │ │ ├── DataSourceWriterContextPartitionHandlerTest.java
│ │ │ │ └── context/
│ │ │ │ └── BigQueryDirectDataSourceWriterContextTest.java
│ │ │ └── org/
│ │ │ └── apache/
│ │ │ └── spark/
│ │ │ └── sql/
│ │ │ └── Scala213SparkSqlUtilsTest.java
│ │ └── resources/
│ │ ├── META-INF/
│ │ │ └── services/
│ │ │ └── com.google.cloud.spark.bigquery.pushdowns.SparkBigQueryPushdown
│ │ ├── ToIteratorTest/
│ │ │ ├── file1.txt
│ │ │ └── file2.csv
│ │ ├── acceptance/
│ │ │ ├── big_numeric.py
│ │ │ ├── read_shakespeare.py
│ │ │ ├── write_stream.py
│ │ │ └── write_stream_data.json
│ │ ├── alltypes.arrow
│ │ ├── alltypes.arrowschema
│ │ ├── alltypes.avro
│ │ ├── alltypes.avroschema.json
│ │ ├── arrowDateTimeRowsInBytes
│ │ ├── arrowDateTimeSchema
│ │ ├── integration/
│ │ │ ├── shakespeare.avro
│ │ │ ├── shakespeare.csv
│ │ │ ├── shakespeare.json
│ │ │ └── shakespeare.parquet
│ │ ├── log4j.properties
│ │ └── spark-bigquery-connector.properties
│ └── third_party/
│ └── apache-spark/
│ ├── LICENSE
│ ├── NOTICE
│ └── src/
│ └── main/
│ └── java/
│ └── com/
│ └── google/
│ └── cloud/
│ └── spark/
│ └── bigquery/
│ ├── ArrowSchemaConverter.java
│ └── AvroSchemaConverter.java
├── spark-bigquery-dsv1/
│ ├── pom.xml
│ ├── spark-bigquery-dsv1-parent/
│ │ └── pom.xml
│ ├── spark-bigquery-dsv1-spark2-support/
│ │ ├── pom.xml
│ │ └── src/
│ │ └── main/
│ │ ├── java/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── spark2/
│ │ │ └── Spark2DataFrameToRDDConverter.java
│ │ └── resources/
│ │ └── META-INF/
│ │ └── services/
│ │ └── com.google.cloud.spark.bigquery.DataFrameToRDDConverter
│ ├── spark-bigquery-dsv1-spark3-support/
│ │ ├── pom.xml
│ │ └── src/
│ │ └── main/
│ │ ├── java/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── spark3/
│ │ │ ├── SerializableAbstractFunction1.java
│ │ │ └── Spark3DataFrameToRDDConverter.java
│ │ └── resources/
│ │ └── META-INF/
│ │ └── services/
│ │ └── com.google.cloud.spark.bigquery.DataFrameToRDDConverter
│ ├── spark-bigquery-with-dependencies-parent/
│ │ └── pom.xml
│ ├── spark-bigquery-with-dependencies_2.11/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── build/
│ │ │ └── javadoc/
│ │ │ └── README.md
│ │ └── test/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── acceptance/
│ │ ├── Scala211DataprocImage13AcceptanceTest.java
│ │ ├── Scala211DataprocImage13DisableConscryptAcceptanceTest.java
│ │ ├── Scala211DataprocImage14AcceptanceTest.java
│ │ └── Scala211DataprocImage14DisableConscryptAcceptanceTest.java
│ ├── spark-bigquery-with-dependencies_2.12/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── build/
│ │ │ └── javadoc/
│ │ │ └── README.md
│ │ ├── test/
│ │ │ └── java/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── acceptance/
│ │ │ ├── Scala212DataprocImage15AcceptanceTest.java
│ │ │ ├── Scala212DataprocImage15DisableConscryptAcceptanceTest.java
│ │ │ ├── Scala212DataprocImage20AcceptanceTest.java
│ │ │ ├── Scala212DataprocImage20DisableConscryptAcceptanceTest.java
│ │ │ ├── Scala212DataprocImage21AcceptanceTest.java
│ │ │ ├── Scala212DataprocImage21DisableConscryptAcceptanceTest.java
│ │ │ ├── Scala212DataprocImage22AcceptanceTest.java
│ │ │ ├── Scala212DataprocImage22DisableConscryptAcceptanceTest.java
│ │ │ ├── Scala212Spark32WriteStreamDataprocServerlessAcceptanceTest.java
│ │ │ ├── Scala212Spark33BigNumericDataprocServerlessAcceptanceTest.java
│ │ │ └── Scala212Spark33ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ └── test_2.12/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── acceptance/
│ │ ├── Scala212BigNumericDataprocServerlessAcceptanceTest.java
│ │ ├── Scala212ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ └── Scala212WriteStreamDataprocServerlessAcceptanceTest.java
│ ├── spark-bigquery-with-dependencies_2.13/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── test/
│ │ │ └── java/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── acceptance/
│ │ │ ├── Scala213DataprocImage21AcceptanceTest.java
│ │ │ ├── Scala213DataprocImage21DisableConscryptAcceptanceTest.java
│ │ │ ├── Scala213Spark33BigNumericDataprocServerlessAcceptanceTest.java
│ │ │ ├── Scala213Spark33ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ │ ├── Scala213Spark33WriteStreamDataprocServerlessAcceptanceTest.java
│ │ │ ├── Scala213Spark34BigNumericDataprocServerlessAcceptanceTest.java
│ │ │ ├── Scala213Spark34ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ │ ├── Scala213Spark35BigNumericDataprocServerlessAcceptanceTest.java
│ │ │ └── Scala213Spark35ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ └── test_2.13/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── acceptance/
│ │ ├── Scala213BigNumericDataprocServerlessAcceptanceTest.java
│ │ ├── Scala213ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ └── Scala213WriteStreamDataprocServerlessAcceptanceTest.java
│ ├── spark-bigquery_2.11/
│ │ └── pom.xml
│ ├── spark-bigquery_2.12/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── java/
│ │ │ │ └── com/
│ │ │ │ └── google/
│ │ │ │ └── cloud/
│ │ │ │ └── spark/
│ │ │ │ └── bigquery/
│ │ │ │ ├── DefaultSource.java
│ │ │ │ └── Scala212BigQueryRelationProvider.java
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ │ └── test/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── Scala212BigQueryRelationProviderTest.java
│ ├── spark-bigquery_2.13/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── java/
│ │ │ │ └── com/
│ │ │ │ └── google/
│ │ │ │ └── cloud/
│ │ │ │ └── spark/
│ │ │ │ └── bigquery/
│ │ │ │ ├── DefaultSource.java
│ │ │ │ └── Scala213BigQueryRelationProvider.java
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ │ └── test/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── Scala213BigQueryRelationProviderTest.java
│ └── src/
│ ├── build/
│ │ ├── javadoc/
│ │ │ └── README.md
│ │ └── resources/
│ │ └── spark-bigquery-connector.properties
│ └── test/
│ └── java/
│ └── com/
│ └── google/
│ └── cloud/
│ └── spark/
│ └── bigquery/
│ ├── SchemaConvertersTest.java
│ └── integration/
│ ├── DataSourceV1DirectWriteIntegrationTest.java
│ ├── DataSourceV1IndirectWriteIntegrationTest.java
│ ├── DataSourceV1OpenLineageIntegrationTest.java
│ ├── DataSourceV1ReadByFormatIntegrationTest.java
│ ├── DataSourceV1ReadFromQueryIntegrationTest.java
│ ├── DataSourceV1ReadIntegrationTest.java
│ └── DataSourceV1WriteIntegrationTestBase.java
├── spark-bigquery-dsv2/
│ ├── pom.xml
│ ├── spark-2.4-bigquery/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── build/
│ │ │ └── resources/
│ │ │ └── spark-bigquery-connector.properties
│ │ ├── main/
│ │ │ ├── java/
│ │ │ │ └── com/
│ │ │ │ └── google/
│ │ │ │ └── cloud/
│ │ │ │ └── spark/
│ │ │ │ └── bigquery/
│ │ │ │ └── v2/
│ │ │ │ ├── BigQueryDataSourceReader.java
│ │ │ │ ├── BigQueryDataSourceV2.java
│ │ │ │ ├── BigQueryDataSourceWriter.java
│ │ │ │ ├── Spark24DataWriter.java
│ │ │ │ ├── Spark24DataWriterFactory.java
│ │ │ │ ├── Spark24InputPartition.java
│ │ │ │ ├── Spark24InputPartitionReader.java
│ │ │ │ ├── Spark24Statistics.java
│ │ │ │ └── Spark24WriterCommitMessage.java
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ │ └── test/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ ├── acceptance/
│ │ │ ├── Spark24DataprocImage14AcceptanceTest.java
│ │ │ ├── Spark24DataprocImage14DisableConscryptAcceptanceTest.java
│ │ │ ├── Spark24DataprocImage15AcceptanceTest.java
│ │ │ └── Spark24DataprocImage15DisableConscryptAcceptanceTest.java
│ │ └── integration/
│ │ ├── Spark24DirectWriteIntegrationTest.java
│ │ ├── Spark24IndirectWriteIntegrationTest.java
│ │ ├── Spark24OpenLineageIntegrationTest.java
│ │ ├── Spark24ReadByFormatIntegrationTest.java
│ │ ├── Spark24ReadFromQueryIntegrationTest.java
│ │ └── Spark24ReadIntegrationTest.java
│ ├── spark-3.1-bigquery/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── build/
│ │ │ └── resources/
│ │ │ └── spark-bigquery-connector.properties
│ │ ├── main/
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ │ └── test/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ ├── acceptance/
│ │ │ ├── Spark31BigNumericDataprocServerlessAcceptanceTest.java
│ │ │ ├── Spark31DataprocImage20AcceptanceTest.java
│ │ │ ├── Spark31DataprocImage20DisableConscryptAcceptanceTest.java
│ │ │ ├── Spark31ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ │ └── Spark31WriteStreamDataprocServerlessAcceptanceTest.java
│ │ └── integration/
│ │ ├── Spark31DirectWriteIntegrationTest.java
│ │ ├── Spark31IndirectWriteIntegrationTest.java
│ │ ├── Spark31OpenLineageIntegrationTest.java
│ │ ├── Spark31ReadByFormatIntegrationTest.java
│ │ ├── Spark31ReadFromQueryIntegrationTest.java
│ │ └── Spark31ReadIntegrationTest.java
│ ├── spark-3.1-bigquery-lib/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ └── java/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── v2/
│ │ │ ├── BigQueryBatchWrite.java
│ │ │ ├── BigQueryInputPartition.java
│ │ │ ├── BigQueryPartitionReader.java
│ │ │ ├── BigQueryPartitionReaderFactory.java
│ │ │ ├── BigQueryStreamingDataWriter.java
│ │ │ ├── BigQueryStreamingWrite.java
│ │ │ ├── BigQueryStreamingWriterFactory.java
│ │ │ ├── BigQueryTableCreator.java
│ │ │ ├── BigQueryWriteBuilder.java
│ │ │ ├── Spark31BigQueryDataWriter.java
│ │ │ ├── Spark31BigQueryDataWriterFactory.java
│ │ │ ├── Spark31BigQueryScanBuilder.java
│ │ │ ├── Spark31BigQueryTable.java
│ │ │ ├── Spark31BigQueryTableProvider.java
│ │ │ ├── Spark31BigQueryWriterCommitMessage.java
│ │ │ ├── Spark3Statistics.java
│ │ │ └── Spark3Util.java
│ │ └── test/
│ │ └── resources/
│ │ └── META-INF/
│ │ └── services/
│ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ ├── spark-3.2-bigquery/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── build/
│ │ │ └── resources/
│ │ │ └── spark-bigquery-connector.properties
│ │ ├── main/
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ │ └── test/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ ├── acceptance/
│ │ │ ├── Spark32BigNumericDataprocServerlessAcceptanceTest.java
│ │ │ ├── Spark32DataprocImage21AcceptanceTest.java
│ │ │ ├── Spark32DataprocImage21DisableConscryptAcceptanceTest.java
│ │ │ ├── Spark32ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ │ └── Spark32WriteStreamDataprocServerlessAcceptanceTest.java
│ │ └── integration/
│ │ ├── Spark32DirectWriteIntegrationTest.java
│ │ ├── Spark32IndirectWriteIntegrationTest.java
│ │ ├── Spark32OpenLineageIntegrationTest.java
│ │ ├── Spark32ReadByFormatIntegrationTest.java
│ │ ├── Spark32ReadFromQueryIntegrationTest.java
│ │ └── Spark32ReadIntegrationTest.java
│ ├── spark-3.2-bigquery-lib/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ └── java/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── v2/
│ │ │ ├── Spark32BigQueryPartitionReader.java
│ │ │ ├── Spark32BigQueryPartitionReaderFactory.java
│ │ │ ├── Spark32BigQueryScanBuilder.java
│ │ │ ├── Spark32BigQueryTable.java
│ │ │ └── Spark32BigQueryTableProvider.java
│ │ └── test/
│ │ └── resources/
│ │ └── META-INF/
│ │ └── services/
│ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ ├── spark-3.3-bigquery/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── build/
│ │ │ └── resources/
│ │ │ └── spark-bigquery-connector.properties
│ │ ├── main/
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ │ └── test/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ ├── acceptance/
│ │ │ ├── Spark33BigNumericDataprocServerlessAcceptanceTest.java
│ │ │ ├── Spark33DataprocImage21AcceptanceTest.java
│ │ │ ├── Spark33DataprocImage21DisableConscryptAcceptanceTest.java
│ │ │ ├── Spark33ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ │ └── Spark33WriteStreamDataprocServerlessAcceptanceTest.java
│ │ └── integration/
│ │ ├── Spark33DirectWriteIntegrationTest.java
│ │ ├── Spark33IndirectWriteIntegrationTest.java
│ │ ├── Spark33OpenLineageIntegrationTest.java
│ │ ├── Spark33ReadByFormatIntegrationTest.java
│ │ ├── Spark33ReadFromQueryIntegrationTest.java
│ │ └── Spark33ReadIntegrationTest.java
│ ├── spark-3.3-bigquery-lib/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ └── java/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── v2/
│ │ │ ├── Spark33BigQueryScanBuilder.java
│ │ │ ├── Spark33BigQueryTable.java
│ │ │ └── Spark33BigQueryTableProvider.java
│ │ └── test/
│ │ └── resources/
│ │ └── META-INF/
│ │ └── services/
│ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ ├── spark-3.4-bigquery/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── build/
│ │ │ └── resources/
│ │ │ └── spark-bigquery-connector.properties
│ │ ├── main/
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ │ └── test/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ ├── acceptance/
│ │ │ ├── Spark34BigNumericDataprocServerlessAcceptanceTest.java
│ │ │ ├── Spark34ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ │ └── Spark34WriteStreamDataprocServerlessAcceptanceTest.java
│ │ └── integration/
│ │ ├── Spark34DirectWriteIntegrationTest.java
│ │ ├── Spark34IndirectWriteIntegrationTest.java
│ │ ├── Spark34OpenLineageIntegrationTest.java
│ │ ├── Spark34ReadByFormatIntegrationTest.java
│ │ ├── Spark34ReadFromQueryIntegrationTest.java
│ │ └── Spark34ReadIntegrationTest.java
│ ├── spark-3.4-bigquery-lib/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── java/
│ │ │ │ └── com/
│ │ │ │ └── google/
│ │ │ │ └── cloud/
│ │ │ │ └── spark/
│ │ │ │ └── bigquery/
│ │ │ │ └── v2/
│ │ │ │ ├── Spark34BigQueryTable.java
│ │ │ │ ├── Spark34BigQueryTableProvider.java
│ │ │ │ └── TimestampNTZTypeConverter.java
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── com.google.cloud.spark.bigquery.TypeConverter
│ │ └── test/
│ │ ├── java/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── v2/
│ │ │ └── TimestampNTZTypeConverterTest.java
│ │ └── resources/
│ │ └── META-INF/
│ │ └── services/
│ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ ├── spark-3.5-bigquery/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── build/
│ │ │ └── resources/
│ │ │ └── spark-bigquery-connector.properties
│ │ ├── main/
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ │ └── test/
│ │ ├── java/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ ├── acceptance/
│ │ │ │ ├── Spark35BigNumericDataprocServerlessAcceptanceTest.java
│ │ │ │ ├── Spark35CatalogAcceptanceTest.java
│ │ │ │ ├── Spark35DataprocImage22AcceptanceTest.java
│ │ │ │ ├── Spark35DataprocImage22DisableConscryptAcceptanceTest.java
│ │ │ │ ├── Spark35ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ │ │ └── Spark35WriteStreamDataprocServerlessAcceptanceTest.java
│ │ │ └── integration/
│ │ │ ├── Spark35CatalogIntegrationTest.java
│ │ │ ├── Spark35DirectWriteIntegrationTest.java
│ │ │ ├── Spark35IndirectWriteIntegrationTest.java
│ │ │ ├── Spark35OpenLineageIntegrationTest.java
│ │ │ ├── Spark35ReadByFormatIntegrationTest.java
│ │ │ ├── Spark35ReadFromQueryIntegrationTest.java
│ │ │ └── Spark35ReadIntegrationTest.java
│ │ └── resources/
│ │ └── acceptance/
│ │ ├── spark-dml-create-table-as-select-in-default-dataset.sql
│ │ ├── spark-dml-create-table-in-default-dataset.sql
│ │ ├── spark-dml-custom-dataset.sql
│ │ └── spark-dml-explicit-table.sql
│ ├── spark-3.5-bigquery-lib/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── java/
│ │ │ │ └── com/
│ │ │ │ └── google/
│ │ │ │ └── cloud/
│ │ │ │ └── spark/
│ │ │ │ └── bigquery/
│ │ │ │ ├── BigQueryCatalog.java
│ │ │ │ ├── NoSuchBigQueryTableException.java
│ │ │ │ └── v2/
│ │ │ │ ├── BigQueryIdentifier.java
│ │ │ │ ├── Spark35BigQueryTable.java
│ │ │ │ ├── Spark35BigQueryTableProvider.java
│ │ │ │ └── Spark35BigQueryWriteBuilder.java
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── com.google.cloud.spark.bigquery.TypeConverter
│ │ └── test/
│ │ └── resources/
│ │ └── META-INF/
│ │ └── services/
│ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ ├── spark-4.0-bigquery/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── build/
│ │ │ └── resources/
│ │ │ └── spark-bigquery-connector.properties
│ │ ├── main/
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ │ └── test/
│ │ ├── java/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ ├── acceptance/
│ │ │ │ ├── Spark40BigNumericDataprocServerlessAcceptanceTest.java
│ │ │ │ ├── Spark40CatalogAcceptanceTest.java
│ │ │ │ ├── Spark40DataprocImage30AcceptanceTest.java
│ │ │ │ ├── Spark40DataprocImage30DisableConscryptAcceptanceTest.java
│ │ │ │ ├── Spark40ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ │ │ └── Spark40WriteStreamDataprocServerlessAcceptanceTest.java
│ │ │ └── integration/
│ │ │ ├── Spark40CatalogIntegrationTest.java
│ │ │ ├── Spark40DirectWriteIntegrationTest.java
│ │ │ ├── Spark40IndirectWriteIntegrationTest.java
│ │ │ ├── Spark40OpenLineageIntegrationTest.java
│ │ │ ├── Spark40ReadByFormatIntegrationTest.java
│ │ │ ├── Spark40ReadFromQueryIntegrationTest.java
│ │ │ └── Spark40ReadIntegrationTest.java
│ │ └── resources/
│ │ └── acceptance/
│ │ ├── spark-dml-create-table-as-select-in-default-dataset.sql
│ │ ├── spark-dml-create-table-in-default-dataset.sql
│ │ ├── spark-dml-custom-dataset.sql
│ │ └── spark-dml-explicit-table.sql
│ ├── spark-4.0-bigquery-lib/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── java/
│ │ │ │ └── com/
│ │ │ │ └── google/
│ │ │ │ └── cloud/
│ │ │ │ └── spark/
│ │ │ │ └── bigquery/
│ │ │ │ └── v2/
│ │ │ │ ├── Spark40BigQueryTable.java
│ │ │ │ ├── Spark40BigQueryTableProvider.java
│ │ │ │ └── Spark40BigQueryWriteBuilder.java
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── com.google.cloud.spark.bigquery.TypeConverter
│ │ └── test/
│ │ └── resources/
│ │ └── META-INF/
│ │ └── services/
│ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ ├── spark-4.1-bigquery/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── build/
│ │ │ └── resources/
│ │ │ └── spark-bigquery-connector.properties
│ │ ├── main/
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ │ └── test/
│ │ ├── java/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ ├── acceptance/
│ │ │ │ ├── Spark41BigNumericDataprocServerlessAcceptanceTest.java
│ │ │ │ ├── Spark41CatalogAcceptanceTest.java
│ │ │ │ ├── Spark41DataprocImage30AcceptanceTest.java
│ │ │ │ ├── Spark41DataprocImage30DisableConscryptAcceptanceTest.java
│ │ │ │ ├── Spark41ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ │ │ └── Spark41WriteStreamDataprocServerlessAcceptanceTest.java
│ │ │ └── integration/
│ │ │ ├── Spark41CatalogIntegrationTest.java
│ │ │ ├── Spark41DirectWriteIntegrationTest.java
│ │ │ ├── Spark41IndirectWriteIntegrationTest.java
│ │ │ ├── Spark41OpenLineageIntegrationTest.java
│ │ │ ├── Spark41ReadByFormatIntegrationTest.java
│ │ │ ├── Spark41ReadFromQueryIntegrationTest.java
│ │ │ └── Spark41ReadIntegrationTest.java
│ │ └── resources/
│ │ └── acceptance/
│ │ ├── spark-dml-create-table-as-select-in-default-dataset.sql
│ │ ├── spark-dml-create-table-in-default-dataset.sql
│ │ ├── spark-dml-custom-dataset.sql
│ │ └── spark-dml-explicit-table.sql
│ ├── spark-4.1-bigquery-lib/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── java/
│ │ │ │ └── com/
│ │ │ │ └── google/
│ │ │ │ └── cloud/
│ │ │ │ └── spark/
│ │ │ │ └── bigquery/
│ │ │ │ └── v2/
│ │ │ │ ├── Spark41BigQueryTable.java
│ │ │ │ ├── Spark41BigQueryTableProvider.java
│ │ │ │ └── Spark41BigQueryWriteBuilder.java
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── com.google.cloud.spark.bigquery.TypeConverter
│ │ └── test/
│ │ └── resources/
│ │ └── META-INF/
│ │ └── services/
│ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ ├── spark-bigquery-dsv2-common/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ └── java/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── v2/
│ │ │ ├── BaseBigQuerySource.java
│ │ │ └── context/
│ │ │ ├── ArrowColumnBatchPartitionReaderContext.java
│ │ │ ├── ArrowInputPartitionContext.java
│ │ │ ├── BigQueryDataSourceReaderContext.java
│ │ │ ├── BigQueryDataSourceReaderModule.java
│ │ │ ├── BigQueryInputPartitionContext.java
│ │ │ ├── BigQueryInputPartitionReaderContext.java
│ │ │ ├── EmptyProjectionInputPartitionContext.java
│ │ │ ├── EmptyProjectionInputPartitionReaderContext.java
│ │ │ ├── InputPartitionContext.java
│ │ │ ├── InputPartitionReaderContext.java
│ │ │ └── StatisticsContext.java
│ │ └── test/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── v2/
│ │ └── context/
│ │ ├── ArrowInputPartitionContextTest.java
│ │ └── BigQueryInputPartitionReaderContextTest.java
│ ├── spark-bigquery-dsv2-parent/
│ │ └── pom.xml
│ └── spark-bigquery-metrics/
│ ├── pom.xml
│ └── src/
│ ├── main/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── v2/
│ │ └── customMetrics/
│ │ ├── MetricUtils.java
│ │ ├── SparkBigQueryBytesReadMetric.java
│ │ ├── SparkBigQueryCustomMetricConstants.java
│ │ ├── SparkBigQueryNumberOfReadStreamsMetric.java
│ │ ├── SparkBigQueryParseTimeMetric.java
│ │ ├── SparkBigQueryRowsReadMetric.java
│ │ ├── SparkBigQueryScanTimeMetric.java
│ │ ├── SparkBigQueryTaskMetric.java
│ │ └── SparkBigQueryTimeInSparkMetric.java
│ └── test/
│ └── java/
│ └── com/
│ └── google/
│ └── cloud/
│ └── spark/
│ └── bigquery/
│ └── v2/
│ └── customMetrics/
│ ├── SparkBigQueryBytesReadMetricTest.java
│ ├── SparkBigQueryNumberOfReadStreamsMetricTest.java
│ ├── SparkBigQueryParseTimeMetricTest.java
│ ├── SparkBigQueryRowsReadMetricTest.java
│ ├── SparkBigQueryScanTimeMetricTest.java
│ ├── SparkBigQueryTaskMetricTest.java
│ └── SparkBigQueryTimeInSparkMetricTest.java
├── spark-bigquery-parent/
│ └── pom.xml
├── spark-bigquery-pushdown/
│ ├── pom.xml
│ ├── pushdown_common_src/
│ │ ├── main/
│ │ │ └── scala/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── pushdowns/
│ │ │ ├── AggregateQuery.scala
│ │ │ ├── BaseSparkBigQueryPushdown.scala
│ │ │ ├── BigQuerySQLQuery.scala
│ │ │ ├── BigQuerySQLStatement.scala
│ │ │ ├── BigQueryStrategy.scala
│ │ │ ├── BinaryOperationExtractor.scala
│ │ │ ├── CastExpressionExtractor.scala
│ │ │ ├── FilterQuery.scala
│ │ │ ├── JoinExtractor.scala
│ │ │ ├── JoinQuery.scala
│ │ │ ├── LeftSemiJoinQuery.scala
│ │ │ ├── ProjectQuery.scala
│ │ │ ├── SortLimitQuery.scala
│ │ │ ├── SourceQuery.scala
│ │ │ ├── SparkBigQueryPushdownUtil.scala
│ │ │ ├── SparkExpressionConverter.scala
│ │ │ ├── SparkExpressionFactory.scala
│ │ │ ├── SparkPlanFactory.scala
│ │ │ ├── UnaryOperationExtractor.scala
│ │ │ ├── UnionOperationExtractor.scala
│ │ │ ├── UnionQuery.scala
│ │ │ └── WindowQuery.scala
│ │ └── test/
│ │ └── scala/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── pushdowns/
│ │ ├── AggregateQuerySuite.scala
│ │ ├── BigQuerySQLStatementSuite.scala
│ │ ├── BigQueryStrategySuite.scala
│ │ ├── CastExpressionExtractorSuite.scala
│ │ ├── FilterQuerySuite.scala
│ │ ├── JoinQuerySuite.scala
│ │ ├── ProjectQuerySuite.scala
│ │ ├── SortLimitQuerySuite.scala
│ │ ├── SourceQuerySuite.scala
│ │ ├── SparkBigQueryPushdownUtilSuite.scala
│ │ ├── SparkExpressionConverterSuite.scala
│ │ ├── TestConstants.scala
│ │ ├── UnaryOperationExtractorSuite.scala
│ │ ├── UnionOperationExtractorSuite.scala
│ │ ├── UnionQuerySuite.scala
│ │ └── WindowQuerySuite.scala
│ ├── spark-2.4-bigquery-pushdown_2.11/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── resources/
│ │ │ │ └── META-INF/
│ │ │ │ └── services/
│ │ │ │ └── com.google.cloud.spark.bigquery.pushdowns.SparkBigQueryPushdown
│ │ │ └── scala/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── pushdowns/
│ │ │ ├── Spark24BigQueryPushdown.scala
│ │ │ ├── Spark24BigQueryPushdownPlan.scala
│ │ │ ├── Spark24BigQueryStrategy.scala
│ │ │ ├── Spark24ExpressionConverter.scala
│ │ │ ├── Spark24ExpressionFactory.scala
│ │ │ └── Spark24PlanFactory.scala
│ │ └── test/
│ │ └── scala/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── pushdowns/
│ │ ├── BinaryOperationExtractorSuite.scala
│ │ ├── JoinExtractorSuite.scala
│ │ ├── Spark24BigQueryStrategySuite.scala
│ │ └── Spark24ExpressionConverterSuite.scala
│ ├── spark-2.4-bigquery-pushdown_2.12/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── resources/
│ │ │ │ └── META-INF/
│ │ │ │ └── services/
│ │ │ │ └── com.google.cloud.spark.bigquery.pushdowns.SparkBigQueryPushdown
│ │ │ └── scala/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── pushdowns/
│ │ │ ├── Spark24BigQueryPushdown.scala
│ │ │ ├── Spark24BigQueryPushdownPlan.scala
│ │ │ ├── Spark24BigQueryStrategy.scala
│ │ │ ├── Spark24ExpressionConverter.scala
│ │ │ ├── Spark24ExpressionFactory.scala
│ │ │ └── Spark24PlanFactory.scala
│ │ └── test/
│ │ └── scala/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── pushdowns/
│ │ ├── Spark24BigQueryStrategySuite.scala
│ │ └── Spark24ExpressionConverterSuite.scala
│ ├── spark-3.1-bigquery-pushdown_2.12/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── resources/
│ │ │ │ └── META-INF/
│ │ │ │ └── services/
│ │ │ │ └── com.google.cloud.spark.bigquery.pushdowns.SparkBigQueryPushdown
│ │ │ └── scala/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── pushdowns/
│ │ │ ├── Spark31BigQueryPushdown.scala
│ │ │ ├── Spark31BigQueryPushdownPlan.scala
│ │ │ ├── Spark31BigQueryStrategy.scala
│ │ │ ├── Spark31ExpressionConverter.scala
│ │ │ ├── Spark31ExpressionFactory.scala
│ │ │ └── Spark31PlanFactory.scala
│ │ └── test/
│ │ └── scala/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── pushdowns/
│ │ ├── BinaryOperationExtractorSuite.scala
│ │ ├── JoinExtractorSuite.scala
│ │ ├── Spark31BigQueryStrategySuite.scala
│ │ └── Spark31ExpressionConverterSuite.scala
│ ├── spark-3.2-bigquery-pushdown_2.12/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── resources/
│ │ │ │ └── META-INF/
│ │ │ │ └── services/
│ │ │ │ └── com.google.cloud.spark.bigquery.pushdowns.SparkBigQueryPushdown
│ │ │ └── scala/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── pushdowns/
│ │ │ ├── Spark32BigQueryPushdown.scala
│ │ │ ├── Spark32BigQueryPushdownPlan.scala
│ │ │ ├── Spark32BigQueryStrategy.scala
│ │ │ ├── Spark32ExpressionConverter.scala
│ │ │ ├── Spark32ExpressionFactory.scala
│ │ │ └── Spark32PlanFactory.scala
│ │ └── test/
│ │ └── scala/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── pushdowns/
│ │ ├── BinaryOperationExtractorSuite.scala
│ │ ├── JoinExtractorSuite.scala
│ │ └── Spark32ExpressionConverterSuite.scala
│ ├── spark-3.2-bigquery-pushdown_2.13/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── resources/
│ │ │ │ └── META-INF/
│ │ │ │ └── services/
│ │ │ │ └── com.google.cloud.spark.bigquery.pushdowns.SparkBigQueryPushdown
│ │ │ └── scala/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── pushdowns/
│ │ │ ├── Spark32BigQueryPushdown.scala
│ │ │ ├── Spark32BigQueryPushdownPlan.scala
│ │ │ ├── Spark32BigQueryStrategy.scala
│ │ │ ├── Spark32ExpressionConverter.scala
│ │ │ ├── Spark32ExpressionFactory.scala
│ │ │ └── Spark32PlanFactory.scala
│ │ └── test/
│ │ └── scala/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── pushdowns/
│ │ ├── BinaryOperationExtractorSuite.scala
│ │ ├── JoinExtractorSuite.scala
│ │ └── Spark32ExpressionConverterSuite.scala
│ ├── spark-3.3-bigquery-pushdown_2.12/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── resources/
│ │ │ │ └── META-INF/
│ │ │ │ └── services/
│ │ │ │ └── com.google.cloud.spark.bigquery.pushdowns.SparkBigQueryPushdown
│ │ │ └── scala/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── pushdowns/
│ │ │ ├── Spark33BigQueryPushdown.scala
│ │ │ ├── Spark33BigQueryPushdownPlan.scala
│ │ │ ├── Spark33BigQueryStrategy.scala
│ │ │ ├── Spark33ExpressionConverter.scala
│ │ │ ├── Spark33ExpressionFactory.scala
│ │ │ └── Spark33PlanFactory.scala
│ │ └── test/
│ │ └── scala/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── pushdowns/
│ │ ├── BinaryOperationExtractorSuite.scala
│ │ ├── JoinExtractorSuite.scala
│ │ └── Spark33ExpressionConverterSuite.scala
│ ├── spark-3.3-bigquery-pushdown_2.13/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── resources/
│ │ │ │ └── META-INF/
│ │ │ │ └── services/
│ │ │ │ └── com.google.cloud.spark.bigquery.pushdowns.SparkBigQueryPushdown
│ │ │ └── scala/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── pushdowns/
│ │ │ ├── Spark33BigQueryPushdown.scala
│ │ │ ├── Spark33BigQueryPushdownPlan.scala
│ │ │ ├── Spark33BigQueryStrategy.scala
│ │ │ ├── Spark33ExpressionConverter.scala
│ │ │ ├── Spark33ExpressionFactory.scala
│ │ │ └── Spark33PlanFactory.scala
│ │ └── test/
│ │ └── scala/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── pushdowns/
│ │ ├── BinaryOperationExtractorSuite.scala
│ │ ├── JoinExtractorSuite.scala
│ │ └── Spark33ExpressionConverterSuite.scala
│ ├── spark-bigquery-pushdown-common_2.11/
│ │ └── pom.xml
│ ├── spark-bigquery-pushdown-common_2.12/
│ │ └── pom.xml
│ ├── spark-bigquery-pushdown-common_2.13/
│ │ └── pom.xml
│ └── spark-bigquery-pushdown-parent/
│ └── pom.xml
├── spark-bigquery-python-lib/
│ ├── pom.xml
│ └── src/
│ ├── assembly/
│ │ └── descriptor.xml
│ └── main/
│ └── python/
│ ├── __init__.py
│ └── google/
│ ├── __init__.py
│ └── cloud/
│ ├── __init__.py
│ └── spark/
│ ├── __init__.py
│ └── bigquery/
│ ├── __init__.py
│ └── big_query_connector_utils.py
├── spark-bigquery-scala-212-support/
│ ├── pom.xml
│ └── src/
│ ├── main/
│ │ ├── java/
│ │ │ ├── com/
│ │ │ │ └── google/
│ │ │ │ └── cloud/
│ │ │ │ └── spark/
│ │ │ │ └── bigquery/
│ │ │ │ └── direct/
│ │ │ │ └── PreScala213BigQueryRDD.java
│ │ │ └── org/
│ │ │ └── apache/
│ │ │ └── spark/
│ │ │ └── sql/
│ │ │ └── PreScala213SparkSqlUtils.java
│ │ └── resources/
│ │ └── META-INF/
│ │ └── services/
│ │ └── org.apache.spark.sql.SparkSqlUtils
│ └── test/
│ └── java/
│ ├── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── direct/
│ │ └── PreScala213BigQueryRDDTest.java
│ └── org/
│ └── apache/
│ └── spark/
│ └── sql/
│ └── PreScala213SparkSqlUtilsTest.java
└── spark-bigquery-tests/
└── pom.xml
================================================
FILE CONTENTS
================================================
================================================
FILE: .github/workflows/codeql-analysis.yml
================================================
# For most projects, this workflow file will not need changing; you simply need
# to commit it to your repository.
#
# You may wish to alter this file to override the set of languages analyzed,
# or to provide custom queries or build logic.
#
# ******** NOTE ********
# We have attempted to detect the languages in your repository. Please check
# the `language` matrix defined below to confirm you have the correct set of
# supported CodeQL languages.
#
name: "CodeQL"
on:
push:
branches: [ master ]
pull_request:
# The branches below must be a subset of the branches above
branches: [ master ]
schedule:
- cron: '18 6 * * 4'
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write
strategy:
fail-fast: false
matrix:
language: [ 'python' ]
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
# Learn more about CodeQL language support at https://git.io/codeql-language-support
steps:
- name: Checkout repository
uses: actions/checkout@v3
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.
# queries: ./path/to/local/query, your-org/your-repo/queries@main
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v3
# ℹ️ Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
# ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
# and modify them (or add more) to build your code if your project
# uses a compiled language
#- run: |
# make bootstrap
# make release
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v3
================================================
FILE: .github/workflows/cpd.yaml
================================================
name: Duplicate Code Detection
on:
push:
branches: [ master ]
pull_request:
# The branches below must be a subset of the branches above
branches: [ master ]
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-java@v3
with:
distribution: 'temurin'
java-version: '8'
cache: 'maven'
- name: Running Duplicate Code Detection
run: ./mvnw pmd:cpd-check -Pall -Daggregate=true
================================================
FILE: .github/workflows/spotless.yaml
================================================
name: Spotless Check
on:
push:
branches: [ master ]
pull_request:
# The branches below must be a subset of the branches above
branches: [ master ]
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-java@v3
with:
distribution: 'temurin'
java-version: '8'
cache: 'maven'
- name: Checkout coding style
run: ./mvnw spotless:check -Pall
================================================
FILE: .gitignore
================================================
*.class
*.log
# sbt/maven specific
.cache/
.history/
.lib/
.flattened-pom.xml
dist/*
target/
lib_managed/
src_managed/
project/boot/
project/plugins/project/
dependency-reduced-pom.xml
*.versionsBackup
.mvn/wrapper/maven-wrapper.jar
# Scala-IDE specific
.scala_dependencies
.worksheet
.idea
*.iml
# Eclipse IDE Specific files
.classpath
.project
.settings/
# ad-hoc tests, should not be added to git
AdAdHocITSuite.scala
# Mac
.DS_Store
# Visual Studio IDE
.vscode/
# toolchains file
toolchains.xml
================================================
FILE: .mvn/wrapper/MavenWrapperDownloader.java
================================================
/*
* Copyright 2007-present the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.net.*;
import java.io.*;
import java.nio.channels.*;
import java.util.Properties;
public class MavenWrapperDownloader {
private static final String WRAPPER_VERSION = "0.5.6";
/**
* Default URL to download the maven-wrapper.jar from, if no 'downloadUrl' is provided.
*/
private static final String DEFAULT_DOWNLOAD_URL = "https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/"
+ WRAPPER_VERSION + "/maven-wrapper-" + WRAPPER_VERSION + ".jar";
/**
* Path to the maven-wrapper.properties file, which might contain a downloadUrl property to
* use instead of the default one.
*/
private static final String MAVEN_WRAPPER_PROPERTIES_PATH =
".mvn/wrapper/maven-wrapper.properties";
/**
* Path where the maven-wrapper.jar will be saved to.
*/
private static final String MAVEN_WRAPPER_JAR_PATH =
".mvn/wrapper/maven-wrapper.jar";
/**
* Name of the property which should be used to override the default download url for the wrapper.
*/
private static final String PROPERTY_NAME_WRAPPER_URL = "wrapperUrl";
public static void main(String args[]) {
System.out.println("- Downloader started");
File baseDirectory = new File(args[0]);
System.out.println("- Using base directory: " + baseDirectory.getAbsolutePath());
// If the maven-wrapper.properties exists, read it and check if it contains a custom
// wrapperUrl parameter.
File mavenWrapperPropertyFile = new File(baseDirectory, MAVEN_WRAPPER_PROPERTIES_PATH);
String url = DEFAULT_DOWNLOAD_URL;
if(mavenWrapperPropertyFile.exists()) {
FileInputStream mavenWrapperPropertyFileInputStream = null;
try {
mavenWrapperPropertyFileInputStream = new FileInputStream(mavenWrapperPropertyFile);
Properties mavenWrapperProperties = new Properties();
mavenWrapperProperties.load(mavenWrapperPropertyFileInputStream);
url = mavenWrapperProperties.getProperty(PROPERTY_NAME_WRAPPER_URL, url);
} catch (IOException e) {
System.out.println("- ERROR loading '" + MAVEN_WRAPPER_PROPERTIES_PATH + "'");
} finally {
try {
if(mavenWrapperPropertyFileInputStream != null) {
mavenWrapperPropertyFileInputStream.close();
}
} catch (IOException e) {
// Ignore ...
}
}
}
System.out.println("- Downloading from: " + url);
File outputFile = new File(baseDirectory.getAbsolutePath(), MAVEN_WRAPPER_JAR_PATH);
if(!outputFile.getParentFile().exists()) {
if(!outputFile.getParentFile().mkdirs()) {
System.out.println(
"- ERROR creating output directory '" + outputFile.getParentFile().getAbsolutePath() + "'");
}
}
System.out.println("- Downloading to: " + outputFile.getAbsolutePath());
try {
downloadFileFromURL(url, outputFile);
System.out.println("Done");
System.exit(0);
} catch (Throwable e) {
System.out.println("- Error downloading");
e.printStackTrace();
System.exit(1);
}
}
private static void downloadFileFromURL(String urlString, File destination) throws Exception {
if (System.getenv("MVNW_USERNAME") != null && System.getenv("MVNW_PASSWORD") != null) {
String username = System.getenv("MVNW_USERNAME");
char[] password = System.getenv("MVNW_PASSWORD").toCharArray();
Authenticator.setDefault(new Authenticator() {
@Override
protected PasswordAuthentication getPasswordAuthentication() {
return new PasswordAuthentication(username, password);
}
});
}
URL website = new URL(urlString);
ReadableByteChannel rbc;
rbc = Channels.newChannel(website.openStream());
FileOutputStream fos = new FileOutputStream(destination);
fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE);
fos.close();
rbc.close();
}
}
================================================
FILE: .mvn/wrapper/maven-wrapper.properties
================================================
distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip
wrapperUrl=https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar
================================================
FILE: CHANGES.md
================================================
# Release Notes
## Next
* Issue #1476: Fixed BigQuery MERGE statement ambiguity on overwrite when table columns are named `target` or `source`.
## 0.44.1 - 2026-03-25
* BigQuery API has been upgraded to version 2.60.0
* BigQuery Storage API has been upgraded to version 3.22.1
* GAX has been upgraded to version 2.75.0
* gRPC has been upgraded to version 1.79.0
* Guava has been upgraded to version 33.5.0-jre
* Netty has been upgraded to version 4.2.10.Final
* Protocol Buffers has been upgraded to version 4.34.0
* google-api-client has been upgraded to version 2.9.0
## 0.44.0 - 2026-02-11
* Added new connector, `spark-4.1-bigquery` aimed to be used in Spark 4.1. Like Spark 4.1, this connector requires at
least Java 17 runtime. It is currently in preview mode.
* `spark-4.0-bigquery` is generally available!
* PR #1445: Add streaming support for Spark DS v2 indirect write.
* PR #1452: Improved the performance of the dynamic partition overwrite for RANGE_BUCKET partitioned tables.
* PR #1434: Support per-catalog configuration for project and location for BigQueryCatalog.
## 0.43.1 - 2025-10-22
* Issue #1417: Fixed ClassCastException in AWS federated identity
* PR #1432: Fixing packaging issue with the spark-4.0-bigquery connector.
## 0.43.0 - 2025-10-17
* Added new connector, `spark-4.0-bigquery` aimed to be used in Spark 4.0. Like Spark 4.0, this connector requires at
least Java 17 runtime. It is currently in preview mode.
* PR #1367: Query Pushdown is no longer supported.
* PR #1369: Catalog enhancements
* PR #1374: Ensure TableId includes project ID if not explicitly set
* PR #1376: `materializationDataset` is now optional to read from views or queries.
* PR #1380: Fixed ImpersonatedCredentials serialization
* PR #1381: Added the option to set custom credentials scopes
* PR #1411: Added Support for [SparkSession#executeCommand](https://archive.apache.org/dist/spark/docs/3.0.0/api/java/org/apache/spark/sql/SparkSession.html#executeCommand-java.lang.String-java.lang.String-scala.collection.immutable.Map-)
* Issue #1421: Fix ArrowInputPartitionContext serialization issue. Thanks @mrjoe7 !
* BigQuery API has been upgraded to version 2.54.0
* BigQuery Storage API has been upgraded to version 3.16.1
* GAX has been upgraded to version 2.68.2
* gRPC has been upgraded to version 1.74.0
## 0.42.2 - 2025-05-16
* PR #1347: Get lineage out of query. Thanks @ddebowczyk92
* PR #1349: Add parameterized query support
* BigQuery API has been upgraded to version 2.48.1
* BigQuery Storage API has been upgraded to version 3.11.4
* GAX has been upgraded to version 2.63.1
* gRPC has been upgraded to version 1.71.0
## 0.42.1 - 2025-03-17
* CVE-2025-24970, CVE-2025-25193: Upgrading netty to version 4.1.119.Final
* PR #1284: Making BigQueryClientFactory Kryo serializable. Thanks @tom-s-powell !
* PR #1345: `materializationDataset` is no longer needed to read from views or queries.
## 0.42.0 - 2025-02-06
* PR #1333: Initial implementation of a BigQuery backed Spark Catalog
* PR #1335: Adding indirect write GCS cleanup logs
* BigQuery API has been upgraded to version 2.47.0
* BigQuery Storage API has been upgraded to version 3.11.2
* GAX has been upgraded to version 2.60.0
* Netty has been upgraded to version 4.1.117.Final
* Guava has been upgraded to version 33.4.0-jre
## 0.41.1 - 2024-12-20
* Issue #1290: Stopped using metadata for optimized count path
* Issue #1317: Improving OpenLineage 1.24.0+ compatibility
* PR #1311: Improve read session expired error message
* PR #1320: Set the `temporaryGcsBucket` to default to `fs.gs.system.bucket` if exists, negating the need to set it in Dataproc clusters.
* BigQuery API has been upgraded to version 2.45.0
* BigQuery Storage API has been upgraded to version 3.11.0
* GAX has been upgraded to version 2.59.0
* Netty has been upgraded to version 4.1.115.Final
* Arrow has been upgraded to version 17.0.0
* Avro has been upgraded to version 1.11.4
## 0.41.0 - 2024-09-05
* PR #1265 : Add additional parentheses for EqualNullSafe filter generation. Thanks @tom-s-powell !
* PR #1267 : Implement OpenLineage spark-extension-interfaces
* PR #1281 : Configure alternative BigNumeric precision and scale defaults
* Issue #1175: Add details to schema mismatch message
* BigQuery API has been upgraded to version 2.42.2
* BigQuery Storage API has been upgraded to version 3.9.0
* GAX has been upgraded to version 2.52.0
* Netty has been upgraded to version 4.1.113.Final
## 0.40.0 - 2024-08-05
* PR #1259 : Encode snapshotTimeMillis in view materialization query. Thanks @tom-s-powell !
* PR #1261 : Adding IdentityToken header in readRows call
* Issue #1043 : Fix Indirect write drops policy tags
* Issue #1244 : Set schema Field Nullables as per ALLOW_FIELD_RELAXATION
* Issue #1254 : fix getting partitioning fields for pseudo columns
* Issue #1263 : Support ClusteredFields in Direct Write
* BigQuery API has been upgraded to version 2.42.0
* BigQuery Storage API has been upgraded to version 3.8.0
* GAX has been upgraded to version 2.51.0
* gRPC has been upgraded to version 1.65.1
* Netty has been upgraded to version 4.1.112.Final
## 0.39.1 - 2024-06-24
* PR #1236: Fixing unshaded artifacts, added shading verification
* PR #1239: Allow GCS bucket to be supplied including a scheme. Thanks @tom-s-powell !
* Issue #1126: Fixing Kryo serialization issues
* Issue #1223: Fix gRPC status creates 'object not serializable' errors
## 0.39.0 - 2024-05-21
* PR #1221: Adding support for [table snapshots](https://cloud.google.com/bigquery/docs/table-snapshots-intro). Thanks @tom-s-powell !
* PR #1222: Add option to request lz4 compressed ReadRowsResponse
* PR #1225: Fixing multi-release jar shading
* PR #1227: Optimizing dynamic partition overwrite for Time partitioned tables
* BigQuery API has been upgraded to version 2.40.1
* BigQuery Storage API has been upgraded to version 3.5.1
* GAX has been upgraded to version 2.48.1
* gRPC has been upgraded to version 1.64.0
## 0.38.0 - 2024-05-01
* PR #1205: Sending Identity token in the read API header
* Issue #1195: Support map type with complex value
* Issue #1215: Support predicate pushdown for DATETIME
* BigQuery API has been upgraded to version 2.39.0
* BigQuery Storage API has been upgraded to version 3.5.0
* GAX has been upgraded to version 2.47.0
* Arrow has been upgraded to version 16.0.0
* gRPC has been upgraded to version 1.63.0
* Netty has been upgraded to version 4.1.109.Final
## 0.37.0 - 2024-03-25
* :warning: Starting version 0.38.0 of the connector, the `spark-2.4-bigquery` version won't be released as Spark 2.4 is
well-supported by the `spark-bigquery-with-dependencies` connectors.
* PR #1156: Propagate stats for BigLake Managed tables
* PR #1181: Add caching during protobuf generation
* PR #1190: Enable connection sharing for atLeastOnce writes
* Issue #1182: Fix query check logic
* BigQuery API has been upgraded to version 2.38.1
* BigQuery Storage API has been upgraded to version 3.3.1
* GAX has been upgraded to version 2.45.0
* Arrow has been upgraded to version 15.0.1
* gRPC has been upgraded to version 1.62.2
* Netty has been upgraded to version 4.1.107.Final
* Protocol Buffers has been upgraded to version 3.25.3
## 0.36.1 - 2024-01-31
* PR #1176: fix timestamp filter translation issue
## 0.36.0 - 2024-01-25
* PR #1155: allow lazy materialization of query on load
* PR #1163: Added config to set the BigQuery Job timeout
* PR #1166: Fix filters by adding surrounding parenthesis. Thanks @tom-s-powell !
* PR #1171: fix read, write issues with Timestamp
* Issue #1116: BigQuery write fails with MessageSize is too large
* BigQuery API has been upgraded to version 2.36.0
* GAX has been upgraded to version 2.40.0
* gRPC has been upgraded to version 1.61.0
* Netty has been upgraded to version 4.1.106.Final
* Protocol Buffers has been upgraded to version 3.25.2
## 0.35.1 - 2023-12-28
* PR #1153: allow writing spark string to BQ datetime
## 0.35.0 - 2023-12-19
* PR #1115: Added new connector, `spark-3.5-bigquery` aimed to be used in Spark 3.5. This connector implements new APIs and capabilities provided by the Spark Data Source V2 API.
* PR #1117: Make read session caching duration configurable
* PR #1118: Improve read session caching key
* PR #1122: Set traceId on write
* PR #1124: Added `SparkListenerEvent`s for Query and Load jobs running on BigQuery
* PR #1127: Fix job labeling for mixed case Dataproc job names
* PR #1136: Consider projections for biglake stats
* PR #1143: Enable async write for default stream
* BigQuery API has been upgraded to version 2.35.0
* BigQuery Storage API has been upgraded to version 2.47.0
* GAX has been upgraded to version 2.38.0
* gRPC has been upgraded to version 1.60.0
* Netty has been upgraded to version 4.1.101.Final
* Protocol Buffers has been upgraded to version 3.25.1
## 0.34.0 - 2023-10-31
* PR #1057: Enable async writes for greater throughput
* PR #1094: CVE-2023-5072: Upgrading the org.json:json dependency
* PR #1095: CVE-2023-4586: Upgrading the netty dependencies
* PR #1104: Fixed nested field predicate pushdown
* PR #1109: Enable read session caching by default for faster Spark planning
* PR #1111: Enable retry of failed messages
* Issue #103: Support for Dynamic partition overwrite for time and range partitioned table
* Issue #1099: Fixing the usage of ExternalAccountCredentials
* BigQuery API has been upgraded to version 2.33.2
* BigQuery Storage API has been upgraded to version 2.44.0
* GAX has been upgraded to version 2.35.0
* gRPC has been upgraded to version 1.58.0
* Protocol Buffers has been upgraded to version 3.24.4
## 0.33.0 - 2023-10-17
* Added new connector, `spark-3.4-bigquery` aimed to be used in Spark 3.4 and above. This connector implements new APIs and capabilities provided by the Spark Data Source V2 API.
* PR #1008: Adding support to expose BigQuery metrics using Spark custom metrics API.
* PR #1038: Logical plan now shows the BigQuery table of DirectBigQueryRelation. Thanks @idc101 !
* PR #1058: View names will appear in query plan instead of the materialized table
* PR #1061: Handle NPE case when reading BQ table with NUMERIC fields. Thanks @hayssams !
* PR #1069: Support TimestampNTZ datatype in spark 3.4
* Issue #453: fix comment handling in query
* Issue #144: allow writing Spark String to BQ TIME type
* Issue #867: Support writing with RangePartitioning
* Issue #1046: Add a way to disable map type support
* Issue #1062: Adding dataproc job ID and UUID labels to BigQuery jobs
## 0.32.2 - 2023-08-07
* CVE-2023-34462: Upgrading netty to verision 4.1.96.Final
## 0.32.1 - 2023-08-03
* PR #1025: Handle Java 8 types for dates and timestamps when compiling filters. Thanks @tom-s-powell !
* Issue #1026: Fixing Numeric conversion
* Issue #1028: Fixing PolicyTags removal on overwrite
## 0.32.0 - 2023-07-17
* Issue #748: `_PARTITIONDATE` pseudo column is provided only for ingestion time **daily** partitioned tables
* Issue #990: Fix to support `allowFieldAddition` for columns with nested fields.
* Issue #993: Spark ML vector read and write fails
* PR #1007: Implement at-least-once option that utilizes default stream
## 0.31.1 - 2023-06-06
* Issue #988: Read statistics are logged at TRACE level. Update the log4j configuration accordingly in order to log them.
## 0.31.0 - 2023-06-01
* :warning: **Breaking Change** BigNumeric conversion has changed, and it is now converted to Spark's
Decimal data type. Notice that BigNumeric can have a wider precision than Decimal, so additional
setting may be needed. See [here](https://github.com/GoogleCloudDataproc/spark-bigquery-connector#numeric-and-bignumeric-support)
for additional details.
* Issue #945: Fixing unable to add new column even with option `allowFieldAddition`
* PR #965: Fix to reuse the same BigQueryClient for the same BigQueryConfig, rather than creating a new one
* PR #950: Added support for service account impersonation
* PR #960: Added support for basic configuration of the gRPC channel pool size in the BigQueryReadClient.
* PR #973: Added support for writing to [CMEK managed tables](https://cloud.google.com/bigquery/docs/customer-managed-encryption).
* PR #971: Fixing wrong results or schema error when Spark nested schema pruning is on for datasource v2
* PR #974: Applying DPP to Hive partitioned BigLake tables (spark-3.2-bigquery and spark-3.3-bigquery only)
* PR #986: CVE-2020-8908, CVE-2023-2976: Upgrading Guava to version 32.0-jre
* BigQuery API has been upgraded to version 2.26.0
* BigQuery Storage API has been upgraded to version 2.36.1
* GAX has been upgraded to version 2.26.0
* gRPC has been upgraded to version 1.55.1
* Netty has been upgraded to version 4.1.92.Final
* Protocol Buffers has been upgraded to version 3.23.0
* PR #957: support direct write with subset field list.
## 0.30.0 - 2023-04-11
* New connectors are out of preview and are now generally available! This includes all the new
connectors: spark-2.4-bigquery, spark-3.1-bigquery, spark-3.2-bigquery and spark-3.3-bigquery are GA and ready to be used in all workloads. Please
refer to the [compatibility matrix](https://github.com/GoogleCloudDataproc/spark-bigquery-connector#connector-to-spark-compatibility-matrix)
when using them.
* Direct write method is out of preview and is now generally available!
* `spark-bigquery-with-dependencies_2.11` is no longer published. If a recent version of the Scala
2.11 connector is needed, it can be built by checking out the code and running
`./mvnw install -Pdsv1_2.11`.
* Issue #522: Supporting Spark's Map type. Notice there are few restrictions as this is not a
BigQuery native type.
* Added support for reading BigQuery table snapshots.
* BigQuery API has been upgraded to version 2.24.4
* BigQuery Storage API has been upgraded to version 2.34.2
* GAX has been upgraded to version 2.24.0
* gRPC has been upgraded to version 1.54.0
* Netty has been upgraded to version 4.1.90.Final
* PR #944: Added support to set query job priority
* Issue #908: Making sure that `preferred_min_stream_count` must be less than or equal to `max_stream_count`
## 0.29.0 - 2023-03-03
* Added two new connectors, `spark-3.2-bigquery` and `spark-3.3-bigquery` aimed to be used in Spark 3.2 and 3.3
respectively. Those connectors implement new APIs and capabilities provided by the Spark Data Source V2 API. Both
connectors are in preview mode.
* Dynamic partition pruning is supported in preview mode by `spark-3.2-bigquery` and `spark-3.3-bigquery`.
* This is the last version of the Spark BigQuery connector for scala 2.11. The code will remain in the repository and
can be compiled into a connector if needed.
* PR #857: Fixing `autovalue` shaded classes repackaging
* BigQuery API has been upgraded to version 2.22.0
* BigQuery Storage API has been upgraded to version 2.31.0
* GAX has been upgraded to version 2.23.0
* gRPC has been upgraded to version 1.53.0
* Netty has been upgraded to version 4.1.89.Final
## 0.28.1 - 2023-02-27
PR #904: Fixing premature client closing in certain cases, which causes RejectedExecutionException to be thrown
## 0.28.0 - 2023-01-09
* Adding support for the [JSON](https://cloud.google.com/bigquery/docs/reference/standard-sql/json-data) data type.
Thanks to @abhijeet-lele and @jonathan-ostrander for their contributions!
* Issue #821: Fixing direct write of empty DataFrames
* PR #832: Fixed client closing
* Issue #838: Fixing unshaded artifacts
* PR #848: Making schema comparison on write less strict
* PR #852: fixed `enableListInference` usage when using the default intermediate format
* Jackson has been upgraded to version 2.14.1, addressing CVE-2022-42003
* BigQuery API has been upgraded to version 2.20.0
* BigQuery Storage API has been upgraded to version 2.27.0
* GAX has been upgraded to version 2.20.1
* Guice has been upgraded to version 5.1.0
* gRPC has been upgraded to version 1.51.1
* Netty has been upgraded to version 4.1.86.Final
* Protocol Buffers has been upgraded to version 3.21.12
## 0.27.1 - 2022-10-18
* PR #792: Added ability to set table labels while writing to a BigQuery table
* PR #796: Allowing custom BigQuery API endpoints
* PR #803: Removed grpc-netty-shaded from the connector jar
* Protocol Buffers has been upgraded to version 3.21.7, addressing CVE-2022-3171
* BigQuery API has been upgraded to version 2.16.1
* BigQuery Storage API has been upgraded to version 2.21.0
* gRPC has been upgraded to version 1.49.1
* Netty has been upgraded to version 4.1.82.Final
## 0.27.0 - 2022-09-20
* Added new Scala 2.13 connector, aimed at Spark versions from 3.2 and above
* PR #750: Adding support for custom access token creation. See more [here](https://github.com/GoogleCloudDataproc/spark-bigquery-connector#how-do-i-authenticate-outside-gce--dataproc).
* PR #745: Supporting load from query in spark-3.1-bigquery.
* PR #767: Adding the option createReadSessionTimeoutInSeconds, to override the timeout for CreateReadSession.
## 0.26.0 - 2022-07-18
* All connectors support the DIRECT write method, using the BigQuery Storage Write API,
without first writing the data to GCS. **DIRECT write method is in preview mode**.
* `spark-3.1-bigquery` has been released in preview mode. This is a Java only library,
implementing the Spark 3.1 DataSource v2 APIs.
* BigQuery API has been upgraded to version 2.13.8
* BigQuery Storage API has been upgraded to version 2.16.0
* gRPC has been upgraded to version 1.47.0
* Netty has been upgraded to version 4.1.79.Final
## 0.25.2 - 2022-06-22
* PR #673: Added integration tests for BigLake external tables.
* PR #674: Increasing default maxParallelism to 10K for BigLake external tables
## 0.25.1 - 2022-06-13
* Issue #651: Fixing the write back to BigQuery.
* PR #664: Add support for BigLake external tables.
* PR #667: Allowing clustering on unpartitioned tables.
* PR #668: Using spark default parallelism as default.
## 0.25.0 - 2022-05-31
* Issue #593: Allow users to disable cache when loading data via SQL query,
by setting `cacheExpirationTimeInMinutes=0`
* PR #613: Added field level schema checks. This can be disabled by setting
`enableModeCheckForSchemaFields=false`
* PR #618: Added support for the `enableListInterface` option. This allows to
use parquet as an intermediate format also for arrays, without adding the
`list` element in the resulting schema as described
[here](https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ParquetOptions)
* PR #641: Removed Conscrypt from the shaded artifact in order to improve
compatibility with Dataproc Serverless and with clusters where Conscrypt is
disabled.
* BigQuery API has been upgraded to version 2.10.6
* BigQuery Storage API has been upgraded to version 2.12.0
* gRPC has been upgraded to version 1.46.0
* Netty has been upgraded to version 4.1.75.Final
## 0.24.2 - 2022-04-05
* PR #580: Fixed shaded artifacts version flattening, the version appears
correctly in the released POM
* PR #583: netty-tcnative is taken from the Netty BOM
* PR #584: CVE-2020-36518 - Upgraded jackson
## 0.24.1 - 2022-03-29
* PR #576: Fixed error running on Datapoc clusters where conscrypt is disabled
(the property`dataproc.conscrypt.provider.enable` set to `false`)
## 0.24.0 - 2022-03-23
* Issue #530: Treating Field.mode==null as Nullable
* PR #518: Cache expiration time can be configured now.
* PR #561: Added support for adding trace ID to the BigQuery reads and writes.
The trace Id will be of the format `Spark:ApplicateName:JobID`. The
application name must be set by the user, job ID is defaults to Dataproc job
ID if exists, otherwise it is set to `spark.app.id`.
* PR #563: Fixed a bug where using writeMethod=DIRECT and SaveMode=Append the
destination table may have been deleted in case `abort()` has been called.
* PR #568: Added support for BigQuery jobs labels
* BigQuery API has been upgraded to version 2.9.4
* BigQuery Storage API has been upgraded to version 2.11.0
* gRPC has been upgraded to version 1.44.1
* Netty has been upgraded to version 4.1.73.Final
## 0.23.2 - 2022-01-19
* PR #521: Added Arrow compression options to the
spark-bigquery-with-dependencies_2.* connectors
* PR #526: Added the option to use parent project for the metadata/jobs API as
well
* BigQuery API has been upgraded to version 2.3.3
* BigQuery Storage API has been upgraded to version 2.4.2
* gRPC has been upgraded to version 1.42.1
* Netty has been upgraded to version 4.1.70.Final
## 0.23.1 - 2021-12-08
* Issue #501: Fixed using avro as an intermediate type for writing.
## 0.23.0 - 2021-12-06
* New connector: A Java only connector implementing the Spark 2.4 APIs
* PR #469: Added support for the BigQuery Storage Write API, allowing faster
writes (Spark 2.4 connector only)
* Issue #481: Added configuration option to use compression from the READ API
for Arrow
* BigQuery API has been upgraded to version 2.1.8
* BigQuery Storage API has been upgraded to version 2.1.2
* gRPC has been upgraded to version 1.41.0
## 0.22.2 - 2021-09-22
* Issue #446: BigNumeric values are properly written to BigQuery
* Issue #452: Adding the option to clean BigQueryClient.destinationTableCache
* BigQuery API has been upgraded to version 2.1.12
* BigQuery Storage API has been upgraded to version 2.3.1
* gRPC has been upgraded to version 1.40.0
## 0.22.1 - 2021-09-08
* Issue #444: allowing unpartitioned clustered table
## 0.22.0 - 2021-06-22
* PR #404: Added support for BigNumeric
* PR #430: Added HTTP and gRPC proxy support
* Issue #273: Resolved the streaming write issue for spark 3.x
## 0.21.1 - 2021-06-22
* PR #413: Pushing all filters to BigQuery Storage API
* Issue #412: Supporting WITH queries
* Issue #409: Allowing all whitespaces after the select
* PR #419: Fix a bug where background threads > 2 cases would miss pages (DSv2)
* PR #416: Moved zstd-jni library to be provided in order to solve Spark 2.4 compatibility (DSv2)
* PR #417: Added back column projection to DSv2
## 0.21.0 - 2021-06-01
* Issue #354: users can query a view with different columns in select() and filter()
* Issue #367: Struct column order is fixed
* Issue #383: Fixed table metadata update when writing to a partitioned table
* Issue #390: Allowing additional white-space types in the query
* Issue #393: replacing avro.shaded dependency with guava
* PR #360: Removed redundant `UNNEST` when compiling `IN` condition
* BigQuery API has been upgraded to version 1.131.1
* BigQuery Storage API has been upgraded to version 1.22.0
* Guava has been upgraded to version 30.1.1-jre
* gRPC has been upgraded to version 1.37.1
* Netty has been upgraded to version 4.1.65.Final
## 0.20.0 - 2021-03-29
* PR #375: Added support for pseudo column support - time partitioned table now supoort the _PARTITIONTIME and _PARTITIONDATE fields
* Issue# 190: Writing data to BigQuery properly populate the field description
* Issue #265: Fixed nested conjunctions/disjunctions when using the AVRO read format
* Issue #326: Fixing netty_tcnative_windows.dll shading
* Arrow has been upgraded to version 4.0.0
## 0.19.1 - 2021-03-01
* PR #324 - Restoring version 0.18.1 dependencies due to networking issues
* BigQuery API has been upgraded to version 1.123.2
* BigQuery Storage API has been upgraded to version 1.6.0
* Guava has been upgraded to version 30.0-jre
* Netty has been upgraded to version 4.1.51.Final
## 0.19.0 - 2021-02-24
* Issue #247: Allowing to load results of any arbitrary SELECT query from BigQuery.
* Issue #310: Allowing to configure the expiration time of materialized data.
* PR #283: Implemented Datasource v2 write support.
* Improved Spark 3 compatibility.
* BigQuery API has been upgraded to version 1.127.4
* BigQuery Storage API has been upgraded to version 1.10.0
* Guava has been upgraded to version 30.1-jre
* Netty has been upgraded to version 4.1.52.Final
## 0.18.1 - 2021-01-21
* Issue #248: Reducing the size of the URI list when writing to BigQuery. This allows larger DataFrames (>10,000 partitions) to be safely written.
* Issue #296: Removed redundant packaged slf4j-api.
* PR #276: Added the option to enable `useAvroLogicalTypes` option When writing data to BigQuery.
## 0.18.0 - 2020-11-12
* Issue #226: Adding support for HOUR, MONTH, DAY TimePartitions
* Issue #260: Increasing connection timeout to the BigQuery service, and
configuring the request retry settings.
* Issue #263: Fixed `select *` error when ColumnarBatch is used (DataSource v2)
* Issue #266: Fixed the external configuration not working regression bug
(Introduced in version 0.17.2)
* PR #262: Filters on BigQuery DATE and TIMESTAMP now use the right type.
* BigQuery API has been upgraded to version 1.123.2
* BigQuery Storage API has been upgraded to version 1.6.0
* Guava has been upgraded to version 30.0-jre
* Netty has been upgraded to version 4.1.51.Final
* netty-tcnative has been upgraded to version 4.1.34.Final
## 0.17.3 - 2020-10-06
* PR #242, #243: Fixed Spark 3 compatibility, added Spark 3 acceptance test
* Issue #249: Fixing credentials creation from key
## 0.17.2 - 2020-09-10
* PR #239: Ensuring that the BigQuery client will have the proper project id
## 0.17.1 - 2020-08-06
* Issue #216: removed redundant ALPN dependency
* Issue #219: Fixed the LessThanOrEqual filter SQL compilation in the DataSource v2 implmentation
* Issue #221: Fixed ProtobufUtilsTest.java with newer BigQuery dependencies
* PR #229: Adding support for Spark ML Vector and Matrix data types
* BigQuery API has been upgraded to version 1.116.8
* BigQuery Storage API has been upgraded to version 1.3.1
## 0.17.0 - 2020-07-15
* PR #201: [Structured streaming write](http://spark.apache.org/docs/2.4.5/structured-streaming-programming-guide.html#starting-streaming-queries)
is now supported (thanks @varundhussa)
* PR #202: Users now has the option to keep the data on GCS after writing to BigQuery (thanks @leoneuwald)
* PR #211: Enabling to overwrite data of a single date partition
* PR #198: Supporting columnar batch reads from Spark in the DataSource V2 implementation. **It is not ready for production use.**
* PR #192: Supporting `MATERIALIZED_VIEW` as table type
* Issue #197: Conditions on StructType fields are now handled by Spark and not the connector
* BigQuery API has been upgraded to version 1.116.3
* BigQuery Storage API has been upgraded to version 1.0.0
* Netty has been upgraded to version 4.1.48.Final (Fixing issue #200)
## 0.16.1 - 2020-06-11
* PR #186: Fixed SparkBigQueryConnectorUserAgentProvider initialization bug
## 0.16.0 - 2020-06-09
**Please don't use this version, use 0.16.1 instead**
* PR #180: Apache Arrow is now the default read format. Based on our benchmarking, Arrow provides read
performance faster by 40% then Avro.
* PR #163: Apache Avro was added as a write intermediate format. It shows better performance over parquet
in large (>50GB) datasets. The spark-avro package must be added in runtime in order to use this format.
* PR #176: Usage simplification: Now instead of using the `table` mandatory option, user can use the built
in `path` parameter of `load()` and `save()`, so that read becomes
`df = spark.read.format("bigquery").load("source_table")` and write becomes
`df.write.format("bigquery").save("target_table")`
* An experimental implementation of the DataSource v2 API has been added. **It is not ready for
production use.**
* BigQuery API has been upgraded to version 1.116.1
* BigQuery Storage API has been upgraded to version 0.133.2-beta
* gRPC has been upgraded to version 1.29.0
* Guava has been upgraded to version 29.0-jre
## 0.15.1-beta - 2020-04-27
* PR #158: Users can now add the `spark.datasource.bigquery` prefix to the configuration options in order to support Spark's `--conf` command line flag
* PR #160: View materialization is performed only on action, fixing a bug where view materialization was done too early
## 0.15.0-beta - 2020-04-20
* PR #150: Reading `DataFrame`s should be quicker, especially in interactive usage such in notebooks
* PR #154: Upgraded to the BigQuery Storage v1 API
* PR #146: Authentication can be done using [AccessToken](https://cloud.google.com/sdk/gcloud/reference/auth/application-default/print-access-token)
on top of Credentials file, Credentials, and the `GOOGLE_APPLICATION_CREDENTIALS` environment variable.
## 0.14.0-beta - 2020-03-31
* Issue #96: Added Arrow as a supported format for reading from BigQuery
* Issue #130 Adding the field description to the schema metadata
* Issue #124: Fixing null values in ArrayType
* Issue #143: Allowing the setting of `SchemaUpdateOption`s When writing to BigQuery
* PR #148: Add support for writing clustered tables
* Upgrade version of google-cloud-bigquery library to 1.110.0
* Upgrade version of google-cloud-bigquerystorage library to 0.126.0-beta
## 0.13.1-beta - 2020-02-14
* The BigQuery Storage API was reverted to v1beta1. The v1beta2 API has not been
fully integrated with custom IAM roles, which can cause issues to customers using
those. The v1beta1 doesn't have this problem. Once the integration is complete,
the API will be upgraded again.
## 0.13.0-beta - 2020-02-12
**Please don't use this version, use 0.13.1-beta instead**
* Moved to use BigQuery Storage API v1beta2
* changed the `parallelism` parameter to `maxParallelism` in order to reflect the
Change in the underlining API (the old parameter has been deprecated)
* Upgrade version of google-cloud-bigquerystorage library to 0.122.0-beta.
* Issue #73: Optimized empty projection used for count() execution.
* Issue #121: Added the option to configure CreateDisposition when inserting data
to BigQuery.
## 0.12.0-beta - 2020-01-29
* Issue #72: Moved the shaded jar name from classifier to a new artifact name
* Issues #73, #87: Added better logging to help understand which columns and filters
are asked by spark, and which are passed down to BigQuery
* Issue #107: The connector will now alert when is is used with the wrong scala version
## 0.11.0-beta - 2019-12-18
* Upgrade version of google-cloud-bigquery library to 1.102.0
* Upgrade version of google-cloud-bigquerystorage library to 0.120.0-beta
* Issue #6: Do not initialize bigquery options by default
* Added ReadRows retries on GRPC internal errors
* Issue #97: Added support for GEOGRAPHY type
## 0.10.0-beta - 2019-11-14
* Added preliminary support for reading from BigQuery views (Issue #21)
* Writing to BigQuery now white-listing the intermediate files instead
of black listing the _SUCCESS files (PR #75)
* Added count() tip to the README
## 0.9.2-beta - 2019-11-11
* Upgrade version of google-cloud-bigquery library to 1.99.0
* Upgrade version of google-cloud-bigquerystorage library to 0.117.0-beta
* Upgrade version of grpc-netty-shaded library to 1.24.1
* Supporting reading large rows (Issue #22, https://issuetracker.google.com/143730055)
* Made sure that all filters are pushed down (Issue #74)
* Fixing log severity
* Added Java Example
## 0.9.1-beta - 2019-10-11
* A NPE in the shutdown hook has occurred in case the delete had succeeded
in the first time. This had no impact on the actual logic, just on the log.
The method now verifies the path exists before trying to delete it, and
hides the redundant exception.
* Added support for data.write.bigquery("table") implicit import, fixed
regression caused by relying of shaded scalalogging
## 0.9.0-beta - 2019-10-08
* Added write support
* Switch requested partitions from SparkContext.defaultParallelism to one
partition per 400MB. This should work better with balanced sharding and
dynamic allocation.
* Cross built for both Scala 2.11 and 2.12
* Upgrade version of google-cloud-bigquery library to 1.96.0
* Upgrade version of google-cloud-bigquerystorage library to 0.114.0-beta
* Upgrade version of grpc-netty-shaded library to 1.23.0
## 0.8.1-beta - 2019-09-12
* Added a shaded version
## 0.8.0-beta - 2019-07-22
* Upgrade version of google-cloud-bigquery library to 1.82.0
* Upgrade version of google-cloud-bigquerystorage library to 0.98.0-beta
* Upgrade version of grpc-netty-shaded library to 1.22.1
* Use balanced sharding strategy to assign roughly same number of rows to each
read stream.
* Update filtering support to reflect full set of filters supported by storage
API (multi-clause predicates, pseudo-column variables, additional filter
clauses - IsNull, IsNotNull, In, StringStartsWith, StringEndsWith,
StringContains etc.)
## 0.7.0-beta - 2019-06-26
* Switch to using the BALANCED sharding strategy, which balances work between
streams on the server-side, leading to a more uniform distribution of rows
between partitions
## 0.6.0-beta - 2019-06-25
* Support specifying credentials through configurations
## 0.5.1-beta - 2019-04-26
* Support Numeric type
* Refactor tests to manage test datasets
## 0.5.0-beta - 2019-03-06
* Initial release
================================================
FILE: CONTRIBUTING.md
================================================
# How to Contribute
We'd love to accept your patches and contributions to this project. There are
just a few small guidelines you need to follow.
## Contributor License Agreement
Contributions to this project must be accompanied by a Contributor License
Agreement. You (or your employer) retain the copyright to your contribution;
this simply gives us permission to use and redistribute your contributions as
part of the project. Head over to <https://cla.developers.google.com/> to see
your current agreements on file or to sign a new one.
You generally only need to submit a CLA once, so if you've already submitted one
(even if it was for a different project), you probably don't need to do it
again.
## Code reviews
All submissions, including submissions by project members, require review. We
use GitHub pull requests for this purpose. Consult
[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
information on using pull requests.
## Community Guidelines
This project follows [Google's Open Source Community
Guidelines](https://opensource.google.com/conduct/).
## Building and Testing the Connector
The connector is built using the Maven wrapper. The project contains several
connectors, sharing some of the code among them. The connectors are:
* spark-bigquery_2.11 - a Scala 2.11 based connector, targeting Spark 2.3 and
2.4 using Scala 2.11.
* spark-bigquery_2.12 - a Scala 2.12 based connector, targeting Spark 2.4 and
3.x using Scala 2.12.
* spark-2.4-bigquery - a Java only connector, targeting Spark 2.4 (of all
Scala versions), using the new DataSource APIs.
* spark-3.1-bigquery - a Java only connector, targeting Spark 3.1 (of all
Scala versions), using the new DataSource APIs. Still under development.
The project's artifacts are:
* `spark-bigquery-parent` - The parent POM for all artifacts. Common settings
and artifact version should be defined here.
* `bigquery-connector-common` - Utility classes for working with the BigQuery
APIs. This artifact has no dependency on Spark. This artifact can potentially
be used by non-spark connectors
* `spark-bigquery-connector-common`- Common utilites and logic shared among
all connectors (Scala and Java alike). Whenever possible, new code should be
in this artifact
* `spark-bigquery-dsv1/spark-bigquery-dsv1-parent` - Common settings for the
Scala based DataSource V1 implementation.
* `spark-bigquery-dsv1/spark-bigquery-dsv1-spark3-support` - As some of the APIs
used by the connector have changed between Spark 2.x and 3.x, they are wrapped
in a neutral interface with wrappers for the two implementations. Unlike the
other dsv1 artifacts, this project depends on Spark 3 for this reason.
* `spark-bigquery-dsv1/spark-bigquery_2.11` and
`spark-bigquery-dsv1/spark-bigquery_2.12` - The implementation of the Scala
based connectors. Both connectors share the same code via a symbolic link, so
a change in one of them will automatically affect the other.
* `spark-bigquery-dsv1/spark-bigquery-with-dependencies-parent` - Common
settings for the shaded artifacts.
* `spark-bigquery-dsv1/spark-bigquery-with-dependencies_2.11` and
`spark-bigquery-dsv1/spark-bigquery-with-dependencies_2.12` - The shaded
distributable of the connector, containing all the dependencies.
* `spark-bigquery-dsv2/spark-bigquery-dsv2-parent` - Common settings for the
Java only DataSource V2 implementations.
* `spark-bigquery-dsv2/spark-2.4-bigquery` - A Java only DataSource V2
connector implementing the Spark 2.4 APIs.
* `spark-bigquery-dsv2/spark-3.1-bigquery` - A Java only DataSource V2
connector implementing the Spark 3.1 APIs. Under development.
* `spark-bigquery-python-lib` - The python support library, adding BigQuery
types not supported by Spark.
As building and running all the connectors is a lengthy process, the project is
split into several [profiles](https://maven.apache.org/guides/introduction/introduction-to-profiles.html),
each building only a subset of the project's artifacts. The profiles are:
* `dsv1` - Running both Scala/DSv1 connectors.
* `dsv1_2.11` - Running just the Scala 2.11 connector.
* `dsv1_2.12` - Running just the Scala 2.12 connector.
* `dsv2` - Running both Java/DSv2 connectors.
* `dsv2_2.4` - Running just the Java Spark 2.4 connector.
* `dsv2_3.1` - Running just the Java Spark 3.1 connector.
* `all` - Running all the connectors.
Example: In order to compile **just** the Scala 2.12 connector run
`./mvnw install -Pdsv1_2.12`.
**Note**: Need java 1.8 and make sure **/usr/libexec/java_home** set to java 1.8 before building any module.
**Important**: If no profile is selected, then only the common artifacts are run.
The integration and acceptance tests are disabled by default. In order to run it please add the
following profiles to the run:
* Integration tests - `./mvnw failsafe:integration-test -Pdsv1_2.11,integration`
* Acceptance tests - `./mvnw verify -Pdsv2_2.4,acceptance`
In order to run the integration tests make sure that your GCP user has the proper accounts for creating and deleting
datasets and tables in your test project in BigQuery. It will also need the permissions to upload files to the test
bucket in GCS as well as delete them.
Setting the following environment variables is required to run the integration tests:
* `GOOGLE_APPLICATION_CREDENTIALS` - the full path to a credentials JSON, either a service account or the result of a
`gcloud auth login` run
* `GOOGLE_CLOUD_PROJECT` - The Google cloud platform project used to test the connector
* `TEMPORARY_GCS_BUCKET` - The GCS bucked used to test writing to BigQuery during the integration tests
* `ACCEPTANCE_TEST_BUCKET` - The GCS bucked used to test writing to BigQuery during the acceptance tests
* `SERVERLESS_NETWORK_URI` - The network used by the serverless batches during the acceptance tests
* `BIGLAKE_CONNECTION_ID` - The connection ID to create a BigLake table using a Cloud Resource connection
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README-template.md
================================================
# Apache Spark SQL connector for Google BigQuery
<!--- TODO(#2): split out into more documents. -->
The connector supports reading [Google BigQuery](https://cloud.google.com/bigquery/) tables into Spark's DataFrames, and writing DataFrames back into BigQuery.
This is done by using the [Spark SQL Data Source API](https://spark.apache.org/docs/latest/sql-programming-guide.html#data-sources) to communicate with BigQuery.
## Unreleased Changes
This Readme may include documentation for changes that haven't been released yet. The latest release's documentation and source code are found here.
https://github.com/GoogleCloudDataproc/spark-bigquery-connector/blob/master/README.md
## BigQuery Storage API
The [Storage API](https://cloud.google.com/bigquery/docs/reference/storage) streams data in parallel directly from BigQuery via gRPC without using Google Cloud Storage as an intermediary.
It has a number of advantages over using the previous export-based read flow that should generally lead to better read performance:
### Direct Streaming
It does not leave any temporary files in Google Cloud Storage. Rows are read directly from BigQuery servers using the Arrow or Avro wire formats.
### Filtering
The new API allows column and predicate filtering to only read the data you are interested in.
#### Column Filtering
Since BigQuery is [backed by a columnar datastore](https://cloud.google.com/blog/big-data/2016/04/inside-capacitor-bigquerys-next-generation-columnar-storage-format), it can efficiently stream data without reading all columns.
#### Predicate Filtering
The Storage API supports arbitrary pushdown of predicate filters. Connector version 0.8.0-beta and above support pushdown of arbitrary filters to Bigquery.
There is a known issue in Spark that does not allow pushdown of filters on nested fields. For example - filters like `address.city = "Sunnyvale"` will not get pushdown to Bigquery.
### Dynamic Sharding
The API rebalances records between readers until they all complete. This means that all Map phases will finish nearly concurrently. See this blog article on [how dynamic sharding is similarly used in Google Cloud Dataflow](https://cloud.google.com/blog/products/gcp/no-shard-left-behind-dynamic-work-rebalancing-in-google-cloud-dataflow).
See [Configuring Partitioning](#configuring-partitioning) for more details.
## Requirements
### Enable the BigQuery Storage API
Follow [these instructions](https://cloud.google.com/bigquery/docs/reference/storage/#enabling_the_api).
### Create a Google Cloud Dataproc cluster (Optional)
If you do not have an Apache Spark environment you can create a Cloud Dataproc cluster with pre-configured auth. The following examples assume you are using Cloud Dataproc, but you can use `spark-submit` on any cluster.
Any Dataproc cluster using the API needs the 'bigquery' or 'cloud-platform' scopes. Dataproc clusters have the 'bigquery' scope by default, so most clusters in enabled projects should work by default e.g.
```
MY_CLUSTER=...
gcloud dataproc clusters create "$MY_CLUSTER"
```
## Downloading and Using the Connector
The latest version of the connector is publicly available in the following links:
| version | Link |
|------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| Spark 4.1 | `gs://spark-lib/bigquery/spark-4.1-bigquery-${next-release-tag}-preview.jar`([HTTP link](https://storage.googleapis.com/spark-lib/bigquery/spark-4.1-bigquery-${next-release-tag}-preview.jar)) |
| Spark 4.0 | `gs://spark-lib/bigquery/spark-4.0-bigquery-${next-release-tag}.jar`([HTTP link](https://storage.googleapis.com/spark-lib/bigquery/spark-4.0-bigquery-${next-release-tag}.jar)) |
| Spark 3.5 | `gs://spark-lib/bigquery/spark-3.5-bigquery-${next-release-tag}.jar`([HTTP link](https://storage.googleapis.com/spark-lib/bigquery/spark-3.5-bigquery-${next-release-tag}.jar)) |
| Spark 3.4 | `gs://spark-lib/bigquery/spark-3.4-bigquery-${next-release-tag}.jar`([HTTP link](https://storage.googleapis.com/spark-lib/bigquery/spark-3.4-bigquery-${next-release-tag}.jar)) |
| Spark 3.3 | `gs://spark-lib/bigquery/spark-3.3-bigquery-${next-release-tag}.jar`([HTTP link](https://storage.googleapis.com/spark-lib/bigquery/spark-3.3-bigquery-${next-release-tag}.jar)) |
| Spark 3.2 | `gs://spark-lib/bigquery/spark-3.2-bigquery-${next-release-tag}.jar`([HTTP link](https://storage.googleapis.com/spark-lib/bigquery/spark-3.2-bigquery-${next-release-tag}.jar)) |
| Spark 3.1 | `gs://spark-lib/bigquery/spark-3.1-bigquery-${next-release-tag}.jar`([HTTP link](https://storage.googleapis.com/spark-lib/bigquery/spark-3.1-bigquery-${next-release-tag}.jar)) |
| Spark 2.4 | `gs://spark-lib/bigquery/spark-2.4-bigquery-0.37.0.jar`([HTTP link](https://storage.googleapis.com/spark-lib/bigquery/spark-2.4-bigquery-0.37.0.jar)) |
| Scala 2.13 | `gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.13-${next-release-tag}.jar` ([HTTP link](https://storage.googleapis.com/spark-lib/bigquery/spark-bigquery-with-dependencies_2.13-${next-release-tag}.jar)) |
| Scala 2.12 | `gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-${next-release-tag}.jar` ([HTTP link](https://storage.googleapis.com/spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-${next-release-tag}.jar)) |
| Scala 2.11 | `gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.11-0.29.0.jar` ([HTTP link](https://storage.googleapis.com/spark-lib/bigquery/spark-bigquery-with-dependencies_2.11-0.29.0.jar)) |
The first six versions are Java based connectors targeting Spark 2.4/3.1/3.2/3.3/3.4/3.5 of all Scala versions built on the new
Data Source APIs (Data Source API v2) of Spark.
The final two connectors are Scala based connectors, please use the jar relevant to your Spark installation as outlined
below.
### Connector to Spark Compatibility Matrix
| Connector \ Spark | 2.3 | 2.4 | 3.0 | 3.1 | 3.2 | 3.3 |3.4 | 3.5 |
|---------------------------------------|---------|---------|---------|---------|---------|---------|---------|---------|
| spark-3.5-bigquery | | | | | | | | ✓ |
| spark-3.4-bigquery | | | | | | | ✓ | ✓ |
| spark-3.3-bigquery | | | | | | ✓ | ✓ | ✓ |
| spark-3.2-bigquery | | | | | ✓ | ✓ | ✓ | ✓ |
| spark-3.1-bigquery | | | | ✓ | ✓ | ✓ | ✓ | ✓ |
| spark-2.4-bigquery | | ✓ | | | | | | |
| spark-bigquery-with-dependencies_2.13 | | | | | ✓ | ✓ | ✓ | ✓ |
| spark-bigquery-with-dependencies_2.12 | | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
| spark-bigquery-with-dependencies_2.11 | ✓ | ✓ | | | | | | |
### Connector to Dataproc Image Compatibility Matrix
| Connector \ Dataproc Image | 1.3 | 1.4 | 1.5 | 2.0 | 2.1 | 2.2 | Serverless<br>Image 1.0 | Serverless<br>Image 2.0 | Serverless<br>Image 2.1 | Serverless<br>Image 2.2 |
|---------------------------------------|---------|---------|---------|---------|---------|---------|-------------------------|-------------------------|-------------------------|-------------------------|
| spark-3.5-bigquery | | | | | | ✓ | | | | ✓ |
| spark-3.4-bigquery | | | | | | ✓ | | | ✓ | ✓ |
| spark-3.3-bigquery | | | | | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
| spark-3.2-bigquery | | | | | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
| spark-3.1-bigquery | | | | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
| spark-2.4-bigquery | | ✓ | ✓ | | | | | | | |
| spark-bigquery-with-dependencies_2.13 | | | | | | | | ✓ | ✓ | ✓ |
| spark-bigquery-with-dependencies_2.12 | | | ✓ | ✓ | ✓ | ✓ | ✓ | | | |
| spark-bigquery-with-dependencies_2.11 | ✓ | ✓ | | | | | | | | |
### Maven / Ivy Package Usage
The connector is also available from the
[Maven Central](https://repo1.maven.org/maven2/com/google/cloud/spark/)
repository. It can be used using the `--packages` option or the
`spark.jars.packages` configuration property. Use the following value
| version | Connector Artifact |
|------------|------------------------------------------------------------------------------------|
| Spark 4.1 | `com.google.cloud.spark:spark-4.1-bigquery:${next-release-tag}-preview` |
| Spark 4.0 | `com.google.cloud.spark:spark-4.0-bigquery:${next-release-tag}` |
| Spark 3.5 | `com.google.cloud.spark:spark-3.5-bigquery:${next-release-tag}` |
| Spark 3.4 | `com.google.cloud.spark:spark-3.4-bigquery:${next-release-tag}` |
| Spark 3.3 | `com.google.cloud.spark:spark-3.3-bigquery:${next-release-tag}` |
| Spark 3.2 | `com.google.cloud.spark:spark-3.2-bigquery:${next-release-tag}` |
| Spark 3.1 | `com.google.cloud.spark:spark-3.1-bigquery:${next-release-tag}` |
| Spark 2.4 | `com.google.cloud.spark:spark-2.4-bigquery:0.37.0` |
| Scala 2.13 | `com.google.cloud.spark:spark-bigquery-with-dependencies_2.13:${next-release-tag}` |
| Scala 2.12 | `com.google.cloud.spark:spark-bigquery-with-dependencies_2.12:${next-release-tag}` |
| Scala 2.11 | `com.google.cloud.spark:spark-bigquery-with-dependencies_2.11:0.29.0` |
### Specifying the Spark BigQuery connector version in a Dataproc cluster
Dataproc clusters created using image 2.1 and above, or batches using the Dataproc serverless service come with built-in Spark BigQuery connector.
Using the standard `--jars` or `--packages` (or alternatively, the `spark.jars`/`spark.jars.packages` configuration) won't help in this case as the built-in connector takes precedence.
To use another version than the built-in one, please do one of the following:
* For Dataproc clusters, using image 2.1 and above, add the following flag on cluster creation to upgrade the version `--metadata SPARK_BQ_CONNECTOR_VERSION=${next-release-tag}`, or `--metadata SPARK_BQ_CONNECTOR_URL=gs://spark-lib/bigquery/spark-3.3-bigquery-${next-release-tag}.jar` to create the cluster with a different jar. The URL can point to any valid connector JAR for the cluster's Spark version.
* For Dataproc serverless batches, add the following property on batch creation to upgrade the version: `--properties dataproc.sparkBqConnector.version=${next-release-tag}`, or `--properties dataproc.sparkBqConnector.uri=gs://spark-lib/bigquery/spark-3.3-bigquery-${next-release-tag}.jar` to create the batch with a different jar. The URL can point to any valid connector JAR for the runtime's Spark version.
## Hello World Example
You can run a simple PySpark wordcount against the API without compilation by running
**Dataproc image 1.5 and above**
```
gcloud dataproc jobs submit pyspark --cluster "$MY_CLUSTER" \
--jars gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-${next-release-tag}.jar \
examples/python/shakespeare.py
```
**Dataproc image 1.4 and below**
```
gcloud dataproc jobs submit pyspark --cluster "$MY_CLUSTER" \
--jars gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.11-0.29.0.jar \
examples/python/shakespeare.py
```
## Example Codelab ##
https://codelabs.developers.google.com/codelabs/pyspark-bigquery
## Usage
The connector uses the cross language [Spark SQL Data Source API](https://spark.apache.org/docs/latest/sql-programming-guide.html#data-sources):
### Reading data from a BigQuery table
```
df = spark.read \
.format("bigquery") \
.load("bigquery-public-data.samples.shakespeare")
```
or the Scala only implicit API:
```
import com.google.cloud.spark.bigquery._
val df = spark.read.bigquery("bigquery-public-data.samples.shakespeare")
```
The connector supports reading from tables that contain spaces in their names.
**Note on ambiguous table names**: If a table name contains both spaces and a SQL keyword (e.g., "from", "where", "join"), it may be misinterpreted as a SQL query. To resolve this ambiguity, quote the table identifier with backticks \`. For example:
```
df = spark.read \
.format("bigquery") \
.load("`my_project.my_dataset.orders from 2023`")
```
For more information, see additional code samples in
[Python](examples/python/shakespeare.py),
[Scala](spark-bigquery-dsv1/src/main/scala/com/google/cloud/spark/bigquery/examples/Shakespeare.scala)
and
[Java](spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/examples/JavaShakespeare.java).
### Reading data from a BigQuery query
The connector allows you to run any
[Standard SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax)
SELECT query on BigQuery and fetch its results directly to a Spark Dataframe.
This is easily done as described in the following code sample:
```
spark.conf.set("viewsEnabled","true")
sql = """
SELECT tag, COUNT(*) c
FROM (
SELECT SPLIT(tags, '|') tags
FROM `bigquery-public-data.stackoverflow.posts_questions` a
WHERE EXTRACT(YEAR FROM creation_date)>=2014
), UNNEST(tags) tag
GROUP BY 1
ORDER BY 2 DESC
LIMIT 10
"""
df = spark.read.format("bigquery").load(sql)
df.show()
```
Which yields the result
```
+----------+-------+
| tag| c|
+----------+-------+
|javascript|1643617|
| python|1352904|
| java|1218220|
| android| 913638|
| php| 911806|
| c#| 905331|
| html| 769499|
| jquery| 608071|
| css| 510343|
| c++| 458938|
+----------+-------+
```
A second option is to use the `query` option like this:
```
df = spark.read.format("bigquery").option("query", sql).load()
```
Notice that the execution should be faster as only the result is transmitted
over the wire. In a similar fashion the queries can include JOINs more
efficiently then running joins on Spark or use other BigQuery features such as
[subqueries](https://cloud.google.com/bigquery/docs/reference/standard-sql/subqueries),
[BigQuery user defined functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions),
[wildcard tables](https://cloud.google.com/bigquery/docs/reference/standard-sql/wildcard-table-reference),
[BigQuery ML](https://cloud.google.com/bigquery-ml/docs)
and more.
In order to use this feature the `viewsEnabled` configurations MUST be set to
`true`. This can also be done globally as shown in the example above.
**Important:** This feature is implemented by running the query on BigQuery and
saving the result into a temporary table, of which Spark will read the results
from. This may add additional costs on your BigQuery account.
### Reading From Parameterized Queries
The connector supports executing [BigQuery parameterized queries](https://cloud.google.com/bigquery/docs/parameterized-queries) using the
standard `spark.read.format('bigquery')` API.
To use parameterized queries:
1. Provide the SQL query containing parameters using the
`.option("query", "SQL_STRING")` with named (`@param`) or positional (`?`) parameters.
2. Specify the parameter values using dedicated options:
* **Named Parameters:** Use options prefixed with `NamedParameters.`. The
parameter name follows the prefix (case-insensitive).
* Format: `.option("NamedParameters.<parameter_name>", "TYPE:value")`
* Example: `.option("NamedParameters.corpus", "STRING:romeoandjuliet")`
* **Positional Parameters:** Use options prefixed with
`PositionalParameters.`. The 1-based index follows the prefix.
* Format:
`.option("PositionalParameters.<parameter_index>", "TYPE:value")`
* Example: `.option("PositionalParameters.1", "STRING:romeoandjuliet")`
The `TYPE` in the `TYPE:value` string specifies the BigQuery Standard SQL data
type. Supported types currently include: `BOOL`, `INT64`, `FLOAT64`, `NUMERIC`,
`STRING`, `DATE`, `DATETIME`, `JSON`, `TIME`, `GEOGRAPHY`, `TIMESTAMP`.
`ARRAY` and `STRUCT` types are not supported as parameters at this time.
### Reading From Views
The connector has a preliminary support for reading from
[BigQuery views](https://cloud.google.com/bigquery/docs/views-intro). Please
note there are a few caveats:
* BigQuery views are not materialized by default, which means that the connector
needs to materialize them before it can read them. This process affects the
read performance, even before running any `collect()` or `count()` action.
* The materialization process can also incur additional costs to your BigQuery
bill.
* Reading from views is **disabled** by default. In order to enable it,
either set the viewsEnabled option when reading the specific view
(`.option("viewsEnabled", "true")`) or set it globally by calling
`spark.conf.set("viewsEnabled", "true")`.
**Notice:** Before version 0.42.1 of the connector, the following configurations
are required:
* By default, the materialized views are created in the same project and
dataset. Those can be configured by the optional `materializationProject`
and `materializationDataset` options, respectively. These options can also
be globally set by calling `spark.conf.set(...)` before reading the views.
* As mentioned in the [BigQuery documentation](https://cloud.google.com/bigquery/docs/writing-results#temporary_and_permanent_tables),
the `materializationDataset` should be in same location as the view.
Starting version 0.42.1 those configurations are **redundant** and are ignored.
It is highly recommended to upgrade to this version or a later one to enjoy
simpler configuration when using views or loading from queries.
### Writing data to BigQuery
Writing DataFrames to BigQuery can be done using two methods: Direct and Indirect.
#### Direct write using the BigQuery Storage Write API
In this method the data is written directly to BigQuery using the
[BigQuery Storage Write API](https://cloud.google.com/bigquery/docs/write-api). In order to enable this option, please
set the `writeMethod` option to `direct`, as shown below:
```
df.write \
.format("bigquery") \
.option("writeMethod", "direct") \
.option("writeAtLeastOnce", "true")
.save("dataset.table")
```
Writing to existing partitioned tables (date partitioned, ingestion time partitioned and range
partitioned) in APPEND save mode and OVERWRITE mode (only date and range partitioned) is fully supported by the connector and the BigQuery Storage Write
API. The use of `datePartition`, `partitionField`, `partitionType`, `partitionRangeStart`, `partitionRangeEnd`, `partitionRangeInterval`
described below is not supported at this moment by the direct write method.
**Important:** Please refer to the [data ingestion pricing](https://cloud.google.com/bigquery/pricing#data_ingestion_pricing)
page regarding the BigQuery Storage Write API pricing.
**Important:** Please use version 0.24.2 and above for direct writes, as previous
versions have a bug that may cause a table deletion in certain cases.
#### Indirect write
In this method the data is written first to GCS, and then it is loaded it to BigQuery. A GCS bucket must be configured
to indicate the temporary data location.
```
df.write \
.format("bigquery") \
.option("temporaryGcsBucket","some-bucket") \
.save("dataset.table")
```
The data is temporarily stored using the [Apache Parquet](https://parquet.apache.org/),
[Apache ORC](https://orc.apache.org/) or [Apache Avro](https://avro.apache.org/) formats.
The GCS bucket and the format can also be set globally using Spark's RuntimeConfig like this:
```
spark.conf.set("temporaryGcsBucket","some-bucket")
df.write \
.format("bigquery") \
.save("dataset.table")
```
When streaming a DataFrame to BigQuery, each batch is written in the same manner as a non-streaming DataFrame.
Note that a HDFS compatible
[checkpoint location](http://spark.apache.org/docs/latest/structured-streaming-programming-guide.html#recovering-from-failures-with-checkpointing)
(eg: `path/to/HDFS/dir` or `gs://checkpoint-bucket/checkpointDir`) must be specified.
```
df.writeStream \
.format("bigquery") \
.option("temporaryGcsBucket","some-bucket") \
.option("checkpointLocation", "some-location") \
.option("table", "dataset.table")
```
**Important:** The connector does not configure the GCS connector, in order to avoid conflict with another GCS connector, if exists. In order to use the write capabilities of the connector, please configure the GCS connector on your cluster as explained [here](https://github.com/GoogleCloudPlatform/bigdata-interop/tree/master/gcs).
#### Schema Behavior on Overwrite
When using `SaveMode.Overwrite` (`.mode("overwrite")`), the connector **preserves the existing table's schema**.
The data is truncated, but column types, descriptions, and policy tags are retained.
```
df.write \
.format("bigquery") \
.mode("overwrite") \
.option("temporaryGcsBucket","some-bucket") \
.save("dataset.table")
```
**Important:** If your DataFrame has a different schema than the existing table (e.g., changing a column from
`INTEGER` to `DOUBLE`), the write will fail with a type mismatch error. To change the schema, either:
- Drop the table before overwriting
- Use BigQuery DDL to alter the table schema first
For some of the schema difference, the following options can work with overwrite:
Programmatic Relaxation: Set `.option("allowFieldRelaxation", "true")` for nullability changes and `.option("allowFieldAddition", "true")` for new columns.
This behavior was introduced between version 0.22.0 and 0.41.0 to prevent accidental schema drift.
**Note:** This behavior applies to both the `indirect` (default) and `direct` write methods.
### Running SQL on BigQuery
The connector supports Spark's [SparkSession#executeCommand](https://archive.apache.org/dist/spark/docs/3.0.0/api/java/org/apache/spark/sql/SparkSession.html#executeCommand-java.lang.String-java.lang.String-scala.collection.immutable.Map-)
with the Spark-X.Y-bigquery connectors. It can be used to run any arbitrary DDL/DML StandardSQL statement on BigQuery as
a query job. `SELECT` statements are not supported, as those are supported by reading from query as shown above. It can
be used as follows:
```
spark.executeCommand("bigquery", sql, options)
```
Notice the following:
* Notice that apart from the authentication options no other options are supported by this functionality.
* This API is available only in the Scala/Java API. PySpark does not provide it.
### Properties
The API Supports a number of options to configure the read
<!--- TODO(#2): Convert to markdown -->
<table id="propertytable">
<style>
table#propertytable td, table th
{
word-break:break-word
}
</style>
<tr valign="top">
<th style="min-width:240px">Property</th>
<th>Meaning</th>
<th style="min-width:80px">Usage</th>
</tr>
<tr valign="top">
<td><code>table</code>
</td>
<td>The BigQuery table in the format <code>[[project:]dataset.]table</code>.
It is recommended to use the <code>path</code> parameter of
<code>load()</code>/<code>save()</code> instead. This option has been
deprecated and will be removed in a future version.
<br/><strong>(Deprecated)</strong>
</td>
<td>Read/Write</td>
</tr>
<tr valign="top">
<td><code>dataset</code>
</td>
<td>The dataset containing the table. This option should be used with
standard table and views, but not when loading query results.
<br/>(Optional unless omitted in <code>table</code>)
</td>
<td>Read/Write</td>
</tr>
<tr valign="top">
<td><code>project</code>
</td>
<td>The Google Cloud Project ID of the table. This option should be used with
standard table and views, but not when loading query results.
<br/>(Optional. Defaults to the project of the Service Account being used)
</td>
<td>Read/Write</td>
</tr>
<tr valign="top">
<td><code>billingProject</code>
</td>
<td>The Google Cloud Project ID to use for <strong>billing</strong> (API calls, query execution).
<br/>(Optional. Defaults to the project of the Service Account being used)
</td>
<td>Read/Write</td>
</tr>
<tr valign="top">
<td><code>parentProject</code>
</td>
<td><strong>(Deprecated)</strong> Alias for <code>billingProject</code>.
<br/>(Optional. Defaults to the project of the Service Account being used)
</td>
<td>Read/Write</td>
</tr>
<tr valign="top">
<td><code>location</code>
</td>
<td>The BigQuery location where the data resides (e.g. US, EU, asia-northeast1).
<br/>(Optional. Defaults to BigQuery default)
</td>
<td>Read/Write</td>
</tr>
<tr valign="top">
<td><code>maxParallelism</code>
</td>
<td>The maximal number of partitions to split the data into. Actual number
may be less if BigQuery deems the data small enough. If there are not
enough executors to schedule a reader per partition, some partitions may
be empty.
<br/><b>Important:</b> The old parameter (<code>parallelism</code>) is
still supported but in deprecated mode. It will ve removed in
version 1.0 of the connector.
<br/>(Optional. Defaults to the larger of the preferredMinParallelism and 20,000)</a>.)
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>preferredMinParallelism</code>
</td>
<td>The preferred minimal number of partitions to split the data into. Actual number
may be less if BigQuery deems the data small enough. If there are not
enough executors to schedule a reader per partition, some partitions may
be empty.
<br/>(Optional. Defaults to the smallest of 3 times the application's default parallelism
and maxParallelism</a>.)
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>viewsEnabled</code>
</td>
<td>Enables the connector to read from views and not only tables. Please read
the <a href="#reading-from-views">relevant section</a> before activating
this option.
<br/>(Optional. Defaults to <code>false</code>)
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>readDataFormat</code>
</td>
<td>Data Format for reading from BigQuery. Options : <code>ARROW</code>, <code>AVRO</code>
<br/>(Optional. Defaults to <code>ARROW</code>)
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>optimizedEmptyProjection</code>
</td>
<td>The connector uses an optimized empty projection (select without any
columns) logic, used for <code>count()</code> execution. This logic takes
the data directly from the table metadata or performs a much efficient
`SELECT COUNT(*) WHERE...` in case there is a filter. You can cancel the
use of this logic by setting this option to <code>false</code>.
<br/>(Optional, defaults to <code>true</code>)
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>pushAllFilters</code>
</td>
<td>If set to <code>true</code>, the connector pushes all the filters Spark can delegate
to BigQuery Storage API. This reduces amount of data that needs to be sent from
BigQuery Storage API servers to Spark clients. This option has been
deprecated and will be removed in a future version.
<br/>(Optional, defaults to <code>true</code>)
<br/><strong>(Deprecated)</strong>
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>bigQueryJobLabel</code>
</td>
<td>Can be used to add labels to the connector initiated query and load
BigQuery jobs. Multiple labels can be set.
<br/>(Optional)
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>bigQueryTableLabel</code>
</td>
<td>Can be used to add labels to the table while writing to a table. Multiple
labels can be set.
<br/>(Optional)
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>traceApplicationName</code>
</td>
<td>Application name used to trace BigQuery Storage read and write sessions.
Setting the application name is required to set the trace ID on the
sessions.
<br/>(Optional)
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>traceJobId</code>
</td>
<td>Job ID used to trace BigQuery Storage read and write sessions.
<br/>(Optional, defaults to the Dataproc job ID is exists, otherwise uses
the Spark application ID)
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>createDisposition</code>
</td>
<td>Specifies whether the job is allowed to create new tables. The permitted
values are:
<ul>
<li><code>CREATE_IF_NEEDED</code> - Configures the job to create the
table if it does not exist.</li>
<li><code>CREATE_NEVER</code> - Configures the job to fail if the
table does not exist.</li>
</ul>
This option takes place only in case Spark has decided to write data
to the table based on the SaveMode.
<br/>(Optional. Default to CREATE_IF_NEEDED).
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>writeMethod</code>
</td>
<td>Controls the method
in which the data is written to BigQuery. Available values are <code>direct</code>
to use the BigQuery Storage Write API and <code>indirect</code> which writes the
data first to GCS and then triggers a BigQuery load operation. See more
<a href="#writing-data-to-bigquery">here</a>
<br/>(Optional, defaults to <code>indirect</code>)
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>writeAtLeastOnce</code>
</td>
<td>Guarantees that data is written to BigQuery at least once. This is a lesser
guarantee than exactly once. This is suitable for streaming scenarios
in which data is continuously being written in small batches.
<br/>(Optional. Defaults to <code>false</code>)
<br/><i>Supported only by the `DIRECT` write method and mode is <b>NOT</b> `Overwrite`.</i>
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>temporaryGcsBucket</code>
</td>
<td>The GCS bucket that temporarily holds the data before it is loaded to
BigQuery. Required unless set in the Spark configuration
(<code>spark.conf.set(...)</code>).
<br/>Defaults to the `fs.gs.system.bucket` if exists, for example on Google Cloud Dataproc clusters, starting version 0.42.0.
<br/><i>Supported only by the `INDIRECT` write method.</i>
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>persistentGcsBucket</code>
</td>
<td>The GCS bucket that holds the data before it is loaded to
BigQuery. If informed, the data won't be deleted after write data
into BigQuery.
<br/><i>Supported only by the `INDIRECT` write method.</i>
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>persistentGcsPath</code>
</td>
<td>The GCS path that holds the data before it is loaded to
BigQuery. Used only with <code>persistentGcsBucket</code>.
<br/><i>Not supported by the `DIRECT` write method.</i>
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>intermediateFormat</code>
</td>
<td>The format of the data before it is loaded to BigQuery, values can be
either "parquet","orc" or "avro". In order to use the Avro format, the
spark-avro package must be added in runtime.
<br/>(Optional. Defaults to <code>parquet</code>). On write only. Supported only for the `INDIRECT` write method.
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>useAvroLogicalTypes</code>
</td>
<td>When loading from Avro (`.option("intermediateFormat", "avro")`), BigQuery uses the underlying Avro types instead of the logical types [by default](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-avro#logical_types). Supplying this option converts Avro logical types to their corresponding BigQuery data types.
<br/>(Optional. Defaults to <code>false</code>). On write only.
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>datePartition</code>
</td>
<td>The date partition the data is going to be written to. Should be a date string
given in the format <code>YYYYMMDD</code>. Can be used to overwrite the data of
a single partition, like this: <code><br/>df.write.format("bigquery")
<br/> .option("datePartition", "20220331")
<br/> .mode("overwrite")
<br/> .save("table")</code>
<br/>(Optional). On write only.
<br/> Can also be used with different partition types like:
<br/> HOUR: <code>YYYYMMDDHH</code>
<br/> MONTH: <code>YYYYMM</code>
<br/> YEAR: <code>YYYY</code>
<br/><i>Not supported by the `DIRECT` write method.</i>
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>partitionField</code>
</td>
<td>If this field is specified, the table is partitioned by this field.
<br/>For Time partitioning, specify together with the option `partitionType`.
<br/>For Integer-range partitioning, specify together with the 3 options: `partitionRangeStart`, `partitionRangeEnd, `partitionRangeInterval`.
<br/>The field must be a top-level TIMESTAMP or DATE field for Time partitioning, or INT64 for Integer-range partitioning. Its mode must be <strong>NULLABLE</strong>
or <strong>REQUIRED</strong>.
If the option is not set for a Time partitioned table, then the table will be partitioned by pseudo
column, referenced via either<code>'_PARTITIONTIME' as TIMESTAMP</code> type, or
<code>'_PARTITIONDATE' as DATE</code> type.
<br/>(Optional).
<br/><i>Not supported by the `DIRECT` write method.</i>
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>partitionExpirationMs</code>
</td>
<td>Number of milliseconds for which to keep the storage for partitions in the table.
The storage in a partition will have an expiration time of its partition time plus this value.
<br/>(Optional).
<br/><i>Not supported by the `DIRECT` write method.</i>
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>partitionType</code>
</td>
<td>Used to specify Time partitioning.
<br/>Supported types are: <code>HOUR, DAY, MONTH, YEAR</code>
<br/> This option is <b>mandatory</b> for a target table to be Time partitioned.
<br/>(Optional. Defaults to DAY if PartitionField is specified).
<br/><i>Not supported by the `DIRECT` write method.</i>
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>partitionRangeStart</code>,
<code>partitionRangeEnd</code>,
<code>partitionRangeInterval</code>
</td>
<td>Used to specify Integer-range partitioning.
<br/>These options are <b>mandatory</b> for a target table to be Integer-range partitioned.
<br/>All 3 options must be specified.
<br/><i>Not supported by the `DIRECT` write method.</i>
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>clusteredFields</code>
</td>
<td>A string of non-repeated, top level columns seperated by comma.
<br/>(Optional).
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>allowFieldAddition</code>
</td>
<td>Adds the <a href="https://googleapis.dev/java/google-cloud-clients/latest/com/google/cloud/bigquery/JobInfo.SchemaUpdateOption.html#ALLOW_FIELD_ADDITION" target="_blank">ALLOW_FIELD_ADDITION</a>
SchemaUpdateOption to the BigQuery LoadJob. Allowed values are <code>true</code> and <code>false</code>.
<br/>(Optional. Default to <code>false</code>).
<br/><i>Supported only by the `INDIRECT` write method.</i>
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>allowFieldRelaxation</code>
</td>
<td>Adds the <a href="https://googleapis.dev/java/google-cloud-clients/latest/com/google/cloud/bigquery/JobInfo.SchemaUpdateOption.html#ALLOW_FIELD_RELAXATION" target="_blank">ALLOW_FIELD_RELAXATION</a>
SchemaUpdateOption to the BigQuery LoadJob. Allowed values are <code>true</code> and <code>false</code>.
<br/>(Optional. Default to <code>false</code>).
<br/><i>Supported only by the `INDIRECT` write method.</i>
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>proxyAddress</code>
</td>
<td> Address of the proxy server. The proxy must be a HTTP proxy and address should be in the `host:port` format.
Can be alternatively set in the Spark configuration (<code>spark.conf.set(...)</code>) or in Hadoop
Configuration (<code>fs.gs.proxy.address</code>).
<br/> (Optional. Required only if connecting to GCP via proxy.)
</td>
<td>Read/Write</td>
</tr>
<tr valign="top">
<td><code>proxyUsername</code>
</td>
<td> The userName used to connect to the proxy. Can be alternatively set in the Spark configuration
(<code>spark.conf.set(...)</code>) or in Hadoop Configuration (<code>fs.gs.proxy.username</code>).
<br/> (Optional. Required only if connecting to GCP via proxy with authentication.)
</td>
<td>Read/Write</td>
</tr>
<tr valign="top">
<td><code>proxyPassword</code>
</td>
<td> The password used to connect to the proxy. Can be alternatively set in the Spark configuration
(<code>spark.conf.set(...)</code>) or in Hadoop Configuration (<code>fs.gs.proxy.password</code>).
<br/> (Optional. Required only if connecting to GCP via proxy with authentication.)
</td>
<td>Read/Write</td>
</tr>
<tr valign="top">
<td><code>httpMaxRetry</code>
</td>
<td> The maximum number of retries for the low-level HTTP requests to BigQuery. Can be alternatively set in the
Spark configuration (<code>spark.conf.set("httpMaxRetry", ...)</code>) or in Hadoop Configuration
(<code>fs.gs.http.max.retry</code>).
<br/> (Optional. Default is 10)
</td>
<td>Read/Write</td>
</tr>
<tr valign="top">
<td><code>httpConnectTimeout</code>
</td>
<td> The timeout in milliseconds to establish a connection with BigQuery. Can be alternatively set in the
Spark configuration (<code>spark.conf.set("httpConnectTimeout", ...)</code>) or in Hadoop Configuration
(<code>fs.gs.http.connect-timeout</code>).
<br/> (Optional. Default is 60000 ms. 0 for an infinite timeout, a negative number for 20000)
</td>
<td>Read/Write</td>
</tr>
<tr valign="top">
<td><code>httpReadTimeout</code>
</td>
<td> The timeout in milliseconds to read data from an established connection. Can be alternatively set in the
Spark configuration (<code>spark.conf.set("httpReadTimeout", ...)</code>) or in Hadoop Configuration
(<code>fs.gs.http.read-timeout</code>).
<br/> (Optional. Default is 60000 ms. 0 for an infinite timeout, a negative number for 20000)
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>arrowCompressionCodec</code>
</td>
<td> Compression codec while reading from a BigQuery table when using Arrow format. Options :
<code>ZSTD (Zstandard compression)</code>,
<code>LZ4_FRAME (https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md)</code>,
<code>COMPRESSION_UNSPECIFIED</code>. The recommended compression codec is <code>ZSTD</code>
while using Java.
<br/> (Optional. Defaults to <code>COMPRESSION_UNSPECIFIED</code> which means no compression will be used)
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>responseCompressionCodec</code>
</td>
<td> Compression codec used to compress the ReadRowsResponse data. Options:
<code>RESPONSE_COMPRESSION_CODEC_UNSPECIFIED</code>,
<code>RESPONSE_COMPRESSION_CODEC_LZ4</code>
<br/> (Optional. Defaults to <code>RESPONSE_COMPRESSION_CODEC_UNSPECIFIED</code> which means no compression will be used)
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>cacheExpirationTimeInMinutes</code>
</td>
<td> The expiration time of the in-memory cache storing query information.
<br/> To disable caching, set the value to 0.
<br/> (Optional. Defaults to 15 minutes)
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>enableModeCheckForSchemaFields</code>
</td>
<td> Checks the mode of every field in destination schema to be equal to the mode in corresponding source field schema, during DIRECT write.
<br/> Default value is true i.e., the check is done by default. If set to false the mode check is ignored.
</td>
<td>Write</td>
</tr>
<td><code>enableListInference</code>
</td>
<td> Indicates whether to use schema inference specifically when the mode is Parquet (https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#parquetoptions).
<br/> Defaults to false.
<br/>
</td>
<td>Write</td>
</tr>
<tr>
<td><code>bqChannelPoolSize</code></td>
<td> The (fixed) size of the gRPC channel pool created by the BigQueryReadClient.
<br/>For optimal performance, this should be set to at least the number of cores on the cluster executors.
</td>
<td>Read</td>
</tr>
<tr>
<td><code>createReadSessionTimeoutInSeconds</code>
</td>
<td> The timeout in seconds to create a ReadSession when reading a table.
<br/> For Extremely large table this value should be increased.
<br/> (Optional. Defaults to 600 seconds)
</td>
<td>Read</td>
</tr>
<tr>
<td><code>queryJobPriority</code>
</td>
<td> Priority levels set for the job while reading data from BigQuery query. The permitted values are:
<ul>
<li><code>BATCH</code> - Query is queued and started as soon as idle resources are available, usually within a few minutes. If the query hasn't started within 3 hours, its priority is changed to <code>INTERACTIVE</code>.</li>
<li><code>INTERACTIVE</code> - Query is executed as soon as possible and count towards the concurrent rate limit and the daily rate limit.</li>
</ul>
For WRITE, this option will be effective when DIRECT write is used with OVERWRITE mode, where the connector overwrites the destination table using MERGE statement.
<br/> (Optional. Defaults to <code>INTERACTIVE</code>)
</td>
<td>Read/Write</td>
</tr>
<tr>
<td><code>destinationTableKmsKeyName</code>
</td>
<td>Describes the Cloud KMS encryption key that will be used to protect destination BigQuery
table. The BigQuery Service Account associated with your project requires access to this
encryption key. for further Information about using CMEK with BigQuery see
[here](https://cloud.google.com/bigquery/docs/customer-managed-encryption#key_resource_id).
<br/><b>Notice:</b> The table will be encrypted by the key only if it created by the
connector. A pre-existing unencrypted table won't be encrypted just by setting this option.
<br/> (Optional)
</td>
<td>Write</td>
</tr>
<tr>
<td><code>allowMapTypeConversion</code>
</td>
<td>Boolean config to disable conversion from BigQuery records to Spark MapType
when the record has two subfields with field names as <code>key</code> and <code>value</code>.
Default value is <code>true</code> which allows the conversion.
<br/> (Optional)
</td>
<td>Read</td>
</tr>
<tr>
<td><code>spark.sql.sources.partitionOverwriteMode</code>
</td>
<td>Config to specify the overwrite mode on write when the table is range/time partitioned.
Currently supportd two modes : <code>STATIC</code> and <code>DYNAMIC</code>. In <code>STATIC</code> mode,
the entire table is overwritten. In <code>DYNAMIC</code> mode, the data is overwritten by partitions of the existing table.
The default value is <code>STATIC</code>.
<br/> (Optional)
</td>
<td>Write</td>
</tr>
<tr>
<td><code>enableReadSessionCaching</code>
</td>
<td>Boolean config to disable read session caching. Caches BigQuery read sessions to allow for faster Spark query planning.
Default value is <code>true</code>.
<br/> (Optional)
</td>
<td>Read</td>
</tr>
<tr>
<td><code>readSessionCacheDurationMins</code>
</td>
<td>Config to set the read session caching duration in minutes. Only works if <code>enableReadSessionCaching</code> is <code>true</code> (default).
Allows specifying the duration to cache read sessions for. Maximum allowed value is <code>300</code>.
Default value is <code>5</code>.
<br/> (Optional)
</td>
<td>Read</td>
</tr>
<tr>
<td><code>bigQueryJobTimeoutInMinutes</code>
</td>
<td>Config to set the BigQuery job timeout in minutes.
Default value is <code>360</code> minutes.
<br/> (Optional)
</td>
<td>Read/Write</td>
</tr>
<tr>
<td><code>snapshotTimeMillis</code>
</td>
<td>A timestamp specified in milliseconds to use to read a table snapshot.
By default this is not set and the latest version of a table is read.
<br/> (Optional)
</td>
<td>Read</td>
</tr>
<tr>
<td><code>bigNumericDefaultPrecision</code>
</td>
<td>An alternative default precision for BigNumeric fields, as the BigQuery default is too wide for Spark. Values can be between 1 and 38.
This default is used only when the field has an unparameterized BigNumeric type.
Please note that there might be data loss if the actual data's precision is more than what is specified.
<br/> (Optional)
</td>
<td>Read/Write</td>
</tr>
<tr>
<td><code>bigNumericDefaultScale</code>
</td>
<td>An alternative default scale for BigNumeric fields. Values can be between 0 and 38, and less than bigNumericFieldsPrecision.
This default is used only when the field has an unparameterized BigNumeric type.
Please note that there might be data loss if the actual data's scale is more than what is specified.
<br/> (Optional)
</td>
<td>Read/Write</td>
</tr>
<tr>
<td><code>credentialsScopes</code>
</td>
<td>Replaces the scopes of the Google Credentials if the credentials type supports that.
If scope replacement is not supported then it does nothing.
<br/>The value should be a comma separated list of valid scopes.
<br/> (Optional)
</td>
<td>Read/Write</td>
</tr>
</table>
Options can also be set outside of the code, using the `--conf` parameter of `spark-submit` or `--properties` parameter
of the `gcloud dataproc submit spark`. In order to use this, prepend the prefix `spark.datasource.bigquery.` to any of
the options, for example `spark.conf.set("temporaryGcsBucket", "some-bucket")` can also be set as
`--conf spark.datasource.bigquery.temporaryGcsBucket=some-bucket`.
### Data types
With the exception of `DATETIME` and `TIME` all BigQuery data types directed map into the corresponding Spark SQL data type. Here are all of the mappings:
<!--- TODO(#2): Convert to markdown -->
<table>
<tr valign="top">
<td><strong>BigQuery Standard SQL Data Type </strong>
</td>
<td><strong>Spark SQL</strong>
<p>
<strong>Data Type</strong>
</td>
<td><strong>Notes</strong>
</td>
</tr>
<tr valign="top">
<td><strong><code>BOOL</code></strong>
</td>
<td><strong><code>BooleanType</code></strong>
</td>
<td>
</td>
</tr>
<tr valign="top">
<td><strong><code>INT64</code></strong>
</td>
<td><strong><code>LongType</code></strong>
</td>
<td>
</td>
</tr>
<tr valign="top">
<td><strong><code>FLOAT64</code></strong>
</td>
<td><strong><code>DoubleType</code></strong>
</td>
<td>
</td>
</tr>
<tr valign="top">
<td><strong><code>NUMERIC</code></strong>
</td>
<td><strong><code>DecimalType</code></strong>
</td>
<td>
Please refer to <a href="#numeric-and-bignumeric-support">Numeric and BigNumeric support</a>
</td>
</tr>
<tr valign="top">
<td><strong><code>BIGNUMERIC</code></strong>
</td>
<td><strong><code>DecimalType</code></strong>
</td>
<td>
Please refer to <a href="#numeric-and-bignumeric-support">Numeric and BigNumeric support</a>
</td>
</tr>
<tr valign="top">
<td><strong><code>STRING</code></strong>
</td>
<td><strong><code>StringType</code></strong>
</td>
<td>
</td>
</tr>
<tr valign="top">
<td><strong><code>BYTES</code></strong>
</td>
<td><strong><code>BinaryType</code></strong>
</td>
<td>
</td>
</tr>
<tr valign="top">
<td><strong><code>STRUCT</code></strong>
</td>
<td><strong><code>StructType</code></strong>
</td>
<td>
</td>
</tr>
<tr valign="top">
<td><strong><code>ARRAY</code></strong>
</td>
<td><strong><code>ArrayType</code></strong>
</td>
<td>
</td>
</tr>
<tr valign="top">
<td><strong><code>TIMESTAMP</code></strong>
</td>
<td><strong><code>TimestampType</code></strong>
</td>
<td>
</td>
</tr>
<tr valign="top">
<td><strong><code>DATE</code></strong>
</td>
<td><strong><code>DateType</code></strong>
</td>
<td>
</td>
</tr>
<tr valign="top">
<td><strong><code>DATETIME</code></strong>
</td>
<td><strong><code>StringType</code>, </strong><strong><code>TimestampNTZType</code>*</strong>
</td>
<td>Spark has no DATETIME type.
<p>
Spark string can be written to an existing BQ DATETIME column provided it is in the <a href="https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#canonical_format_for_datetime_literals">format for BQ DATETIME literals</a>.
<p>
* For Spark 3.4+, BQ DATETIME is read as Spark's TimestampNTZ type i.e. java LocalDateTime
</td>
</tr>
<tr valign="top">
<td><strong><code>TIME</code></strong>
</td>
<td><strong><code>LongType</code>, <strong><code>StringType</code>*</strong>
</td>
<td>Spark has no TIME type. The generated longs, which indicate <a href="https://avro.apache.org/docs/1.8.0/spec.html#Time+%2528microsecond+precision%2529">microseconds since midnight</a> can be safely cast to TimestampType, but this causes the date to be inferred as the current day. Thus times are left as longs and user can cast if they like.
<p>
When casting to Timestamp TIME have the same TimeZone issues as DATETIME
<p>
* Spark string can be written to an existing BQ TIME column provided it is in the <a href="https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#canonical_format_for_time_literals">format for BQ TIME literals</a>.
</td>
</tr>
<tr valign="top">
<td><strong><code>JSON</code></strong>
</td>
<td><strong><code>StringType</code></strong>
</td>
<td>Spark has no JSON type. The values are read as String. In order to write JSON back to BigQuery, the following conditions are <b>REQUIRED</b>:
<ul>
<li>Use the <code>INDIRECT</code> write method</li>
<li>Use the <code>AVRO</code> intermediate format</li>
<li>The DataFrame field <b>MUST</b> be of type <code>String</code> and has an entry of sqlType=JSON in its metadata</li>
</ul>
</td>
</tr>
<tr valign="top" id="datatype:map">
<td><strong><code>ARRAY<STRUCT<key,value>></code></strong>
</td>
<td><strong><code>MapType</code></strong>
</td>
<td>BigQuery has no MAP type, therefore similar to other conversions like Apache Avro and BigQuery Load jobs, the connector converts a Spark Map to a REPEATED STRUCT<key,value>.
This means that while writing and reading of maps is available, running a SQL on BigQuery that uses map semantics is not supported.
To refer to the map's values using BigQuery SQL, please check the <a href="https://cloud.google.com/bigquery/docs/reference/standard-sql/arrays">BigQuery documentation</a>.
Due to these incompatibilities, a few restrictions apply:
<ul>
<li>Keys can be Strings only</li>
<li>Values can be simple types (not structs)</li>
<li>For INDIRECT write, use the <code>AVRO</code> intermediate format. DIRECT write is supported as well</li>
</ul>
</td>
</tr>
</table>
#### Spark ML Data Types Support
The Spark ML [Vector](https://spark.apache.org/docs/2.4.5/api/python/pyspark.ml.html#pyspark.ml.linalg.Vector) and
[Matrix](https://spark.apache.org/docs/2.4.5/api/python/pyspark.ml.html#pyspark.ml.linalg.Matrix) are supported,
including their dense and sparse versions. The data is saved as a BigQuery RECORD. Notice that a suffix is added to
the field's description which includes the spark type of the field.
In order to write those types to BigQuery, use the ORC or Avro intermediate format, and have them as column of the
Row (i.e. not a field in a struct).
#### Numeric and BigNumeric support
BigQuery's BigNumeric has a precision of 76.76 (the 77th digit is partial) and scale of 38. Since
this precision and scale is beyond spark's DecimalType (38 scale and 38 precision) support, it means
that BigNumeric fields with precision larger than 38 cannot be used. Once this Spark limitation will
be updated the connector will be updated accordingly.
The Spark Decimal/BigQuery Numeric conversion tries to preserve the parameterization of the type, i.e
`NUMERIC(10,2)` will be converted to `Decimal(10,2)` and vice versa. Notice however that there are
cases where [the parameters are lost](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#parameterized_data_types).
This means that the parameters will be reverted to the defaults - NUMERIC (38,9) and BIGNUMERIC(76,38).
This means that at the moment, BigNumeric read is supported only from a standard table, but not from
BigQuery view or when [reading data from a BigQuery query](#reading-data-from-a-bigquery-query).
### Filtering
The connector automatically computes column and pushdown filters the DataFrame's `SELECT` statement e.g.
```
spark.read.bigquery("bigquery-public-data:samples.shakespeare")
.select("word")
.where("word = 'Hamlet' or word = 'Claudius'")
.collect()
```
filters to the column `word` and pushed down the predicate filter `word = 'hamlet' or word = 'Claudius'`.
If you do not wish to make multiple read requests to BigQuery, you can cache the DataFrame before filtering e.g.:
```
val cachedDF = spark.read.bigquery("bigquery-public-data:samples.shakespeare").cache()
val rows = cachedDF.select("word")
.where("word = 'Hamlet'")
.collect()
// All of the table was cached and this doesn't require an API call
val otherRows = cachedDF.select("word_count")
.where("word = 'Romeo'")
.collect()
```
You can also manually specify the `filter` option, which will override automatic pushdown and Spark will do the rest of the filtering in the client.
### Partitioned Tables
The pseudo columns \_PARTITIONDATE and \_PARTITIONTIME are not part of the table schema. Therefore in order to query by the partitions of [partitioned tables](https://cloud.google.com/bigquery/docs/partitioned-tables) do not use the where() method shown above. Instead, add a filter option in the following manner:
```
val df = spark.read.format("bigquery")
.option("filter", "_PARTITIONDATE > '2019-01-01'")
...
.load(TABLE)
```
### Configuring Partitioning
By default, the connector calculates the requested `maxParallelism` as the larger of `preferredMinParallelism` (which defaults to 3 times the application's default parallelism) and 20,000. BigQuery may limit the number of partitions based on server constraints.
Both <code>[maxParallelism](#properties)</code> and <code>[preferredMinParallelism](#properties)</code> can be configured explicitly to control the number of partitions.
## Tagging BigQuery Resources
In order to support tracking the usage of BigQuery resources the connectors
offers the following options to tag BigQuery resources:
### Adding BigQuery Jobs Labels
The connector can launch BigQuery load and query jobs. Adding labels to the jobs
is done in the following manner:
```
spark.conf.set("bigQueryJobLabel.cost_center", "analytics")
spark.conf.set("bigQueryJobLabel.usage", "nightly_etl")
```
This will create labels `cost_center`=`analytics` and `usage`=`nightly_etl`.
### Adding BigQuery Storage Trace ID
Used to annotate the read and write sessions. The trace ID is of the format
`Spark:ApplicationName:JobID`. This is an opt-in option, and to use it the user
need to set the `traceApplicationName` property. JobID is auto generated by the
Dataproc job ID, with a fallback to the Spark application ID (such as
`application_1648082975639_0001`). The Job ID can be overridden by setting the
`traceJobId` option. Notice that the total length of the trace ID cannot be over
256 characters.
## Using in Jupyter Notebooks
The connector can be used in [Jupyter notebooks](https://jupyter.org/) even if
it is not installed on the Spark cluster. It can be added as an external jar in
using the following code:
**Python:**
```python
from pyspark.sql import SparkSession
spark = SparkSession.builder \
.config("spark.jars.packages", "com.google.cloud.spark:spark-bigquery-with-dependencies_2.12:${next-release-tag}") \
.getOrCreate()
df = spark.read.format("bigquery") \
.load("dataset.table")
```
**Scala:**
```scala
val spark = SparkSession.builder
.config("spark.jars.packages", "com.google.cloud.spark:spark-bigquery-with-dependencies_2.12:${next-release-tag}")
.getOrCreate()
val df = spark.read.format("bigquery")
.load("dataset.table")
```
In case Spark cluster is using Scala 2.12 (it's optional for Spark 2.4.x,
mandatory in 3.0.x), then the relevant package is
com.google.cloud.spark:spark-bigquery-with-dependencies_**2.12**:${next-release-tag}. In
order to know which Scala version is used, please run the following code:
**Python:**
```python
spark.sparkContext._jvm.scala.util.Properties.versionString()
```
**Scala:**
```python
scala.util.Properties.versionString
```
## Compiling against the connector
Unless you wish to use the implicit Scala API `spark.read.bigquery("TABLE_ID")`, there is no need to compile against the connector.
To include the connector in your project:
### Maven
```xml
<dependency>
<groupId>com.google.cloud.spark</groupId>
<artifactId>spark-bigquery-with-dependencies_${scala.version}</artifactId>
<version>${next-release-tag}</version>
</dependency>
```
### SBT
```sbt
libraryDependencies += "com.google.cloud.spark" %% "spark-bigquery-with-dependencies" % "${next-release-tag}"
```
### Connector metrics and how to view them
Spark populates a lot of metrics which can be found by the end user in the spark history page. But all these metrics are spark related which are implicitly collected without any change from the connector.
But there are few metrics which are populated from the BigQuery and currently are visible in the application logs which can be read in the driver/executor logs.
From Spark 3.2 onwards, spark has provided the API to expose custom metrics in the spark UI page https://spark.apache.org/docs/3.2.0/api/java/org/apache/spark/sql/connector/metric/CustomMetric.html
Currently, using this API, connector exposes the following bigquery metrics during read
<table id="metricstable">
<style>
table#metricstable td, table th
{
word-break:break-word
}
</style>
<tr valign="top">
<th style="min-width:240px">Metric Name</th>
<th style="min-width:240px">Description</th>
</tr>
<tr valign="top">
<td><code>bytes read</code></td>
<td>number of BigQuery bytes read</td>
</tr>
<tr valign="top">
<td><code>rows read</code></td>
<td>number of BigQuery rows read</td>
</tr>
<tr valign="top">
<td><code>scan time</code></td>
<td>the amount of time spent between read rows response requested to obtained across all the executors, in milliseconds.</td>
</tr>
<tr valign="top">
<td><code>parse time</code></td>
<td>the amount of time spent for parsing the rows read across all the executors, in milliseconds.</td>
</tr>
<tr valign="top">
<td><code>spark time</code></td>
<td>the amount of time spent in spark to process the queries (i.e., apart from scanning and parsing), across all the executors, in milliseconds.</td>
</tr>
</table>
**Note:** To use the metrics in the Spark UI page, you need to make sure the `spark-bigquery-metrics-${next-release-tag}.jar` is the class path before starting the history-server and the connector version is `spark-3.2` or above.
## FAQ
### What is the Pricing for the Storage API?
See the [BigQuery pricing documentation](https://cloud.google.com/bigquery/pricing#storage-api).
### I have very few partitions
You can manually set the number of partitions with the `maxParallelism` property. BigQuery may provide fewer partitions than you ask for. See [Configuring Partitioning](#configuring-partitioning).
You can also always repartition after reading in Spark.
### I get quota exceeded errors while writing
If there are too many partitions the CreateWriteStream or Throughput [quotas](https://cloud.google.com/bigquery/quotas#write-api-limits)
may be exceeded. This occurs because while the data within each partition is processed serially, independent
partitions may be processed in parallel on different nodes within the spark cluster. Generally, to ensure maximum
sustained throughput you should file a quota increase request. However, you can also manually reduce the number of
partitions being written by calling <code>coalesce</code> on the DataFrame to mitigate this problem.
```
desiredPartitionCount = 5
dfNew = df.coalesce(desiredPartitionCount)
dfNew.write
```
A rule of thumb is to have a single partition handle at least 1GB of data.
Also note that a job running with the `writeAtLeastOnce` property turned on will not encounter CreateWriteStream
quota errors.
### How do I authenticate outside GCE / Dataproc?
The connector needs an instance of a GoogleCredentials in order to connect to the BigQuery APIs. There are multiple
options to provide it:
* The default is to load the JSON key from the `GOOGLE_APPLICATION_CREDENTIALS` environment variable, as described
[here](https://cloud.google.com/docs/authentication/getting-started).
* In case the environment variable cannot be changed, the credentials file can be configured as
as a spark option. The file should reside on the same path on all the nodes of the cluster.
```
// Globally
spark.conf.set("credentialsFile", "</path/to/key/file>")
// Per read/Write
spark.read.format("bigquery").option("credentialsFile", "</path/to/key/file>")
```
* Credentials can also be provided explicitly, either as a parameter or from Spark runtime configuration.
They should be passed in as a base64-encoded string directly.
```
// Globally
spark.conf.set("credentials", "<SERVICE_ACCOUNT_JSON_IN_BASE64>")
// Per read/Write
spark.read.format("bigquery").option("credentials", "<SERVICE_ACCOUNT_JSON_IN_BASE64>")
```
* In cases where the user has an internal service providing the Google AccessToken, a custom implementation
can be done, creating only the AccessToken and providing its TTL. Token refresh will re-generate a new token. In order
to use this, implement the
[com.google.cloud.bigquery.connector.common.AccessTokenProvider](https://github.com/GoogleCloudDataproc/spark-bigquery-connector/tree/master/bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/AccessTokenProvider.java)
interface. The fully qualified class name of the implementation should be provided in the `gcpAccessTokenProvider`
option. `AccessTokenProvider` must be implemented in Java or other JVM language such as Scala or Kotlin. It must
either have a no-arg constructor or a constructor accepting a single `java.util.String` argument. This configuration
parameter can be supplied using the `gcpAccessTokenProviderConfig` option. If this is not provided then the no-arg
constructor wil be called. The jar containing the implementation should be on the cluster's classpath.
```
// Globally
spark.conf.set("gcpAccessTokenProvider", "com.example.ExampleAccessTokenProvider")
// Per read/Write
spark.read.format("bigquery").option("gcpAccessTokenProvider", "com.example.ExampleAccessTokenProvider")
```
* Service account impersonation can be configured for a specific username and a group name, or
for all users by default using below properties:
- `gcpImpersonationServiceAccountForUser_<USER_NAME>` (not set by default)
The service account impersonation for a specific user.
- `gcpImpersonationServiceAccountForGroup_<GROUP_NAME>` (not set by default)
The service account impersonation for a specific group.
- `gcpImpersonationServiceAccount` (not set by default)
Default service account impersonation for all users.
If any of the above properties are set then the service account specified will be impersonated by
generating a short-lived credentials when accessing BigQuery.
If more than one property is set then the service account associated with the username will take
precedence over the service account associated with the group name for a matching user and group,
which in turn will take precedence over default service account impersonation.
* For a simpler application, where access token refresh is not required, another alternative is to pass the access token
as the `gcpAccessToken` configuration option. You can get the access token by running
`gcloud auth application-default print-access-token`.
```
// Globally
spark.conf.set("gcpAccessToken", "<access-token>")
// Per read/Write
spark.read.format("bigquery").option("gcpAccessToken", "<acccess-token>")
```
**Important:** The `CredentialsProvider` and `AccessTokenProvider` need to be implemented in Java or
other JVM language such as Scala or Kotlin. The jar containing the implementation should be on the cluster's classpath.
**Notice:** Only one of the above options should be provided.
### How do I connect to GCP/BigQuery via Proxy?
To connect to a forward proxy and to authenticate the user credentials, configure the following options.
`proxyAddress`: Address of the proxy server. The proxy must be an HTTP proxy and address should be in the `host:port`
format.
`proxyUsername`: The userName used to connect to the proxy.
`proxyPassword`: The password used to connect to the proxy.
```
val df = spark.read.format("bigquery")
.option("proxyAddress", "http://my-proxy:1234")
.option("proxyUsername", "my-username")
.option("proxyPassword", "my-password")
.load("some-table")
```
The same proxy parameters can also be set globally using Spark's RuntimeConfig like this:
```
spark.conf.set("proxyAddress", "http://my-proxy:1234")
spark.conf.set("proxyUsername", "my-username")
spark.conf.set("proxyPassword", "my-password")
val df = spark.read.format("bigquery")
.load("some-table")
```
You can set the following in the hadoop configuration as well.
`fs.gs.proxy.address`(similar to "proxyAddress"), `fs.gs.proxy.username`(similar to "proxyUsername") and
`fs.gs.proxy.password`(similar to "proxyPassword").
If the same parameter is set at multiple places the order of priority is as follows:
option("key", "value") > spark.conf > hadoop configuration
================================================
FILE: README.md
================================================
# Apache Spark SQL connector for Google BigQuery
<!--- TODO(#2): split out into more documents. -->
The connector supports reading [Google BigQuery](https://cloud.google.com/bigquery/) tables into Spark's DataFrames, and writing DataFrames back into BigQuery.
This is done by using the [Spark SQL Data Source API](https://spark.apache.org/docs/latest/sql-programming-guide.html#data-sources) to communicate with BigQuery.
## BigQuery Storage API
The [Storage API](https://cloud.google.com/bigquery/docs/reference/storage) streams data in parallel directly from BigQuery via gRPC without using Google Cloud Storage as an intermediary.
It has a number of advantages over using the previous export-based read flow that should generally lead to better read performance:
### Direct Streaming
It does not leave any temporary files in Google Cloud Storage. Rows are read directly from BigQuery servers using the Arrow or Avro wire formats.
### Filtering
The new API allows column and predicate filtering to only read the data you are interested in.
#### Column Filtering
Since BigQuery is [backed by a columnar datastore](https://cloud.google.com/blog/big-data/2016/04/inside-capacitor-bigquerys-next-generation-columnar-storage-format), it can efficiently stream data without reading all columns.
#### Predicate Filtering
The Storage API supports arbitrary pushdown of predicate filters. Connector version 0.8.0-beta and above support pushdown of arbitrary filters to Bigquery.
There is a known issue in Spark that does not allow pushdown of filters on nested fields. For example - filters like `address.city = "Sunnyvale"` will not get pushdown to Bigquery.
### Dynamic Sharding
The API rebalances records between readers until they all complete. This means that all Map phases will finish nearly concurrently. See this blog article on [how dynamic sharding is similarly used in Google Cloud Dataflow](https://cloud.google.com/blog/products/gcp/no-shard-left-behind-dynamic-work-rebalancing-in-google-cloud-dataflow).
See [Configuring Partitioning](#configuring-partitioning) for more details.
## Requirements
### Enable the BigQuery Storage API
Follow [these instructions](https://cloud.google.com/bigquery/docs/reference/storage/#enabling_the_api).
### Create a Google Cloud Dataproc cluster (Optional)
If you do not have an Apache Spark environment you can create a Cloud Dataproc cluster with pre-configured auth. The following examples assume you are using Cloud Dataproc, but you can use `spark-submit` on any cluster.
Any Dataproc cluster using the API needs the 'bigquery' or 'cloud-platform' scopes. Dataproc clusters have the 'bigquery' scope by default, so most clusters in enabled projects should work by default e.g.
```
MY_CLUSTER=...
gcloud dataproc clusters create "$MY_CLUSTER"
```
## Downloading and Using the Connector
The latest version of the connector is publicly available in the following links:
| version | Link |
|------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| Spark 4.1 | `gs://spark-lib/bigquery/spark-4.1-bigquery-0.44.1-preview.jar`([HTTP link](https://storage.googleapis.com/spark-lib/bigquery/spark-4.1-bigquery-0.44.1-preview.jar)) |
| Spark 4.0 | `gs://spark-lib/bigquery/spark-4.0-bigquery-0.44.1.jar`([HTTP link](https://storage.googleapis.com/spark-lib/bigquery/spark-4.0-bigquery-0.44.1.jar)) |
| Spark 3.5 | `gs://spark-lib/bigquery/spark-3.5-bigquery-0.44.1.jar`([HTTP link](https://storage.googleapis.com/spark-lib/bigquery/spark-3.5-bigquery-0.44.1.jar)) |
| Spark 3.4 | `gs://spark-lib/bigquery/spark-3.4-bigquery-0.44.1.jar`([HTTP link](https://storage.googleapis.com/spark-lib/bigquery/spark-3.4-bigquery-0.44.1.jar)) |
| Spark 3.3 | `gs://spark-lib/bigquery/spark-3.3-bigquery-0.44.1.jar`([HTTP link](https://storage.googleapis.com/spark-lib/bigquery/spark-3.3-bigquery-0.44.1.jar)) |
| Spark 3.2 | `gs://spark-lib/bigquery/spark-3.2-bigquery-0.44.1.jar`([HTTP link](https://storage.googleapis.com/spark-lib/bigquery/spark-3.2-bigquery-0.44.1.jar)) |
| Spark 3.1 | `gs://spark-lib/bigquery/spark-3.1-bigquery-0.44.1.jar`([HTTP link](https://storage.googleapis.com/spark-lib/bigquery/spark-3.1-bigquery-0.44.1.jar)) |
| Spark 2.4 | `gs://spark-lib/bigquery/spark-2.4-bigquery-0.37.0.jar`([HTTP link](https://storage.googleapis.com/spark-lib/bigquery/spark-2.4-bigquery-0.37.0.jar)) |
| Scala 2.13 | `gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.13-0.44.1.jar` ([HTTP link](https://storage.googleapis.com/spark-lib/bigquery/spark-bigquery-with-dependencies_2.13-0.44.1.jar)) |
| Scala 2.12 | `gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-0.44.1.jar` ([HTTP link](https://storage.googleapis.com/spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-0.44.1.jar)) |
| Scala 2.11 | `gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.11-0.29.0.jar` ([HTTP link](https://storage.googleapis.com/spark-lib/bigquery/spark-bigquery-with-dependencies_2.11-0.29.0.jar)) |
The first six versions are Java based connectors targeting Spark 2.4/3.1/3.2/3.3/3.4/3.5 of all Scala versions built on the new
Data Source APIs (Data Source API v2) of Spark.
The final two connectors are Scala based connectors, please use the jar relevant to your Spark installation as outlined
below.
### Connector to Spark Compatibility Matrix
| Connector \ Spark | 2.3 | 2.4 | 3.0 | 3.1 | 3.2 | 3.3 |3.4 | 3.5 |
|---------------------------------------|---------|---------|---------|---------|---------|---------|---------|---------|
| spark-3.5-bigquery | | | | | | | | ✓ |
| spark-3.4-bigquery | | | | | | | ✓ | ✓ |
| spark-3.3-bigquery | | | | | | ✓ | ✓ | ✓ |
| spark-3.2-bigquery | | | | | ✓ | ✓ | ✓ | ✓ |
| spark-3.1-bigquery | | | | ✓ | ✓ | ✓ | ✓ | ✓ |
| spark-2.4-bigquery | | ✓ | | | | | | |
| spark-bigquery-with-dependencies_2.13 | | | | | ✓ | ✓ | ✓ | ✓ |
| spark-bigquery-with-dependencies_2.12 | | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
| spark-bigquery-with-dependencies_2.11 | ✓ | ✓ | | | | | | |
### Connector to Dataproc Image Compatibility Matrix
| Connector \ Dataproc Image | 1.3 | 1.4 | 1.5 | 2.0 | 2.1 | 2.2 | Serverless<br>Image 1.0 | Serverless<br>Image 2.0 | Serverless<br>Image 2.1 | Serverless<br>Image 2.2 |
|---------------------------------------|---------|---------|---------|---------|---------|---------|-------------------------|-------------------------|-------------------------|-------------------------|
| spark-3.5-bigquery | | | | | | ✓ | | | | ✓ |
| spark-3.4-bigquery | | | | | | ✓ | | | ✓ | ✓ |
| spark-3.3-bigquery | | | | | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
| spark-3.2-bigquery | | | | | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
| spark-3.1-bigquery | | | | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
| spark-2.4-bigquery | | ✓ | ✓ | | | | | | | |
| spark-bigquery-with-dependencies_2.13 | | | | | | | | ✓ | ✓ | ✓ |
| spark-bigquery-with-dependencies_2.12 | | | ✓ | ✓ | ✓ | ✓ | ✓ | | | |
| spark-bigquery-with-dependencies_2.11 | ✓ | ✓ | | | | | | | | |
### Maven / Ivy Package Usage
The connector is also available from the
[Maven Central](https://repo1.maven.org/maven2/com/google/cloud/spark/)
repository. It can be used using the `--packages` option or the
`spark.jars.packages` configuration property. Use the following value
| version | Connector Artifact |
|------------|------------------------------------------------------------------------------------|
| Spark 4.1 | `com.google.cloud.spark:spark-4.1-bigquery:0.44.1-preview` |
| Spark 4.0 | `com.google.cloud.spark:spark-4.0-bigquery:0.44.1` |
| Spark 3.5 | `com.google.cloud.spark:spark-3.5-bigquery:0.44.1` |
| Spark 3.4 | `com.google.cloud.spark:spark-3.4-bigquery:0.44.1` |
| Spark 3.3 | `com.google.cloud.spark:spark-3.3-bigquery:0.44.1` |
| Spark 3.2 | `com.google.cloud.spark:spark-3.2-bigquery:0.44.1` |
| Spark 3.1 | `com.google.cloud.spark:spark-3.1-bigquery:0.44.1` |
| Spark 2.4 | `com.google.cloud.spark:spark-2.4-bigquery:0.37.0` |
| Scala 2.13 | `com.google.cloud.spark:spark-bigquery-with-dependencies_2.13:0.44.1` |
| Scala 2.12 | `com.google.cloud.spark:spark-bigquery-with-dependencies_2.12:0.44.1` |
| Scala 2.11 | `com.google.cloud.spark:spark-bigquery-with-dependencies_2.11:0.29.0` |
### Specifying the Spark BigQuery connector version in a Dataproc cluster
Dataproc clusters created using image 2.1 and above, or batches using the Dataproc serverless service come with built-in Spark BigQuery connector.
Using the standard `--jars` or `--packages` (or alternatively, the `spark.jars`/`spark.jars.packages` configuration) won't help in this case as the built-in connector takes precedence.
To use another version than the built-in one, please do one of the following:
* For Dataproc clusters, using image 2.1 and above, add the following flag on cluster creation to upgrade the version `--metadata SPARK_BQ_CONNECTOR_VERSION=0.44.1`, or `--metadata SPARK_BQ_CONNECTOR_URL=gs://spark-lib/bigquery/spark-3.3-bigquery-0.44.1.jar` to create the cluster with a different jar. The URL can point to any valid connector JAR for the cluster's Spark version.
* For Dataproc serverless batches, add the following property on batch creation to upgrade the version: `--properties dataproc.sparkBqConnector.version=0.44.1`, or `--properties dataproc.sparkBqConnector.uri=gs://spark-lib/bigquery/spark-3.3-bigquery-0.44.1.jar` to create the batch with a different jar. The URL can point to any valid connector JAR for the runtime's Spark version.
## Hello World Example
You can run a simple PySpark wordcount against the API without compilation by running
**Dataproc image 1.5 and above**
```
gcloud dataproc jobs submit pyspark --cluster "$MY_CLUSTER" \
--jars gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-0.44.1.jar \
examples/python/shakespeare.py
```
**Dataproc image 1.4 and below**
```
gcloud dataproc jobs submit pyspark --cluster "$MY_CLUSTER" \
--jars gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.11-0.29.0.jar \
examples/python/shakespeare.py
```
## Example Codelab ##
https://codelabs.developers.google.com/codelabs/pyspark-bigquery
## Usage
The connector uses the cross language [Spark SQL Data Source API](https://spark.apache.org/docs/latest/sql-programming-guide.html#data-sources):
### Reading data from a BigQuery table
```
df = spark.read \
.format("bigquery") \
.load("bigquery-public-data.samples.shakespeare")
```
or the Scala only implicit API:
```
import com.google.cloud.spark.bigquery._
val df = spark.read.bigquery("bigquery-public-data.samples.shakespeare")
```
The connector supports reading from tables that contain spaces in their names.
**Note on ambiguous table names**: If a table name contains both spaces and a SQL keyword (e.g., "from", "where", "join"), it may be misinterpreted as a SQL query. To resolve this ambiguity, quote the table identifier with backticks \`. For example:
```
df = spark.read \
.format("bigquery") \
.load("`my_project.my_dataset.orders from 2023`")
```
For more information, see additional code samples in
[Python](examples/python/shakespeare.py),
[Scala](spark-bigquery-dsv1/src/main/scala/com/google/cloud/spark/bigquery/examples/Shakespeare.scala)
and
[Java](spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/examples/JavaShakespeare.java).
### Reading data from a BigQuery query
The connector allows you to run any
[Standard SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax)
SELECT query on BigQuery and fetch its results directly to a Spark Dataframe.
This is easily done as described in the following code sample:
```
spark.conf.set("viewsEnabled","true")
sql = """
SELECT tag, COUNT(*) c
FROM (
SELECT SPLIT(tags, '|') tags
FROM `bigquery-public-data.stackoverflow.posts_questions` a
WHERE EXTRACT(YEAR FROM creation_date)>=2014
), UNNEST(tags) tag
GROUP BY 1
ORDER BY 2 DESC
LIMIT 10
"""
df = spark.read.format("bigquery").load(sql)
df.show()
```
Which yields the result
```
+----------+-------+
| tag| c|
+----------+-------+
|javascript|1643617|
| python|1352904|
| java|1218220|
| android| 913638|
| php| 911806|
| c#| 905331|
| html| 769499|
| jquery| 608071|
| css| 510343|
| c++| 458938|
+----------+-------+
```
A second option is to use the `query` option like this:
```
df = spark.read.format("bigquery").option("query", sql).load()
```
Notice that the execution should be faster as only the result is transmitted
over the wire. In a similar fashion the queries can include JOINs more
efficiently then running joins on Spark or use other BigQuery features such as
[subqueries](https://cloud.google.com/bigquery/docs/reference/standard-sql/subqueries),
[BigQuery user defined functions](https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions),
[wildcard tables](https://cloud.google.com/bigquery/docs/reference/standard-sql/wildcard-table-reference),
[BigQuery ML](https://cloud.google.com/bigquery-ml/docs)
and more.
In order to use this feature the `viewsEnabled` configurations MUST be set to
`true`. This can also be done globally as shown in the example above.
**Important:** This feature is implemented by running the query on BigQuery and
saving the result into a temporary table, of which Spark will read the results
from. This may add additional costs on your BigQuery account.
### Reading From Parameterized Queries
The connector supports executing [BigQuery parameterized queries](https://cloud.google.com/bigquery/docs/parameterized-queries) using the
standard `spark.read.format('bigquery')` API.
To use parameterized queries:
1. Provide the SQL query containing parameters using the
`.option("query", "SQL_STRING")` with named (`@param`) or positional (`?`) parameters.
2. Specify the parameter values using dedicated options:
* **Named Parameters:** Use options prefixed with `NamedParameters.`. The
parameter name follows the prefix (case-insensitive).
* Format: `.option("NamedParameters.<parameter_name>", "TYPE:value")`
* Example: `.option("NamedParameters.corpus", "STRING:romeoandjuliet")`
* **Positional Parameters:** Use options prefixed with
`PositionalParameters.`. The 1-based index follows the prefix.
* Format:
`.option("PositionalParameters.<parameter_index>", "TYPE:value")`
* Example: `.option("PositionalParameters.1", "STRING:romeoandjuliet")`
The `TYPE` in the `TYPE:value` string specifies the BigQuery Standard SQL data
type. Supported types currently include: `BOOL`, `INT64`, `FLOAT64`, `NUMERIC`,
`STRING`, `DATE`, `DATETIME`, `JSON`, `TIME`, `GEOGRAPHY`, `TIMESTAMP`.
`ARRAY` and `STRUCT` types are not supported as parameters at this time.
### Reading From Views
The connector has a preliminary support for reading from
[BigQuery views](https://cloud.google.com/bigquery/docs/views-intro). Please
note there are a few caveats:
* BigQuery views are not materialized by default, which means that the connector
needs to materialize them before it can read them. This process affects the
read performance, even before running any `collect()` or `count()` action.
* The materialization process can also incur additional costs to your BigQuery
bill.
* Reading from views is **disabled** by default. In order to enable it,
either set the viewsEnabled option when reading the specific view
(`.option("viewsEnabled", "true")`) or set it globally by calling
`spark.conf.set("viewsEnabled", "true")`.
**Notice:** Before version 0.42.1 of the connector, the following configurations
are required:
* By default, the materialized views are created in the same project and
dataset. Those can be configured by the optional `materializationProject`
and `materializationDataset` options, respectively. These options can also
be globally set by calling `spark.conf.set(...)` before reading the views.
* As mentioned in the [BigQuery documentation](https://cloud.google.com/bigquery/docs/writing-results#temporary_and_permanent_tables),
the `materializationDataset` should be in same location as the view.
Starting version 0.42.1 those configurations are **redundant** and are ignored.
It is highly recommended to upgrade to this version or a later one to enjoy
simpler configuration when using views or loading from queries.
### Writing data to BigQuery
Writing DataFrames to BigQuery can be done using two methods: Direct and Indirect.
#### Direct write using the BigQuery Storage Write API
In this method the data is written directly to BigQuery using the
[BigQuery Storage Write API](https://cloud.google.com/bigquery/docs/write-api). In order to enable this option, please
set the `writeMethod` option to `direct`, as shown below:
```
df.write \
.format("bigquery") \
.option("writeMethod", "direct") \
.option("writeAtLeastOnce", "true")
.save("dataset.table")
```
Writing to existing partitioned tables (date partitioned, ingestion time partitioned and range
partitioned) in APPEND save mode and OVERWRITE mode (only date and range partitioned) is fully supported by the connector and the BigQuery Storage Write
API. The use of `datePartition`, `partitionField`, `partitionType`, `partitionRangeStart`, `partitionRangeEnd`, `partitionRangeInterval`
described below is not supported at this moment by the direct write method.
**Important:** Please refer to the [data ingestion pricing](https://cloud.google.com/bigquery/pricing#data_ingestion_pricing)
page regarding the BigQuery Storage Write API pricing.
**Important:** Please use version 0.24.2 and above for direct writes, as previous
versions have a bug that may cause a table deletion in certain cases.
#### Indirect write
In this method the data is written first to GCS, and then it is loaded it to BigQuery. A GCS bucket must be configured
to indicate the temporary data location.
```
df.write \
.format("bigquery") \
.option("temporaryGcsBucket","some-bucket") \
.save("dataset.table")
```
The data is temporarily stored using the [Apache Parquet](https://parquet.apache.org/),
[Apache ORC](https://orc.apache.org/) or [Apache Avro](https://avro.apache.org/) formats.
The GCS bucket and the format can also be set globally using Spark's RuntimeConfig like this:
```
spark.conf.set("temporaryGcsBucket","some-bucket")
df.write \
.format("bigquery") \
.save("dataset.table")
```
When streaming a DataFrame to BigQuery, each batch is written in the same manner as a non-streaming DataFrame.
Note that a HDFS compatible
[checkpoint location](http://spark.apache.org/docs/latest/structured-streaming-programming-guide.html#recovering-from-failures-with-checkpointing)
(eg: `path/to/HDFS/dir` or `gs://checkpoint-bucket/checkpointDir`) must be specified.
```
df.writeStream \
.format("bigquery") \
.option("temporaryGcsBucket","some-bucket") \
.option("checkpointLocation", "some-location") \
.option("table", "dataset.table")
```
**Important:** The connector does not configure the GCS connector, in order to avoid conflict with another GCS connector, if exists. In order to use the write capabilities of the connector, please configure the GCS connector on your cluster as explained [here](https://github.com/GoogleCloudPlatform/bigdata-interop/tree/master/gcs).
#### Schema Behavior on Overwrite
When using `SaveMode.Overwrite` (`.mode("overwrite")`), the connector **preserves the existing table's schema**.
The data is truncated, but column types, descriptions, and policy tags are retained.
```
df.write \
.format("bigquery") \
.mode("overwrite") \
.option("temporaryGcsBucket","some-bucket") \
.save("dataset.table")
```
**Important:** If your DataFrame has a different schema than the existing table (e.g., changing a column from
`INTEGER` to `DOUBLE`), the write will fail with a type mismatch error. To change the schema, either:
- Drop the table before overwriting
- Use BigQuery DDL to alter the table schema first
For some of the schema difference, the following options can work with overwrite:
Programmatic Relaxation: Set `.option("allowFieldRelaxation", "true")` for nullability changes and `.option("allowFieldAddition", "true")` for new columns.
This behavior was introduced between version 0.22.0 and 0.41.0 to prevent accidental schema drift.
**Note:** This behavior applies to both the `indirect` (default) and `direct` write methods.
### Running SQL on BigQuery
The connector supports Spark's [SparkSession#executeCommand](https://archive.apache.org/dist/spark/docs/3.0.0/api/java/org/apache/spark/sql/SparkSession.html#executeCommand-java.lang.String-java.lang.String-scala.collection.immutable.Map-)
with the Spark-X.Y-bigquery connectors. It can be used to run any arbitrary DDL/DML StandardSQL statement on BigQuery as
a query job. `SELECT` statements are not supported, as those are supported by reading from query as shown above. It can
be used as follows:
```
spark.executeCommand("bigquery", sql, options)
```
Notice the following:
* Notice that apart from the authentication options no other options are supported by this functionality.
* This API is available only in the Scala/Java API. PySpark does not provide it.
### Properties
The API Supports a number of options to configure the read
<!--- TODO(#2): Convert to markdown -->
<table id="propertytable">
<style>
table#propertytable td, table th
{
word-break:break-word
}
</style>
<tr valign="top">
<th style="min-width:240px">Property</th>
<th>Meaning</th>
<th style="min-width:80px">Usage</th>
</tr>
<tr valign="top">
<td><code>table</code>
</td>
<td>The BigQuery table in the format <code>[[project:]dataset.]table</code>.
It is recommended to use the <code>path</code> parameter of
<code>load()</code>/<code>save()</code> instead. This option has been
deprecated and will be removed in a future version.
<br/><strong>(Deprecated)</strong>
</td>
<td>Read/Write</td>
</tr>
<tr valign="top">
<td><code>dataset</code>
</td>
<td>The dataset containing the table. This option should be used with
standard table and views, but not when loading query results.
<br/>(Optional unless omitted in <code>table</code>)
</td>
<td>Read/Write</td>
</tr>
<tr valign="top">
<td><code>project</code>
</td>
<td>The Google Cloud Project ID of the table. This option should be used with
standard table and views, but not when loading query results.
<br/>(Optional. Defaults to the project of the Service Account being used)
</td>
<td>Read/Write</td>
</tr>
<tr valign="top">
<td><code>billingProject</code>
</td>
<td>The Google Cloud Project ID to use for <strong>billing</strong> (API calls, query execution).
<br/>(Optional. Defaults to the project of the Service Account being used)
</td>
<td>Read/Write</td>
</tr>
<tr valign="top">
<td><code>parentProject</code>
</td>
<td><strong>(Deprecated)</strong> Alias for <code>billingProject</code>.
<br/>(Optional. Defaults to the project of the Service Account being used)
</td>
<td>Read/Write</td>
</tr>
<tr valign="top">
<td><code>location</code>
</td>
<td>The BigQuery location where the data resides (e.g. US, EU, asia-northeast1).
<br/>(Optional. Defaults to BigQuery default)
</td>
<td>Read/Write</td>
</tr>
<tr valign="top">
<td><code>maxParallelism</code>
</td>
<td>The maximal number of partitions to split the data into. Actual number
may be less if BigQuery deems the data small enough. If there are not
enough executors to schedule a reader per partition, some partitions may
be empty.
<br/><b>Important:</b> The old parameter (<code>parallelism</code>) is
still supported but in deprecated mode. It will ve removed in
version 1.0 of the connector.
<br/>(Optional. Defaults to the larger of the preferredMinParallelism and 20,000)</a>.)
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>preferredMinParallelism</code>
</td>
<td>The preferred minimal number of partitions to split the data into. Actual number
may be less if BigQuery deems the data small enough. If there are not
enough executors to schedule a reader per partition, some partitions may
be empty.
<br/>(Optional. Defaults to the smallest of 3 times the application's default parallelism
and maxParallelism</a>.)
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>viewsEnabled</code>
</td>
<td>Enables the connector to read from views and not only tables. Please read
the <a href="#reading-from-views">relevant section</a> before activating
this option.
<br/>(Optional. Defaults to <code>false</code>)
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>readDataFormat</code>
</td>
<td>Data Format for reading from BigQuery. Options : <code>ARROW</code>, <code>AVRO</code>
<br/>(Optional. Defaults to <code>ARROW</code>)
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>optimizedEmptyProjection</code>
</td>
<td>The connector uses an optimized empty projection (select without any
columns) logic, used for <code>count()</code> execution. This logic takes
the data directly from the table metadata or performs a much efficient
`SELECT COUNT(*) WHERE...` in case there is a filter. You can cancel the
use of this logic by setting this option to <code>false</code>.
<br/>(Optional, defaults to <code>true</code>)
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>pushAllFilters</code>
</td>
<td>If set to <code>true</code>, the connector pushes all the filters Spark can delegate
to BigQuery Storage API. This reduces amount of data that needs to be sent from
BigQuery Storage API servers to Spark clients. This option has been
deprecated and will be removed in a future version.
<br/>(Optional, defaults to <code>true</code>)
<br/><strong>(Deprecated)</strong>
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>bigQueryJobLabel</code>
</td>
<td>Can be used to add labels to the connector initiated query and load
BigQuery jobs. Multiple labels can be set.
<br/>(Optional)
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>bigQueryTableLabel</code>
</td>
<td>Can be used to add labels to the table while writing to a table. Multiple
labels can be set.
<br/>(Optional)
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>traceApplicationName</code>
</td>
<td>Application name used to trace BigQuery Storage read and write sessions.
Setting the application name is required to set the trace ID on the
sessions.
<br/>(Optional)
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>traceJobId</code>
</td>
<td>Job ID used to trace BigQuery Storage read and write sessions.
<br/>(Optional, defaults to the Dataproc job ID is exists, otherwise uses
the Spark application ID)
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>createDisposition</code>
</td>
<td>Specifies whether the job is allowed to create new tables. The permitted
values are:
<ul>
<li><code>CREATE_IF_NEEDED</code> - Configures the job to create the
table if it does not exist.</li>
<li><code>CREATE_NEVER</code> - Configures the job to fail if the
table does not exist.</li>
</ul>
This option takes place only in case Spark has decided to write data
to the table based on the SaveMode.
<br/>(Optional. Default to CREATE_IF_NEEDED).
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>writeMethod</code>
</td>
<td>Controls the method
in which the data is written to BigQuery. Available values are <code>direct</code>
to use the BigQuery Storage Write API and <code>indirect</code> which writes the
data first to GCS and then triggers a BigQuery load operation. See more
<a href="#writing-data-to-bigquery">here</a>
<br/>(Optional, defaults to <code>indirect</code>)
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>writeAtLeastOnce</code>
</td>
<td>Guarantees that data is written to BigQuery at least once. This is a lesser
guarantee than exactly once. This is suitable for streaming scenarios
in which data is continuously being written in small batches.
<br/>(Optional. Defaults to <code>false</code>)
<br/><i>Supported only by the `DIRECT` write method and mode is <b>NOT</b> `Overwrite`.</i>
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>temporaryGcsBucket</code>
</td>
<td>The GCS bucket that temporarily holds the data before it is loaded to
BigQuery. Required unless set in the Spark configuration
(<code>spark.conf.set(...)</code>).
<br/>Defaults to the `fs.gs.system.bucket` if exists, for example on Google Cloud Dataproc clusters, starting version 0.42.0.
<br/><i>Supported only by the `INDIRECT` write method.</i>
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>persistentGcsBucket</code>
</td>
<td>The GCS bucket that holds the data before it is loaded to
BigQuery. If informed, the data won't be deleted after write data
into BigQuery.
<br/><i>Supported only by the `INDIRECT` write method.</i>
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>persistentGcsPath</code>
</td>
<td>The GCS path that holds the data before it is loaded to
BigQuery. Used only with <code>persistentGcsBucket</code>.
<br/><i>Not supported by the `DIRECT` write method.</i>
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>intermediateFormat</code>
</td>
<td>The format of the data before it is loaded to BigQuery, values can be
either "parquet","orc" or "avro". In order to use the Avro format, the
spark-avro package must be added in runtime.
<br/>(Optional. Defaults to <code>parquet</code>). On write only. Supported only for the `INDIRECT` write method.
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>useAvroLogicalTypes</code>
</td>
<td>When loading from Avro (`.option("intermediateFormat", "avro")`), BigQuery uses the underlying Avro types instead of the logical types [by default](https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-avro#logical_types). Supplying this option converts Avro logical types to their corresponding BigQuery data types.
<br/>(Optional. Defaults to <code>false</code>). On write only.
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>datePartition</code>
</td>
<td>The date partition the data is going to be written to. Should be a date string
given in the format <code>YYYYMMDD</code>. Can be used to overwrite the data of
a single partition, like this: <code><br/>df.write.format("bigquery")
<br/> .option("datePartition", "20220331")
<br/> .mode("overwrite")
<br/> .save("table")</code>
<br/>(Optional). On write only.
<br/> Can also be used with different partition types like:
<br/> HOUR: <code>YYYYMMDDHH</code>
<br/> MONTH: <code>YYYYMM</code>
<br/> YEAR: <code>YYYY</code>
<br/><i>Not supported by the `DIRECT` write method.</i>
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>partitionField</code>
</td>
<td>If this field is specified, the table is partitioned by this field.
<br/>For Time partitioning, specify together with the option `partitionType`.
<br/>For Integer-range partitioning, specify together with the 3 options: `partitionRangeStart`, `partitionRangeEnd, `partitionRangeInterval`.
<br/>The field must be a top-level TIMESTAMP or DATE field for Time partitioning, or INT64 for Integer-range partitioning. Its mode must be <strong>NULLABLE</strong>
or <strong>REQUIRED</strong>.
If the option is not set for a Time partitioned table, then the table will be partitioned by pseudo
column, referenced via either<code>'_PARTITIONTIME' as TIMESTAMP</code> type, or
<code>'_PARTITIONDATE' as DATE</code> type.
<br/>(Optional).
<br/><i>Not supported by the `DIRECT` write method.</i>
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>partitionExpirationMs</code>
</td>
<td>Number of milliseconds for which to keep the storage for partitions in the table.
The storage in a partition will have an expiration time of its partition time plus this value.
<br/>(Optional).
<br/><i>Not supported by the `DIRECT` write method.</i>
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>partitionType</code>
</td>
<td>Used to specify Time partitioning.
<br/>Supported types are: <code>HOUR, DAY, MONTH, YEAR</code>
<br/> This option is <b>mandatory</b> for a target table to be Time partitioned.
<br/>(Optional. Defaults to DAY if PartitionField is specified).
<br/><i>Not supported by the `DIRECT` write method.</i>
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>partitionRangeStart</code>,
<code>partitionRangeEnd</code>,
<code>partitionRangeInterval</code>
</td>
<td>Used to specify Integer-range partitioning.
<br/>These options are <b>mandatory</b> for a target table to be Integer-range partitioned.
<br/>All 3 options must be specified.
<br/><i>Not supported by the `DIRECT` write method.</i>
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>clusteredFields</code>
</td>
<td>A string of non-repeated, top level columns seperated by comma.
<br/>(Optional).
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>allowFieldAddition</code>
</td>
<td>Adds the <a href="https://googleapis.dev/java/google-cloud-clients/latest/com/google/cloud/bigquery/JobInfo.SchemaUpdateOption.html#ALLOW_FIELD_ADDITION" target="_blank">ALLOW_FIELD_ADDITION</a>
SchemaUpdateOption to the BigQuery LoadJob. Allowed values are <code>true</code> and <code>false</code>.
<br/>(Optional. Default to <code>false</code>).
<br/><i>Supported only by the `INDIRECT` write method.</i>
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>allowFieldRelaxation</code>
</td>
<td>Adds the <a href="https://googleapis.dev/java/google-cloud-clients/latest/com/google/cloud/bigquery/JobInfo.SchemaUpdateOption.html#ALLOW_FIELD_RELAXATION" target="_blank">ALLOW_FIELD_RELAXATION</a>
SchemaUpdateOption to the BigQuery LoadJob. Allowed values are <code>true</code> and <code>false</code>.
<br/>(Optional. Default to <code>false</code>).
<br/><i>Supported only by the `INDIRECT` write method.</i>
</td>
<td>Write</td>
</tr>
<tr valign="top">
<td><code>proxyAddress</code>
</td>
<td> Address of the proxy server. The proxy must be a HTTP proxy and address should be in the `host:port` format.
Can be alternatively set in the Spark configuration (<code>spark.conf.set(...)</code>) or in Hadoop
Configuration (<code>fs.gs.proxy.address</code>).
<br/> (Optional. Required only if connecting to GCP via proxy.)
</td>
<td>Read/Write</td>
</tr>
<tr valign="top">
<td><code>proxyUsername</code>
</td>
<td> The userName used to connect to the proxy. Can be alternatively set in the Spark configuration
(<code>spark.conf.set(...)</code>) or in Hadoop Configuration (<code>fs.gs.proxy.username</code>).
<br/> (Optional. Required only if connecting to GCP via proxy with authentication.)
</td>
<td>Read/Write</td>
</tr>
<tr valign="top">
<td><code>proxyPassword</code>
</td>
<td> The password used to connect to the proxy. Can be alternatively set in the Spark configuration
(<code>spark.conf.set(...)</code>) or in Hadoop Configuration (<code>fs.gs.proxy.password</code>).
<br/> (Optional. Required only if connecting to GCP via proxy with authentication.)
</td>
<td>Read/Write</td>
</tr>
<tr valign="top">
<td><code>httpMaxRetry</code>
</td>
<td> The maximum number of retries for the low-level HTTP requests to BigQuery. Can be alternatively set in the
Spark configuration (<code>spark.conf.set("httpMaxRetry", ...)</code>) or in Hadoop Configuration
(<code>fs.gs.http.max.retry</code>).
<br/> (Optional. Default is 10)
</td>
<td>Read/Write</td>
</tr>
<tr valign="top">
<td><code>httpConnectTimeout</code>
</td>
<td> The timeout in milliseconds to establish a connection with BigQuery. Can be alternatively set in the
Spark configuration (<code>spark.conf.set("httpConnectTimeout", ...)</code>) or in Hadoop Configuration
(<code>fs.gs.http.connect-timeout</code>).
<br/> (Optional. Default is 60000 ms. 0 for an infinite timeout, a negative number for 20000)
</td>
<td>Read/Write</td>
</tr>
<tr valign="top">
<td><code>httpReadTimeout</code>
</td>
<td> The timeout in milliseconds to read data from an established connection. Can be alternatively set in the
Spark configuration (<code>spark.conf.set("httpReadTimeout", ...)</code>) or in Hadoop Configuration
(<code>fs.gs.http.read-timeout</code>).
<br/> (Optional. Default is 60000 ms. 0 for an infinite timeout, a negative number for 20000)
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>arrowCompressionCodec</code>
</td>
<td> Compression codec while reading from a BigQuery table when using Arrow format. Options :
<code>ZSTD (Zstandard compression)</code>,
<code>LZ4_FRAME (https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md)</code>,
<code>COMPRESSION_UNSPECIFIED</code>. The recommended compression codec is <code>ZSTD</code>
while using Java.
<br/> (Optional. Defaults to <code>COMPRESSION_UNSPECIFIED</code> which means no compression will be used)
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>responseCompressionCodec</code>
</td>
<td> Compression codec used to compress the ReadRowsResponse data. Options:
<code>RESPONSE_COMPRESSION_CODEC_UNSPECIFIED</code>,
<code>RESPONSE_COMPRESSION_CODEC_LZ4</code>
<br/> (Optional. Defaults to <code>RESPONSE_COMPRESSION_CODEC_UNSPECIFIED</code> which means no compression will be used)
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>cacheExpirationTimeInMinutes</code>
</td>
<td> The expiration time of the in-memory cache storing query information.
<br/> To disable caching, set the value to 0.
<br/> (Optional. Defaults to 15 minutes)
</td>
<td>Read</td>
</tr>
<tr valign="top">
<td><code>enableModeCheckForSchemaFields</code>
</td>
<td> Checks the mode of every field in destination schema to be equal to the mode in corresponding source field schema, during DIRECT write.
<br/> Default value is true i.e., the check is done by default. If set to false the mode check is ignored.
</td>
<td>Write</td>
</tr>
<td><code>enableListInference</code>
</td>
<td> Indicates whether to use schema inference specifically when the mode is Parquet (https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#parquetoptions).
<br/> Defaults to false.
<br/>
</td>
<td>Write</td>
</tr>
<tr>
<td><code>bqChannelPoolSize</code></td>
<td> The (fixed) size of the gRPC channel pool created by the BigQueryReadClient.
<br/>For optimal performance, this should be set to at least the number of cores on the cluster executors.
</td>
<td>Read</td>
</tr>
<tr>
<td><code>createReadSessionTimeoutInSeconds</code>
</td>
<td> The timeout in seconds to create a ReadSession when reading a table.
<br/> For Extremely large table this value should be increased.
<br/> (Optional. Defaults to 600 seconds)
</td>
<td>Read</td>
</tr>
<tr>
<td><code>queryJobPriority</code>
</td>
<td> Priority levels set for the job while reading data from BigQuery query. The permitted values are:
<ul>
<li><code>BATCH</code> - Query is queued and started as soon as idle resources are available, usually within a few minutes. If the query hasn't started within 3 hours, its priority is changed to <code>INTERACTIVE</code>.</li>
<li><code>INTERACTIVE</code> - Query is executed as soon as possible and count towards the concurrent rate limit and the daily rate limit.</li>
</ul>
For WRITE, this option will be effective when DIRECT write is used with OVERWRITE mode, where the connector overwrites the destination table using MERGE statement.
<br/> (Optional. Defaults to <code>INTERACTIVE</code>)
</td>
<td>Read/Write</td>
</tr>
<tr>
<td><code>destinationTableKmsKeyName</code>
</td>
<td>Describes the Cloud KMS encryption key that will be used to protect destination BigQuery
table. The BigQuery Service Account associated with your project requires access to this
encryption key. for further Information about using CMEK with BigQuery see
[here](https://cloud.google.com/bigquery/docs/customer-managed-encryption#key_resource_id).
<br/><b>Notice:</b> The table will be encrypted by the key only if it created by the
connector. A pre-existing unencrypted table won't be encrypted just by setting this option.
<br/> (Optional)
</td>
<td>Write</td>
</tr>
<tr>
<td><code>allowMapTypeConversion</code>
</td>
<td>Boolean config to disable conversion from BigQuery records to Spark MapType
when the record has two subfields with field names as <code>key</code> and <code>value</code>.
Default value is <code>true</code> which allows the conversion.
<br/> (Optional)
</td>
<td>Read</td>
</tr>
<tr>
<td><code>spark.sql.sources.partitionOverwriteMode</code>
</td>
<td>Config to specify the overwrite mode on write when the table is range/time partitioned.
Currently supportd two modes : <code>STATIC</code> and <code>DYNAMIC</code>. In <code>STATIC</code> mode,
the entire table is overwritten. In <code>DYNAMIC</code> mode, the data is overwritten by partitions of the existing table.
The default value is <code>STATIC</code>.
<br/> (Optional)
</td>
<td>Write</td>
</tr>
<tr>
<td><code>enableReadSessionCaching</code>
</td>
<td>Boolean config to disable read session caching. Caches BigQuery read sessions to allow for faster Spark query planning.
Default value is <code>true</code>.
<br/> (Optional)
</td>
<td>Read</td>
</tr>
<tr>
<td><code>readSessionCacheDurationMins</code>
</td>
<td>Config to set the read session caching duration in minutes. Only works if <code>enableReadSessionCaching</code> is <code>true</code> (default).
Allows specifying the duration to cache read sessions for. Maximum allowed value is <code>300</code>.
Default value is <code>5</code>.
<br/> (Optional)
</td>
<td>Read</td>
</tr>
<tr>
<td><code>bigQueryJobTimeoutInMinutes</code>
</td>
<td>Config to set the BigQuery job timeout in minutes.
Default value is <code>360</code> minutes.
<br/> (Optional)
</td>
<td>Read/Write</td>
</tr>
<tr>
<td><code>snapshotTimeMillis</code>
</td>
<td>A timestamp specified in milliseconds to use to read a table snapshot.
By default this is not set and the latest version of a table is read.
<br/> (Optional)
</td>
<td>Read</td>
</tr>
<tr>
<td><code>bigNumericDefaultPrecision</code>
</td>
<td>An alternative default precision for BigNumeric fields, as the BigQuery default is too wide for Spark. Values can be between 1 and 38.
This default is used only when the field has an unparameterized BigNumeric type.
Please note that there might be data loss if the actual data's precision is more than what is specified.
<br/> (Optional)
</td>
<td>Read/Write</td>
</tr>
<tr>
<td><code>bigNumericDefaultScale</code>
</td>
<td>An alternative default scale for BigNumeric fields. Values can be between 0 and 38, and less than bigNumericFieldsPrecision.
This default is used only when the field has an unparameterized BigNumeric type.
Please note that there might be data loss if the actual data's scale is more than what is specified.
<br/> (Optional)
</td>
<td>Read/Write</td>
</tr>
<tr>
<td><code>credentialsScopes</code>
</td>
<td>Replaces the scopes of the Google Credentials if the credentials type supports that.
If scope replacement is not supported then it does nothing.
<br/>The value should be a comma separated list of valid scopes.
<br/> (Optional)
</td>
<td>Read/Write</td>
</tr>
</table>
Options can also be set outside of the code, using the `--conf` parameter of `spark-submit` or `--properties` parameter
of the `gcloud dataproc submit spark`. In order to use this, prepend the prefix `spark.datasource.bigquery.` to any of
the options, for example `spark.conf.set("temporaryGcsBucket", "some-bucket")` can also be set as
`--conf spark.datasource.bigquery.temporaryGcsBucket=some-bucket`.
### Data types
With the exception of `DATETIME` and `TIME` all BigQuery data types directed map into the corresponding Spark SQL data type. Here are all of the mappings:
<!--- TODO(#2): Convert to markdown -->
<table>
<tr valign="top">
<td><strong>BigQuery Standard SQL Data Type </strong>
</td>
<td><strong>Spark SQL</strong>
<p>
<strong>Data Type</strong>
</td>
<td><strong>Notes</strong>
</td>
</tr>
<tr valign="top">
<td><strong><code>BOOL</code></strong>
</td>
<td><strong><code>BooleanType</code></strong>
</td>
<td>
</td>
</tr>
<tr valign="top">
<td><strong><code>INT64</code></strong>
</td>
<td><strong><code>LongType</code></strong>
</td>
<td>
</td>
</tr>
<tr valign="top">
<td><strong><code>FLOAT64</code></strong>
</td>
<td><strong><code>DoubleType</code></strong>
</td>
<td>
</td>
</tr>
<tr valign="top">
<td><strong><code>NUMERIC</code></strong>
</td>
<td><strong><code>DecimalType</code></strong>
</td>
<td>
Please refer to <a href="#numeric-and-bignumeric-support">Numeric and BigNumeric support</a>
</td>
</tr>
<tr valign="top">
<td><strong><code>BIGNUMERIC</code></strong>
</td>
<td><strong><code>DecimalType</code></strong>
</td>
<td>
Please refer to <a href="#numeric-and-bignumeric-support">Numeric and BigNumeric support</a>
</td>
</tr>
<tr valign="top">
<td><strong><code>STRING</code></strong>
</td>
<td><strong><code>StringType</code></strong>
</td>
<td>
</td>
</tr>
<tr valign="top">
<td><strong><code>BYTES</code></strong>
</td>
<td><strong><code>BinaryType</code></strong>
</td>
<td>
</td>
</tr>
<tr valign="top">
<td><strong><code>STRUCT</code></strong>
</td>
<td><strong><code>StructType</code></strong>
</td>
<td>
</td>
</tr>
<tr valign="top">
<td><strong><code>ARRAY</code></strong>
</td>
<td><strong><code>ArrayType</code></strong>
</td>
<td>
</td>
</tr>
<tr valign="top">
<td><strong><code>TIMESTAMP</code></strong>
</td>
<td><strong><code>TimestampType</code></strong>
</td>
<td>
</td>
</tr>
<tr valign="top">
<td><strong><code>DATE</code></strong>
</td>
<td><strong><code>DateType</code></strong>
</td>
<td>
</td>
</tr>
<tr valign="top">
<td><strong><code>DATETIME</code></strong>
</td>
<td><strong><code>StringType</code>, </strong><strong><code>TimestampNTZType</code>*</strong>
</td>
<td>Spark has no DATETIME type.
<p>
Spark string can be written to an existing BQ DATETIME column provided it is in the <a href="https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#canonical_format_for_datetime_literals">format for BQ DATETIME literals</a>.
<p>
* For Spark 3.4+, BQ DATETIME is read as Spark's TimestampNTZ type i.e. java LocalDateTime
</td>
</tr>
<tr valign="top">
<td><strong><code>TIME</code></strong>
</td>
<td><strong><code>LongType</code>, <strong><code>StringType</code>*</strong>
</td>
<td>Spark has no TIME type. The generated longs, which indicate <a href="https://avro.apache.org/docs/1.8.0/spec.html#Time+%2528microsecond+precision%2529">microseconds since midnight</a> can be safely cast to TimestampType, but this causes the date to be inferred as the current day. Thus times are left as longs and user can cast if they like.
<p>
When casting to Timestamp TIME have the same TimeZone issues as DATETIME
<p>
* Spark string can be written to an existing BQ TIME column provided it is in the <a href="https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#canonical_format_for_time_literals">format for BQ TIME literals</a>.
</td>
</tr>
<tr valign="top">
<td><strong><code>JSON</code></strong>
</td>
<td><strong><code>StringType</code></strong>
</td>
<td>Spark has no JSON type. The values are read as String. In order to write JSON back to BigQuery, the following conditions are <b>REQUIRED</b>:
<ul>
<li>Use the <code>INDIRECT</code> write method</li>
<li>Use the <code>AVRO</code> intermediate format</li>
<li>The DataFrame field <b>MUST</b> be of type <code>String</code> and has an entry of sqlType=JSON in its metadata</li>
</ul>
</td>
</tr>
<tr valign="top" id="datatype:map">
<td><strong><code>ARRAY<STRUCT<key,value>></code></strong>
</td>
<td><strong><code>MapType</code></strong>
</td>
<td>BigQuery has no MAP type, therefore similar to other conversions like Apache Avro and BigQuery Load jobs, the connector converts a Spark Map to a REPEATED STRUCT<key,value>.
This means that while writing and reading of maps is available, running a SQL on BigQuery that uses map semantics is not supported.
To refer to the map's values using BigQuery SQL, please check the <a href="https://cloud.google.com/bigquery/docs/reference/standard-sql/arrays">BigQuery documentation</a>.
Due to these incompatibilities, a few restrictions apply:
<ul>
<li>Keys can be Strings only</li>
<li>Values can be simple types (not structs)</li>
<li>For INDIRECT write, use the <code>AVRO</code> intermediate format. DIRECT write is supported as well</li>
</ul>
</td>
</tr>
</table>
#### Spark ML Data Types Support
The Spark ML [Vector](https://spark.apache.org/docs/2.4.5/api/python/pyspark.ml.html#pyspark.ml.linalg.Vector) and
[Matrix](https://spark.apache.org/docs/2.4.5/api/python/pyspark.ml.html#pyspark.ml.linalg.Matrix) are supported,
including their dense and sparse versions. The data is saved as a BigQuery RECORD. Notice that a suffix is added to
the field's description which includes the spark type of the field.
In order to write those types to BigQuery, use the ORC or Avro intermediate format, and have them as column of the
Row (i.e. not a field in a struct).
#### Numeric and BigNumeric support
BigQuery's BigNumeric has a precision of 76.76 (the 77th digit is partial) and scale of 38. Since
this precision and scale is beyond spark's DecimalType (38 scale and 38 precision) support, it means
that BigNumeric fields with precision larger than 38 cannot be used. Once this Spark limitation will
be updated the connector will be updated accordingly.
The Spark Decimal/BigQuery Numeric conversion tries to preserve the parameterization of the type, i.e
`NUMERIC(10,2)` will be converted to `Decimal(10,2)` and vice versa. Notice however that there are
cases where [the parameters are lost](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#parameterized_data_types).
This means that the parameters will be reverted to the defaults - NUMERIC (38,9) and BIGNUMERIC(76,38).
This means that at the moment, BigNumeric read is supported only from a standard table, but not from
BigQuery view or when [reading data from a BigQuery query](#reading-data-from-a-bigquery-query).
### Filtering
The connector automatically computes column and pushdown filters the DataFrame's `SELECT` statement e.g.
```
spark.read.bigquery("bigquery-public-data:samples.shakespeare")
.select("word")
.where("word = 'Hamlet' or word = 'Claudius'")
.collect()
```
filters to the column `word` and pushed down the predicate filter `word = 'hamlet' or word = 'Claudius'`.
If you do not wish to make multiple read requests to BigQuery, you can cache the DataFrame before filtering e.g.:
```
val cachedDF = spark.read.bigquery("bigquery-public-data:samples.shakespeare").cache()
val rows = cachedDF.select("word")
.where("word = 'Hamlet'")
.collect()
// All of the table was cached and this doesn't require an API call
val otherRows = cachedDF.select("word_count")
.where("word = 'Romeo'")
.collect()
```
You can also manually specify the `filter` option, which will override automatic pushdown and Spark will do the rest of the filtering in the client.
### Partitioned Tables
The pseudo columns \_PARTITIONDATE and \_PARTITIONTIME are not part of the table schema. Therefore in order to query by the partitions of [partitioned tables](https://cloud.google.com/bigquery/docs/partitioned-tables) do not use the where() method shown above. Instead, add a filter option in the following manner:
```
val df = spark.read.format("bigquery")
.option("filter", "_PARTITIONDATE > '2019-01-01'")
...
.load(TABLE)
```
### Configuring Partitioning
By default, the connector calculates the requested `maxParallelism` as the larger of `preferredMinParallelism` (which defaults to 3 times the application's default parallelism) and 20,000. BigQuery may limit the number of partitions based on server constraints.
Both <code>[maxParallelism](#properties)</code> and <code>[preferredMinParallelism](#properties)</code> can be configured explicitly to control the number of partitions.
## Tagging BigQuery Resources
In order to support tracking the usage of BigQuery resources the connectors
offers the following options to tag BigQuery resources:
### Adding BigQuery Jobs Labels
The connector can launch BigQuery load and query jobs. Adding labels to the jobs
is done in the following manner:
```
spark.conf.set("bigQueryJobLabel.cost_center", "analytics")
spark.conf.set("bigQueryJobLabel.usage", "nightly_etl")
```
This will create labels `cost_center`=`analytics` and `usage`=`nightly_etl`.
### Adding BigQuery Storage Trace ID
Used to annotate the read and write sessions. The trace ID is of the format
`Spark:ApplicationName:JobID`. This is an opt-in option, and to use it the user
need to set the `traceApplicationName` property. JobID is auto generated by the
Dataproc job ID, with a fallback to the Spark application ID (such as
`application_1648082975639_0001`). The Job ID can be overridden by setting the
`traceJobId` option. Notice that the total length of the trace ID cannot be over
256 characters.
## Using in Jupyter Notebooks
The connector can be used in [Jupyter notebooks](https://jupyter.org/) even if
it is not installed on the Spark cluster. It can be added as an external jar in
using the following code:
**Python:**
```python
from pyspark.sql import SparkSession
spark = SparkSession.builder \
.config("spark.jars.packages", "com.google.cloud.spark:spark-bigquery-with-dependencies_2.12:0.44.1") \
.getOrCreate()
df = spark.read.format("bigquery") \
.load("dataset.table")
```
**Scala:**
```scala
val spark = SparkSession.builder
.config("spark.jars.packages", "com.google.cloud.spark:spark-bigquery-with-dependencies_2.12:0.44.1")
.getOrCreate()
val df = spark.read.format("bigquery")
.load("dataset.table")
```
In case Spark cluster is using Scala 2.12 (it's optional for Spark 2.4.x,
mandatory in 3.0.x), then the relevant package is
com.google.cloud.spark:spark-bigquery-with-dependencies_**2.12**:0.44.1. In
order to know which Scala version is used, please run the following code:
**Python:**
```python
spark.sparkContext._jvm.scala.util.Properties.versionString()
```
**Scala:**
```python
scala.util.Properties.versionString
```
## Compiling against the connector
Unless you wish to use the implicit Scala API `spark.read.bigquery("TABLE_ID")`, there is no need to compile against the connector.
To include the connector in your project:
### Maven
```xml
<dependency>
<groupId>com.google.cloud.spark</groupId>
<artifactId>spark-bigquery-with-dependencies_${scala.version}</artifactId>
<version>0.44.1</version>
</dependency>
```
### SBT
```sbt
libraryDependencies += "com.google.cloud.spark" %% "spark-bigquery-with-dependencies" % "0.44.1"
```
### Connector metrics and how to view them
Spark populates a lot of metrics which can be found by the end user in the spark history page. But all these metrics are spark related which are implicitly collected without any change from the connector.
But there are few metrics which are populated from the BigQuery and currently are visible in the application logs which can be read in the driver/executor logs.
From Spark 3.2 onwards, spark has provided the API to expose custom metrics in the spark UI page https://spark.apache.org/docs/3.2.0/api/java/org/apache/spark/sql/connector/metric/CustomMetric.html
Currently, using this API, connector exposes the following bigquery metrics during read
<table id="metricstable">
<style>
table#metricstable td, table th
{
word-break:break-word
}
</style>
<tr valign="top">
<th style="min-width:240px">Metric Name</th>
<th style="min-width:240px">Description</th>
</tr>
<tr valign="top">
<td><code>bytes read</code></td>
<td>number of BigQuery bytes read</td>
</tr>
<tr valign="top">
<td><code>rows read</code></td>
<td>number of BigQuery rows read</td>
</tr>
<tr valign="top">
<td><code>scan time</code></td>
<td>the amount of time spent between read rows response requested to obtained across all the executors, in milliseconds.</td>
</tr>
<tr valign="top">
<td><code>parse time</code></td>
<td>the amount of time spent for parsing the rows read across all the executors, in milliseconds.</td>
</tr>
<tr valign="top">
<td><code>spark time</code></td>
<td>the amount of time spent in spark to process the queries (i.e., apart from scanning and parsing), across all the executors, in milliseconds.</td>
</tr>
</table>
**Note:** To use the metrics in the Spark UI page, you need to make sure the `spark-bigquery-metrics-0.44.1.jar` is the class path before starting the history-server and the connector version is `spark-3.2` or above.
## FAQ
### What is the Pricing for the Storage API?
See the [BigQuery pricing documentation](https://cloud.google.com/bigquery/pricing#storage-api).
### I have very few partitions
You can manually set the number of partitions with the `maxParallelism` property. BigQuery may provide fewer partitions than you ask for. See [Configuring Partitioning](#configuring-partitioning).
You can also always repartition after reading in Spark.
### I get quota exceeded errors while writing
If there are too many partitions the CreateWriteStream or Throughput [quotas](https://cloud.google.com/bigquery/quotas#write-api-limits)
may be exceeded. This occurs because while the data within each partition is processed serially, independent
partitions may be processed in parallel on different nodes within the spark cluster. Generally, to ensure maximum
sustained throughput you should file a quota increase request. However, you can also manually reduce the number of
partitions being written by calling <code>coalesce</code> on the DataFrame to mitigate this problem.
```
desiredPartitionCount = 5
dfNew = df.coalesce(desiredPartitionCount)
dfNew.write
```
A rule of thumb is to have a single partition handle at least 1GB of data.
Also note that a job running with the `writeAtLeastOnce` property turned on will not encounter CreateWriteStream
quota errors.
### How do I authenticate outside GCE / Dataproc?
The connector needs an instance of a GoogleCredentials in order to connect to the BigQuery APIs. There are multiple
options to provide it:
* The default is to load the JSON key from the `GOOGLE_APPLICATION_CREDENTIALS` environment variable, as described
[here](https://cloud.google.com/docs/authentication/getting-started).
* In case the environment variable cannot be changed, the credentials file can be configured as
as a spark option. The file should reside on the same path on all the nodes of the cluster.
```
// Globally
spark.conf.set("credentialsFile", "</path/to/key/file>")
// Per read/Write
spark.read.format("bigquery").option("credentialsFile", "</path/to/key/file>")
```
* Credentials can also be provided explicitly, either as a parameter or from Spark runtime configuration.
They should be passed in as a base64-encoded string directly.
```
// Globally
spark.conf.set("credentials", "<SERVICE_ACCOUNT_JSON_IN_BASE64>")
// Per read/Write
spark.read.format("bigquery").option("credentials", "<SERVICE_ACCOUNT_JSON_IN_BASE64>")
```
* In cases where the user has an internal service providing the Google AccessToken, a custom implementation
can be done, creating only the AccessToken and providing its TTL. Token refresh will re-generate a new token. In order
to use this, implement the
[com.google.cloud.bigquery.connector.common.AccessTokenProvider](https://github.com/GoogleCloudDataproc/spark-bigquery-connector/tree/master/bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/AccessTokenProvider.java)
interface. The fully qualified class name of the implementation should be provided in the `gcpAccessTokenProvider`
option. `AccessTokenProvider` must be implemented in Java or other JVM language such as Scala or Kotlin. It must
either have a no-arg constructor or a constructor accepting a single `java.util.String` argument. This configuration
parameter can be supplied using the `gcpAccessTokenProviderConfig` option. If this is not provided then the no-arg
constructor wil be called. The jar containing the implementation should be on the cluster's classpath.
```
// Globally
spark.conf.set("gcpAccessTokenProvider", "com.example.ExampleAccessTokenProvider")
// Per read/Write
spark.read.format("bigquery").option("gcpAccessTokenProvider", "com.example.ExampleAccessTokenProvider")
```
* Service account impersonation can be configured for a specific username and a group name, or
for all users by default using below properties:
- `gcpImpersonationServiceAccountForUser_<USER_NAME>` (not set by default)
The service account impersonation for a specific user.
- `gcpImpersonationServiceAccountForGroup_<GROUP_NAME>` (not set by default)
The service account impersonation for a specific group.
- `gcpImpersonationServiceAccount` (not set by default)
Default service account impersonation for all users.
If any of the above properties are set then the service account specified will be impersonated by
generating a short-lived credentials when accessing BigQuery.
If more than one property is set then the service account associated with the username will take
precedence over the service account associated with the group name for a matching user and group,
which in turn will take precedence over default service account impersonation.
* For a simpler application, where access token refresh is not required, another alternative is to pass the access token
as the `gcpAccessToken` configuration option. You can get the access token by running
`gcloud auth application-default print-access-token`.
```
// Globally
spark.conf.set("gcpAccessToken", "<access-token>")
// Per read/Write
spark.read.format("bigquery").option("gcpAccessToken", "<acccess-token>")
```
**Important:** The `CredentialsProvider` and `AccessTokenProvider` need to be implemented in Java or
other JVM language such as Scala or Kotlin. The jar containing the implementation should be on the cluster's classpath.
**Notice:** Only one of the above options should be provided.
### How do I connect to GCP/BigQuery via Proxy?
To connect to a forward proxy and to authenticate the user credentials, configure the following options.
`proxyAddress`: Address of the proxy server. The proxy must be an HTTP proxy and address should be in the `host:port`
format.
`proxyUsername`: The userName used to connect to the proxy.
`proxyPassword`: The password used to connect to the proxy.
```
val df = spark.read.format("bi
gitextract_x5ltg6ft/
├── .github/
│ └── workflows/
│ ├── codeql-analysis.yml
│ ├── cpd.yaml
│ └── spotless.yaml
├── .gitignore
├── .mvn/
│ └── wrapper/
│ ├── MavenWrapperDownloader.java
│ └── maven-wrapper.properties
├── CHANGES.md
├── CONTRIBUTING.md
├── LICENSE
├── README-template.md
├── README.md
├── bigquery-connector-common/
│ ├── pom.xml
│ └── src/
│ ├── main/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── bigquery/
│ │ └── connector/
│ │ └── common/
│ │ ├── AccessToken.java
│ │ ├── AccessTokenProvider.java
│ │ ├── AccessTokenProviderCredentials.java
│ │ ├── ArrowReaderIterator.java
│ │ ├── ArrowUtil.java
│ │ ├── BigQueryClient.java
│ │ ├── BigQueryClientFactory.java
│ │ ├── BigQueryClientFactoryConfig.java
│ │ ├── BigQueryClientModule.java
│ │ ├── BigQueryConfig.java
│ │ ├── BigQueryConfigurationUtil.java
│ │ ├── BigQueryConnectorException.java
│ │ ├── BigQueryCredentialsSupplier.java
│ │ ├── BigQueryDirectDataWriterHelper.java
│ │ ├── BigQueryErrorCode.java
│ │ ├── BigQueryJobCompletionListener.java
│ │ ├── BigQueryMetrics.java
│ │ ├── BigQueryProxyConfig.java
│ │ ├── BigQueryProxyTransporterBuilder.java
│ │ ├── BigQueryPushdownException.java
│ │ ├── BigQueryPushdownUnsupportedException.java
│ │ ├── BigQueryStorageReadRowsTracer.java
│ │ ├── BigQueryTracerFactory.java
│ │ ├── BigQueryUtil.java
│ │ ├── ComparisonResult.java
│ │ ├── DecompressReadRowsResponse.java
│ │ ├── DurationTimer.java
│ │ ├── EnvironmentContext.java
│ │ ├── GcpUtil.java
│ │ ├── HttpUtil.java
│ │ ├── IdentityTokenSupplier.java
│ │ ├── IteratorMultiplexer.java
│ │ ├── LazyInitializationSupplier.java
│ │ ├── LoggingBigQueryStorageReadRowsTracer.java
│ │ ├── LoggingBigQueryTracerFactory.java
│ │ ├── MaterializationConfiguration.java
│ │ ├── NonInterruptibleBlockingBytesChannel.java
│ │ ├── ParallelArrowReader.java
│ │ ├── ParameterMode.java
│ │ ├── QueryParameterHelper.java
│ │ ├── ReadRowsHelper.java
│ │ ├── ReadRowsResponseInputStreamEnumeration.java
│ │ ├── ReadSessionCreator.java
│ │ ├── ReadSessionCreatorConfig.java
│ │ ├── ReadSessionCreatorConfigBuilder.java
│ │ ├── ReadSessionMetrics.java
│ │ ├── ReadSessionResponse.java
│ │ ├── StreamCombiningIterator.java
│ │ ├── UserAgentProvider.java
│ │ ├── VersionProvider.java
│ │ └── WriteStreamStatistics.java
│ └── test/
│ ├── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── bigquery/
│ │ └── connector/
│ │ └── common/
│ │ ├── BigQueryClientFactoryTest.java
│ │ ├── BigQueryConfigurationUtilTest.java
│ │ ├── BigQueryConnectorExceptionTest.java
│ │ ├── BigQueryCredentialsSupplierTest.java
│ │ ├── BigQueryUtilTest.java
│ │ ├── DurationTimerTest.java
│ │ ├── HttpUtilTest.java
│ │ ├── IteratorMultiplexerTest.java
│ │ ├── LoggingBigQueryStorageReadRowsTracerTest.java
│ │ ├── MockResponsesBatch.java
│ │ ├── ParallelArrowReaderTest.java
│ │ ├── ReadRowsHelperTest.java
│ │ ├── ReadSessionCreatorTest.java
│ │ └── integration/
│ │ ├── CustomCredentialsIntegrationTest.java
│ │ └── DefaultCredentialsDelegateAccessTokenProvider.java
│ └── resources/
│ └── external-account-credentials.json
├── cloudbuild/
│ ├── Dockerfile
│ ├── cloudbuild.yaml
│ ├── gcp-settings.xml
│ ├── nightly.sh
│ ├── nightly.yaml
│ └── presubmit.sh
├── coverage/
│ └── pom.xml
├── examples/
│ ├── notebooks/
│ │ ├── Advanced ML Pipelines.ipynb
│ │ ├── Distribute_Generic_Functions.ipynb
│ │ ├── Top words in Shakespeare by work.ipynb
│ │ └── Transform_with_Python.ipynb
│ └── python/
│ ├── query_results.py
│ └── shakespeare.py
├── mvnw
├── mvnw.cmd
├── pom.xml
├── scalastyle-config.xml
├── scripts/
│ └── verify-shading.sh
├── spark-bigquery-connector-common/
│ ├── pom.xml
│ ├── src/
│ │ ├── build/
│ │ │ └── resources/
│ │ │ └── spark-bigquery-connector.properties
│ │ ├── main/
│ │ │ ├── java/
│ │ │ │ ├── com/
│ │ │ │ │ └── google/
│ │ │ │ │ └── cloud/
│ │ │ │ │ └── spark/
│ │ │ │ │ └── bigquery/
│ │ │ │ │ ├── ArrowBinaryIterator.java
│ │ │ │ │ ├── AvroBinaryIterator.java
│ │ │ │ │ ├── BigQueryConnectorUtils.java
│ │ │ │ │ ├── BigQueryRelation.java
│ │ │ │ │ ├── BigQueryRelationProviderBase.java
│ │ │ │ │ ├── BigQueryStreamWriter.java
│ │ │ │ │ ├── BigQueryStreamingSink.java
│ │ │ │ │ ├── BigQueryUtilScala.java
│ │ │ │ │ ├── DataFrameToRDDConverter.java
│ │ │ │ │ ├── DataSourceVersion.java
│ │ │ │ │ ├── GuiceInjectorCreator.java
│ │ │ │ │ ├── InjectorBuilder.java
│ │ │ │ │ ├── InjectorFactory.java
│ │ │ │ │ ├── InternalRowIterator.java
│ │ │ │ │ ├── PartitionOverwriteMode.java
│ │ │ │ │ ├── ProtobufUtils.java
│ │ │ │ │ ├── ReadRowsResponseToInternalRowIteratorConverter.java
│ │ │ │ │ ├── SchemaConverters.java
│ │ │ │ │ ├── SchemaConvertersConfiguration.java
│ │ │ │ │ ├── SparkBigQueryConfig.java
│ │ │ │ │ ├── SparkBigQueryConnectorModule.java
│ │ │ │ │ ├── SparkBigQueryConnectorUserAgentProvider.java
│ │ │ │ │ ├── SparkBigQueryConnectorVersionProvider.java
│ │ │ │ │ ├── SparkBigQueryLineageProvider.java
│ │ │ │ │ ├── SparkBigQueryProxyAndHttpConfig.java
│ │ │ │ │ ├── SparkBigQueryUtil.java
│ │ │ │ │ ├── SparkFilterUtils.java
│ │ │ │ │ ├── SupportedCustomDataType.java
│ │ │ │ │ ├── SupportsQueryPushdown.java
│ │ │ │ │ ├── TypeConverter.java
│ │ │ │ │ ├── direct/
│ │ │ │ │ │ ├── BigQueryPartition.java
│ │ │ │ │ │ ├── BigQueryRDDContext.java
│ │ │ │ │ │ ├── BigQueryRDDFactory.java
│ │ │ │ │ │ ├── DirectBigQueryRelation.java
│ │ │ │ │ │ ├── Scala213BigQueryRDD.java
│ │ │ │ │ │ └── ScalaIterator.java
│ │ │ │ │ ├── events/
│ │ │ │ │ │ ├── BigQueryJobCompletedEvent.java
│ │ │ │ │ │ ├── LoadJobCompletedEvent.java
│ │ │ │ │ │ └── QueryJobCompletedEvent.java
│ │ │ │ │ ├── examples/
│ │ │ │ │ │ └── JavaShakespeare.java
│ │ │ │ │ ├── metrics/
│ │ │ │ │ │ ├── DataOrigin.java
│ │ │ │ │ │ ├── SparkBigQueryConnectorMetricsUtils.java
│ │ │ │ │ │ ├── SparkBigQueryReadSessionMetrics.java
│ │ │ │ │ │ └── SparkMetricsSource.java
│ │ │ │ │ ├── pushdowns/
│ │ │ │ │ │ └── SparkBigQueryPushdown.java
│ │ │ │ │ ├── util/
│ │ │ │ │ │ └── HdfsUtils.java
│ │ │ │ │ └── write/
│ │ │ │ │ ├── BigQueryDataSourceWriterInsertableRelation.java
│ │ │ │ │ ├── BigQueryDeprecatedIndirectInsertableRelation.java
│ │ │ │ │ ├── BigQueryInsertableRelationBase.java
│ │ │ │ │ ├── BigQueryWriteHelper.java
│ │ │ │ │ ├── CreatableRelationProviderHelper.java
│ │ │ │ │ ├── DataSourceWriterContextPartitionHandler.java
│ │ │ │ │ ├── IntermediateDataCleaner.java
│ │ │ │ │ └── context/
│ │ │ │ │ ├── AvroIntermediateRecordWriter.java
│ │ │ │ │ ├── BigQueryDataSourceWriterModule.java
│ │ │ │ │ ├── BigQueryDirectDataSourceWriterContext.java
│ │ │ │ │ ├── BigQueryDirectDataWriterContext.java
│ │ │ │ │ ├── BigQueryDirectDataWriterContextFactory.java
│ │ │ │ │ ├── BigQueryDirectWriterCommitMessageContext.java
│ │ │ │ │ ├── BigQueryIndirectDataSourceWriterContext.java
│ │ │ │ │ ├── BigQueryIndirectDataWriterContext.java
│ │ │ │ │ ├── BigQueryIndirectDataWriterContextFactory.java
│ │ │ │ │ ├── BigQueryIndirectWriterCommitMessageContext.java
│ │ │ │ │ ├── DataSourceWriterContext.java
│ │ │ │ │ ├── DataWriterContext.java
│ │ │ │ │ ├── DataWriterContextFactory.java
│ │ │ │ │ ├── IntermediateRecordWriter.java
│ │ │ │ │ ├── NoOpDataWriterContext.java
│ │ │ │ │ └── WriterCommitMessageContext.java
│ │ │ │ └── org/
│ │ │ │ └── apache/
│ │ │ │ └── spark/
│ │ │ │ └── sql/
│ │ │ │ ├── Scala213SparkSqlUtils.java
│ │ │ │ └── SparkSqlUtils.java
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ ├── io.openlineage.spark.extension.OpenLineageExtensionProvider
│ │ │ └── org.apache.spark.sql.SparkSqlUtils
│ │ └── test/
│ │ ├── java/
│ │ │ ├── com/
│ │ │ │ └── google/
│ │ │ │ └── cloud/
│ │ │ │ └── spark/
│ │ │ │ └── bigquery/
│ │ │ │ ├── AvroSchemaConverterTest.java
│ │ │ │ ├── BigQueryRelationProviderTestBase.java
│ │ │ │ ├── BigQueryRelationTest.java
│ │ │ │ ├── DataSourceOptions.java
│ │ │ │ ├── InjectorBuilderTest.java
│ │ │ │ ├── MockResponsesBatch.java
│ │ │ │ ├── MockSparkBigQueryPushdown.java
│ │ │ │ ├── OptimizeLoadUriListTest.java
│ │ │ │ ├── ProtobufUtilsTest.java
│ │ │ │ ├── SchemaConverterTest.java
│ │ │ │ ├── SparkBigQueryConfigTest.java
│ │ │ │ ├── SparkBigQueryProxyAndHttpConfigTest.java
│ │ │ │ ├── SparkBigQueryUtilTest.java
│ │ │ │ ├── SparkFilterUtilsTest.java
│ │ │ │ ├── SupportedCustomDataTypeTest.java
│ │ │ │ ├── TestConstants.java
│ │ │ │ ├── acceptance/
│ │ │ │ │ ├── AcceptanceTestConstants.java
│ │ │ │ │ ├── AcceptanceTestContext.java
│ │ │ │ │ ├── AcceptanceTestUtils.java
│ │ │ │ │ ├── BigNumericDataprocServerlessAcceptanceTestBase.java
│ │ │ │ │ ├── DataprocAcceptanceTestBase.java
│ │ │ │ │ ├── DataprocServerlessAcceptanceTestBase.java
│ │ │ │ │ ├── ReadSheakspeareDataprocServerlessAcceptanceTestBase.java
│ │ │ │ │ └── WriteStreamDataprocServerlessAcceptanceTestBase.java
│ │ │ │ ├── direct/
│ │ │ │ │ └── Scala213BigQueryRDDTest.java
│ │ │ │ ├── integration/
│ │ │ │ │ ├── CatalogIntegrationTestBase.java
│ │ │ │ │ ├── IntegrationTestUtils.java
│ │ │ │ │ ├── OpenLineageIntegrationTestBase.java
│ │ │ │ │ ├── ReadByFormatIntegrationTestBase.java
│ │ │ │ │ ├── ReadFromQueryIntegrationTestBase.java
│ │ │ │ │ ├── ReadIntegrationTestBase.java
│ │ │ │ │ ├── SparkBigQueryIntegrationTestBase.java
│ │ │ │ │ ├── TestConstants.java
│ │ │ │ │ ├── TestDataset.java
│ │ │ │ │ ├── WriteIntegrationTestBase.java
│ │ │ │ │ └── model/
│ │ │ │ │ ├── ColumnOrderTestClass.java
│ │ │ │ │ ├── Data.java
│ │ │ │ │ ├── Friend.java
│ │ │ │ │ ├── Link.java
│ │ │ │ │ ├── NumStruct.java
│ │ │ │ │ ├── Person.java
│ │ │ │ │ ├── RangeData.java
│ │ │ │ │ └── StringStruct.java
│ │ │ │ ├── metrics/
│ │ │ │ │ ├── SparkBigQueryReadSessionMetricsTest.java
│ │ │ │ │ └── SparkMetricsSourceTest.java
│ │ │ │ ├── util/
│ │ │ │ │ └── HdfsUtilsTest.java
│ │ │ │ └── write/
│ │ │ │ ├── DataSourceWriterContextPartitionHandlerTest.java
│ │ │ │ └── context/
│ │ │ │ └── BigQueryDirectDataSourceWriterContextTest.java
│ │ │ └── org/
│ │ │ └── apache/
│ │ │ └── spark/
│ │ │ └── sql/
│ │ │ └── Scala213SparkSqlUtilsTest.java
│ │ └── resources/
│ │ ├── META-INF/
│ │ │ └── services/
│ │ │ └── com.google.cloud.spark.bigquery.pushdowns.SparkBigQueryPushdown
│ │ ├── ToIteratorTest/
│ │ │ ├── file1.txt
│ │ │ └── file2.csv
│ │ ├── acceptance/
│ │ │ ├── big_numeric.py
│ │ │ ├── read_shakespeare.py
│ │ │ ├── write_stream.py
│ │ │ └── write_stream_data.json
│ │ ├── alltypes.arrow
│ │ ├── alltypes.arrowschema
│ │ ├── alltypes.avro
│ │ ├── alltypes.avroschema.json
│ │ ├── arrowDateTimeRowsInBytes
│ │ ├── arrowDateTimeSchema
│ │ ├── integration/
│ │ │ ├── shakespeare.avro
│ │ │ ├── shakespeare.csv
│ │ │ ├── shakespeare.json
│ │ │ └── shakespeare.parquet
│ │ ├── log4j.properties
│ │ └── spark-bigquery-connector.properties
│ └── third_party/
│ └── apache-spark/
│ ├── LICENSE
│ ├── NOTICE
│ └── src/
│ └── main/
│ └── java/
│ └── com/
│ └── google/
│ └── cloud/
│ └── spark/
│ └── bigquery/
│ ├── ArrowSchemaConverter.java
│ └── AvroSchemaConverter.java
├── spark-bigquery-dsv1/
│ ├── pom.xml
│ ├── spark-bigquery-dsv1-parent/
│ │ └── pom.xml
│ ├── spark-bigquery-dsv1-spark2-support/
│ │ ├── pom.xml
│ │ └── src/
│ │ └── main/
│ │ ├── java/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── spark2/
│ │ │ └── Spark2DataFrameToRDDConverter.java
│ │ └── resources/
│ │ └── META-INF/
│ │ └── services/
│ │ └── com.google.cloud.spark.bigquery.DataFrameToRDDConverter
│ ├── spark-bigquery-dsv1-spark3-support/
│ │ ├── pom.xml
│ │ └── src/
│ │ └── main/
│ │ ├── java/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── spark3/
│ │ │ ├── SerializableAbstractFunction1.java
│ │ │ └── Spark3DataFrameToRDDConverter.java
│ │ └── resources/
│ │ └── META-INF/
│ │ └── services/
│ │ └── com.google.cloud.spark.bigquery.DataFrameToRDDConverter
│ ├── spark-bigquery-with-dependencies-parent/
│ │ └── pom.xml
│ ├── spark-bigquery-with-dependencies_2.11/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── build/
│ │ │ └── javadoc/
│ │ │ └── README.md
│ │ └── test/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── acceptance/
│ │ ├── Scala211DataprocImage13AcceptanceTest.java
│ │ ├── Scala211DataprocImage13DisableConscryptAcceptanceTest.java
│ │ ├── Scala211DataprocImage14AcceptanceTest.java
│ │ └── Scala211DataprocImage14DisableConscryptAcceptanceTest.java
│ ├── spark-bigquery-with-dependencies_2.12/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── build/
│ │ │ └── javadoc/
│ │ │ └── README.md
│ │ ├── test/
│ │ │ └── java/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── acceptance/
│ │ │ ├── Scala212DataprocImage15AcceptanceTest.java
│ │ │ ├── Scala212DataprocImage15DisableConscryptAcceptanceTest.java
│ │ │ ├── Scala212DataprocImage20AcceptanceTest.java
│ │ │ ├── Scala212DataprocImage20DisableConscryptAcceptanceTest.java
│ │ │ ├── Scala212DataprocImage21AcceptanceTest.java
│ │ │ ├── Scala212DataprocImage21DisableConscryptAcceptanceTest.java
│ │ │ ├── Scala212DataprocImage22AcceptanceTest.java
│ │ │ ├── Scala212DataprocImage22DisableConscryptAcceptanceTest.java
│ │ │ ├── Scala212Spark32WriteStreamDataprocServerlessAcceptanceTest.java
│ │ │ ├── Scala212Spark33BigNumericDataprocServerlessAcceptanceTest.java
│ │ │ └── Scala212Spark33ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ └── test_2.12/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── acceptance/
│ │ ├── Scala212BigNumericDataprocServerlessAcceptanceTest.java
│ │ ├── Scala212ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ └── Scala212WriteStreamDataprocServerlessAcceptanceTest.java
│ ├── spark-bigquery-with-dependencies_2.13/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── test/
│ │ │ └── java/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── acceptance/
│ │ │ ├── Scala213DataprocImage21AcceptanceTest.java
│ │ │ ├── Scala213DataprocImage21DisableConscryptAcceptanceTest.java
│ │ │ ├── Scala213Spark33BigNumericDataprocServerlessAcceptanceTest.java
│ │ │ ├── Scala213Spark33ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ │ ├── Scala213Spark33WriteStreamDataprocServerlessAcceptanceTest.java
│ │ │ ├── Scala213Spark34BigNumericDataprocServerlessAcceptanceTest.java
│ │ │ ├── Scala213Spark34ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ │ ├── Scala213Spark35BigNumericDataprocServerlessAcceptanceTest.java
│ │ │ └── Scala213Spark35ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ └── test_2.13/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── acceptance/
│ │ ├── Scala213BigNumericDataprocServerlessAcceptanceTest.java
│ │ ├── Scala213ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ └── Scala213WriteStreamDataprocServerlessAcceptanceTest.java
│ ├── spark-bigquery_2.11/
│ │ └── pom.xml
│ ├── spark-bigquery_2.12/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── java/
│ │ │ │ └── com/
│ │ │ │ └── google/
│ │ │ │ └── cloud/
│ │ │ │ └── spark/
│ │ │ │ └── bigquery/
│ │ │ │ ├── DefaultSource.java
│ │ │ │ └── Scala212BigQueryRelationProvider.java
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ │ └── test/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── Scala212BigQueryRelationProviderTest.java
│ ├── spark-bigquery_2.13/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── java/
│ │ │ │ └── com/
│ │ │ │ └── google/
│ │ │ │ └── cloud/
│ │ │ │ └── spark/
│ │ │ │ └── bigquery/
│ │ │ │ ├── DefaultSource.java
│ │ │ │ └── Scala213BigQueryRelationProvider.java
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ │ └── test/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── Scala213BigQueryRelationProviderTest.java
│ └── src/
│ ├── build/
│ │ ├── javadoc/
│ │ │ └── README.md
│ │ └── resources/
│ │ └── spark-bigquery-connector.properties
│ └── test/
│ └── java/
│ └── com/
│ └── google/
│ └── cloud/
│ └── spark/
│ └── bigquery/
│ ├── SchemaConvertersTest.java
│ └── integration/
│ ├── DataSourceV1DirectWriteIntegrationTest.java
│ ├── DataSourceV1IndirectWriteIntegrationTest.java
│ ├── DataSourceV1OpenLineageIntegrationTest.java
│ ├── DataSourceV1ReadByFormatIntegrationTest.java
│ ├── DataSourceV1ReadFromQueryIntegrationTest.java
│ ├── DataSourceV1ReadIntegrationTest.java
│ └── DataSourceV1WriteIntegrationTestBase.java
├── spark-bigquery-dsv2/
│ ├── pom.xml
│ ├── spark-2.4-bigquery/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── build/
│ │ │ └── resources/
│ │ │ └── spark-bigquery-connector.properties
│ │ ├── main/
│ │ │ ├── java/
│ │ │ │ └── com/
│ │ │ │ └── google/
│ │ │ │ └── cloud/
│ │ │ │ └── spark/
│ │ │ │ └── bigquery/
│ │ │ │ └── v2/
│ │ │ │ ├── BigQueryDataSourceReader.java
│ │ │ │ ├── BigQueryDataSourceV2.java
│ │ │ │ ├── BigQueryDataSourceWriter.java
│ │ │ │ ├── Spark24DataWriter.java
│ │ │ │ ├── Spark24DataWriterFactory.java
│ │ │ │ ├── Spark24InputPartition.java
│ │ │ │ ├── Spark24InputPartitionReader.java
│ │ │ │ ├── Spark24Statistics.java
│ │ │ │ └── Spark24WriterCommitMessage.java
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ │ └── test/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ ├── acceptance/
│ │ │ ├── Spark24DataprocImage14AcceptanceTest.java
│ │ │ ├── Spark24DataprocImage14DisableConscryptAcceptanceTest.java
│ │ │ ├── Spark24DataprocImage15AcceptanceTest.java
│ │ │ └── Spark24DataprocImage15DisableConscryptAcceptanceTest.java
│ │ └── integration/
│ │ ├── Spark24DirectWriteIntegrationTest.java
│ │ ├── Spark24IndirectWriteIntegrationTest.java
│ │ ├── Spark24OpenLineageIntegrationTest.java
│ │ ├── Spark24ReadByFormatIntegrationTest.java
│ │ ├── Spark24ReadFromQueryIntegrationTest.java
│ │ └── Spark24ReadIntegrationTest.java
│ ├── spark-3.1-bigquery/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── build/
│ │ │ └── resources/
│ │ │ └── spark-bigquery-connector.properties
│ │ ├── main/
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ │ └── test/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ ├── acceptance/
│ │ │ ├── Spark31BigNumericDataprocServerlessAcceptanceTest.java
│ │ │ ├── Spark31DataprocImage20AcceptanceTest.java
│ │ │ ├── Spark31DataprocImage20DisableConscryptAcceptanceTest.java
│ │ │ ├── Spark31ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ │ └── Spark31WriteStreamDataprocServerlessAcceptanceTest.java
│ │ └── integration/
│ │ ├── Spark31DirectWriteIntegrationTest.java
│ │ ├── Spark31IndirectWriteIntegrationTest.java
│ │ ├── Spark31OpenLineageIntegrationTest.java
│ │ ├── Spark31ReadByFormatIntegrationTest.java
│ │ ├── Spark31ReadFromQueryIntegrationTest.java
│ │ └── Spark31ReadIntegrationTest.java
│ ├── spark-3.1-bigquery-lib/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ └── java/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── v2/
│ │ │ ├── BigQueryBatchWrite.java
│ │ │ ├── BigQueryInputPartition.java
│ │ │ ├── BigQueryPartitionReader.java
│ │ │ ├── BigQueryPartitionReaderFactory.java
│ │ │ ├── BigQueryStreamingDataWriter.java
│ │ │ ├── BigQueryStreamingWrite.java
│ │ │ ├── BigQueryStreamingWriterFactory.java
│ │ │ ├── BigQueryTableCreator.java
│ │ │ ├── BigQueryWriteBuilder.java
│ │ │ ├── Spark31BigQueryDataWriter.java
│ │ │ ├── Spark31BigQueryDataWriterFactory.java
│ │ │ ├── Spark31BigQueryScanBuilder.java
│ │ │ ├── Spark31BigQueryTable.java
│ │ │ ├── Spark31BigQueryTableProvider.java
│ │ │ ├── Spark31BigQueryWriterCommitMessage.java
│ │ │ ├── Spark3Statistics.java
│ │ │ └── Spark3Util.java
│ │ └── test/
│ │ └── resources/
│ │ └── META-INF/
│ │ └── services/
│ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ ├── spark-3.2-bigquery/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── build/
│ │ │ └── resources/
│ │ │ └── spark-bigquery-connector.properties
│ │ ├── main/
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ │ └── test/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ ├── acceptance/
│ │ │ ├── Spark32BigNumericDataprocServerlessAcceptanceTest.java
│ │ │ ├── Spark32DataprocImage21AcceptanceTest.java
│ │ │ ├── Spark32DataprocImage21DisableConscryptAcceptanceTest.java
│ │ │ ├── Spark32ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ │ └── Spark32WriteStreamDataprocServerlessAcceptanceTest.java
│ │ └── integration/
│ │ ├── Spark32DirectWriteIntegrationTest.java
│ │ ├── Spark32IndirectWriteIntegrationTest.java
│ │ ├── Spark32OpenLineageIntegrationTest.java
│ │ ├── Spark32ReadByFormatIntegrationTest.java
│ │ ├── Spark32ReadFromQueryIntegrationTest.java
│ │ └── Spark32ReadIntegrationTest.java
│ ├── spark-3.2-bigquery-lib/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ └── java/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── v2/
│ │ │ ├── Spark32BigQueryPartitionReader.java
│ │ │ ├── Spark32BigQueryPartitionReaderFactory.java
│ │ │ ├── Spark32BigQueryScanBuilder.java
│ │ │ ├── Spark32BigQueryTable.java
│ │ │ └── Spark32BigQueryTableProvider.java
│ │ └── test/
│ │ └── resources/
│ │ └── META-INF/
│ │ └── services/
│ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ ├── spark-3.3-bigquery/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── build/
│ │ │ └── resources/
│ │ │ └── spark-bigquery-connector.properties
│ │ ├── main/
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ │ └── test/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ ├── acceptance/
│ │ │ ├── Spark33BigNumericDataprocServerlessAcceptanceTest.java
│ │ │ ├── Spark33DataprocImage21AcceptanceTest.java
│ │ │ ├── Spark33DataprocImage21DisableConscryptAcceptanceTest.java
│ │ │ ├── Spark33ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ │ └── Spark33WriteStreamDataprocServerlessAcceptanceTest.java
│ │ └── integration/
│ │ ├── Spark33DirectWriteIntegrationTest.java
│ │ ├── Spark33IndirectWriteIntegrationTest.java
│ │ ├── Spark33OpenLineageIntegrationTest.java
│ │ ├── Spark33ReadByFormatIntegrationTest.java
│ │ ├── Spark33ReadFromQueryIntegrationTest.java
│ │ └── Spark33ReadIntegrationTest.java
│ ├── spark-3.3-bigquery-lib/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ └── java/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── v2/
│ │ │ ├── Spark33BigQueryScanBuilder.java
│ │ │ ├── Spark33BigQueryTable.java
│ │ │ └── Spark33BigQueryTableProvider.java
│ │ └── test/
│ │ └── resources/
│ │ └── META-INF/
│ │ └── services/
│ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ ├── spark-3.4-bigquery/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── build/
│ │ │ └── resources/
│ │ │ └── spark-bigquery-connector.properties
│ │ ├── main/
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ │ └── test/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ ├── acceptance/
│ │ │ ├── Spark34BigNumericDataprocServerlessAcceptanceTest.java
│ │ │ ├── Spark34ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ │ └── Spark34WriteStreamDataprocServerlessAcceptanceTest.java
│ │ └── integration/
│ │ ├── Spark34DirectWriteIntegrationTest.java
│ │ ├── Spark34IndirectWriteIntegrationTest.java
│ │ ├── Spark34OpenLineageIntegrationTest.java
│ │ ├── Spark34ReadByFormatIntegrationTest.java
│ │ ├── Spark34ReadFromQueryIntegrationTest.java
│ │ └── Spark34ReadIntegrationTest.java
│ ├── spark-3.4-bigquery-lib/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── java/
│ │ │ │ └── com/
│ │ │ │ └── google/
│ │ │ │ └── cloud/
│ │ │ │ └── spark/
│ │ │ │ └── bigquery/
│ │ │ │ └── v2/
│ │ │ │ ├── Spark34BigQueryTable.java
│ │ │ │ ├── Spark34BigQueryTableProvider.java
│ │ │ │ └── TimestampNTZTypeConverter.java
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── com.google.cloud.spark.bigquery.TypeConverter
│ │ └── test/
│ │ ├── java/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── v2/
│ │ │ └── TimestampNTZTypeConverterTest.java
│ │ └── resources/
│ │ └── META-INF/
│ │ └── services/
│ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ ├── spark-3.5-bigquery/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── build/
│ │ │ └── resources/
│ │ │ └── spark-bigquery-connector.properties
│ │ ├── main/
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ │ └── test/
│ │ ├── java/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ ├── acceptance/
│ │ │ │ ├── Spark35BigNumericDataprocServerlessAcceptanceTest.java
│ │ │ │ ├── Spark35CatalogAcceptanceTest.java
│ │ │ │ ├── Spark35DataprocImage22AcceptanceTest.java
│ │ │ │ ├── Spark35DataprocImage22DisableConscryptAcceptanceTest.java
│ │ │ │ ├── Spark35ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ │ │ └── Spark35WriteStreamDataprocServerlessAcceptanceTest.java
│ │ │ └── integration/
│ │ │ ├── Spark35CatalogIntegrationTest.java
│ │ │ ├── Spark35DirectWriteIntegrationTest.java
│ │ │ ├── Spark35IndirectWriteIntegrationTest.java
│ │ │ ├── Spark35OpenLineageIntegrationTest.java
│ │ │ ├── Spark35ReadByFormatIntegrationTest.java
│ │ │ ├── Spark35ReadFromQueryIntegrationTest.java
│ │ │ └── Spark35ReadIntegrationTest.java
│ │ └── resources/
│ │ └── acceptance/
│ │ ├── spark-dml-create-table-as-select-in-default-dataset.sql
│ │ ├── spark-dml-create-table-in-default-dataset.sql
│ │ ├── spark-dml-custom-dataset.sql
│ │ └── spark-dml-explicit-table.sql
│ ├── spark-3.5-bigquery-lib/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── java/
│ │ │ │ └── com/
│ │ │ │ └── google/
│ │ │ │ └── cloud/
│ │ │ │ └── spark/
│ │ │ │ └── bigquery/
│ │ │ │ ├── BigQueryCatalog.java
│ │ │ │ ├── NoSuchBigQueryTableException.java
│ │ │ │ └── v2/
│ │ │ │ ├── BigQueryIdentifier.java
│ │ │ │ ├── Spark35BigQueryTable.java
│ │ │ │ ├── Spark35BigQueryTableProvider.java
│ │ │ │ └── Spark35BigQueryWriteBuilder.java
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── com.google.cloud.spark.bigquery.TypeConverter
│ │ └── test/
│ │ └── resources/
│ │ └── META-INF/
│ │ └── services/
│ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ ├── spark-4.0-bigquery/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── build/
│ │ │ └── resources/
│ │ │ └── spark-bigquery-connector.properties
│ │ ├── main/
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ │ └── test/
│ │ ├── java/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ ├── acceptance/
│ │ │ │ ├── Spark40BigNumericDataprocServerlessAcceptanceTest.java
│ │ │ │ ├── Spark40CatalogAcceptanceTest.java
│ │ │ │ ├── Spark40DataprocImage30AcceptanceTest.java
│ │ │ │ ├── Spark40DataprocImage30DisableConscryptAcceptanceTest.java
│ │ │ │ ├── Spark40ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ │ │ └── Spark40WriteStreamDataprocServerlessAcceptanceTest.java
│ │ │ └── integration/
│ │ │ ├── Spark40CatalogIntegrationTest.java
│ │ │ ├── Spark40DirectWriteIntegrationTest.java
│ │ │ ├── Spark40IndirectWriteIntegrationTest.java
│ │ │ ├── Spark40OpenLineageIntegrationTest.java
│ │ │ ├── Spark40ReadByFormatIntegrationTest.java
│ │ │ ├── Spark40ReadFromQueryIntegrationTest.java
│ │ │ └── Spark40ReadIntegrationTest.java
│ │ └── resources/
│ │ └── acceptance/
│ │ ├── spark-dml-create-table-as-select-in-default-dataset.sql
│ │ ├── spark-dml-create-table-in-default-dataset.sql
│ │ ├── spark-dml-custom-dataset.sql
│ │ └── spark-dml-explicit-table.sql
│ ├── spark-4.0-bigquery-lib/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── java/
│ │ │ │ └── com/
│ │ │ │ └── google/
│ │ │ │ └── cloud/
│ │ │ │ └── spark/
│ │ │ │ └── bigquery/
│ │ │ │ └── v2/
│ │ │ │ ├── Spark40BigQueryTable.java
│ │ │ │ ├── Spark40BigQueryTableProvider.java
│ │ │ │ └── Spark40BigQueryWriteBuilder.java
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── com.google.cloud.spark.bigquery.TypeConverter
│ │ └── test/
│ │ └── resources/
│ │ └── META-INF/
│ │ └── services/
│ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ ├── spark-4.1-bigquery/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── build/
│ │ │ └── resources/
│ │ │ └── spark-bigquery-connector.properties
│ │ ├── main/
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ │ └── test/
│ │ ├── java/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ ├── acceptance/
│ │ │ │ ├── Spark41BigNumericDataprocServerlessAcceptanceTest.java
│ │ │ │ ├── Spark41CatalogAcceptanceTest.java
│ │ │ │ ├── Spark41DataprocImage30AcceptanceTest.java
│ │ │ │ ├── Spark41DataprocImage30DisableConscryptAcceptanceTest.java
│ │ │ │ ├── Spark41ReadSheakspeareDataprocServerlessAcceptanceTest.java
│ │ │ │ └── Spark41WriteStreamDataprocServerlessAcceptanceTest.java
│ │ │ └── integration/
│ │ │ ├── Spark41CatalogIntegrationTest.java
│ │ │ ├── Spark41DirectWriteIntegrationTest.java
│ │ │ ├── Spark41IndirectWriteIntegrationTest.java
│ │ │ ├── Spark41OpenLineageIntegrationTest.java
│ │ │ ├── Spark41ReadByFormatIntegrationTest.java
│ │ │ ├── Spark41ReadFromQueryIntegrationTest.java
│ │ │ └── Spark41ReadIntegrationTest.java
│ │ └── resources/
│ │ └── acceptance/
│ │ ├── spark-dml-create-table-as-select-in-default-dataset.sql
│ │ ├── spark-dml-create-table-in-default-dataset.sql
│ │ ├── spark-dml-custom-dataset.sql
│ │ └── spark-dml-explicit-table.sql
│ ├── spark-4.1-bigquery-lib/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── java/
│ │ │ │ └── com/
│ │ │ │ └── google/
│ │ │ │ └── cloud/
│ │ │ │ └── spark/
│ │ │ │ └── bigquery/
│ │ │ │ └── v2/
│ │ │ │ ├── Spark41BigQueryTable.java
│ │ │ │ ├── Spark41BigQueryTableProvider.java
│ │ │ │ └── Spark41BigQueryWriteBuilder.java
│ │ │ └── resources/
│ │ │ └── META-INF/
│ │ │ └── services/
│ │ │ └── com.google.cloud.spark.bigquery.TypeConverter
│ │ └── test/
│ │ └── resources/
│ │ └── META-INF/
│ │ └── services/
│ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ ├── spark-bigquery-dsv2-common/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ └── java/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── v2/
│ │ │ ├── BaseBigQuerySource.java
│ │ │ └── context/
│ │ │ ├── ArrowColumnBatchPartitionReaderContext.java
│ │ │ ├── ArrowInputPartitionContext.java
│ │ │ ├── BigQueryDataSourceReaderContext.java
│ │ │ ├── BigQueryDataSourceReaderModule.java
│ │ │ ├── BigQueryInputPartitionContext.java
│ │ │ ├── BigQueryInputPartitionReaderContext.java
│ │ │ ├── EmptyProjectionInputPartitionContext.java
│ │ │ ├── EmptyProjectionInputPartitionReaderContext.java
│ │ │ ├── InputPartitionContext.java
│ │ │ ├── InputPartitionReaderContext.java
│ │ │ └── StatisticsContext.java
│ │ └── test/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── v2/
│ │ └── context/
│ │ ├── ArrowInputPartitionContextTest.java
│ │ └── BigQueryInputPartitionReaderContextTest.java
│ ├── spark-bigquery-dsv2-parent/
│ │ └── pom.xml
│ └── spark-bigquery-metrics/
│ ├── pom.xml
│ └── src/
│ ├── main/
│ │ └── java/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── v2/
│ │ └── customMetrics/
│ │ ├── MetricUtils.java
│ │ ├── SparkBigQueryBytesReadMetric.java
│ │ ├── SparkBigQueryCustomMetricConstants.java
│ │ ├── SparkBigQueryNumberOfReadStreamsMetric.java
│ │ ├── SparkBigQueryParseTimeMetric.java
│ │ ├── SparkBigQueryRowsReadMetric.java
│ │ ├── SparkBigQueryScanTimeMetric.java
│ │ ├── SparkBigQueryTaskMetric.java
│ │ └── SparkBigQueryTimeInSparkMetric.java
│ └── test/
│ └── java/
│ └── com/
│ └── google/
│ └── cloud/
│ └── spark/
│ └── bigquery/
│ └── v2/
│ └── customMetrics/
│ ├── SparkBigQueryBytesReadMetricTest.java
│ ├── SparkBigQueryNumberOfReadStreamsMetricTest.java
│ ├── SparkBigQueryParseTimeMetricTest.java
│ ├── SparkBigQueryRowsReadMetricTest.java
│ ├── SparkBigQueryScanTimeMetricTest.java
│ ├── SparkBigQueryTaskMetricTest.java
│ └── SparkBigQueryTimeInSparkMetricTest.java
├── spark-bigquery-parent/
│ └── pom.xml
├── spark-bigquery-pushdown/
│ ├── pom.xml
│ ├── pushdown_common_src/
│ │ ├── main/
│ │ │ └── scala/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── pushdowns/
│ │ │ ├── AggregateQuery.scala
│ │ │ ├── BaseSparkBigQueryPushdown.scala
│ │ │ ├── BigQuerySQLQuery.scala
│ │ │ ├── BigQuerySQLStatement.scala
│ │ │ ├── BigQueryStrategy.scala
│ │ │ ├── BinaryOperationExtractor.scala
│ │ │ ├── CastExpressionExtractor.scala
│ │ │ ├── FilterQuery.scala
│ │ │ ├── JoinExtractor.scala
│ │ │ ├── JoinQuery.scala
│ │ │ ├── LeftSemiJoinQuery.scala
│ │ │ ├── ProjectQuery.scala
│ │ │ ├── SortLimitQuery.scala
│ │ │ ├── SourceQuery.scala
│ │ │ ├── SparkBigQueryPushdownUtil.scala
│ │ │ ├── SparkExpressionConverter.scala
│ │ │ ├── SparkExpressionFactory.scala
│ │ │ ├── SparkPlanFactory.scala
│ │ │ ├── UnaryOperationExtractor.scala
│ │ │ ├── UnionOperationExtractor.scala
│ │ │ ├── UnionQuery.scala
│ │ │ └── WindowQuery.scala
│ │ └── test/
│ │ └── scala/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── pushdowns/
│ │ ├── AggregateQuerySuite.scala
│ │ ├── BigQuerySQLStatementSuite.scala
│ │ ├── BigQueryStrategySuite.scala
│ │ ├── CastExpressionExtractorSuite.scala
│ │ ├── FilterQuerySuite.scala
│ │ ├── JoinQuerySuite.scala
│ │ ├── ProjectQuerySuite.scala
│ │ ├── SortLimitQuerySuite.scala
│ │ ├── SourceQuerySuite.scala
│ │ ├── SparkBigQueryPushdownUtilSuite.scala
│ │ ├── SparkExpressionConverterSuite.scala
│ │ ├── TestConstants.scala
│ │ ├── UnaryOperationExtractorSuite.scala
│ │ ├── UnionOperationExtractorSuite.scala
│ │ ├── UnionQuerySuite.scala
│ │ └── WindowQuerySuite.scala
│ ├── spark-2.4-bigquery-pushdown_2.11/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── resources/
│ │ │ │ └── META-INF/
│ │ │ │ └── services/
│ │ │ │ └── com.google.cloud.spark.bigquery.pushdowns.SparkBigQueryPushdown
│ │ │ └── scala/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── pushdowns/
│ │ │ ├── Spark24BigQueryPushdown.scala
│ │ │ ├── Spark24BigQueryPushdownPlan.scala
│ │ │ ├── Spark24BigQueryStrategy.scala
│ │ │ ├── Spark24ExpressionConverter.scala
│ │ │ ├── Spark24ExpressionFactory.scala
│ │ │ └── Spark24PlanFactory.scala
│ │ └── test/
│ │ └── scala/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── pushdowns/
│ │ ├── BinaryOperationExtractorSuite.scala
│ │ ├── JoinExtractorSuite.scala
│ │ ├── Spark24BigQueryStrategySuite.scala
│ │ └── Spark24ExpressionConverterSuite.scala
│ ├── spark-2.4-bigquery-pushdown_2.12/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── resources/
│ │ │ │ └── META-INF/
│ │ │ │ └── services/
│ │ │ │ └── com.google.cloud.spark.bigquery.pushdowns.SparkBigQueryPushdown
│ │ │ └── scala/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── pushdowns/
│ │ │ ├── Spark24BigQueryPushdown.scala
│ │ │ ├── Spark24BigQueryPushdownPlan.scala
│ │ │ ├── Spark24BigQueryStrategy.scala
│ │ │ ├── Spark24ExpressionConverter.scala
│ │ │ ├── Spark24ExpressionFactory.scala
│ │ │ └── Spark24PlanFactory.scala
│ │ └── test/
│ │ └── scala/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── pushdowns/
│ │ ├── Spark24BigQueryStrategySuite.scala
│ │ └── Spark24ExpressionConverterSuite.scala
│ ├── spark-3.1-bigquery-pushdown_2.12/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── resources/
│ │ │ │ └── META-INF/
│ │ │ │ └── services/
│ │ │ │ └── com.google.cloud.spark.bigquery.pushdowns.SparkBigQueryPushdown
│ │ │ └── scala/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── pushdowns/
│ │ │ ├── Spark31BigQueryPushdown.scala
│ │ │ ├── Spark31BigQueryPushdownPlan.scala
│ │ │ ├── Spark31BigQueryStrategy.scala
│ │ │ ├── Spark31ExpressionConverter.scala
│ │ │ ├── Spark31ExpressionFactory.scala
│ │ │ └── Spark31PlanFactory.scala
│ │ └── test/
│ │ └── scala/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── pushdowns/
│ │ ├── BinaryOperationExtractorSuite.scala
│ │ ├── JoinExtractorSuite.scala
│ │ ├── Spark31BigQueryStrategySuite.scala
│ │ └── Spark31ExpressionConverterSuite.scala
│ ├── spark-3.2-bigquery-pushdown_2.12/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── resources/
│ │ │ │ └── META-INF/
│ │ │ │ └── services/
│ │ │ │ └── com.google.cloud.spark.bigquery.pushdowns.SparkBigQueryPushdown
│ │ │ └── scala/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── pushdowns/
│ │ │ ├── Spark32BigQueryPushdown.scala
│ │ │ ├── Spark32BigQueryPushdownPlan.scala
│ │ │ ├── Spark32BigQueryStrategy.scala
│ │ │ ├── Spark32ExpressionConverter.scala
│ │ │ ├── Spark32ExpressionFactory.scala
│ │ │ └── Spark32PlanFactory.scala
│ │ └── test/
│ │ └── scala/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── pushdowns/
│ │ ├── BinaryOperationExtractorSuite.scala
│ │ ├── JoinExtractorSuite.scala
│ │ └── Spark32ExpressionConverterSuite.scala
│ ├── spark-3.2-bigquery-pushdown_2.13/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── resources/
│ │ │ │ └── META-INF/
│ │ │ │ └── services/
│ │ │ │ └── com.google.cloud.spark.bigquery.pushdowns.SparkBigQueryPushdown
│ │ │ └── scala/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── pushdowns/
│ │ │ ├── Spark32BigQueryPushdown.scala
│ │ │ ├── Spark32BigQueryPushdownPlan.scala
│ │ │ ├── Spark32BigQueryStrategy.scala
│ │ │ ├── Spark32ExpressionConverter.scala
│ │ │ ├── Spark32ExpressionFactory.scala
│ │ │ └── Spark32PlanFactory.scala
│ │ └── test/
│ │ └── scala/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── pushdowns/
│ │ ├── BinaryOperationExtractorSuite.scala
│ │ ├── JoinExtractorSuite.scala
│ │ └── Spark32ExpressionConverterSuite.scala
│ ├── spark-3.3-bigquery-pushdown_2.12/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── resources/
│ │ │ │ └── META-INF/
│ │ │ │ └── services/
│ │ │ │ └── com.google.cloud.spark.bigquery.pushdowns.SparkBigQueryPushdown
│ │ │ └── scala/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── pushdowns/
│ │ │ ├── Spark33BigQueryPushdown.scala
│ │ │ ├── Spark33BigQueryPushdownPlan.scala
│ │ │ ├── Spark33BigQueryStrategy.scala
│ │ │ ├── Spark33ExpressionConverter.scala
│ │ │ ├── Spark33ExpressionFactory.scala
│ │ │ └── Spark33PlanFactory.scala
│ │ └── test/
│ │ └── scala/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── pushdowns/
│ │ ├── BinaryOperationExtractorSuite.scala
│ │ ├── JoinExtractorSuite.scala
│ │ └── Spark33ExpressionConverterSuite.scala
│ ├── spark-3.3-bigquery-pushdown_2.13/
│ │ ├── pom.xml
│ │ └── src/
│ │ ├── main/
│ │ │ ├── resources/
│ │ │ │ └── META-INF/
│ │ │ │ └── services/
│ │ │ │ └── com.google.cloud.spark.bigquery.pushdowns.SparkBigQueryPushdown
│ │ │ └── scala/
│ │ │ └── com/
│ │ │ └── google/
│ │ │ └── cloud/
│ │ │ └── spark/
│ │ │ └── bigquery/
│ │ │ └── pushdowns/
│ │ │ ├── Spark33BigQueryPushdown.scala
│ │ │ ├── Spark33BigQueryPushdownPlan.scala
│ │ │ ├── Spark33BigQueryStrategy.scala
│ │ │ ├── Spark33ExpressionConverter.scala
│ │ │ ├── Spark33ExpressionFactory.scala
│ │ │ └── Spark33PlanFactory.scala
│ │ └── test/
│ │ └── scala/
│ │ └── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── pushdowns/
│ │ ├── BinaryOperationExtractorSuite.scala
│ │ ├── JoinExtractorSuite.scala
│ │ └── Spark33ExpressionConverterSuite.scala
│ ├── spark-bigquery-pushdown-common_2.11/
│ │ └── pom.xml
│ ├── spark-bigquery-pushdown-common_2.12/
│ │ └── pom.xml
│ ├── spark-bigquery-pushdown-common_2.13/
│ │ └── pom.xml
│ └── spark-bigquery-pushdown-parent/
│ └── pom.xml
├── spark-bigquery-python-lib/
│ ├── pom.xml
│ └── src/
│ ├── assembly/
│ │ └── descriptor.xml
│ └── main/
│ └── python/
│ ├── __init__.py
│ └── google/
│ ├── __init__.py
│ └── cloud/
│ ├── __init__.py
│ └── spark/
│ ├── __init__.py
│ └── bigquery/
│ ├── __init__.py
│ └── big_query_connector_utils.py
├── spark-bigquery-scala-212-support/
│ ├── pom.xml
│ └── src/
│ ├── main/
│ │ ├── java/
│ │ │ ├── com/
│ │ │ │ └── google/
│ │ │ │ └── cloud/
│ │ │ │ └── spark/
│ │ │ │ └── bigquery/
│ │ │ │ └── direct/
│ │ │ │ └── PreScala213BigQueryRDD.java
│ │ │ └── org/
│ │ │ └── apache/
│ │ │ └── spark/
│ │ │ └── sql/
│ │ │ └── PreScala213SparkSqlUtils.java
│ │ └── resources/
│ │ └── META-INF/
│ │ └── services/
│ │ └── org.apache.spark.sql.SparkSqlUtils
│ └── test/
│ └── java/
│ ├── com/
│ │ └── google/
│ │ └── cloud/
│ │ └── spark/
│ │ └── bigquery/
│ │ └── direct/
│ │ └── PreScala213BigQueryRDDTest.java
│ └── org/
│ └── apache/
│ └── spark/
│ └── sql/
│ └── PreScala213SparkSqlUtilsTest.java
└── spark-bigquery-tests/
└── pom.xml
Showing preview only (299K chars total). Download the full file or copy to clipboard to get everything.
SYMBOL INDEX (3029 symbols across 409 files)
FILE: .mvn/wrapper/MavenWrapperDownloader.java
class MavenWrapperDownloader (line 21) | public class MavenWrapperDownloader {
method main (line 48) | public static void main(String args[]) {
method downloadFileFromURL (line 97) | private static void downloadFileFromURL(String urlString, File destina...
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/AccessToken.java
class AccessToken (line 24) | public class AccessToken extends com.google.auth.oauth2.AccessToken {
method AccessToken (line 30) | public AccessToken(String tokenValue, Date expirationTime) {
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/AccessTokenProvider.java
type AccessTokenProvider (line 21) | public interface AccessTokenProvider extends Serializable {
method getAccessToken (line 22) | AccessToken getAccessToken() throws IOException;
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/AccessTokenProviderCredentials.java
class AccessTokenProviderCredentials (line 23) | public class AccessTokenProviderCredentials extends GoogleCredentials {
method AccessTokenProviderCredentials (line 27) | public AccessTokenProviderCredentials(AccessTokenProvider accessTokenP...
method refreshAccessToken (line 31) | @Override
method getAccessTokenProvider (line 36) | @VisibleForTesting
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/ArrowReaderIterator.java
class ArrowReaderIterator (line 26) | public class ArrowReaderIterator implements Iterator<VectorSchemaRoot> {
method ArrowReaderIterator (line 33) | public ArrowReaderIterator(ArrowReader reader) {
method hasNext (line 37) | @Override
method next (line 60) | @Override
method ensureClosed (line 67) | private void ensureClosed() throws IOException {
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/ArrowUtil.java
class ArrowUtil (line 22) | public class ArrowUtil {
method ArrowUtil (line 23) | private ArrowUtil() {}
method newRootAllocator (line 26) | public static RootAllocator newRootAllocator(long maxAllocation) {
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryClient.java
class BigQueryClient (line 85) | public class BigQueryClient {
method BigQueryClient (line 102) | public BigQueryClient(
method createRestClient (line 121) | static Bigquery createRestClient(BigQuery bigQuery) {
method runCleanupJobs (line 145) | public static synchronized void runCleanupJobs() {
method waitForJob (line 165) | public JobInfo waitForJob(Job job) {
method createWhereClause (line 190) | private static Optional<String> createWhereClause(String[] filters) {
method getTable (line 197) | public TableInfo getTable(TableId tableId) {
method tableExists (line 207) | public boolean tableExists(TableId tableId) {
method createTable (line 229) | public TableInfo createTable(TableId tableId, Schema schema, CreateTab...
method createTempTable (line 265) | public TableInfo createTempTable(TableId destinationTableId, Schema sc...
method createTempTableAfterCheckingSchema (line 274) | public TableInfo createTempTableAfterCheckingSchema(
method createTempTableId (line 293) | public TableId createTempTableId(TableId destinationTableId) {
method deleteTable (line 310) | public boolean deleteTable(TableId tableId) {
method copyData (line 315) | private Job copyData(
method isTablePartitioned (line 333) | public boolean isTablePartitioned(TableId tableId) {
method overwriteDestinationWithTemporaryDynamicPartitons (line 355) | public Job overwriteDestinationWithTemporaryDynamicPartitons(
method overwriteDestinationWithTemporary (line 403) | public Job overwriteDestinationWithTemporary(
method appendDestinationWithTemporary (line 433) | public Job appendDestinationWithTemporary(TableId temporaryTableId, Ta...
method createTablePathForBigQueryStorage (line 444) | public String createTablePathForBigQueryStorage(TableId tableId) {
method getReadTable (line 457) | public TableInfo getReadTable(ReadTableOptions options) {
method getReadTableSchema (line 504) | public Schema getReadTableSchema(ReadTableOptions options) {
method validateViewsEnabled (line 517) | private void validateViewsEnabled(ReadTableOptions options) {
method toDatasetId (line 528) | DatasetId toDatasetId(TableId tableId) {
method getProjectId (line 532) | public String getProjectId() {
method listDatasets (line 536) | public Iterable<Dataset> listDatasets() {
method listDatasetsForProject (line 540) | public Iterable<Dataset> listDatasetsForProject(String projectId) {
method listTables (line 544) | public Iterable<Table> listTables(DatasetId datasetId, TableDefinition...
method update (line 552) | public Table update(TableInfo table) {
method createAndWaitFor (line 556) | public Job createAndWaitFor(JobConfiguration.Builder jobConfiguration) {
method createAndWaitFor (line 560) | public Job createAndWaitFor(JobConfiguration jobConfiguration) {
method create (line 589) | Job create(JobInfo jobInfo) {
method query (line 593) | public TableResult query(String sql) {
method createSql (line 608) | String createSql(
method createSql (line 625) | String createSql(
method fullTableName (line 641) | public static String fullTableName(TableId tableId) {
method calculateTableSize (line 650) | public long calculateTableSize(TableId tableId, Optional<String> filte...
method calculateTableSize (line 654) | public long calculateTableSize(TableInfo tableInfo, Optional<String> f...
method getNumberOfRows (line 691) | private long getNumberOfRows(String sql) {
method materializeQueryToTable (line 706) | public TableInfo materializeQueryToTable(
method createDestinationTableWithoutReference (line 717) | TableId createDestinationTableWithoutReference() {
method createDestinationTable (line 721) | TableId createDestinationTable(
method materializeQueryToTable (line 738) | public TableInfo materializeQueryToTable(
method materializeQueryToTable (line 758) | public TableInfo materializeQueryToTable(
method materializeViewToTable (line 790) | public TableInfo materializeViewToTable(
method getQueryResultSchema (line 803) | public Schema getQueryResultSchema(
method getQueryResultSchema (line 811) | public Schema getQueryResultSchema(
method materializeTable (line 836) | private TableInfo materializeTable(
method materializeTable (line 849) | private TableInfo materializeTable(
method materializeTable (line 876) | private TableInfo materializeTable(String querySql, TempTableBuilder t...
method loadDataIntoTable (line 888) | public JobStatistics.LoadStatistics loadDataIntoTable(
method createTableIfNeeded (line 1008) | public void createTableIfNeeded(
method getRestTable (line 1019) | public Optional<com.google.api.services.bigquery.model.Table> getRestT...
method datasetExists (line 1036) | public boolean datasetExists(DatasetId datasetId) {
method createDataset (line 1040) | public void createDataset(DatasetId datasetId, Map<String, String> met...
method deleteDataset (line 1053) | public boolean deleteDataset(DatasetId datasetId, boolean cascade) {
method getDataset (line 1061) | public DatasetInfo getDataset(DatasetId datasetId) {
type ReadTableOptions (line 1065) | public interface ReadTableOptions {
method tableId (line 1066) | TableId tableId();
method query (line 1068) | Optional<String> query();
method viewsEnabled (line 1070) | boolean viewsEnabled();
method viewEnabledParamName (line 1072) | String viewEnabledParamName();
method expirationTimeInMinutes (line 1074) | int expirationTimeInMinutes();
method getQueryParameterHelper (line 1076) | QueryParameterHelper getQueryParameterHelper();
method getKmsKeyName (line 1078) | default Optional<String> getKmsKeyName() {
type LoadDataOptions (line 1083) | public interface LoadDataOptions {
method getTableId (line 1084) | TableId getTableId();
method getCreateDisposition (line 1086) | Optional<JobInfo.CreateDisposition> getCreateDisposition();
method getPartitionField (line 1088) | Optional<String> getPartitionField();
method getPartitionType (line 1090) | Optional<TimePartitioning.Type> getPartitionType();
method getPartitionRange (line 1092) | Optional<RangePartitioning.Range> getPartitionRange();
method getPartitionTypeOrDefault (line 1094) | TimePartitioning.Type getPartitionTypeOrDefault();
method getPartitionExpirationMs (line 1096) | OptionalLong getPartitionExpirationMs();
method getPartitionRequireFilter (line 1098) | Optional<Boolean> getPartitionRequireFilter();
method getClusteredFields (line 1100) | Optional<ImmutableList<String>> getClusteredFields();
method isUseAvroLogicalTypes (line 1102) | boolean isUseAvroLogicalTypes();
method getDecimalTargetTypes (line 1104) | List<String> getDecimalTargetTypes();
method getLoadSchemaUpdateOptions (line 1106) | List<JobInfo.SchemaUpdateOption> getLoadSchemaUpdateOptions();
method getEnableModeCheckForSchemaFields (line 1108) | boolean getEnableModeCheckForSchemaFields();
method getKmsKeyName (line 1110) | Optional<String> getKmsKeyName();
type CreateTableOptions (line 1113) | public interface CreateTableOptions {
method getKmsKeyName (line 1115) | default Optional<String> getKmsKeyName() {
method getBigQueryTableLabels (line 1119) | default Map<String, String> getBigQueryTableLabels() {
method getClusteredFields (line 1123) | default Optional<ImmutableList<String>> getClusteredFields() {
method of (line 1127) | static CreateTableOptions of(
class TempTableBuilder (line 1150) | static class TempTableBuilder implements Callable<TableInfo> {
method TempTableBuilder (line 1160) | TempTableBuilder(
method TempTableBuilder (line 1179) | TempTableBuilder(
method call (line 1198) | @Override
method createTableFromQuery (line 1203) | TableInfo createTableFromQuery() {
method waitForJob (line 1242) | Job waitForJob(Job job) {
class JobConfigurationFactory (line 1268) | static class JobConfigurationFactory {
method JobConfigurationFactory (line 1272) | public JobConfigurationFactory(Map<String, String> labels, Priority ...
method createQueryJobConfigurationBuilder (line 1277) | QueryJobConfiguration.Builder createQueryJobConfigurationBuilder(
method createParameterizedQueryJobConfigurationBuilder (line 1283) | QueryJobConfiguration.Builder createParameterizedQueryJobConfigurati...
method createLoadJobConfigurationBuilder (line 1303) | LoadJobConfiguration.Builder createLoadJobConfigurationBuilder(
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryClientFactory.java
class BigQueryClientFactory (line 55) | public class BigQueryClientFactory implements Serializable {
method BigQueryClientFactory (line 77) | @Inject
method getBigQueryReadClient (line 90) | public BigQueryReadClient getBigQueryReadClient() {
method getBigQueryWriteClient (line 107) | public BigQueryWriteClient getBigQueryWriteClient() {
method hashCode (line 121) | @Override
method equals (line 146) | @Override
method getCredentials (line 172) | @VisibleForTesting
method createBigQueryReadClient (line 191) | private BigQueryReadClient createBigQueryReadClient(
method createBigQueryWriteClient (line 243) | private BigQueryWriteClient createBigQueryWriteClient(Optional<String>...
method createTransportBuilder (line 257) | private InstantiatingGrpcChannelProvider.Builder createTransportBuilder(
method setProxyConfig (line 272) | private void setProxyConfig(InstantiatingGrpcChannelProvider.Builder t...
method shutdownBigQueryReadClient (line 283) | private void shutdownBigQueryReadClient(BigQueryReadClient bigQueryRea...
method shutdownBigQueryWriteClient (line 289) | private void shutdownBigQueryWriteClient(BigQueryWriteClient bigQueryW...
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryClientFactoryConfig.java
class BigQueryClientFactoryConfig (line 28) | public class BigQueryClientFactoryConfig implements BigQueryConfig {
method BigQueryClientFactoryConfig (line 62) | BigQueryClientFactoryConfig(BigQueryConfig bigQueryConfig, long bigQue...
method getAccessTokenProviderFQCN (line 100) | @Override
method getAccessTokenProviderConfig (line 105) | @Override
method getCredentialsKey (line 110) | @Override
method getCredentialsFile (line 115) | @Override
method getAccessToken (line 120) | @Override
method getLoggedInUserName (line 125) | @Override
method getLoggedInUserGroups (line 130) | @Override
method getImpersonationServiceAccountsForUsers (line 135) | @Override
method getImpersonationServiceAccountsForGroups (line 140) | @Override
method getImpersonationServiceAccount (line 145) | @Override
method getCredentialsScopes (line 150) | @Override
method getParentProjectId (line 155) | @Override
method getCatalogProjectId (line 160) | @Override
method getCatalogLocation (line 165) | @Override
method useParentProjectForMetadataOperations (line 170) | @Override
method isViewsEnabled (line 175) | @Override
method getMaterializationProject (line 180) | @Override
method getMaterializationDataset (line 185) | @Override
method getBigQueryClientConnectTimeout (line 190) | @Override
method getBigQueryClientReadTimeout (line 195) | @Override
method getBigQueryClientRetrySettings (line 200) | @Override
method getBigQueryProxyConfig (line 205) | @Override
method getBigQueryStorageGrpcEndpoint (line 210) | @Override
method getBigQueryHttpEndpoint (line 215) | @Override
method getCacheExpirationTimeInMinutes (line 220) | @Override
method getBigQueryJobLabels (line 225) | @Override
method getCreateReadSessionTimeoutInSeconds (line 230) | @Override
method getChannelPoolSize (line 235) | @Override
method getFlowControlWindowBytes (line 240) | @Override
method getQueryJobPriority (line 245) | @Override
method getBigQueryJobTimeoutInMinutes (line 250) | public long getBigQueryJobTimeoutInMinutes() {
method equals (line 254) | @Override
method hashCode (line 284) | @Override
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryClientModule.java
class BigQueryClientModule (line 32) | public class BigQueryClientModule implements com.google.inject.Module {
method createHeaderProvider (line 42) | @Provides
method configure (line 49) | @Override
method provideBigQueryCredentialsSupplier (line 58) | @Provides
method provideDestinationTableCache (line 79) | @Provides
method provideBigQueryClient (line 97) | @Provides
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryConfig.java
type BigQueryConfig (line 27) | public interface BigQueryConfig {
method getAccessTokenProviderFQCN (line 29) | Optional<String> getAccessTokenProviderFQCN();
method getAccessTokenProviderConfig (line 31) | Optional<String> getAccessTokenProviderConfig();
method getCredentialsKey (line 33) | Optional<String> getCredentialsKey();
method getCredentialsFile (line 35) | Optional<String> getCredentialsFile();
method getAccessToken (line 37) | Optional<String> getAccessToken();
method getLoggedInUserName (line 39) | String getLoggedInUserName();
method getLoggedInUserGroups (line 41) | Set<String> getLoggedInUserGroups();
method getImpersonationServiceAccountsForUsers (line 43) | Optional<Map<String, String>> getImpersonationServiceAccountsForUsers();
method getImpersonationServiceAccountsForGroups (line 45) | Optional<Map<String, String>> getImpersonationServiceAccountsForGroups();
method getImpersonationServiceAccount (line 47) | Optional<String> getImpersonationServiceAccount();
method getParentProjectId (line 49) | String getParentProjectId();
method getCatalogProjectId (line 51) | Optional<String> getCatalogProjectId();
method getCatalogLocation (line 53) | Optional<String> getCatalogLocation();
method useParentProjectForMetadataOperations (line 55) | boolean useParentProjectForMetadataOperations();
method isViewsEnabled (line 57) | boolean isViewsEnabled();
method getMaterializationProject (line 59) | Optional<String> getMaterializationProject();
method getMaterializationDataset (line 61) | Optional<String> getMaterializationDataset();
method getBigQueryClientConnectTimeout (line 63) | int getBigQueryClientConnectTimeout();
method getBigQueryClientReadTimeout (line 65) | int getBigQueryClientReadTimeout();
method getBigQueryClientRetrySettings (line 67) | RetrySettings getBigQueryClientRetrySettings();
method getBigQueryProxyConfig (line 69) | BigQueryProxyConfig getBigQueryProxyConfig();
method getBigQueryStorageGrpcEndpoint (line 71) | Optional<String> getBigQueryStorageGrpcEndpoint();
method getBigQueryHttpEndpoint (line 73) | Optional<String> getBigQueryHttpEndpoint();
method getCacheExpirationTimeInMinutes (line 75) | int getCacheExpirationTimeInMinutes();
method getBigQueryJobLabels (line 77) | ImmutableMap<String, String> getBigQueryJobLabels();
method getCreateReadSessionTimeoutInSeconds (line 79) | Optional<Long> getCreateReadSessionTimeoutInSeconds();
method getChannelPoolSize (line 81) | int getChannelPoolSize();
method getFlowControlWindowBytes (line 85) | Optional<Integer> getFlowControlWindowBytes();
method getQueryJobPriority (line 87) | Priority getQueryJobPriority();
method getBigQueryJobTimeoutInMinutes (line 89) | long getBigQueryJobTimeoutInMinutes();
method getCredentialsScopes (line 91) | Optional<ImmutableList<String>> getCredentialsScopes();
method getClientCreationHashCode (line 93) | default int getClientCreationHashCode() {
method areClientCreationConfigsEqual (line 109) | default boolean areClientCreationConfigsEqual(BigQueryConfig b) {
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryConfigurationUtil.java
class BigQueryConfigurationUtil (line 33) | public class BigQueryConfigurationUtil {
method BigQueryConfigurationUtil (line 38) | private BigQueryConfigurationUtil() {}
method defaultBilledProject (line 40) | public static com.google.common.base.Supplier<String> defaultBilledPro...
method getRequiredOption (line 44) | public static String getRequiredOption(Map<String, String> options, St...
method getRequiredOption (line 50) | public static String getRequiredOption(
method getOption (line 55) | public static com.google.common.base.Optional<String> getOption(
method getOption (line 60) | public static com.google.common.base.Optional<String> getOption(
method getOptionFromMultipleParams (line 68) | public static com.google.common.base.Optional<String> getOptionFromMul...
method getMapEntriesWithPrefix (line 79) | public static Map<String, String> getMapEntriesWithPrefix(
method removePrefixFromMapKeys (line 90) | public static com.google.common.base.Optional<Map<String, String>> rem...
method getAnyOptionsWithPrefix (line 104) | public static com.google.common.base.Optional<Map<String, String>> get...
method getAnyOption (line 112) | public static com.google.common.base.Optional<String> getAnyOption(
method getAnyOption (line 120) | public static com.google.common.base.Optional<String> getAnyOption(
method getAnyBooleanOption (line 131) | public static boolean getAnyBooleanOption(
method empty (line 139) | public static com.google.common.base.Optional empty() {
method fromJavaUtil (line 143) | public static com.google.common.base.Optional fromJavaUtil(java.util.O...
method parseSimpleTableId (line 148) | public static TableId parseSimpleTableId(
method parseSimpleTableId (line 164) | public static TableId parseSimpleTableId(
method parseSimpleTableId (line 172) | public static TableId parseSimpleTableId(
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryConnectorException.java
class BigQueryConnectorException (line 24) | public class BigQueryConnectorException extends RuntimeException {
method BigQueryConnectorException (line 28) | public BigQueryConnectorException(String message) {
method BigQueryConnectorException (line 32) | public BigQueryConnectorException(String message, StatusException caus...
method BigQueryConnectorException (line 36) | public BigQueryConnectorException(String message, StatusRuntimeExcepti...
method BigQueryConnectorException (line 40) | public BigQueryConnectorException(String message, Throwable cause) {
method BigQueryConnectorException (line 44) | public BigQueryConnectorException(BigQueryErrorCode errorCode, String ...
method BigQueryConnectorException (line 49) | public BigQueryConnectorException(
method BigQueryConnectorException (line 54) | public BigQueryConnectorException(
method BigQueryConnectorException (line 59) | public BigQueryConnectorException(BigQueryErrorCode errorCode, String ...
method getErrorCode (line 64) | public BigQueryErrorCode getErrorCode() {
class InvalidSchemaException (line 69) | public static class InvalidSchemaException extends BigQueryConnectorEx...
method InvalidSchemaException (line 70) | public InvalidSchemaException(String message) {
method InvalidSchemaException (line 74) | public InvalidSchemaException(String message, Throwable t) {
class SerializableStatusException (line 83) | public static class SerializableStatusException extends RuntimeExcepti...
method SerializableStatusException (line 88) | SerializableStatusException(StatusException wrapped) {
method SerializableStatusException (line 94) | SerializableStatusException(StatusRuntimeException wrapped) {
method getMessage (line 100) | @Override
method toString (line 105) | @Override
method getStackTrace (line 110) | @Override
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryCredentialsSupplier.java
class BigQueryCredentialsSupplier (line 44) | public class BigQueryCredentialsSupplier {
method BigQueryCredentialsSupplier (line 51) | public BigQueryCredentialsSupplier(
method createCredentialsFromAccessToken (line 105) | private static GoogleCredentials createCredentialsFromAccessToken(Stri...
method createCredentialsFromImpersonation (line 109) | private static Optional<GoogleCredentials> createCredentialsFromImpers...
method getServiceAccountToImpersonateByKeys (line 152) | private static Optional<String> getServiceAccountToImpersonateByKeys(
method createCredentialsFromKey (line 162) | private static GoogleCredentials createCredentialsFromKey(
method createCredentialsFromFile (line 182) | private static GoogleCredentials createCredentialsFromFile(
method createDefaultCredentials (line 201) | public static GoogleCredentials createDefaultCredentials() {
method getCredentials (line 209) | public Credentials getCredentials() {
method getUniverseDomain (line 213) | public String getUniverseDomain() {
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryDirectDataWriterHelper.java
class BigQueryDirectDataWriterHelper (line 50) | public class BigQueryDirectDataWriterHelper {
method BigQueryDirectDataWriterHelper (line 76) | public BigQueryDirectDataWriterHelper(
method retryCreateWriteStream (line 117) | private String retryCreateWriteStream() throws ExecutionException, Int...
method retryCallable (line 143) | private <V> V retryCallable(Callable<V> callable)
method createStreamWriter (line 154) | private StreamWriter createStreamWriter(String writeStreamName) {
method addRow (line 179) | public void addRow(ByteString message) throws IOException {
method checkForFailedResponse (line 207) | private void checkForFailedResponse(boolean waitForResponse) {
method sendAppendRowsRequest (line 256) | private void sendAppendRowsRequest() throws IOException {
method validateAppendRowsResponse (line 284) | private ApiFuture<AppendRowsResponse> validateAppendRowsResponse(
method finalizeStream (line 315) | public WriteStreamStatistics finalizeStream() throws IOException {
method retryFinalizeWriteStream (line 365) | private FinalizeWriteStreamResponse retryFinalizeWriteStream(
method waitBeforeFinalization (line 376) | private void waitBeforeFinalization() {
method abort (line 393) | public void abort() {
method clean (line 399) | private void clean() {
method clearProtoRows (line 406) | private void clearProtoRows() {
method getWriteStreamName (line 412) | public String getWriteStreamName() {
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryErrorCode.java
type BigQueryErrorCode (line 18) | public enum BigQueryErrorCode {
method BigQueryErrorCode (line 29) | BigQueryErrorCode(int code) {
method getCode (line 33) | public int getCode() {
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryJobCompletionListener.java
type BigQueryJobCompletionListener (line 20) | public interface BigQueryJobCompletionListener {
method accept (line 22) | void accept(JobInfo completedJob);
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryMetrics.java
type BigQueryMetrics (line 3) | public interface BigQueryMetrics {
method incrementBytesReadCounter (line 4) | void incrementBytesReadCounter(long val);
method incrementRowsReadCounter (line 6) | void incrementRowsReadCounter(long val);
method updateScanTime (line 8) | void updateScanTime(long val);
method updateParseTime (line 10) | void updateParseTime(long val);
method updateTimeInSpark (line 12) | void updateTimeInSpark(long val);
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryProxyConfig.java
type BigQueryProxyConfig (line 25) | public interface BigQueryProxyConfig {
method getProxyUri (line 33) | Optional<URI> getProxyUri();
method getProxyUsername (line 40) | Optional<String> getProxyUsername();
method getProxyPassword (line 47) | Optional<String> getProxyPassword();
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryProxyTransporterBuilder.java
class BigQueryProxyTransporterBuilder (line 38) | public class BigQueryProxyTransporterBuilder {
method createGrpcChannelConfigurator (line 40) | public static ApiFunction<ManagedChannelBuilder, ManagedChannelBuilder>
method createHttpTransportFactory (line 80) | public static HttpTransportFactory createHttpTransportFactory(
class BigQueryHttpTransportFactory (line 105) | public static class BigQueryHttpTransportFactory implements HttpTransp...
method BigQueryHttpTransportFactory (line 108) | public BigQueryHttpTransportFactory() {
method BigQueryHttpTransportFactory (line 113) | public BigQueryHttpTransportFactory(HttpClientBuilder httpClientBuil...
method create (line 117) | @Override
method checkProxyParamsValidity (line 123) | public static void checkProxyParamsValidity(
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryPushdownException.java
class BigQueryPushdownException (line 22) | public class BigQueryPushdownException extends BigQueryConnectorException {
method BigQueryPushdownException (line 24) | public BigQueryPushdownException(String message) {
method BigQueryPushdownException (line 28) | public BigQueryPushdownException(String message, Throwable t) {
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryPushdownUnsupportedException.java
class BigQueryPushdownUnsupportedException (line 22) | public class BigQueryPushdownUnsupportedException extends BigQueryConnec...
method BigQueryPushdownUnsupportedException (line 24) | public BigQueryPushdownUnsupportedException(String message) {
method BigQueryPushdownUnsupportedException (line 28) | public BigQueryPushdownUnsupportedException(String message, Throwable ...
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryStorageReadRowsTracer.java
type BigQueryStorageReadRowsTracer (line 30) | public interface BigQueryStorageReadRowsTracer extends Serializable {
method startStream (line 32) | void startStream();
method rowsParseStarted (line 34) | void rowsParseStarted();
method rowsParseFinished (line 37) | void rowsParseFinished(long rowsParsed);
method readRowsResponseRequested (line 40) | void readRowsResponseRequested();
method readRowsResponseObtained (line 43) | void readRowsResponseObtained(long bytesReceived);
method finished (line 46) | void finished();
method nextBatchNeeded (line 49) | void nextBatchNeeded();
method forkWithPrefix (line 59) | BigQueryStorageReadRowsTracer forkWithPrefix(String id);
method getBytesRead (line 61) | long getBytesRead();
method getRowsRead (line 63) | long getRowsRead();
method getScanTimeInMilliSec (line 65) | long getScanTimeInMilliSec();
method getParseTimeInMilliSec (line 67) | long getParseTimeInMilliSec();
method getTimeInSparkInMilliSec (line 69) | long getTimeInSparkInMilliSec();
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryTracerFactory.java
type BigQueryTracerFactory (line 22) | public interface BigQueryTracerFactory extends Serializable {
method newReadRowsTracer (line 23) | BigQueryStorageReadRowsTracer newReadRowsTracer(
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryUtil.java
class BigQueryUtil (line 82) | public class BigQueryUtil {
method BigQueryUtil (line 128) | private BigQueryUtil() {}
method isRetryable (line 130) | public static boolean isRetryable(Throwable cause) {
method isRetryableInternalError (line 134) | static boolean isRetryableInternalError(Throwable t) {
method isReadSessionExpired (line 144) | public static boolean isReadSessionExpired(Throwable cause) {
method isReadSessionExpiredInternalError (line 148) | static boolean isReadSessionExpiredInternalError(Throwable t) {
method convertToBigQueryException (line 157) | static BigQueryException convertToBigQueryException(BigQueryError erro...
method areCredentialsEqual (line 161) | static boolean areCredentialsEqual(Credentials credentials1, Credentia...
method getCredentialsByteArray (line 171) | static byte[] getCredentialsByteArray(Credentials credentials) {
method getCredentialsFromByteArray (line 185) | static Credentials getCredentialsFromByteArray(byte[] byteArray) {
method firstPresent (line 196) | public static <T> Optional<T> firstPresent(Optional<T>... optionals) {
method parseTableId (line 205) | public static TableId parseTableId(String rawTable) {
method parseTableId (line 209) | public static TableId parseTableId(
method parseTableId (line 214) | public static TableId parseTableId(
method friendlyTableName (line 244) | public static String friendlyTableName(TableId tableId) {
method convertAndThrow (line 250) | public static void convertAndThrow(BigQueryError error) {
method optimizeLoadUriList (line 263) | public static List<String> optimizeLoadUriList(
method trimUris (line 288) | private static Multimap<String, String> trimUris(Pattern pattern, List...
method schemaWritable (line 310) | public static ComparisonResult schemaWritable(
method getStreamNames (line 337) | public static List<String> getStreamNames(ReadSession readSession) {
method fieldWritable (line 357) | @VisibleForTesting
method formatPrecisionAndScale (line 459) | private static String formatPrecisionAndScale(Field field) {
method typeWriteable (line 465) | @VisibleForTesting
method getPrecision (line 476) | @VisibleForTesting
method getScale (line 485) | @VisibleForTesting
method getValueOrDefault (line 491) | private static int getValueOrDefault(
method isModeWritable (line 506) | @VisibleForTesting
method fieldListWritable (line 518) | @VisibleForTesting
method nullableIfNull (line 560) | static Field.Mode nullableIfNull(Field.Mode mode) {
method emptyIfNeeded (line 564) | public static Optional<String> emptyIfNeeded(String value) {
method createVerifiedInstance (line 572) | public static <T> T createVerifiedInstance(
method verifySerialization (line 607) | public static <T> T verifySerialization(T obj) {
method getPartitionFields (line 624) | public static ImmutableList<String> getPartitionFields(TableInfo table...
method getClusteringFields (line 662) | public static ImmutableList<String> getClusteringFields(TableInfo tabl...
method filterLengthInLimit (line 675) | public static boolean filterLengthInLimit(Optional<String> filter) {
method adjustSchemaIfNeeded (line 688) | public static Schema adjustSchemaIfNeeded(
method adjustField (line 712) | @VisibleForTesting
method prepareQueryForLog (line 746) | public static String prepareQueryForLog(String query, int maxLength) {
method getQueryForTimePartitionedTable (line 753) | static String getQueryForTimePartitionedTable(
method createOptimizedMergeQuery (line 788) | private static String createOptimizedMergeQuery(
method getQueryForRangePartitionedTable (line 829) | static String getQueryForRangePartitionedTable(
method sanitizeLabelValue (line 867) | public static String sanitizeLabelValue(String value) {
method isBigLakeManagedTable (line 890) | public static boolean isBigLakeManagedTable(TableInfo table) {
method isBigQueryNativeTable (line 906) | public static boolean isBigQueryNativeTable(TableInfo table) {
method parseQueryParameters (line 922) | public static QueryParameterHelper parseQueryParameters(Map<String, St...
method processNamedParameter (line 965) | private static void processNamedParameter(
method processPositionalParameter (line 988) | private static void processPositionalParameter(
method buildPositionalParameterList (line 1033) | private static List<QueryParameterValue> buildPositionalParameterList(
method parseSingleParameterValue (line 1059) | private static QueryParameterValue parseSingleParameterValue(
method formatTableResult (line 1105) | public static String[] formatTableResult(TableResult result, boolean w...
method fieldValueToString (line 1132) | private static String fieldValueToString(FieldValue fieldValue) {
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/ComparisonResult.java
class ComparisonResult (line 7) | public class ComparisonResult {
method fromEqualsResult (line 14) | static ComparisonResult fromEqualsResult(boolean equal) {
method differentWithDescription (line 18) | static ComparisonResult differentWithDescription(List<String> facts) {
method equal (line 22) | static ComparisonResult equal() {
method differentNoDescription (line 26) | static ComparisonResult differentNoDescription() {
method ComparisonResult (line 30) | private ComparisonResult(ImmutableList<String> facts) {
method valuesAreEqual (line 34) | public boolean valuesAreEqual() {
method makeMessage (line 38) | public String makeMessage() {
method toString (line 45) | public String toString() {
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/DecompressReadRowsResponse.java
class DecompressReadRowsResponse (line 12) | public class DecompressReadRowsResponse {
method decompressArrowRecordBatch (line 14) | public static InputStream decompressArrowRecordBatch(
method decompressAvroRecordBatch (line 20) | public static InputStream decompressAvroRecordBatch(
method decompressRecordBatchInternal (line 26) | private static InputStream decompressRecordBatchInternal(
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/DurationTimer.java
class DurationTimer (line 28) | final class DurationTimer implements Serializable {
method start (line 34) | public void start() {
method finish (line 38) | public void finish() {
method getAccumulatedTime (line 47) | public Duration getAccumulatedTime() {
method getSamples (line 51) | public long getSamples() {
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/EnvironmentContext.java
class EnvironmentContext (line 24) | public class EnvironmentContext {
method EnvironmentContext (line 28) | public EnvironmentContext(ImmutableMap<String, String> bigQueryJobLabe...
method getBigQueryJobLabels (line 32) | public ImmutableMap<String, String> getBigQueryJobLabels() {
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/GcpUtil.java
class GcpUtil (line 34) | public class GcpUtil {
method getGcpRegion (line 39) | public static Optional<String> getGcpRegion() {
method getGcpZone (line 43) | public static Optional<String> getGcpZone() {
method provideGcpZone (line 48) | @VisibleForTesting
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/HttpUtil.java
class HttpUtil (line 23) | public class HttpUtil {
method HttpUtil (line 25) | private HttpUtil() {}
method createHeaderProvider (line 27) | public static HeaderProvider createHeaderProvider(BigQueryConfig confi...
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/IdentityTokenSupplier.java
class IdentityTokenSupplier (line 14) | public class IdentityTokenSupplier implements Serializable {
method fetchIdentityToken (line 20) | public static Optional<String> fetchIdentityToken(String audience) {
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/IteratorMultiplexer.java
class IteratorMultiplexer (line 33) | public class IteratorMultiplexer<T> implements AutoCloseable {
method IteratorMultiplexer (line 47) | public IteratorMultiplexer(Iterator<T> iterator, int splits) {
method close (line 58) | @Override
method readAhead (line 74) | void readAhead() {
method getSplit (line 102) | public synchronized Iterator<T> getSplit(int split) {
class QueueIterator (line 111) | private class QueueIterator<T> implements Iterator<T> {
method hasNext (line 117) | @Override
method next (line 134) | @Override
method markDone (line 149) | public synchronized void markDone(Throwable e) {
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/LazyInitializationSupplier.java
class LazyInitializationSupplier (line 28) | public class LazyInitializationSupplier<T extends @Nullable Object> impl...
method LazyInitializationSupplier (line 33) | public LazyInitializationSupplier(Supplier<T> delegate) {
method get (line 37) | @Override
method toString (line 52) | @Override
method isInitialized (line 59) | public boolean isInitialized() {
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/LoggingBigQueryStorageReadRowsTracer.java
class LoggingBigQueryStorageReadRowsTracer (line 30) | public class LoggingBigQueryStorageReadRowsTracer implements BigQuerySto...
method LoggingBigQueryStorageReadRowsTracer (line 50) | LoggingBigQueryStorageReadRowsTracer(
method startStream (line 61) | @Override
method rowsParseStarted (line 66) | @Override
method rowsParseFinished (line 71) | @Override
method readRowsResponseRequested (line 77) | @Override
method readRowsResponseObtained (line 82) | @Override
method finished (line 88) | @Override
method average (line 94) | private static Duration average(DurationTimer durationTimer) {
method format (line 102) | private static String format(DurationTimer durationTimer) {
method difference (line 111) | private static String difference(DurationTimer d1, DurationTimer d2) {
method perSecond (line 118) | private static long perSecond(DurationTimer timer, long metric) {
method logData (line 130) | private void logData() {
method nextBatchNeeded (line 159) | @Override
method forkWithPrefix (line 167) | @Override
method getBytesRead (line 173) | @Override
method getRowsRead (line 178) | @Override
method getScanTimeInMilliSec (line 183) | @Override
method getParseTimeInMilliSec (line 188) | @Override
method getTimeInSparkInMilliSec (line 193) | @Override
method getStreamName (line 198) | String getStreamName() {
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/LoggingBigQueryTracerFactory.java
class LoggingBigQueryTracerFactory (line 21) | public class LoggingBigQueryTracerFactory implements BigQueryTracerFacto...
method LoggingBigQueryTracerFactory (line 24) | @Inject
method LoggingBigQueryTracerFactory (line 29) | LoggingBigQueryTracerFactory(int logIntervalPowerOf2) {
method newReadRowsTracer (line 33) | @Override
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/MaterializationConfiguration.java
class MaterializationConfiguration (line 23) | public class MaterializationConfiguration {
method from (line 30) | public static MaterializationConfiguration from(
method MaterializationConfiguration (line 57) | private MaterializationConfiguration(
method getMaterializationProject (line 66) | public Optional<String> getMaterializationProject() {
method getMaterializationDataset (line 70) | public Optional<String> getMaterializationDataset() {
method getMaterializationExpirationTimeInMinutes (line 74) | public int getMaterializationExpirationTimeInMinutes() {
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/NonInterruptibleBlockingBytesChannel.java
class NonInterruptibleBlockingBytesChannel (line 32) | public class NonInterruptibleBlockingBytesChannel implements ReadableByt...
method NonInterruptibleBlockingBytesChannel (line 38) | public NonInterruptibleBlockingBytesChannel(InputStream is) {
method read (line 42) | @Override
method isOpen (line 65) | @Override
method close (line 70) | @Override
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/ParallelArrowReader.java
class ParallelArrowReader (line 50) | public class ParallelArrowReader implements AutoCloseable {
method ParallelArrowReader (line 72) | public ParallelArrowReader(
method next (line 92) | public boolean next() throws IOException {
method start (line 128) | private void start() {
method consumeReaders (line 135) | private void consumeReaders() {
method close (line 219) | @Override
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/ParameterMode.java
type ParameterMode (line 20) | public enum ParameterMode implements Serializable {
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/QueryParameterHelper.java
class QueryParameterHelper (line 35) | public final class QueryParameterHelper implements Serializable {
method QueryParameterHelper (line 43) | public QueryParameterHelper(
method none (line 53) | static QueryParameterHelper none() {
method named (line 58) | static QueryParameterHelper named(Map<String, QueryParameterValue> nam...
method positional (line 64) | static QueryParameterHelper positional(List<QueryParameterValue> posit...
method getMode (line 75) | public ParameterMode getMode() {
method getNamedParameters (line 85) | public Optional<Map<String, QueryParameterValue>> getNamedParameters() {
method getPositionalParameters (line 95) | public Optional<List<QueryParameterValue>> getPositionalParameters() {
method isEmpty (line 100) | public boolean isEmpty() {
method configureBuilder (line 104) | public QueryJobConfiguration.Builder configureBuilder(QueryJobConfigur...
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/ReadRowsHelper.java
class ReadRowsHelper (line 32) | public class ReadRowsHelper implements AutoCloseable {
class Options (line 37) | public static final class Options implements Serializable {
method Options (line 44) | public Options(
method getMaxReadRowsRetries (line 55) | public int getMaxReadRowsRetries() {
method getEndpoint (line 59) | public Optional<String> getEndpoint() {
method numBackgroundThreads (line 63) | public int numBackgroundThreads() {
method numPrebufferResponses (line 67) | public int numPrebufferResponses() {
method ReadRowsHelper (line 76) | public ReadRowsHelper(
method ReadRowsHelper (line 88) | public ReadRowsHelper(
method readRows (line 99) | public Iterator<ReadRowsResponse> readRows() {
method toString (line 115) | @Override
method close (line 120) | @Override
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/ReadRowsResponseInputStreamEnumeration.java
class ReadRowsResponseInputStreamEnumeration (line 30) | public class ReadRowsResponseInputStreamEnumeration implements java.util...
method ReadRowsResponseInputStreamEnumeration (line 37) | public ReadRowsResponseInputStreamEnumeration(
method hasMoreElements (line 47) | public boolean hasMoreElements() {
method nextElement (line 51) | public InputStream nextElement() {
method loadNextResponse (line 65) | void loadNextResponse() {
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/ReadSessionCreator.java
class ReadSessionCreator (line 45) | public class ReadSessionCreator {
method initializeCache (line 59) | private static synchronized void initializeCache(long readSessionCache...
method ReadSessionCreator (line 70) | public ReadSessionCreator(
method create (line 91) | public ReadSessionResponse create(
method toTablePath (line 242) | static String toTablePath(TableId tableId) {
method getActualTable (line 248) | public TableInfo getActualTable(
method getActualTable (line 254) | TableInfo getActualTable(
method isInputTableAView (line 281) | public boolean isInputTableAView(TableInfo table) {
method getReadSessionCache (line 301) | Cache<CreateReadSessionRequest, ReadSession> getReadSessionCache() {
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/ReadSessionCreatorConfig.java
class ReadSessionCreatorConfig (line 25) | public class ReadSessionCreatorConfig {
method ReadSessionCreatorConfig (line 50) | ReadSessionCreatorConfig(
method isViewsEnabled (line 99) | public boolean isViewsEnabled() {
method getViewEnabledParamName (line 103) | public String getViewEnabledParamName() {
method getMaterializationProject (line 107) | public Optional<String> getMaterializationProject() {
method getMaterializationDataset (line 111) | public Optional<String> getMaterializationDataset() {
method getMaterializationExpirationTimeInMinutes (line 115) | public int getMaterializationExpirationTimeInMinutes() {
method getReadDataFormat (line 119) | public DataFormat getReadDataFormat() {
method getArrowCompressionCodec (line 123) | public CompressionCodec getArrowCompressionCodec() {
method getResponseCompressionCodec (line 127) | public ResponseCompressionCodec getResponseCompressionCodec() {
method getMaxReadRowsRetries (line 131) | public int getMaxReadRowsRetries() {
method getMaxParallelism (line 135) | public OptionalInt getMaxParallelism() {
method getDefaultParallelism (line 139) | public int getDefaultParallelism() {
method getRequestEncodedBase (line 143) | public Optional<String> getRequestEncodedBase() {
method getBigQueryStorageGrpcEndpoint (line 147) | public Optional<String> getBigQueryStorageGrpcEndpoint() {
method getBigQueryHttpEndpoint (line 151) | public Optional<String> getBigQueryHttpEndpoint() {
method backgroundParsingThreads (line 155) | public int backgroundParsingThreads() {
method getPushAllFilters (line 159) | public boolean getPushAllFilters() {
method toReadRowsHelperOptions (line 163) | public ReadRowsHelper.Options toReadRowsHelperOptions() {
method streamsPerPartition (line 171) | public int streamsPerPartition() {
method getPrebufferResponses (line 175) | public int getPrebufferResponses() {
method getTraceId (line 179) | public Optional<String> getTraceId() {
method getPreferredMinParallelism (line 183) | public OptionalInt getPreferredMinParallelism() {
method isReadSessionCachingEnabled (line 187) | public boolean isReadSessionCachingEnabled() {
method getReadSessionCacheDurationMins (line 191) | public long getReadSessionCacheDurationMins() {
method getSnapshotTimeMillis (line 195) | public OptionalLong getSnapshotTimeMillis() {
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/ReadSessionCreatorConfigBuilder.java
class ReadSessionCreatorConfigBuilder (line 26) | public class ReadSessionCreatorConfigBuilder {
method setViewsEnabled (line 53) | @CanIgnoreReturnValue
method setMaterializationProject (line 59) | @CanIgnoreReturnValue
method setMaterializationDataset (line 66) | @CanIgnoreReturnValue
method setMaterializationExpirationTimeInMinutes (line 73) | @CanIgnoreReturnValue
method setReadDataFormat (line 80) | @CanIgnoreReturnValue
method setMaxReadRowsRetries (line 86) | @CanIgnoreReturnValue
method setViewEnabledParamName (line 92) | @CanIgnoreReturnValue
method setMaxParallelism (line 98) | @CanIgnoreReturnValue
method setPreferredMinParallelism (line 104) | @CanIgnoreReturnValue
method setDefaultParallelism (line 111) | @CanIgnoreReturnValue
method setRequestEncodedBase (line 117) | @CanIgnoreReturnValue
method setBigQueryStorageGrpcEndpoint (line 124) | @CanIgnoreReturnValue
method setBigQueryHttpEndpoint (line 131) | @CanIgnoreReturnValue
method setBackgroundParsingThreads (line 138) | @CanIgnoreReturnValue
method setPushAllFilters (line 144) | @CanIgnoreReturnValue
method setPrebufferReadRowsResponses (line 150) | @CanIgnoreReturnValue
method setStreamsPerPartition (line 156) | @CanIgnoreReturnValue
method setArrowCompressionCodec (line 162) | @CanIgnoreReturnValue
method setResponseCompressionCodec (line 169) | @CanIgnoreReturnValue
method setTraceId (line 176) | @CanIgnoreReturnValue
method setEnableReadSessionCaching (line 182) | public ReadSessionCreatorConfigBuilder setEnableReadSessionCaching(
method setReadSessionCacheDurationMins (line 188) | public ReadSessionCreatorConfigBuilder setReadSessionCacheDurationMins(
method setSnapshotTimeMillis (line 194) | @CanIgnoreReturnValue
method build (line 200) | public ReadSessionCreatorConfig build() {
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/ReadSessionMetrics.java
type ReadSessionMetrics (line 18) | public interface ReadSessionMetrics {
method incrementBytesReadAccumulator (line 19) | void incrementBytesReadAccumulator(long value);
method incrementRowsReadAccumulator (line 21) | void incrementRowsReadAccumulator(long value);
method incrementScanTimeAccumulator (line 23) | void incrementScanTimeAccumulator(long value);
method incrementParseTimeAccumulator (line 25) | void incrementParseTimeAccumulator(long value);
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/ReadSessionResponse.java
class ReadSessionResponse (line 21) | public class ReadSessionResponse {
method ReadSessionResponse (line 26) | public ReadSessionResponse(ReadSession readSession, TableInfo readTabl...
method getReadSession (line 31) | public ReadSession getReadSession() {
method getReadTableInfo (line 35) | public TableInfo getReadTableInfo() {
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/StreamCombiningIterator.java
class StreamCombiningIterator (line 53) | public class StreamCombiningIterator implements Iterator<ReadRowsRespons...
method StreamCombiningIterator (line 75) | StreamCombiningIterator(
method stopWithError (line 100) | synchronized void stopWithError(Throwable error) {
method hasActiveObservers (line 115) | private boolean hasActiveObservers() {
method next (line 125) | @Override
method hasNext (line 152) | @Override
method cancel (line 180) | public void cancel() {
method maybeFinished (line 189) | private void maybeFinished() {
method completeStream (line 204) | private void completeStream(boolean addEos) {
method parseReadSessionId (line 218) | private String parseReadSessionId(String stream) {
method newConnection (line 228) | private void newConnection(Observer observer, ReadRowsRequest.Builder ...
class Observer (line 252) | class Observer implements ResponseObserver<ReadRowsResponse> {
method Observer (line 270) | Observer(ReadRowsRequest.Builder builder) {
method onResponse (line 275) | @Override
method onStart (line 286) | @Override
method onError (line 303) | @Override
method onComplete (line 330) | @Override
method request (line 339) | public synchronized void request() {
method cancel (line 386) | public void cancel() {
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/UserAgentProvider.java
type UserAgentProvider (line 18) | public interface UserAgentProvider {
method getUserAgent (line 20) | String getUserAgent();
method getConnectorInfo (line 22) | String getConnectorInfo();
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/VersionProvider.java
type VersionProvider (line 18) | @FunctionalInterface
method getVersion (line 21) | String getVersion();
FILE: bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/WriteStreamStatistics.java
class WriteStreamStatistics (line 3) | public class WriteStreamStatistics {
method WriteStreamStatistics (line 7) | public WriteStreamStatistics(long rowCount, long bytesWritten) {
method getRowCount (line 12) | public long getRowCount() {
method getBytesWritten (line 16) | public long getBytesWritten() {
FILE: bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/BigQueryClientFactoryTest.java
class BigQueryClientFactoryTest (line 54) | public class BigQueryClientFactoryTest {
method getProxyUri (line 70) | @Override
method getProxyUsername (line 75) | @Override
method getProxyPassword (line 80) | @Override
method BigQueryClientFactoryTest (line 86) | public BigQueryClientFactoryTest() {
method testGetReadClientForSameClientFactory (line 91) | @Test
method testGetReadClientWithUserAgent (line 108) | @Test
method testGetReadClientWithBigQueryConfig (line 152) | @Test
method testGetReadClientWithServiceAccountCredentials (line 187) | @Test
method testGetWriteClientForSameClientFactory (line 234) | @Test
method testGetWriteClientWithUserAgent (line 250) | @Test
method testGetWriteClientWithBigQueryConfig (line 291) | @Test
method testGetWriteClientWithServiceAccountCredentials (line 326) | @Test
method testGetReadClientWithSameAndDifferentBQConfig (line 370) | @Test
method testGetWriteClientWithSameAndDifferentBQConfig (line 404) | @Test
method testHashCodeWithExternalAccountCredentials (line 437) | @Test
method testGetCredentials_ImpersonatedCredentials_calendarNotNullAfterDeserialization (line 457) | @Test
method createServiceAccountCredentials (line 522) | private ServiceAccountCredentials createServiceAccountCredentials(Stri...
class TestBigQueryConfig (line 534) | private class TestBigQueryConfig implements BigQueryConfig {
method TestBigQueryConfig (line 538) | TestBigQueryConfig(Optional<String> bigQueryStorageGrpcEndpoint) {
method getAccessTokenProviderFQCN (line 542) | @Override
method getAccessTokenProviderConfig (line 547) | @Override
method getCredentialsKey (line 552) | @Override
method getCredentialsFile (line 557) | @Override
method getLoggedInUserName (line 562) | @Override
method getLoggedInUserGroups (line 567) | @Override
method getImpersonationServiceAccountsForUsers (line 572) | @Override
method getImpersonationServiceAccountsForGroups (line 577) | @Override
method getImpersonationServiceAccount (line 582) | @Override
method getAccessToken (line 587) | @Override
method getParentProjectId (line 592) | @Override
method getCatalogProjectId (line 597) | @Override
method getCatalogLocation (line 602) | @Override
method useParentProjectForMetadataOperations (line 607) | @Override
method isViewsEnabled (line 612) | @Override
method getMaterializationProject (line 617) | @Override
method getMaterializationDataset (line 622) | @Override
method getBigQueryClientConnectTimeout (line 627) | @Override
method getBigQueryClientReadTimeout (line 632) | @Override
method getBigQueryClientRetrySettings (line 637) | @Override
method getBigQueryProxyConfig (line 642) | @Override
method getBigQueryStorageGrpcEndpoint (line 647) | @Override
method getBigQueryHttpEndpoint (line 652) | @Override
method getCacheExpirationTimeInMinutes (line 657) | @Override
method getBigQueryJobLabels (line 662) | @Override
method getCreateReadSessionTimeoutInSeconds (line 667) | @Override
method getChannelPoolSize (line 672) | @Override
method getFlowControlWindowBytes (line 677) | @Override
method getQueryJobPriority (line 682) | @Override
method getBigQueryJobTimeoutInMinutes (line 687) | @Override
method getCredentialsScopes (line 692) | @Override
method equals (line 697) | @Override
method hashCode (line 709) | @Override
FILE: bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/BigQueryConfigurationUtilTest.java
class BigQueryConfigurationUtilTest (line 25) | public class BigQueryConfigurationUtilTest {
method testParseSimpleTableId_tableOnly (line 30) | @Test
method testParseSimpleTableId_pathOnly (line 40) | @Test
method testParseSimpleTableId_tableAndDataset (line 50) | @Test
method testParseSimpleTableId_allParams (line 60) | @Test
method testParseSimpleTableId_fallbackDatasetIgnored (line 70) | @Test
method testParseSimpleTableId_fallbackDatasetUsed (line 81) | @Test
method testParseSimpleTableId_fallbackProjectIgnored (line 92) | @Test
method testParseSimpleTableId_fallbackProjectUsed (line 103) | @Test
method testParseSimpleTableId_missingDataset (line 114) | @Test
FILE: bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/BigQueryConnectorExceptionTest.java
class BigQueryConnectorExceptionTest (line 25) | public class BigQueryConnectorExceptionTest {
method testStatusExceptionSerialization (line 27) | @Test
method testStatusRuntimeExceptionSerialization (line 37) | @Test
FILE: bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/BigQueryCredentialsSupplierTest.java
class BigQueryCredentialsSupplierTest (line 43) | public class BigQueryCredentialsSupplierTest {
method testCredentialsFromAccessToken (line 82) | @Test
method testCredentialsFromKey (line 131) | @Test
method testCredentialsFromKeyWithErrors (line 187) | @Test
method testCredentialsFromFile (line 245) | @Test
method testCredentialsFromFileWithErrors (line 302) | @Test
method createServiceAccountJson (line 361) | private String createServiceAccountJson(String projectId) throws Excep...
method createImpersonatedCredentials (line 375) | Credentials createImpersonatedCredentials(
method testSingleServiceAccountImpersonation (line 399) | @Test
method testImpersonationForUsers (line 409) | @Test
method testImpersonationForGroups (line 437) | @Test
method testImpersonationForUsersAndGroups (line 470) | @Test
method testFallbackToDefault (line 497) | @Test
method testExceptionIsThrownOnFile (line 519) | @Test
method testExceptionIsThrownOnKey (line 545) | @Test
method testCustomScopes (line 571) | @Test
method testUniverseDomain (line 599) | @Test
method testDefaultUniverseDomain (line 627) | @Test
method testUniverseDomainOnFailure (line 650) | @Test
FILE: bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/BigQueryUtilTest.java
class BigQueryUtilTest (line 69) | public class BigQueryUtilTest {
method checkFailureMessage (line 76) | private static void checkFailureMessage(ComparisonResult result, Strin...
method testParseFullyQualifiedTable (line 81) | @Test
method testParseFullyQualifiedLegacyTable (line 87) | @Test
method testParseInvalidTable (line 93) | @Test
method testParseFullyQualifiedTableWithDefaults (line 100) | @Test
method testParsePartiallyQualifiedTable (line 108) | @Test
method testParsePartiallyQualifiedTableWithDefaults (line 114) | @Test
method testParseUnqualifiedTableWithDefaults (line 124) | @Test
method testParseFullyQualifiedPartitionedTable (line 132) | @Test
method testParseUnqualifiedPartitionedTable (line 139) | @Test
method testParseTableWithDatePartition (line 147) | @Test
method testParseFullyQualifiedTableWithSpaces (line 158) | @Test
method testParseAmbiguousTableWithSpaces (line 164) | @Test
method testParseAmbiguousTableWithBackticks (line 173) | @Test
method testUnparsableTable (line 182) | @Test
method testFriendlyName (line 187) | @Test
method testShortFriendlyName (line 193) | @Test
method testConvertAndThrows (line 199) | @Test
method testFirstPresent (line 208) | @Test
method testSchemaEqualsWithFieldOrder (line 219) | @Test
method testSchemaWritableNoFieldOrder (line 235) | @Test
method testNullableField (line 252) | @Test
method testSchemaWritableWithNulls (line 260) | @Test
method testFieldWritableWithNulls (line 273) | @Test
method testRequiredFieldNotFound (line 282) | @Test
method testFieldNameMismatch (line 289) | @Test
method testSubfieldsMismatch (line 298) | @Test
method testFieldWritable (line 314) | @Test
method testFieldWritable_notTypeWritable (line 327) | @Test
method testFieldWritableMaxLength (line 336) | @Test
method testFieldWritableScaleAndPrecision (line 354) | @Test
method testSchemaWritableWithEnableModeCheckForSchemaFields (line 378) | @Test
method testSchemaWritableWithDisableNullableFieldCheck (line 414) | @Test
method testSchemaWritableWithMoreUnEqualNumberOfFields (line 446) | @Test
method testIsModeWritable (line 470) | @Test
method testCreateVerifiedInstanceNoClass (line 483) | @Test
method testCreateVerifiedInstanceFailedInheritance (line 490) | @Test
method testCreateVerifiedInstance (line 497) | @Test
method testCreateVerifiedInstanceWithArg (line 504) | @Test
method testVerifySerialization (line 511) | @Test
method testVerifySerializationFail (line 518) | @Test
method testGetStreamNames (line 525) | @Test
method testEmptyGetStreamNames (line 541) | @Test
method testGetPartitionField_not_standard_table (line 552) | @Test
method testGetPartitionField_no_partitioning (line 558) | @Test
method testGetPartitionField_time_partitioning (line 565) | @Test
method testGetPartitionField_time_partitioning_pseudoColumn (line 579) | @Test
method testGetPartitionField_time_partitioning_pseudoColumn_day (line 593) | @Test
method testGetPartitionField_range_partitioning (line 607) | @Test
method testGetPartitionField_hive_partitioning (line 620) | @Test
method testGetClusteringFields_not_standard_table (line 640) | @Test
method ttestGetClusteringFields_no_clustering (line 646) | @Test
method testGetClusteringFields_time_partitioning (line 653) | @Test
method testFilterLengthInLimit_no_filter (line 668) | @Test
method testFilterLengthInLimit_small_filter (line 673) | @Test
method testFilterLengthInLimit_very_large_filter (line 678) | @Test
method testGetPrecision (line 685) | @Test
method testGetScale (line 702) | @Test
method testAdjustSchemaIfNeeded (line 717) | @Test
method testAdjustSchemaForNewField (line 744) | @Test
method testAdjustField_no_op (line 761) | @Test
method testAdjustField_numeric_to_big_numeric (line 769) | @Test
method testAdjustFieldRecursive (line 777) | @Test
method testAdjustFieldRecursive_with_bignumeric_conversion (line 791) | @Test
method testAdjustField_nullExistingField (line 806) | @Test
method testAdjustField_nullExistingFieldWithRecordType (line 813) | @Test
method testPrepareQueryForLog_withNewLine (line 822) | @Test
method testPrepareQueryForLog_withoutNewLine (line 828) | @Test
method testPrepareQueryForLog_withTruncating (line 834) | @Test
method testSanitizeLabelValue (line 840) | @Test
method testIsBigLakeManagedTable_with_BigLakeManagedTable (line 851) | @Test
method testIsBigLakeManagedTable_with_BigQueryExternalTable (line 869) | @Test
method testIsBigLakeManagedTable_with_BigQueryNativeTable (line 881) | @Test
method testIsBigQueryNativeTable_with_BigLakeManagedTable (line 891) | @Test
method testIsBigQueryNativeTable_with_BigQueryExternalTable (line 909) | @Test
method testIsBigQueryNativeTable_with_BigQueryNativeTable (line 921) | @Test
method testAdjustField_nullable_allowRelaxation (line 931) | @Test
method testAdjustField_exisitingFieldNullable_allowRelaxation (line 941) | @Test
method testAdjustField_nullable_dontAllowRelaxation (line 951) | @Test
method testAdjustField_numeric_to_bigNumeric (line 961) | @Test
method testCredentialSerialization (line 971) | @Test
method testParseNamedParameters_SuccessAllTypes (line 986) | @Test
method testParseNamedParameters_EmptyStringValue (line 1036) | @Test
method testParseNamedParameters_SpacesInValue (line 1048) | @Test
method testParseNamedParameters_DuplicateKeysDifferentCase (line 1064) | @Test
method testParseNamedParameters_IdenticalKeys (line 1081) | @Test
method testParsePositionalParameters_Success (line 1094) | @Test
method testParsePositionalParameters_SingleParameter (line 1117) | @Test
method testParseParameters_NoParameterOptions (line 1130) | @Test
method testParseParameters_EmptyOptionsMap (line 1145) | @Test
method testParseParameters_ErrorMixedNamedAndPositional (line 1157) | @Test
method testParseParameters_ErrorMixedPositionalAndNamed (line 1170) | @Test
method testParseParameters_ErrorNullValueString (line 1183) | @Test
method testParseParameters_ErrorInvalidFormatNoColon (line 1196) | @Test
method testParseParameters_ErrorInvalidFormatEmptyType (line 1207) | @Test
method testParseParameters_ErrorUnknownType (line 1218) | @Test
method testParseParameters_ErrorUnsupportedTypeArray (line 1230) | @Test
method testParseParameters_ErrorUnsupportedTypeStruct (line 1240) | @Test
method testParseNamedParameters_ErrorEmptyName (line 1254) | @Test
method testParsePositionalParameters_ErrorEmptyIndex (line 1264) | @Test
method testParsePositionalParameters_ErrorNonNumericIndex (line 1275) | @Test
method testParsePositionalParameters_ErrorZeroIndex (line 1287) | @Test
method testParsePositionalParameters_ErrorNegativeIndexMinusOne (line 1301) | @Test
method testParsePositionalParameters_ErrorIndexGap (line 1317) | @Test
method testParsePositionalParameters_ErrorOnlyGap (line 1333) | @Test
method formatTableResult_whenSchemaIsNull_returnsEmptyArray (line 1349) | @Test
method formatTableResult_whenNoRows_returnsHeaderOnly (line 1362) | @Test
method formatTableResult_whenSchemaIsEmpty_returnsEmptyHeader (line 1382) | @Test
method formatTableResult_whenHasRowsWithDataAndNulls_returnsFormattedTable (line 1400) | @Test
method testGetQueryForTimePartitionedTable (line 1461) | @Test
FILE: bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/DurationTimerTest.java
class DurationTimerTest (line 24) | public class DurationTimerTest {
method testStartStopPairedCall (line 25) | @Test
method testFinishedByItselfCall (line 44) | @Test
FILE: bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/HttpUtilTest.java
class HttpUtilTest (line 26) | public class HttpUtilTest {
method testCreateHeaderProviderWithParentProject (line 28) | @Test
method testCreateHeaderProviderNoParentProject (line 41) | @Test
FILE: bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/IteratorMultiplexerTest.java
class IteratorMultiplexerTest (line 30) | public class IteratorMultiplexerTest {
method testIteratorRoundRobins (line 32) | @Test
method testIteratorRoundRobinsOneValue (line 62) | @Test
method testIteratorClosedGracefullyWhenSubIteratorsAreInterrupted (line 92) | @Test
method testIteratorClosedGracefullyWhenMultiplexerClosed (line 130) | @Test
FILE: bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/LoggingBigQueryStorageReadRowsTracerTest.java
class LoggingBigQueryStorageReadRowsTracerTest (line 26) | public class LoggingBigQueryStorageReadRowsTracerTest {
method setup (line 30) | @Before
method testStartAndFinish (line 38) | @Test
method testWaitingForSpark (line 54) | @Test
method testWaitingForService (line 65) | @Test
method testParseTime (line 80) | @Test
method testLogsAppropriatelyFinished (line 95) | @Test
method testFinishedNoLogs (line 106) | @Test
method testForkWithPrefix (line 112) | @Test
FILE: bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/MockResponsesBatch.java
class MockResponsesBatch (line 23) | class MockResponsesBatch implements Iterator<ReadRowsResponse> {
method addResponse (line 26) | void addResponse(ReadRowsResponse response) {
method addException (line 30) | void addException(RuntimeException exception) {
method hasNext (line 34) | @Override
method next (line 39) | @Override
FILE: bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/ParallelArrowReaderTest.java
class ParallelArrowReaderTest (line 48) | public class ParallelArrowReaderTest {
method initializeAllocator (line 52) | @Before
method closeAllocator (line 57) | @After
method getReaderWithSequence (line 62) | ArrowReader getReaderWithSequence(int... values) throws IOException {
method testExceptionIsPropagatedFromNext (line 140) | @Test
method testInterruptsOnClose (line 163) | @Test
FILE: bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/ReadRowsHelperTest.java
class ReadRowsHelperTest (line 20) | @Ignore
FILE: bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/ReadSessionCreatorTest.java
class ReadSessionCreatorTest (line 70) | public class ReadSessionCreatorTest {
method startStaticServer (line 100) | @BeforeClass
method stopServer (line 109) | @AfterClass
method setUp (line 114) | @Before
method tearDown (line 126) | @After
method testSerializedInstanceIsPropagated (line 131) | @Test
method testDefaultMinMaxStreamCount (line 164) | @Test
method testCustomMinStreamCount (line 192) | @Test
method testCustomMaxStreamCount (line 220) | @Test
method testMinStreamCountGreaterThanMaxStreamCount (line 249) | @Test
method testMaxStreamCountWithoutMinStreamCount (line 277) | @Test
method testSnapshotTimeMillis (line 305) | @Test
method testViewSnapshotTimeMillis (line 331) | @Test
method testCacheMissScenario (line 363) | private void testCacheMissScenario(
method testReadSessionCacheMiss (line 375) | @Test
method addCacheEntry (line 407) | private ReadSession addCacheEntry(
method testCacheHitScenario (line 441) | private void testCacheHitScenario(
method testReadSessionCacheHit (line 450) | @Test
method createMockBigQueryReadClient (line 498) | private static BigQueryReadClient createMockBigQueryReadClient(Enhance...
FILE: bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/integration/CustomCredentialsIntegrationTest.java
class CustomCredentialsIntegrationTest (line 30) | public class CustomCredentialsIntegrationTest {
method testAccessTokenProvider (line 35) | @Test
method testAccessTokenProvider_withConfig (line 71) | @Test
FILE: bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/integration/DefaultCredentialsDelegateAccessTokenProvider.java
class DefaultCredentialsDelegateAccessTokenProvider (line 31) | public class DefaultCredentialsDelegateAccessTokenProvider implements Ac...
method DefaultCredentialsDelegateAccessTokenProvider (line 37) | public DefaultCredentialsDelegateAccessTokenProvider() {
method DefaultCredentialsDelegateAccessTokenProvider (line 41) | public DefaultCredentialsDelegateAccessTokenProvider(String config) {
method getAccessToken (line 50) | @Override
method getCallCount (line 58) | int getCallCount() {
method getConfig (line 62) | String getConfig() {
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/ArrowBinaryIterator.java
class ArrowBinaryIterator (line 47) | public class ArrowBinaryIterator implements Iterator<InternalRow> {
method ArrowBinaryIterator (line 56) | public ArrowBinaryIterator(
method hasNext (line 95) | @Override
method next (line 107) | @Override
method toArrowRows (line 112) | private Iterator<InternalRow> toArrowRows(VectorSchemaRoot root, List<...
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/AvroBinaryIterator.java
class AvroBinaryIterator (line 38) | public class AvroBinaryIterator implements Iterator<InternalRow> {
method AvroBinaryIterator (line 59) | public AvroBinaryIterator(
method hasNext (line 87) | @Override
method next (line 104) | @Override
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/BigQueryConnectorUtils.java
class BigQueryConnectorUtils (line 22) | public class BigQueryConnectorUtils {
method BigQueryConnectorUtils (line 26) | private BigQueryConnectorUtils() {}
method enablePushdownSession (line 28) | public static void enablePushdownSession(SparkSession spark) {
method disablePushdownSession (line 33) | public static void disablePushdownSession(SparkSession spark) {
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/BigQueryRelation.java
class BigQueryRelation (line 26) | public class BigQueryRelation extends BaseRelation {
method BigQueryRelation (line 34) | public BigQueryRelation(SparkBigQueryConfig options, TableInfo table, ...
method sqlContext (line 42) | @Override
method schema (line 47) | @Override
method getTableId (line 53) | public TableId getTableId() {
method getTableName (line 57) | public String getTableName() {
method getTableNameForLogging (line 62) | protected String getTableNameForLogging() {
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/BigQueryRelationProviderBase.java
class BigQueryRelationProviderBase (line 42) | public class BigQueryRelationProviderBase implements DataSourceRegister {
method BigQueryRelationProviderBase (line 46) | public BigQueryRelationProviderBase(Supplier<GuiceInjectorCreator> get...
method BigQueryRelationProviderBase (line 51) | public BigQueryRelationProviderBase() {
method createRelation (line 55) | public BaseRelation createRelation(SQLContext sqlContext, Map<String, ...
method createRelation (line 59) | public BaseRelation createRelation(
method createSink (line 64) | public Sink createSink(
method createRelationInternal (line 77) | protected BigQueryRelation createRelationInternal(
method createRelation (line 129) | public BaseRelation createRelation(
method shortName (line 136) | @Override
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/BigQueryStreamWriter.java
class BigQueryStreamWriter (line 34) | public final class BigQueryStreamWriter {
method BigQueryStreamWriter (line 38) | private BigQueryStreamWriter() {}
method writeBatch (line 48) | public static void writeBatch(
method getSaveMode (line 82) | private static SaveMode getSaveMode(OutputMode outputMode) {
method dataFrameToRDDConverterFactory (line 92) | public static DataFrameToRDDConverter dataFrameToRDDConverterFactory(S...
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/BigQueryStreamingSink.java
class BigQueryStreamingSink (line 32) | public class BigQueryStreamingSink implements Sink {
method BigQueryStreamingSink (line 45) | public BigQueryStreamingSink(
method addBatch (line 65) | @Override
method getSqlContext (line 79) | public SQLContext getSqlContext() {
method getParameters (line 83) | public Map<String, String> getParameters() {
method getPartitionColumns (line 87) | public List<String> getPartitionColumns() {
method getOutputMode (line 91) | public OutputMode getOutputMode() {
method getOpts (line 95) | public SparkBigQueryConfig getOpts() {
method getBigQueryClient (line 99) | public BigQueryClient getBigQueryClient() {
method getLatestBatchId (line 103) | public long getLatestBatchId() {
method equals (line 108) | @Override
method hashCode (line 123) | @Override
method toString (line 129) | @Override
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/BigQueryUtilScala.java
class BigQueryUtilScala (line 26) | public final class BigQueryUtilScala { // Renamed from BigQueryUtilScala...
method BigQueryUtilScala (line 28) | private BigQueryUtilScala() {}
method validateScalaVersionCompatibility (line 32) | public static void validateScalaVersionCompatibility() {
method trimVersion (line 91) | private static String trimVersion(String version) {
method toOption (line 95) | public static <T> Optional<T> toOption(java.util.Optional<T> javaOptio...
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/DataFrameToRDDConverter.java
type DataFrameToRDDConverter (line 22) | public interface DataFrameToRDDConverter {
method convertToRDD (line 23) | RDD<Row> convertToRDD(Dataset<Row> data);
method supports (line 25) | boolean supports(String sparkVersion);
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/DataSourceVersion.java
type DataSourceVersion (line 24) | public enum DataSourceVersion {
method updateOptionsMap (line 28) | public void updateOptionsMap(Map<String, String> optionsMap) {
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/GuiceInjectorCreator.java
type GuiceInjectorCreator (line 28) | interface GuiceInjectorCreator {
method createGuiceInjector (line 29) | default Injector createGuiceInjector(
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/InjectorBuilder.java
class InjectorBuilder (line 27) | public class InjectorBuilder {
method withSpark (line 37) | public InjectorBuilder withSpark(SparkSession spark) {
method withSchema (line 42) | public InjectorBuilder withSchema(StructType schema) {
method withOptions (line 47) | public InjectorBuilder withOptions(Map<String, String> options) {
method withTableIsMandatory (line 52) | public InjectorBuilder withTableIsMandatory(boolean tableIsMandatory) {
method withDataSourceVersion (line 57) | public InjectorBuilder withDataSourceVersion(DataSourceVersion dataSou...
method withCustomDefaults (line 62) | public InjectorBuilder withCustomDefaults(Map<String, String> customDe...
method withConfig (line 67) | public InjectorBuilder withConfig(SparkBigQueryConfig config) {
method build (line 72) | public Injector build() {
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/InjectorFactory.java
class InjectorFactory (line 27) | public class InjectorFactory {
method InjectorFactory (line 28) | private InjectorFactory() {}
method createInjector (line 30) | public static Injector createInjector(
method createInjector (line 36) | public static Injector createInjector(
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/InternalRowIterator.java
class InternalRowIterator (line 29) | public class InternalRowIterator implements Iterator<InternalRow> {
method InternalRowIterator (line 37) | public InternalRowIterator(
method hasNext (line 48) | @Override
method next (line 73) | @Override
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/PartitionOverwriteMode.java
type PartitionOverwriteMode (line 3) | public enum PartitionOverwriteMode {
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/ProtobufUtils.java
class ProtobufUtils (line 75) | public class ProtobufUtils {
class ProtobufSchemaFieldCacheEntry (line 77) | public static final class ProtobufSchemaFieldCacheEntry {
method ProtobufSchemaFieldCacheEntry (line 85) | public ProtobufSchemaFieldCacheEntry(
method getSparkType (line 100) | public DataType getSparkType() {
method getNullable (line 104) | public boolean getNullable() {
method getNestedTypeDescriptor (line 108) | public Descriptors.Descriptor getNestedTypeDescriptor() {
method getFieldDescriptor (line 112) | public Descriptors.FieldDescriptor getFieldDescriptor() {
method getTypeConverterOptional (line 116) | public Optional<TypeConverter> getTypeConverterOptional() {
method getCustomDataTypeOptional (line 120) | public Optional<SupportedCustomDataType> getCustomDataTypeOptional() {
method computeProtobufSchemaFieldEntry (line 125) | private static ProtobufSchemaFieldCacheEntry computeProtobufSchemaFiel...
method load (line 154) | @Override
method toProtoSchema (line 226) | public static ProtoSchema toProtoSchema(Schema schema) throws IllegalA...
method toProtoSchema (line 235) | public static ProtoSchema toProtoSchema(StructType schema) throws Ille...
method toDescriptor (line 244) | private static Descriptors.Descriptor toDescriptor(Schema schema)
method createDescriptorFromProto (line 258) | private static Descriptors.Descriptor createDescriptorFromProto(
method buildDescriptorProtoWithFields (line 273) | @VisibleForTesting
method createProtoFieldBuilder (line 309) | private static DescriptorProtos.FieldDescriptorProto.Builder createPro...
method createProtoFieldBuilder (line 317) | @VisibleForTesting
method toProtoFieldLabel (line 326) | private static DescriptorProtos.FieldDescriptorProto.Label toProtoFiel...
method toProtoFieldType (line 337) | private static DescriptorProtos.FieldDescriptorProto.Type toProtoField...
method toProtoRows (line 352) | public static ProtoRows toProtoRows(StructType sparkSchema, InternalRo...
method buildSingleRowMessage (line 375) | public static DynamicMessage buildSingleRowMessage(
method toDescriptor (line 416) | public static Descriptors.Descriptor toDescriptor(StructType schema)
method convertSparkValueToProtoRowValue (line 431) | private static Object convertSparkValueToProtoRowValue(
method buildDescriptorProtoWithFields (line 651) | private static DescriptorProtos.DescriptorProto buildDescriptorProtoWi...
method createMapStructType (line 713) | static StructType createMapStructType(MapType mapType) {
method createMapStructFields (line 717) | @NotNull
method toProtoFieldType (line 733) | private static DescriptorProtos.FieldDescriptorProto.Type toProtoField...
method convertDecimalToString (line 753) | private static String convertDecimalToString(Object decimal) {
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/ReadRowsResponseToInternalRowIteratorConverter.java
type ReadRowsResponseToInternalRowIteratorConverter (line 32) | public interface ReadRowsResponseToInternalRowIteratorConverter {
method avro (line 34) | static ReadRowsResponseToInternalRowIteratorConverter avro(
method arrow (line 52) | static ReadRowsResponseToInternalRowIteratorConverter arrow(
method convert (line 66) | Iterator<InternalRow> convert(ReadRowsResponse response);
method getBatchSizeInBytes (line 68) | int getBatchSizeInBytes(ReadRowsResponse response);
class Avro (line 70) | class Avro implements ReadRowsResponseToInternalRowIteratorConverter, ...
method Avro (line 82) | public Avro(
method convert (line 100) | @Override
method getBatchSizeInBytes (line 113) | @Override
class Arrow (line 122) | class Arrow implements ReadRowsResponseToInternalRowIteratorConverter,...
method Arrow (line 132) | public Arrow(
method convert (line 146) | @Override
method getBatchSizeInBytes (line 158) | @Override
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SchemaConverters.java
class SchemaConverters (line 45) | public class SchemaConverters {
method SchemaConverters (line 58) | private SchemaConverters(SchemaConvertersConfiguration configuration) {
method from (line 62) | public static SchemaConverters from(SchemaConvertersConfiguration conf...
method toSpark (line 67) | public StructType toSpark(Schema schema) {
method getSchemaWithPseudoColumns (line 79) | public Schema getSchemaWithPseudoColumns(TableInfo tableInfo) {
method convertToInternalRow (line 109) | public InternalRow convertToInternalRow(
method convert (line 121) | Object convert(Field field, Object value, StructField userProvidedFiel...
method getStructFieldForRepeatedMode (line 148) | private StructField getStructFieldForRepeatedMode(StructField field) {
method convertByBigQueryType (line 163) | Object convertByBigQueryType(Field bqField, Object value, StructField ...
method getBytes (line 237) | private byte[] getBytes(ByteBuffer buf) {
method convertAll (line 245) | GenericInternalRow convertAll(
method convert (line 284) | @VisibleForTesting
method convertMap (line 310) | Optional<StructField> convertMap(Field field, Metadata metadata) {
method getDataType (line 342) | private DataType getDataType(Field field) {
method getCustomDataType (line 348) | @VisibleForTesting
method getStandardDataType (line 363) | private DataType getStandardDataType(Field field) {
method createDecimalTypeFromNumericField (line 442) | @VisibleForTesting
method toBigQuerySchema (line 472) | public Schema toBigQuerySchema(StructType sparkSchema) {
method sparkToBigQueryFields (line 480) | private FieldList sparkToBigQueryFields(StructType sparkStruct, int de...
method createBigQueryColumn (line 491) | @VisibleForTesting
method getDescriptionOrCommentOfField (line 564) | public static Optional<String> getDescriptionOrCommentOfField(
method buildMapTypeField (line 588) | private Field buildMapTypeField(
method toBigQueryType (line 599) | @VisibleForTesting
method createBigQueryFieldBuilder (line 643) | private Field.Builder createBigQueryFieldBuilder(
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SchemaConvertersConfiguration.java
class SchemaConvertersConfiguration (line 22) | public class SchemaConvertersConfiguration implements Serializable {
method SchemaConvertersConfiguration (line 29) | private SchemaConvertersConfiguration(
method from (line 36) | public static SchemaConvertersConfiguration from(SparkBigQueryConfig c...
method of (line 43) | public static SchemaConvertersConfiguration of(boolean allowMapTypeCon...
method of (line 50) | public static SchemaConvertersConfiguration of(
method createDefault (line 56) | public static SchemaConvertersConfiguration createDefault() {
method getAllowMapTypeConversion (line 63) | public boolean getAllowMapTypeConversion() {
method getBigNumericDefaultPrecision (line 67) | public int getBigNumericDefaultPrecision() {
method getBigNumericDefaultScale (line 71) | public int getBigNumericDefaultScale() {
method equals (line 75) | @Override
method hashCode (line 85) | @Override
method toString (line 91) | @Override
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkBigQueryConfig.java
class SparkBigQueryConfig (line 90) | public class SparkBigQueryConfig
type WriteMethod (line 101) | public enum WriteMethod {
method from (line 105) | public static WriteMethod from(@Nullable String writeMethod) {
method SparkBigQueryConfig (line 272) | @VisibleForTesting
method from (line 279) | public static SparkBigQueryConfig from(
method from (line 296) | public static SparkBigQueryConfig from(
method from (line 319) | @VisibleForTesting
method from (line 343) | @VisibleForTesting
method splitOnComma (line 717) | private static ImmutableList<String> splitOnComma(String value) {
method stripPrefix (line 726) | private static com.google.common.base.Optional<String> stripPrefix(
method parseBigQueryLabels (line 738) | @VisibleForTesting
method toLowerCaseKeysMap (line 765) | private static ImmutableMap<String, String> toLowerCaseKeysMap(Map<Str...
method isQuery (line 773) | @VisibleForTesting
method validateDateFormat (line 795) | private static void validateDateFormat(
method normalizeConf (line 812) | static ImmutableMap<String, String> normalizeConf(Map<String, String> ...
method createCredentials (line 824) | public Credentials createCredentials() {
method getTableId (line 844) | public TableId getTableId() {
method getTableIdWithExplicitProject (line 849) | public TableId getTableIdWithExplicitProject() {
method getTableIdWithoutThePartition (line 859) | public TableId getTableIdWithoutThePartition() {
method getQuery (line 871) | public Optional<String> getQuery() {
method getQueryParameterHelper (line 875) | public QueryParameterHelper getQueryParameterHelper() {
method getParentProjectId (line 879) | @Override
method getCatalogProjectId (line 884) | @Override
method getCatalogLocation (line 889) | @Override
method useParentProjectForMetadataOperations (line 894) | @Override
method getAccessTokenProviderFQCN (line 899) | @Override
method getAccessTokenProviderConfig (line 904) | @Override
method getLoggedInUserName (line 909) | @Override
method getLoggedInUserGroups (line 914) | @Override
method getImpersonationServiceAccountsForUsers (line 919) | @Override
method getImpersonationServiceAccountsForGroups (line 924) | @Override
method getImpersonationServiceAccount (line 929) | @Override
method getCredentialsKey (line 934) | @Override
method getCredentialsFile (line 939) | @Override
method getAccessToken (line 944) | @Override
method getCredentialsScopes (line 949) | @Override
method getFilter (line 954) | public Optional<String> getFilter() {
method getSchema (line 958) | public Optional<StructType> getSchema() {
method getMaxParallelism (line 962) | public OptionalInt getMaxParallelism() {
method getPreferredMinParallelism (line 966) | public OptionalInt getPreferredMinParallelism() {
method getDefaultParallelism (line 972) | public int getDefaultParallelism() {
method getTemporaryGcsBucket (line 976) | public Optional<String> getTemporaryGcsBucket() {
method getPersistentGcsBucket (line 980) | public Optional<String> getPersistentGcsBucket() {
method getPersistentGcsPath (line 984) | public Optional<String> getPersistentGcsPath() {
method getIntermediateFormat (line 988) | public IntermediateFormat getIntermediateFormat() {
method getReadDataFormat (line 992) | public DataFormat getReadDataFormat() {
method getArrowCompressionCodec (line 996) | public CompressionCodec getArrowCompressionCodec() {
method getResponseCompressionCodec (line 1000) | public ResponseCompressionCodec getResponseCompressionCodec() {
method isCombinePushedDownFilters (line 1004) | public boolean isCombinePushedDownFilters() {
method isUseAvroLogicalTypes (line 1008) | @Override
method getDecimalTargetTypes (line 1013) | public ImmutableList<String> getDecimalTargetTypes() {
method isViewsEnabled (line 1017) | public boolean isViewsEnabled() {
method getMaterializationProject (line 1021) | @Override
method getMaterializationDataset (line 1026) | @Override
method getMaterializationExpirationTimeInMinutes (line 1031) | public int getMaterializationExpirationTimeInMinutes() {
method getPartitionField (line 1035) | public Optional<String> getPartitionField() {
method getPartitionExpirationMs (line 1039) | public OptionalLong getPartitionExpirationMs() {
method getPartitionRequireFilter (line 1045) | public Optional<Boolean> getPartitionRequireFilter() {
method getPartitionType (line 1049) | public Optional<TimePartitioning.Type> getPartitionType() {
method getPartitionRange (line 1053) | public Optional<RangePartitioning.Range> getPartitionRange() {
method getPartitionTypeOrDefault (line 1067) | public TimePartitioning.Type getPartitionTypeOrDefault() {
method getClusteredFields (line 1071) | public Optional<ImmutableList<String>> getClusteredFields() {
method getCreateDisposition (line 1075) | public Optional<JobInfo.CreateDisposition> getCreateDisposition() {
method isOptimizedEmptyProjection (line 1079) | public boolean isOptimizedEmptyProjection() {
method getLoadSchemaUpdateOptions (line 1083) | public ImmutableList<JobInfo.SchemaUpdateOption> getLoadSchemaUpdateOp...
method getMaxReadRowsRetries (line 1087) | public int getMaxReadRowsRetries() {
method getPushAllFilters (line 1091) | public boolean getPushAllFilters() {
method getEnableModeCheckForSchemaFields (line 1095) | public boolean getEnableModeCheckForSchemaFields() {
method getBigQueryClientConnectTimeout (line 1102) | @Override
method getBigQueryClientReadTimeout (line 1109) | @Override
method getBigQueryProxyConfig (line 1116) | @Override
method getBigQueryStorageGrpcEndpoint (line 1121) | @Override
method getBigQueryHttpEndpoint (line 1126) | @Override
method getCacheExpirationTimeInMinutes (line 1131) | @Override
method getCreateReadSessionTimeoutInSeconds (line 1136) | @Override
method getPartitionOverwriteModeValue (line 1141) | public PartitionOverwriteMode getPartitionOverwriteModeValue() {
method getChannelPoolSize (line 1145) | @Override
method getFlowControlWindowBytes (line 1150) | @Override
method getQueryJobPriority (line 1155) | @Override
method getKmsKeyName (line 1160) | @Override
method getBigQueryClientRetrySettings (line 1165) | @Override
method getRetrySettings (line 1172) | private static RetrySettings getRetrySettings(int maxAttempts) {
method getBigqueryDataWriteHelperRetrySettings (line 1186) | public RetrySettings getBigqueryDataWriteHelperRetrySettings() {
method getWriteMethod (line 1190) | public WriteMethod getWriteMethod() {
method isWriteAtLeastOnce (line 1194) | public boolean isWriteAtLeastOnce() {
method getTraceId (line 1198) | public Optional<String> getTraceId() {
method getBigQueryJobLabels (line 1202) | @Override
method getAllowMapTypeConversion (line 1207) | public boolean getAllowMapTypeConversion() {
method getBigQueryJobTimeoutInMinutes (line 1211) | public long getBigQueryJobTimeoutInMinutes() {
method getBigQueryTableLabels (line 1215) | public ImmutableMap<String, String> getBigQueryTableLabels() {
method getGpn (line 1219) | public Optional<String> getGpn() {
method getSnapshotTimeMillis (line 1223) | public OptionalLong getSnapshotTimeMillis() {
method getBigNumericDefaultPrecision (line 1227) | public int getBigNumericDefaultPrecision() {
method getBigNumericDefaultScale (line 1231) | public int getBigNumericDefaultScale() {
method toReadSessionCreatorConfig (line 1235) | public ReadSessionCreatorConfig toReadSessionCreatorConfig() {
method toReadTableOptions (line 1263) | public BigQueryClient.ReadTableOptions toReadTableOptions() {
type IntermediateFormat (line 1302) | public enum IntermediateFormat {
method IntermediateFormat (line 1319) | IntermediateFormat(String dataSource, FormatOptions formatOptions) {
method from (line 1324) | public static IntermediateFormat from(
method isSpark24OrAbove (line 1360) | static boolean isSpark24OrAbove(String sparkVersion) {
method missingAvroException (line 1365) | @VisibleForTesting
method getDataSource (line 1385) | public String getDataSource() {
method getFormatOptions (line 1389) | public FormatOptions getFormatOptions() {
method getFileSuffix (line 1393) | public String getFileSuffix() {
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkBigQueryConnectorModule.java
class SparkBigQueryConnectorModule (line 38) | public class SparkBigQueryConnectorModule implements Module {
method SparkBigQueryConnectorModule (line 51) | public SparkBigQueryConnectorModule(
method configure (line 68) | @Override
method provideSparkSession (line 87) | @Singleton
method provideDataSourceVersion (line 93) | @Singleton
method provideSparkBigQueryConfig (line 99) | @Singleton
method provideUserAgentProvider (line 113) | @Singleton
method provideEnvironmentContext (line 120) | @Singleton
method provideBigQueryJobCompletionListener (line 127) | @Singleton
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkBigQueryConnectorUserAgentProvider.java
class SparkBigQueryConnectorUserAgentProvider (line 28) | public class SparkBigQueryConnectorUserAgentProvider implements UserAgen...
method SparkBigQueryConnectorUserAgentProvider (line 53) | public SparkBigQueryConnectorUserAgentProvider(String dataSourceVersio...
method getUserAgent (line 58) | @Override
method getConnectorInfo (line 71) | @Override
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkBigQueryConnectorVersionProvider.java
class SparkBigQueryConnectorVersionProvider (line 24) | public class SparkBigQueryConnectorVersionProvider implements VersionPro...
method SparkBigQueryConnectorVersionProvider (line 28) | public SparkBigQueryConnectorVersionProvider(SparkContext sparkContext) {
method getVersion (line 32) | @Override
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkBigQueryLineageProvider.java
class SparkBigQueryLineageProvider (line 21) | public class SparkBigQueryLineageProvider implements OpenLineageExtensio...
method getVisitorClassName (line 22) | @Override
method shadedPackage (line 27) | @Override
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkBigQueryProxyAndHttpConfig.java
class SparkBigQueryProxyAndHttpConfig (line 36) | public class SparkBigQueryProxyAndHttpConfig implements BigQueryProxyCon...
method SparkBigQueryProxyAndHttpConfig (line 63) | @VisibleForTesting
method from (line 68) | @VisibleForTesting
method checkProxyParamsValidity (line 129) | private static void checkProxyParamsValidity(SparkBigQueryProxyAndHttp...
method checkHttpParamsValidity (line 142) | private static void checkHttpParamsValidity(SparkBigQueryProxyAndHttpC...
method getProperty (line 152) | private static com.google.common.base.Optional<String> getProperty(
method getFirstOrSecondOption (line 164) | private static com.google.common.base.Optional<String> getFirstOrSecon...
method parseProxyAddress (line 170) | @VisibleForTesting
method getProxyUri (line 199) | public Optional<URI> getProxyUri() {
method getProxyUsername (line 203) | public Optional<String> getProxyUsername() {
method getProxyPassword (line 207) | public Optional<String> getProxyPassword() {
method getHttpMaxRetry (line 211) | Optional<Integer> getHttpMaxRetry() {
method getHttpConnectTimeout (line 215) | Optional<Integer> getHttpConnectTimeout() {
method getHttpReadTimeout (line 219) | Optional<Integer> getHttpReadTimeout() {
method equals (line 223) | @Override
method hashCode (line 240) | @Override
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkBigQueryUtil.java
class SparkBigQueryUtil (line 56) | public class SparkBigQueryUtil {
method loadBuildProperties (line 74) | private static Properties loadBuildProperties() {
method optimizeLoadUriListForSpark (line 92) | public static List<String> optimizeLoadUriListForSpark(List<String> ur...
method createGcsPath (line 106) | public static Path createGcsPath(
method getUniqueGcsPath (line 141) | private static Path getUniqueGcsPath(String gcsBucket, String applicat...
method getBucketAndScheme (line 159) | private static String getBucketAndScheme(String gcsBucket) {
method getJobId (line 163) | public static String getJobId(SQLConf sqlConf) {
method getJobIdInternal (line 169) | @VisibleForTesting
method saveModeToWriteDisposition (line 178) | public static JobInfo.WriteDisposition saveModeToWriteDisposition(Save...
method parseSimpleTableId (line 193) | public static TableId parseSimpleTableId(SparkSession spark, Map<Strin...
method sparkTimestampToBigQuery (line 199) | public static long sparkTimestampToBigQuery(Object sparkValue) {
method sparkDateToBigQuery (line 224) | public static int sparkDateToBigQuery(Object sparkValue) {
method getTableNameFromOptions (line 238) | public static String getTableNameFromOptions(Map<String, String> optio...
method scalaMapToJavaMap (line 250) | public static <K, V> ImmutableMap<K, V> scalaMapToJavaMap(
method isDataFrameShowMethodInStackTrace (line 257) | public static boolean isDataFrameShowMethodInStackTrace() {
method isJson (line 268) | public static boolean isJson(Metadata metadata) {
method extractPartitionAndClusteringFilters (line 272) | public static ImmutableList<Filter> extractPartitionAndClusteringFilters(
method filtersOnField (line 287) | @VisibleForTesting
method getTypeConverterStream (line 295) | public static Stream<TypeConverter> getTypeConverterStream() {
method extractJobLabels (line 299) | @NotNull
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkFilterUtils.java
class SparkFilterUtils (line 39) | public class SparkFilterUtils {
method SparkFilterUtils (line 41) | private SparkFilterUtils() {}
method isTopLevelFieldHandled (line 44) | public static boolean isTopLevelFieldHandled(
method isFilterWithNamedFieldHandled (line 131) | static boolean isFilterWithNamedFieldHandled(
method isHandled (line 146) | public static boolean isHandled(
method handledFilters (line 182) | public static Iterable<Filter> handledFilters(
method handledFilters (line 187) | public static Iterable<Filter> handledFilters(
method unhandledFilters (line 194) | public static Iterable<Filter> unhandledFilters(
method unhandledFilters (line 199) | public static Iterable<Filter> unhandledFilters(
method getCompiledFilter (line 206) | public static String getCompiledFilter(
method compileFilter (line 226) | public static String compileFilter(Filter filter) {
method compileFilters (line 303) | public static String compileFilters(Iterable<Filter> filters) {
method compileValue (line 312) | static String compileValue(Object value) {
method compileValue (line 317) | static String compileValue(Object value, char arrayStart, char arrayEn...
method escape (line 351) | static String escape(String value) {
method quote (line 359) | static String quote(String value) {
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SupportedCustomDataType.java
type SupportedCustomDataType (line 26) | public enum SupportedCustomDataType {
method SupportedCustomDataType (line 33) | SupportedCustomDataType(String typeMarker, DataType sparkDataType) {
method getSparkDataType (line 38) | public UserDefinedType getSparkDataType() {
method getTypeMarker (line 42) | public String getTypeMarker() {
method getSqlType (line 46) | public DataType getSqlType() {
method serialize (line 50) | public InternalRow serialize(Object obj) {
method of (line 54) | public static Optional<SupportedCustomDataType> of(DataType dataType) {
method forDescription (line 61) | public static Optional<SupportedCustomDataType> forDescription(String ...
method toSqlType (line 68) | public static DataType toSqlType(DataType dataType) {
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SupportsQueryPushdown.java
type SupportsQueryPushdown (line 22) | public interface SupportsQueryPushdown {
method getBigQueryRDDFactory (line 23) | BigQueryRDDFactory getBigQueryRDDFactory();
method getPushdownFilters (line 27) | Optional<String> getPushdownFilters();
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/TypeConverter.java
type TypeConverter (line 22) | public interface TypeConverter<T> {
method toSparkType (line 23) | DataType toSparkType(LegacySQLTypeName bigQueryType);
method toBigQueryType (line 25) | LegacySQLTypeName toBigQueryType(DataType sparkType);
method toProtoFieldType (line 27) | DescriptorProtos.FieldDescriptorProto.Type toProtoFieldType(DataType s...
method supportsBigQueryType (line 29) | boolean supportsBigQueryType(LegacySQLTypeName bigQueryType);
method supportsSparkType (line 31) | boolean supportsSparkType(DataType sparkType);
method sparkToProtoValue (line 33) | T sparkToProtoValue(Object sparkValue);
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/direct/BigQueryPartition.java
class BigQueryPartition (line 21) | public class BigQueryPartition implements Partition {
method BigQueryPartition (line 25) | public BigQueryPartition(String stream, int index) {
method getStream (line 30) | public String getStream() {
method index (line 34) | @Override
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/direct/BigQueryRDDContext.java
class BigQueryRDDContext (line 48) | class BigQueryRDDContext implements Serializable {
method BigQueryRDDContext (line 62) | public BigQueryRDDContext(
method compute (line 81) | public scala.collection.Iterator<InternalRow> compute(Partition split,...
method getPartitions (line 135) | public Partition[] getPartitions() {
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/direct/BigQueryRDDFactory.java
class BigQueryRDDFactory (line 59) | public class BigQueryRDDFactory {
method BigQueryRDDFactory (line 72) | public BigQueryRDDFactory(
method buildScanFromSQL (line 89) | public RDD<InternalRow> buildScanFromSQL(String sql) {
method createRddFromTable (line 136) | public RDD<?> createRddFromTable(
method createRDD (line 179) | @VisibleForTesting
method getNumBytes (line 231) | public long getNumBytes(TableDefinition tableDefinition) {
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/direct/DirectBigQueryRelation.java
class DirectBigQueryRelation (line 55) | public class DirectBigQueryRelation extends BigQueryRelation
method DirectBigQueryRelation (line 72) | public DirectBigQueryRelation(
method needConversion (line 92) | @Override
method sizeInBytes (line 97) | @Override
method buildScan (line 102) | @Override
method buildScan (line 107) | @Override
method buildScan (line 112) | @Override
method unhandledFilters (line 143) | @Override
method getBigQueryRDDFactory (line 160) | public BigQueryRDDFactory getBigQueryRDDFactory() {
method getCompiledFilter (line 165) | String getCompiledFilter(Filter[] filters) {
method generateEmptyRowRDD (line 185) | private RDD<?> generateEmptyRowRDD(TableInfo tableInfo, String filter) {
method insert (line 202) | @Override
class ObjectToInternalRowConverter (line 211) | private static class ObjectToInternalRowConverter extends AbstractFunc...
method apply (line 216) | @Override
method toSqlTableReference (line 222) | static String toSqlTableReference(TableId tableId) {
method equals (line 226) | @Override
method hashCode (line 243) | @Override
method toString (line 248) | @Override
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/direct/Scala213BigQueryRDD.java
class Scala213BigQueryRDD (line 31) | class Scala213BigQueryRDD extends RDD<InternalRow> {
method Scala213BigQueryRDD (line 36) | public Scala213BigQueryRDD(SparkContext sparkContext, BigQueryRDDConte...
method compute (line 45) | @Override
method getPartitions (line 50) | @Override
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/direct/ScalaIterator.java
class ScalaIterator (line 22) | class ScalaIterator<T> extends AbstractIterator<T> {
method ScalaIterator (line 26) | public ScalaIterator(Iterator<T> underlying) {
method hasNext (line 30) | @Override
method next (line 35) | @Override
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/events/BigQueryJobCompletedEvent.java
class BigQueryJobCompletedEvent (line 27) | public abstract class BigQueryJobCompletedEvent implements SparkListener...
method BigQueryJobCompletedEvent (line 32) | protected BigQueryJobCompletedEvent(JobInfo jobInfo, JobConfiguration....
method from (line 42) | public static Optional<BigQueryJobCompletedEvent> from(JobInfo complet...
method getJobInfo (line 53) | public JobInfo getJobInfo() {
method getEtag (line 57) | public String getEtag() {
method getGeneratedId (line 61) | public String getGeneratedId() {
method getJobId (line 65) | public JobId getJobId() {
method getSelfLink (line 69) | public String getSelfLink() {
method getStatus (line 73) | public JobStatus getStatus() {
method getUserEmail (line 77) | public String getUserEmail() {
method logEvent (line 81) | @Override
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/events/LoadJobCompletedEvent.java
class LoadJobCompletedEvent (line 23) | public class LoadJobCompletedEvent extends BigQueryJobCompletedEvent {
method LoadJobCompletedEvent (line 25) | public LoadJobCompletedEvent(JobInfo jobInfo) {
method getLoadJobConfiguration (line 29) | public LoadJobConfiguration getLoadJobConfiguration() {
method getLoadJobStatistics (line 33) | public JobStatistics.LoadStatistics getLoadJobStatistics() {
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/events/QueryJobCompletedEvent.java
class QueryJobCompletedEvent (line 23) | public class QueryJobCompletedEvent extends BigQueryJobCompletedEvent {
method QueryJobCompletedEvent (line 25) | public QueryJobCompletedEvent(JobInfo jobInfo) {
method getQueryJobConfiguration (line 29) | public QueryJobConfiguration getQueryJobConfiguration() {
method getQueryJobStatistics (line 33) | public JobStatistics.QueryStatistics getQueryJobStatistics() {
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/examples/JavaShakespeare.java
class JavaShakespeare (line 23) | public class JavaShakespeare {
method main (line 25) | public static void main(String[] args) {
method usage (line 65) | private static void usage() {
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/metrics/DataOrigin.java
type DataOrigin (line 18) | public enum DataOrigin {
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/metrics/SparkBigQueryConnectorMetricsUtils.java
class SparkBigQueryConnectorMetricsUtils (line 26) | public class SparkBigQueryConnectorMetricsUtils {
method postInputFormatEvent (line 30) | public static void postInputFormatEvent(SparkContext sparkContext) {
method postConnectorVersion (line 44) | public static void postConnectorVersion(SparkContext sparkContext, Str...
method postWriteSessionMetrics (line 65) | public static void postWriteSessionMetrics(
method getAccumulatorNameForMetric (line 125) | public static String getAccumulatorNameForMetric(String metricName, St...
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/metrics/SparkBigQueryReadSessionMetrics.java
class SparkBigQueryReadSessionMetrics (line 32) | public class SparkBigQueryReadSessionMetrics extends SparkListener
method SparkBigQueryReadSessionMetrics (line 52) | private SparkBigQueryReadSessionMetrics(
method hashCode (line 91) | @Override
method equals (line 101) | @Override
method from (line 114) | public static SparkBigQueryReadSessionMetrics from(
method incrementBytesReadAccumulator (line 125) | public void incrementBytesReadAccumulator(long value) {
method incrementRowsReadAccumulator (line 129) | public void incrementRowsReadAccumulator(long value) {
method incrementScanTimeAccumulator (line 133) | public void incrementScanTimeAccumulator(long value) {
method incrementParseTimeAccumulator (line 137) | public void incrementParseTimeAccumulator(long value) {
method getBytesRead (line 141) | public long getBytesRead() {
method getRowsRead (line 145) | public long getRowsRead() {
method getScanTime (line 149) | public long getScanTime() {
method getParseTime (line 153) | public long getParseTime() {
method getNumReadStreams (line 157) | public long getNumReadStreams() {
method getBytesReadAccumulator (line 161) | public LongAccumulator getBytesReadAccumulator() {
method getRowsReadAccumulator (line 165) | public LongAccumulator getRowsReadAccumulator() {
method getParseTimeAccumulator (line 169) | public LongAccumulator getParseTimeAccumulator() {
method getScanTimeAccumulator (line 173) | public LongAccumulator getScanTimeAccumulator() {
method onJobEnd (line 177) | @Override
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/metrics/SparkMetricsSource.java
class SparkMetricsSource (line 11) | public class SparkMetricsSource implements Source, Serializable, BigQuer...
method SparkMetricsSource (line 20) | public SparkMetricsSource() {
method sourceName (line 34) | @Override
method metricRegistry (line 39) | @Override
method updateParseTime (line 44) | @Override
method updateTimeInSpark (line 49) | @Override
method incrementBytesReadCounter (line 54) | @Override
method incrementRowsReadCounter (line 59) | @Override
method updateScanTime (line 64) | @Override
method getParseTime (line 69) | public Timer getParseTime() {
method getTimeInSpark (line 73) | public Timer getTimeInSpark() {
method getScanTime (line 77) | public Timer getScanTime() {
method getBytesRead (line 81) | public Counter getBytesRead() {
method getRowsRead (line 85) | public Counter getRowsRead() {
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/pushdowns/SparkBigQueryPushdown.java
type SparkBigQueryPushdown (line 20) | public interface SparkBigQueryPushdown {
method supportsSparkVersion (line 22) | boolean supportsSparkVersion(String sparkVersion);
method enable (line 24) | void enable(SparkSession spark);
method disable (line 26) | void disable(SparkSession spark);
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/util/HdfsUtils.java
class HdfsUtils (line 23) | public class HdfsUtils {
method toJavaUtilIterator (line 26) | public static <T> Iterator<T> toJavaUtilIterator(final RemoteIterator<...
method toJavaUtilIterable (line 48) | public static <T> Iterable<T> toJavaUtilIterable(final RemoteIterator<...
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/BigQueryDataSourceWriterInsertableRelation.java
class BigQueryDataSourceWriterInsertableRelation (line 37) | public class BigQueryDataSourceWriterInsertableRelation extends BigQuery...
method BigQueryDataSourceWriterInsertableRelation (line 41) | public BigQueryDataSourceWriterInsertableRelation(
method insert (line 50) | @Override
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/BigQueryDeprecatedIndirectInsertableRelation.java
class BigQueryDeprecatedIndirectInsertableRelation (line 29) | public class BigQueryDeprecatedIndirectInsertableRelation extends BigQue...
method BigQueryDeprecatedIndirectInsertableRelation (line 31) | public BigQueryDeprecatedIndirectInsertableRelation(
method insert (line 36) | @Override
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/BigQueryInsertableRelationBase.java
class BigQueryInsertableRelationBase (line 33) | public abstract class BigQueryInsertableRelationBase extends BaseRelation
method BigQueryInsertableRelationBase (line 43) | protected BigQueryInsertableRelationBase(
method sqlContext (line 51) | @Override
method schema (line 56) | @Override
method exists (line 63) | public boolean exists() {
method isEmpty (line 68) | public boolean isEmpty() {
method numberOfRows (line 73) | private Optional<BigInteger> numberOfRows() {
method getTableId (line 77) | public TableId getTableId() {
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/BigQueryWriteHelper.java
class BigQueryWriteHelper (line 59) | public class BigQueryWriteHelper {
method BigQueryWriteHelper (line 78) | public BigQueryWriteHelper(
method writeDataFrameToBigQuery (line 114) | public void writeDataFrameToBigQuery() {
method loadDataToBigQuery (line 164) | void loadDataToBigQuery() throws IOException {
method friendlyTableName (line 197) | String friendlyTableName() {
method updateMetadataIfNeeded (line 201) | void updateMetadataIfNeeded() {
method updateTableMetadataIfNeeded (line 205) | public static void updateTableMetadataIfNeeded(
method updatedField (line 249) | static Field updatedField(Field field, StructField sparkSchemaField) {
method cleanTemporaryGcsPathIfNeeded (line 258) | void cleanTemporaryGcsPathIfNeeded() {
method verifySaveMode (line 264) | static void verifySaveMode(SaveMode saveMode) {
method tableExists (line 270) | private boolean tableExists() {
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/CreatableRelationProviderHelper.java
class CreatableRelationProviderHelper (line 34) | public class CreatableRelationProviderHelper {
method createRelation (line 36) | public BaseRelation createRelation(
method createRelation (line 46) | public BaseRelation createRelation(
method createBigQueryInsertableRelation (line 84) | @VisibleForTesting
method createBigQueryInsertableRelation (line 104) | public BigQueryInsertableRelationBase createBigQueryInsertableRelation(
method createBigQueryInsertableRelationFromInjector (line 118) | public BigQueryInsertableRelationBase createBigQueryInsertableRelation...
method createBigQueryInsertableRelationInternal (line 123) | private BigQueryInsertableRelationBase createBigQueryInsertableRelatio...
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/DataSourceWriterContextPartitionHandler.java
class DataSourceWriterContextPartitionHandler (line 34) | public class DataSourceWriterContextPartitionHandler
method DataSourceWriterContextPartitionHandler (line 46) | public DataSourceWriterContextPartitionHandler(
method call (line 54) | @Override
method writerCommitMessageWithError (line 80) | private static WriterCommitMessageContext writerCommitMessageWithError...
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/IntermediateDataCleaner.java
class IntermediateDataCleaner (line 28) | public class IntermediateDataCleaner extends Thread {
method IntermediateDataCleaner (line 36) | public IntermediateDataCleaner(Path path, Configuration conf) {
method run (line 41) | @Override
method deletePath (line 46) | public void deletePath() {
method deleteEpochPath (line 69) | public void deleteEpochPath(long epochId) {
method pathExists (line 84) | private boolean pathExists(FileSystem fs, Path path) {
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/AvroIntermediateRecordWriter.java
class AvroIntermediateRecordWriter (line 26) | public class AvroIntermediateRecordWriter implements IntermediateRecordW...
method AvroIntermediateRecordWriter (line 32) | AvroIntermediateRecordWriter(Schema schema, OutputStream outputStream)...
method write (line 39) | @Override
method close (line 44) | @Override
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/BigQueryDataSourceWriterModule.java
class BigQueryDataSourceWriterModule (line 37) | public class BigQueryDataSourceWriterModule implements Module {
method BigQueryDataSourceWriterModule (line 44) | public BigQueryDataSourceWriterModule(
method configure (line 52) | @Override
method provideDirectDataSourceWriterContext (line 57) | @Singleton
method provideIndirectDataSourceWriterContext (line 89) | @Singleton
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/BigQueryDirectDataSourceWriterContext.java
class BigQueryDirectDataSourceWriterContext (line 51) | public class BigQueryDirectDataSourceWriterContext implements DataSource...
type WritingMode (line 77) | enum WritingMode {
method BigQueryDirectDataSourceWriterContext (line 87) | public BigQueryDirectDataSourceWriterContext(
method getOrCreateTable (line 147) | private BigQueryTable getOrCreateTable(
method createWriterContextFactory (line 199) | @Override
method onDataWriterCommit (line 212) | @Override
method commit (line 229) | @Override
method abort (line 297) | @Override
method setTableInfo (line 308) | @Override
class BigQueryTable (line 314) | static class BigQueryTable {
method BigQueryTable (line 318) | public BigQueryTable(TableId tableId, boolean toDeleteOnAbort) {
method getTableId (line 323) | public TableId getTableId() {
method toDeleteOnAbort (line 327) | public boolean toDeleteOnAbort() {
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/BigQueryDirectDataWriterContext.java
class BigQueryDirectDataWriterContext (line 40) | public class BigQueryDirectDataWriterContext implements DataWriterContex...
method BigQueryDirectDataWriterContext (line 58) | public BigQueryDirectDataWriterContext(
method write (line 94) | @Override
method commit (line 107) | @Override
method abort (line 129) | @Override
method close (line 135) | @Override
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/BigQueryDirectDataWriterContextFactory.java
class BigQueryDirectDataWriterContextFactory (line 25) | public class BigQueryDirectDataWriterContextFactory
method BigQueryDirectDataWriterContextFactory (line 36) | public BigQueryDirectDataWriterContextFactory(
method createDataWriterContext (line 66) | @Override
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/BigQueryDirectWriterCommitMessageContext.java
class BigQueryDirectWriterCommitMessageContext (line 18) | public class BigQueryDirectWriterCommitMessageContext implements WriterC...
method BigQueryDirectWriterCommitMessageContext (line 29) | public BigQueryDirectWriterCommitMessageContext(
method getWriteStreamName (line 46) | public String getWriteStreamName() {
method getPartitionId (line 50) | public int getPartitionId() {
method getTaskId (line 54) | public long getTaskId() {
method getEpochId (line 58) | public long getEpochId() {
method getTablePath (line 62) | public String getTablePath() {
method getRowCount (line 66) | public long getRowCount() {
method getBytesWritten (line 70) | public long getBytesWritten() {
method toString (line 74) | @Override
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/BigQueryIndirectDataSourceWriterContext.java
class BigQueryIndirectDataSourceWriterContext (line 57) | public class BigQueryIndirectDataSourceWriterContext implements DataSour...
method BigQueryIndirectDataSourceWriterContext (line 78) | public BigQueryIndirectDataSourceWriterContext(
method createWriterContextFactory (line 100) | @Override
method onDataStreamingWriterCommit (line 110) | @Override
method onDataStreamingWriterAbort (line 115) | @Override
method commit (line 128) | @Override
method abort (line 133) | @Override
method setTableInfo (line 145) | @Override
method commitMessages (line 150) | private void commitMessages(WriterCommitMessageContext[] messages, lon...
method loadDataToBigQuery (line 201) | void loadDataToBigQuery(List<String> sourceUris, Schema schema) throws...
method updateMetadataIfNeeded (line 223) | void updateMetadataIfNeeded() {
method cleanTemporaryGcsPathIfNeeded (line 227) | void cleanTemporaryGcsPathIfNeeded(long epochId) {
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/BigQueryIndirectDataWriterContext.java
class BigQueryIndirectDataWriterContext (line 30) | class BigQueryIndirectDataWriterContext implements DataWriterContext<Int...
method BigQueryIndirectDataWriterContext (line 42) | protected BigQueryIndirectDataWriterContext(
method write (line 57) | @Override
method commit (line 64) | @Override
method abort (line 70) | @Override
method close (line 77) | @Override
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/BigQueryIndirectDataWriterContextFactory.java
class BigQueryIndirectDataWriterContextFactory (line 28) | class BigQueryIndirectDataWriterContextFactory implements DataWriterCont...
method BigQueryIndirectDataWriterContextFactory (line 35) | public BigQueryIndirectDataWriterContextFactory(
method createDataWriterContext (line 46) | @Override
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/BigQueryIndirectWriterCommitMessageContext.java
class BigQueryIndirectWriterCommitMessageContext (line 18) | class BigQueryIndirectWriterCommitMessageContext implements WriterCommit...
method BigQueryIndirectWriterCommitMessageContext (line 23) | public BigQueryIndirectWriterCommitMessageContext(String uri) {
method getUri (line 27) | public String getUri() {
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/DataSourceWriterContext.java
type DataSourceWriterContext (line 34) | public interface DataSourceWriterContext {
method createWriterContextFactory (line 36) | DataWriterContextFactory<InternalRow> createWriterContextFactory();
method useCommitCoordinator (line 38) | default boolean useCommitCoordinator() {
method onDataWriterCommit (line 42) | default void onDataWriterCommit(WriterCommitMessageContext message) {}
method onDataStreamingWriterCommit (line 44) | default void onDataStreamingWriterCommit(long epochId, WriterCommitMes...
method onDataStreamingWriterAbort (line 46) | default void onDataStreamingWriterAbort(long epochId, WriterCommitMess...
method commit (line 48) | void commit(WriterCommitMessageContext[] messages);
method abort (line 50) | void abort(WriterCommitMessageContext[] messages);
method setTableInfo (line 52) | void setTableInfo(TableInfo tableInfo);
method create (line 54) | static Optional<DataSourceWriterContext> create(
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/DataWriterContext.java
type DataWriterContext (line 26) | public interface DataWriterContext<T> extends Closeable {
method write (line 27) | void write(T row) throws IOException;
method commit (line 29) | WriterCommitMessageContext commit() throws IOException;
method abort (line 31) | void abort() throws IOException;
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/DataWriterContextFactory.java
type DataWriterContextFactory (line 20) | public interface DataWriterContextFactory<T> extends Serializable {
method createDataWriterContext (line 22) | DataWriterContext<T> createDataWriterContext(int partitionId, long tas...
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/IntermediateRecordWriter.java
type IntermediateRecordWriter (line 22) | public interface IntermediateRecordWriter extends Closeable {
method write (line 24) | void write(GenericRecord avroRecord) throws IOException;
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/NoOpDataWriterContext.java
class NoOpDataWriterContext (line 21) | public class NoOpDataWriterContext implements DataWriterContext<Internal...
method write (line 22) | @Override
method commit (line 25) | @Override
method abort (line 30) | @Override
method close (line 33) | @Override
FILE: spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/WriterCommitMessageContext.java
type WriterCommitMessageContext (line 21) | public interface WriterCommitMessageContext extends Serializable {
method getError (line 23) | default Optional<Exception> getError() {
FILE: spark-bigquery-connector-common/src/main/java/org/apache/spark/sql/Scala213SparkSqlUtils.java
class Scala213SparkSqlUtils (line 32) | public class Scala213SparkSqlUtils extends SparkSqlUtils {
method supportsScalaVersion (line 34) | @Override
method rowToInternalRow (line 39) | @Override
method createExpressionEncoder (line 47) | @Override
method toAttributes (line 63) | public static scala.collection.immutable.Seq<AttributeReference> toAtt...
FILE: spark-bigquery-connector-common/src/main/java/org/apache/spark/sql/SparkSqlUtils.java
class SparkSqlUtils (line 24) | public abstract class SparkSqlUtils {
method getInstance (line 27) | public static SparkSqlUtils getInstance() {
method supportsScalaVersion (line 45) | public abstract boolean supportsScalaVersion(String scalaVersion);
method rowToInternalRow (line 47) | public abstract InternalRow rowToInternalRow(Row row);
method createExpressionEncoder (line 49) | public abstract ExpressionEncoder<Row> createExpressionEncoder(StructT...
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/AvroSchemaConverterTest.java
class AvroSchemaConverterTest (line 36) | public class AvroSchemaConverterTest {
method testSchemaConversion (line 38) | @Test
method testConvertIntegers (line 112) | @Test
method testConvertNull (line 152) | @Test
method testConvertNullable (line 177) | @Test
method testConvertDecimal (line 202) | @Test
method testConvertDoubles (line 236) | @Test
method testConvertDateTime (line 263) | @Test
method testComparisonToSparkAvro (line 289) | @Test
method checkField (line 292) | private void checkField(Schema.Field field, String name, Schema schema) {
method numericDecimal (line 297) | private Schema numericDecimal(String name) {
method bignumericDecimal (line 301) | private Schema bignumericDecimal(String name) {
method nullable (line 305) | Schema nullable(Schema schema) {
method nullable (line 309) | Schema nullable(Schema.Type type) {
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/BigQueryRelationProviderTestBase.java
class BigQueryRelationProviderTestBase (line 58) | public abstract class BigQueryRelationProviderTestBase {
method setUp (line 84) | @Before
method tearDown (line 136) | @After
method tableExists (line 144) | @Test
method tableDoesNotExist (line 160) | @Test
method tableDoesNotExistWithExpectedExceptionAnnotation (line 180) | @Test(expected = RuntimeException.class)
method credentialsParameterIsUsedToInitializeBigQueryOptions (line 189) | @Test
method createProvider (line 217) | abstract BigQueryRelationProviderBase createProvider(
method createProvider (line 220) | abstract BigQueryRelationProviderBase createProvider();
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/BigQueryRelationTest.java
class BigQueryRelationTest (line 33) | public class BigQueryRelationTest {
method testCreateTableNameForLogging_withTable (line 35) | @Test
method testCreateTableNameForLogging_fromQuery (line 56) | @Test
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/DataSourceOptions.java
class DataSourceOptions (line 22) | class DataSourceOptions {
method DataSourceOptions (line 25) | public DataSourceOptions(Map<String, String> originalMap) {
method asMap (line 32) | public Map<String, String> asMap() {
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/InjectorBuilderTest.java
class InjectorBuilderTest (line 27) | public class InjectorBuilderTest {
method testDefaults (line 29) | @Test
method testParams (line 40) | @Test
method testConnectorInfo (line 65) | @Test
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/MockResponsesBatch.java
class MockResponsesBatch (line 23) | class MockResponsesBatch implements Iterator<ReadRowsResponse> {
method addResponse (line 27) | void addResponse(ReadRowsResponse response) {
method addException (line 31) | void addException(RuntimeException exception) {
method hasNext (line 35) | @Override
method next (line 40) | @Override
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/MockSparkBigQueryPushdown.java
class MockSparkBigQueryPushdown (line 21) | public class MockSparkBigQueryPushdown implements SparkBigQueryPushdown {
method supportsSparkVersion (line 26) | @Override
method enable (line 31) | @Override
method disable (line 36) | @Override
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/OptimizeLoadUriListTest.java
class OptimizeLoadUriListTest (line 28) | @RunWith(Parameterized.class)
method OptimizeLoadUriListTest (line 34) | public OptimizeLoadUriListTest(List<String> input, List<String> expect...
method data (line 39) | @Parameterized.Parameters(name = "{index}: Should get {1}")
method test (line 115) | @Test
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/ProtobufUtilsTest.java
class ProtobufUtilsTest (line 54) | public class ProtobufUtilsTest {
method testBigQueryRecordToDescriptor (line 68) | @Test
method testBigQueryToProtoSchema (line 80) | @Test
method testSparkStructRowToDynamicMessage (line 117) | @Test
method getDummyRow (line 143) | private InternalRow getDummyRow() {
method testSparkRowToProtoRow (line 165) | @Test
method testSettingARequiredFieldAsNull (line 178) | @Test
method createStructSchemaDescriptor (line 361) | public Descriptors.Descriptor createStructSchemaDescriptor() {
method createStructDescriptor (line 371) | public Descriptors.Descriptor createStructDescriptor() throws Assumpti...
method createBigSchemaRowDescriptor (line 393) | public Descriptors.Descriptor createBigSchemaRowDescriptor() {
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/SchemaConverterTest.java
class SchemaConverterTest (line 38) | public class SchemaConverterTest {
method testEmptySchemaBigQueryToSparkConversion (line 58) | @Test
method testSingleFieldSchemaBigQueryToSparkConversion (line 66) | @Test
method testFullFieldSchemaBigQueryToSparkConversion (line 75) | @Test
method testFieldHasDescriptionBigQueryToSpark (line 85) | @Test
method testGetSchemaWithPseudoColumns (line 109) | @Test
method testGetSchemaWithPseudoColumnsOfNoneDailyPartitioning (line 134) | @Test
method buildTableInfo (line 149) | public TableInfo buildTableInfo(Schema schema, TimePartitioning timePa...
method testSparkToBQSchema (line 161) | @Test
method testDecimalTypeConversionFromSparkToBigQuery (line 175) | @Test
method VerifyDecimalConversion (line 186) | private void VerifyDecimalConversion(int precision, int scale, LegacyS...
method getKeyValueRepeatedField (line 196) | private Field getKeyValueRepeatedField() {
method testFailureOnTooWideBigNumericConversion (line 206) | @Test
method testTimeTypesConversions (line 220) | @Test
method testDescriptionConversion (line 231) | @Test
method testDescriptionConversionForSparkML (line 247) | @Test
method testSparkMLConversionNoDescription (line 264) | @Test
method testNoDescriptionConversion (line 275) | @Test
method testCommentConversion (line 285) | @Test
method testMaximumNestingDepthError (line 298) | @Test
method testGetCustomDataType (line 317) | @Test
method testConvertBigQueryMapToSparkMap_not_repeated (line 335) | @Test
method testConvertBigQueryMapToSparkMap_not_record (line 345) | @Test
method testConvertBigQueryMapToSparkMap_wrong_record_size (line 355) | @Test
method testConvertBigQueryMapToSparkMap_wrong_record_fields (line 368) | @Test
method testConvertBigQueryMapToSparkMap_with_actual_map (line 384) | @Test
method testConvertSparkMapToBigQueryMap_nested (line 396) | @Test
method testConvertBigQueryMapToSparkMap_nested (line 423) | @Test
method testConvertBigQueryMapToSparkMap_mapTypeConversionDisabled (line 455) | @Test
method testConvertBigQueryToSparkArray_mapTypeConversionDisabled (line 463) | @Test
method testCreateDecimalTypeFromNumericField (line 477) | @Test
method testCreateDecimalTypeFromCustomBigNumericField (line 487) | @Test
method testCreateDecimalTypeFromCustomBigNumericField_wide (line 496) | @Test
method numeric (line 507) | private Field.Builder numeric() {
method assertDecimal (line 511) | private void assertDecimal(Field.Builder numeric, int expectedPrecisio...
method simpleStructField (line 746) | private StructField simpleStructField(String name, DataType dataType) {
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/SparkBigQueryConfigTest.java
class SparkBigQueryConfigTest (line 54) | public class SparkBigQueryConfigTest {
method testSerializability (line 63) | @Test
method testDefaults (line 82) | @Test
method testConfigFromOptions (line 141) | @Test
method testConfigFromOptions_rangePartitioning (line 250) | @Test
method testCacheExpirationSetToZero (line 282) | @Test
method testCacheExpirationSetToNegative (line 307) | @Test
method testInvalidCompressionCodec (line 340) | @Test
method testConfigFromGlobalOptions (line 376) | @Test
method testGetTableIdWithoutThePartition_PartitionExists (line 414) | @Test
method testGetTableIdWithoutThePartition_PartitionMissing (line 440) | @Test
method testQueryMatching (line 464) | @Test
method testQueryMatchingWithSpacedTables (line 486) | @Test
method testJobLabelOverride (line 523) | @Test
method testTableLabelOverride (line 544) | @Test
method testCustomDefaults (line 566) | @Test
method asDataSourceOptionsMap (line 599) | private static Map<String, String> asDataSourceOptionsMap(Map<String, ...
method withParameter (line 608) | private Map<String, String> withParameter(String key, String value) {
method withParameters (line 612) | private Map<String, String> withParameters(
method testTakingCredentialsFileFromGcsHadoopConfig (line 621) | @Test
method testTakingCredentialsFilefromTheProperties (line 637) | @Test
method testNoCredentialsFileIsProvided (line 653) | @Test
method testTakingProjectIdFromGcsHadoopConfig (line 669) | @Test
method testTakingProjectIdFromTheProperties (line 685) | @Test
method testNoProjectIdIsProvided (line 701) | @Test
method testInvalidDataFormat (line 717) | @Test
method testDataFormatNoValueIsSet (line 737) | @Test
method testSetReadDataFormatAsAvro (line 753) | @Test
method testGetAnyOptionWithFallbackOnlyNewConfigExist (line 769) | @Test
method testKmsKeyPropagationToReadTableOptions (line 786) | @Test
method testGetAnyOptionWithFallbackBothConfigsExist (line 808) | @Test
method testGetAnyOptionWithFallbackOnlyFallbackExists (line 826) | @Test
method testGetAnyOptionWithFallbackNoConfigExists (line 842) | @Test
method testMaxParallelismOnlyNewConfigExist (line 858) | @Test
method testMaxParallelismBothConfigsExist (line 874) | @Test
method testMaxParallelismOnlyOldConfigExists (line 890) | @Test
method testMaxParallelismNoConfigExists (line 906) | @Test
method testLoadSchemaUpdateOptionAllowFieldAddition (line 922) | @Test
method testLoadSchemaUpdateOptionAllowFieldRelaxation (line 939) | @Test
method testLoadSchemaUpdateOptionBoth (line 956) | @Test
method testLoadSchemaUpdateOptionNone (line 976) | @Test
method testNormalizeAllConf (line 992) | @Test
method testSetPersistentGcsPath (line 1007) | @Test
method testSetPersistentGcsBucket (line 1023) | @Test
method testSetPersistentGcsBucketWithPrefix (line 1039) | @Test
method testSetTemporaryGcsBucket (line 1055) | @Test
method testSetTemporaryGcsBucketWithPrefix (line 1071) | @Test
method testBqChannelPoolSize (line 1087) | @Test
method testBqFlowControWindow (line 1103) | @Test
method testBadCredentials (line 1119) | @Test
method testImpersonationGlobal (line 1140) | @Test
method testImpersonationGlobalForUser (line 1159) | @Test
method testImpersonationGlobalForGroup (line 1187) | @Test
method testMissingAvroMessage (line 1216) | @Test
method testEnableListInferenceWithDefaultIntermediateFormat (line 1227) | @Test
method testSystemBucketAsDefaultTemporaryGcsBucket (line 1244) | @Test
method testLoadFromQueryConfig (line 1262) | @Test
method testCatalogProjectId (line 1283) | @Test
method testCatalogLocation (line 1318) | @Test
method testCatalogProjectIdAndLocation (line 1353) | @Test
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/SparkBigQueryProxyAndHttpConfigTest.java
class SparkBigQueryProxyAndHttpConfigTest (line 37) | public class SparkBigQueryProxyAndHttpConfigTest {
method getHadoopConfiguration (line 61) | private Configuration getHadoopConfiguration() {
method testSerializability (line 79) | @Test
method testConfigFromOptions (line 89) | @Test
method testConfigFromGlobalOptions (line 108) | @Test
method testConfigFromHadoopConfigurationOptions (line 128) | @Test
method testConfigWithAllThreeParameters (line 145) | @Test
method testConfigWithGlobalParametersAndHadoopConfig (line 163) | @Test
method testConfigViaSparkBigQueryConfigWithAllThreeParameters (line 182) | @Test
method testConfigViaSparkBigQueryConfigWithGlobalOptionsAndHadoopConfiguration (line 213) | @Test
method testConfigViaSparkBigQueryConfigWithHadoopConfiguration (line 245) | @Test
method testWhenProxyIsNotSetAndUserNamePasswordAreNotNull (line 274) | @Test
method testWhenProxyIsSetAndUserNameIsNull (line 299) | @Test
method testWhenProxyIsSetAndPasswordIsNull (line 322) | @Test
method testParseProxyAddress (line 345) | @Test
method testParseProxyAddressIllegalPath (line 362) | @Test
method testParseProxyAddressNoPort (line 379) | @Test
method testParseProxyAddressWrongScheme (line 397) | @Test
method testParseProxyAddressNoHost (line 414) | @Test
method getURI (line 428) | private URI getURI(String scheme, String host, int port) throws URISyn...
method testBigQueryProxyTransporterBuilder (line 432) | @Test
method testBigQueryProxyTransporterBuilderWithErrors (line 448) | @Test
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/SparkBigQueryUtilTest.java
class SparkBigQueryUtilTest (line 42) | public class SparkBigQueryUtilTest {
method testGetJobIdInternal_hasTagsAndAppId (line 46) | @Test
method testGetJobIdInternal_missingTags_hasAppId (line 55) | @Test
method testGetJobIdInternal_missingBoth (line 61) | @Test
method testSaveModeToWriteDisposition (line 67) | @Test
method testParseSimpleTableId (line 81) | @Test
method testIsJson_SqlTypeJson (line 90) | @Test
method testIsJson_SqlTypeOther (line 96) | @Test
method testIsJson_NoSqlType (line 102) | @Test
method testExtractPartitionFilters_no_match (line 108) | @Test
method testExtractPartitionFilters_has_match (line 124) | @Test
method testExtractJobLabels_no_labels (line 141) | @Test
method testExtractJobLabels_with_labels (line 147) | @Test
method testSparkDateToBigQuery (line 159) | @Test
method testSparkTimestampToBigQuery (line 166) | @Test
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/SparkFilterUtilsTest.java
class SparkFilterUtilsTest (line 38) | @RunWith(Parameterized.class)
method data (line 45) | @Parameterized.Parameters(name = "{index}: dataFormat={0}, pushAllFilt...
method SparkFilterUtilsTest (line 57) | public SparkFilterUtilsTest(DataFormat dataFormat, boolean pushAllFilt...
method resetDefaultTimeZone (line 62) | @After
method testValidFilters (line 67) | @Test
method testMultipleValidFiltersAreHandled (line 95) | @Test
method testInvalidFilters (line 103) | @Test
method testNewFilterBehaviourWithFilterOption (line 121) | @Test
method testNewFilterBehaviourNoFilterOption (line 136) | @Test
method checkFilters (line 146) | private void checkFilters(
method testStringFilters (line 161) | @Test
method testNumericAndNullFilters (line 178) | @Test
method testDateFilters (line 206) | @Test
method testDateFilters_java8Time (line 216) | @Test
method testTimestampFilters (line 226) | @Test
method testTimestampFilters_java8Time (line 236) | @Test
method testTimestampFilters_timezone (line 245) | @Test
method testFiltersWithNestedOrAnd_1 (line 273) | @Test
method testFiltersWithNestedOrAnd_2 (line 310) | @Test
method testFiltersWithNestedOrAnd_3 (line 334) | @Test
method testFiltersWithNestedOrAnd_4 (line 380) | @Test
method testQuote (line 413) | @Test
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/SupportedCustomDataTypeTest.java
class SupportedCustomDataTypeTest (line 24) | public class SupportedCustomDataTypeTest {
method testVector (line 26) | @Test
method testMatrix (line 33) | @Test
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/TestConstants.java
class TestConstants (line 38) | public class TestConstants {
method TestConstants (line 193) | private TestConstants() {}
method copy (line 195) | private static <T> T[] copy(T... elements) {
class StringStruct (line 199) | public static class StringStruct {
method StringStruct (line 204) | public StringStruct(String str3, String str1, String str2) {
method getStr3 (line 210) | public String getStr3() {
method setStr3 (line 214) | public void setStr3(String str3) {
method getStr1 (line 218) | public String getStr1() {
method setStr1 (line 222) | public void setStr1(String str1) {
method getStr2 (line 226) | public String getStr2() {
method setStr2 (line 230) | public void setStr2(String str2) {
class NumStruct (line 235) | public static class NumStruct {
method NumStruct (line 241) | public NumStruct(Long num3, Long num2, Long num1, List<StringStruct>...
method getNum3 (line 248) | public Long getNum3() {
method setNum3 (line 252) | public void setNum3(Long num3) {
method getNum2 (line 256) | public Long getNum2() {
method setNum2 (line 260) | public void setNum2(Long num2) {
method getNum1 (line 264) | public Long getNum1() {
method setNum1 (line 268) | public void setNum1(Long num1) {
method getStringStructArr (line 272) | public List<StringStruct> getStringStructArr() {
method setStringStructArr (line 276) | public void setStringStructArr(List<StringStruct> stringStructArr) {
class ColumnOrderTestClass (line 281) | public static class ColumnOrderTestClass {
method ColumnOrderTestClass (line 285) | public ColumnOrderTestClass(NumStruct nums, String str) {
method getNums (line 290) | public NumStruct getNums() {
method setNums (line 294) | public void setNums(NumStruct nums) {
method getStr (line 298) | public String getStr() {
method setStr (line 302) | public void setStr(String str) {
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/acceptance/AcceptanceTestConstants.java
class AcceptanceTestConstants (line 20) | public class AcceptanceTestConstants {
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/acceptance/AcceptanceTestContext.java
class AcceptanceTestContext (line 18) | public class AcceptanceTestContext {
method AcceptanceTestContext (line 28) | public AcceptanceTestContext(
method getScriptUri (line 39) | public String getScriptUri(String testName) {
method getResultsDirUri (line 43) | public String getResultsDirUri(String testName) {
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/acceptance/AcceptanceTestUtils.java
class AcceptanceTestUtils (line 41) | public class AcceptanceTestUtils {
method getArtifact (line 50) | public static Path getArtifact(Path targetDir, String prefix, String s...
method prefixSuffixChecker (line 63) | private static Predicate<Path> prefixSuffixChecker(final String prefix...
method lastModifiedTime (line 70) | private static FileTime lastModifiedTime(Path path) {
method copyToGcs (line 78) | public static BlobId copyToGcs(Path source, String destinationUri, Str...
method uploadToGcs (line 92) | public static BlobId uploadToGcs(InputStream source, String destinatio...
method uploadToGcs (line 102) | public static BlobId uploadToGcs(ByteBuffer content, String destinatio...
method createTestBaseGcsDir (line 115) | public static String createTestBaseGcsDir(String testId) {
method getCsv (line 119) | public static String getCsv(String resultsDirUri) throws Exception {
method deleteGcsDir (line 136) | public static void deleteGcsDir(String testBaseGcsDir) throws Exception {
method createBqDataset (line 154) | public static void createBqDataset(String dataset) {
method getNumOfRowsOfBqTable (line 159) | public static int getNumOfRowsOfBqTable(String dataset, String table) {
method runBqQuery (line 163) | public static void runBqQuery(String query) throws Exception {
method deleteBqDatasetAndTables (line 167) | public static void deleteBqDatasetAndTables(String dataset) {
method uploadConnectorJar (line 171) | static void uploadConnectorJar(String targetDir, String prefix, String...
method generateClusterName (line 178) | public static String generateClusterName(String testId) {
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/acceptance/BigNumericDataprocServerlessAcceptanceTestBase.java
class BigNumericDataprocServerlessAcceptanceTestBase (line 35) | public class BigNumericDataprocServerlessAcceptanceTestBase
method BigNumericDataprocServerlessAcceptanceTestBase (line 38) | public BigNumericDataprocServerlessAcceptanceTestBase(
method testBatch (line 43) | @Test
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/acceptance/DataprocAcceptanceTestBase.java
class DataprocAcceptanceTestBase (line 47) | public class DataprocAcceptanceTestBase {
method DataprocAcceptanceTestBase (line 57) | protected DataprocAcceptanceTestBase(AcceptanceTestContext context) {
method DataprocAcceptanceTestBase (line 61) | protected DataprocAcceptanceTestBase(
method setup (line 67) | protected static AcceptanceTestContext setup(
method tearDown (line 100) | protected static void tearDown(AcceptanceTestContext context) throws E...
method createClusterIfNeeded (line 108) | protected static String createClusterIfNeeded(
method terminateCluster (line 126) | protected static void terminateCluster(String clusterName) throws Exce...
method cluster (line 130) | private static void cluster(ThrowingConsumer<ClusterControllerClient> ...
method createCluster (line 138) | private static Cluster createCluster(
method testRead (line 179) | @Test
method writeStream (line 193) | @Test
method testBigNumeric (line 222) | @Test
method createAndRunPythonJob (line 253) | private Job createAndRunPythonJob(
method createPySparkJobBuilder (line 269) | private PySparkJob.Builder createPySparkJobBuilder(
method runAndWait (line 288) | private Job runAndWait(Job job, Duration timeout) throws Exception {
method waitForJobCompletion (line 302) | Job waitForJobCompletion(
type ThrowingConsumer (line 323) | @FunctionalInterface
method accept (line 325) | void accept(T t) throws Exception;
class ClusterProperty (line 328) | protected static class ClusterProperty {
method ClusterProperty (line 333) | private ClusterProperty(String key, String value, String marker) {
method of (line 339) | protected static ClusterProperty of(String key, String value, String...
method getKey (line 343) | public String getKey() {
method getValue (line 347) | public String getValue() {
method getMarker (line 351) | public String getMarker() {
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/acceptance/DataprocServerlessAcceptanceTestBase.java
class DataprocServerlessAcceptanceTestBase (line 44) | public class DataprocServerlessAcceptanceTestBase {
method DataprocServerlessAcceptanceTestBase (line 62) | public DataprocServerlessAcceptanceTestBase(String connectorJarPrefix,...
method createBatchControllerClient (line 67) | @Before
method tearDown (line 92) | @After
method createAndRunPythonBatch (line 99) | protected Batch createAndRunPythonBatch(
method createPySparkBatchBuilder (line 141) | protected PySparkBatch.Builder createPySparkBatchBuilder(
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/acceptance/ReadSheakspeareDataprocServerlessAcceptanceTestBase.java
class ReadSheakspeareDataprocServerlessAcceptanceTestBase (line 26) | public class ReadSheakspeareDataprocServerlessAcceptanceTestBase
method ReadSheakspeareDataprocServerlessAcceptanceTestBase (line 29) | public ReadSheakspeareDataprocServerlessAcceptanceTestBase(
method testBatch (line 34) | @Test
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/acceptance/WriteStreamDataprocServerlessAcceptanceTestBase.java
class WriteStreamDataprocServerlessAcceptanceTestBase (line 27) | public class WriteStreamDataprocServerlessAcceptanceTestBase
method WriteStreamDataprocServerlessAcceptanceTestBase (line 30) | public WriteStreamDataprocServerlessAcceptanceTestBase(
method testBatch (line 35) | @Test
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/direct/Scala213BigQueryRDDTest.java
class Scala213BigQueryRDDTest (line 25) | public class Scala213BigQueryRDDTest {
method testCreateScala213BigQueryRDD (line 27) | @Test
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/CatalogIntegrationTestBase.java
class CatalogIntegrationTestBase (line 39) | public class CatalogIntegrationTestBase {
method setupGlobalSpark (line 50) | @BeforeClass
method teardownGlobalSpark (line 64) | @AfterClass
method setupSparkSession (line 72) | @Before
method teardownSparkSession (line 82) | @After
method renameTestTable (line 87) | @Before
method cleanTestTable (line 95) | @After
method testCreateTableInDefaultNamespace (line 103) | @Test
method testCreateTableInCustomNamespace (line 108) | @Test
method internalTestCreateTable (line 113) | private void internalTestCreateTable(String dataset) throws Interrupte...
method testCreateTableAndInsertInDefaultNamespace (line 121) | @Test
method testCreateTableAndInsertInCustomNamespace (line 126) | @Test
method internalTestCreateTableAndInsert (line 131) | private void internalTestCreateTableAndInsert(String dataset) throws I...
method testCreateTableAsSelectInDefaultNamespace (line 140) | @Test
method testCreateTableAsSelectInCustomNamespace (line 145) | @Test
method internalTestCreateTableAsSelect (line 150) | private void internalTestCreateTableAsSelect(String dataset) throws In...
method testCreateTableWithExplicitTargetInDefaultNamespace (line 158) | @Test
method testCreateTableWithExplicitTargetInCustomNamespace (line 164) | @Test
method internalTestCreateTableWithExplicitTarget (line 170) | private void internalTestCreateTableWithExplicitTarget(String dataset)
method fullTableName (line 190) | private String fullTableName(String dataset) {
method selectCountStarFrom (line 198) | private long selectCountStarFrom(String dataset, String table) throws ...
method testReadFromDifferentBigQueryProject (line 210) | @Test
method testListNamespaces (line 219) | @Test
method testCreateNamespace (line 230) | @Test
method testCreateNamespaceWithLocation (line 241) | @Test
method testDropDatabase (line 257) | @Test
method testCatalogInitializationWithProject (line 268) | @Test
method testCreateCatalogWithLocation (line 311) | @Test
method testCreateTableAsSelectWithProjectAndLocation (line 341) | @Test
method createSparkSession (line 396) | private static SparkSession createSparkSession() {
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/IntegrationTestUtils.java
class IntegrationTestUtils (line 51) | public class IntegrationTestUtils {
method getBigquery (line 58) | public static BigQuery getBigquery() {
method getBigQueryClient (line 62) | private static BigQueryClient getBigQueryClient() {
method createDataset (line 74) | public static void createDataset(String dataset) {
method runQuery (line 81) | public static TableResult runQuery(String query) {
method runQuery (line 85) | public static void runQuery(String query, Object... args) {
method listTables (line 89) | public static Iterable<Table> listTables(DatasetId datasetId, TableDef...
method createBigLakeTable (line 93) | public static void createBigLakeTable(
method deleteDatasetAndTables (line 115) | public static void deleteDatasetAndTables(String dataset) {
method metadata (line 121) | static Metadata metadata(String key, String value) {
method getOrCreateSparkSession (line 127) | static SparkSession getOrCreateSparkSession(String applicationName) {
method createView (line 131) | static void createView(String dataset, String view) {
method compareRows (line 139) | public static void compareRows(Row row, Row expected) {
method compareBigNumericDataSetSchema (line 160) | public static void compareBigNumericDataSetSchema(
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/OpenLineageIntegrationTestBase.java
class OpenLineageIntegrationTestBase (line 37) | public class OpenLineageIntegrationTestBase {
method OpenLineageIntegrationTestBase (line 46) | public OpenLineageIntegrationTestBase() {
method createTestTable (line 51) | @Before
method clearLineageFile (line 56) | @After
class CustomSessionFactory (line 62) | protected static class CustomSessionFactory extends ExternalResource {
method before (line 66) | @Override
method parseEventLogs (line 84) | private List<JSONObject> parseEventLogs(File file) throws Exception {
method getFieldName (line 102) | private String getFieldName(JSONObject event, String field) {
method testLineageEvent (line 107) | @Test
method testLineageEventWithQueryInput (line 134) | @Test
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/ReadByFormatIntegrationTestBase.java
class ReadByFormatIntegrationTestBase (line 58) | public class ReadByFormatIntegrationTestBase extends SparkBigQueryIntegr...
method ReadByFormatIntegrationTestBase (line 65) | public ReadByFormatIntegrationTestBase(String dataFormat) {
method ReadByFormatIntegrationTestBase (line 69) | public ReadByFormatIntegrationTestBase(String dataFormat, boolean user...
method ReadByFormatIntegrationTestBase (line 73) | public ReadByFormatIntegrationTestBase(
method ReadByFormatIntegrationTestBase (line 78) | public ReadByFormatIntegrationTestBase(
method testViewWithDifferentColumnsForSelectAndFilter (line 86) | @Test
method testCachedViewWithDifferentColumnsForSelectAndFilter (line 102) | @Test
method testOutOfOrderColumns (line 119) | @Test
method testSelectAllColumnsFromATable (line 134) | @Test
method testNumberOfPartitions (line 151) | @Test
method testDefaultNumberOfPartitions (line 165) | @Test
method testBalancedPartitions (line 178) | @Test(timeout = 300_000)
method testKeepingFiltersBehaviour (line 212) | @Test
method testColumnOrderOfStruct (line 239) | @Test
method testConvertBigQueryMapToSparkMap (line 259) | @Test
method testTimestampNTZReadFromBigQuery (line 301) | @Test
method testWindowFunctionPartitionBy (line 328) | @Test
method testWindowFunctionPartitionByWithArray (line 355) | @Test
method scalaMapToJavaMap (line 383) | static <K, V> Map<K, V> scalaMapToJavaMap(scala.collection.Map<K, V> m...
method getViewDataFrame (line 389) | Dataset<Row> getViewDataFrame() {
method readAllTypesTable (line 402) | Dataset<Row> readAllTypesTable() {
method extractWords (line 411) | protected Set<String> extractWords(Dataset<Row> df) {
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/ReadFromQueryIntegrationTestBase.java
class ReadFromQueryIntegrationTestBase (line 50) | class ReadFromQueryIntegrationTestBase extends SparkBigQueryIntegrationT...
method addListener (line 58) | @Before
method removeListener (line 64) | @After
method ReadFromQueryIntegrationTestBase (line 69) | protected ReadFromQueryIntegrationTestBase() {
method ReadFromQueryIntegrationTestBase (line 73) | protected ReadFromQueryIntegrationTestBase(boolean isDsv2OnSpark3AndAb...
method testReadFromQuery_nomMterializationDataset (line 79) | @Test
method internalTestReadFromQueryToMaterializationDataset (line 91) | private void internalTestReadFromQueryToMaterializationDataset(String ...
method internalTestReadFromQueryWithAdditionalOptions (line 97) | private void internalTestReadFromQueryWithAdditionalOptions(
method testReadFromQuery (line 115) | @Test
method testReadFromQueryWithNewLine (line 127) | @Test
method testQueryOption (line 140) | @Test
method testMaterializtionToAutoGeneratedTable (line 181) | @Test
method validateResult (line 201) | private void validateResult(Dataset<Row> df) {
method testBadQuery (line 220) | @Test
method testQueryJobPriority (line 237) | @Test
method testReadFromLongQueryWithBigQueryJobTimeout (line 256) | @Test
method testReadWithNamedParameters (line 277) | @Test
method testReadWithPositionalParameters (line 312) | @Test
method testReadWithMixedParametersFails (line 347) | @Test
method testReadFromQueryWithKmsKey (line 382) | @Test
class TestBigQueryJobCompletionListener (line 412) | class TestBigQueryJobCompletionListener extends SparkListener {
method onOtherEvent (line 416) | @Override
method getJobInfos (line 423) | public ImmutableList<JobInfo> getJobInfos() {
method reset (line 427) | public void reset() {
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/ReadIntegrationTestBase.java
class ReadIntegrationTestBase (line 60) | public class ReadIntegrationTestBase extends SparkBigQueryIntegrationTes...
method ReadIntegrationTestBase (line 118) | public ReadIntegrationTestBase() {
method ReadIntegrationTestBase (line 122) | public ReadIntegrationTestBase(boolean userProvidedSchemaAllowed) {
method ReadIntegrationTestBase (line 126) | public ReadIntegrationTestBase(boolean userProvidedSchemaAllowed, Data...
method ReadIntegrationTestBase (line 130) | public ReadIntegrationTestBase(
method intializeSchema (line 137) | private void intializeSchema(Optional<DataType> timeStampNTZType) {
method clearGcsObjectsToCleanList (line 150) | @Before
method cleanGcsObjects (line 155) | @After
method resetDefaultTimeZone (line 162) | @After
method testShakespeare (line 171) | private void testShakespeare(Dataset<Row> df) {
method testReadWithOption (line 186) | @Test
method testReadWithSimplifiedApi (line 192) | @Test
method testReadCompressed (line 197) | @Test
method testReadCompressedWith1BackgroundThreads (line 212) | @Test
method testReadCompressedWith4BackgroundThreads (line 228) | @Test
method testReadSchemaPruned (line 244) | @Test
method testFilters (line 256) | @Test
method readAllTypesTable (line 276) | Dataset<Row> readAllTypesTable() {
method testCountWithFilters (line 285) | @Test
method testKnownSizeInBytes (line 311) | @Test
method testKnownSchema (line 319) | @Test
method testUserDefinedSchema (line 325) | @Test
method testNonExistentSchema (line 344) | @Test
method testHeadDoesNotTimeoutAndOOM (line 354) | @Test(timeout = 10_000) // 10 seconds
method testUnhandledFilterOnStruct (line 365) | @Test
method testQueryMaterializedView (line 382) | @Test
method testQueryMaterializedView_noMaterializationDataset (line 397) | @Test
method testOrAcrossColumnsAndFormats (line 411) | @Test
method testArrowResponseCompressionCodec (line 436) | @Test
method testAvroResponseCompressionCodec (line 473) | @Test
method testArrowCompressionCodec (line 510) | @Test
method uploadFileToGCS (line 549) | private void uploadFileToGCS(String resourceName, String destinationUR...
method testReadFromBigLakeTable_csv (line 560) | @Test
method testReadFromBigLakeTable_json (line 565) | @Test
method testReadFromBigLakeTable_parquet (line 571) | @Test
method testReadFromBigLakeTable_avro (line 579) | @Test
method testBigLakeTable (line 585) | private void testBigLakeTable(FormatOptions formatOptions, String data...
method testReadFromTableSnapshot (line 607) | @Test
method testReadFromTableWithSpacesInName (line 637) | @Test
method testCreateReadSessionTimeout (line 676) | @Test
method testCreateReadSessionTimeoutWithLessTimeOnHugeData (line 695) | @Test
method testNestedFieldProjection (line 717) | @Test
method testReadFilteredTimestampField (line 735) | @Test
method testPushDateTimePredicate (line 759) | @Test
method testPseudoColumnsRuntimeFilteringDate (line 778) | @Test
method testPseudoColumnsRuntimeFilteringHour (line 808) | @Test
method testExecuteCommand (line 838) | @Test
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/SparkBigQueryIntegrationTestBase.java
class SparkBigQueryIntegrationTestBase (line 24) | public class SparkBigQueryIntegrationTestBase {
method SparkBigQueryIntegrationTestBase (line 32) | public SparkBigQueryIntegrationTestBase() {
method createTestTable (line 36) | @Before
class SparkFactory (line 41) | protected static class SparkFactory extends ExternalResource {
method before (line 44) | @Override
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/TestConstants.java
class TestConstants (line 62) | public class TestConstants {
method copy (line 399) | private static <T> T[] copy(T... elements) {
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/TestDataset.java
class TestDataset (line 22) | public class TestDataset extends ExternalResource {
method apply (line 26) | @Override
method before (line 36) | @Override
method after (line 63) | @Override
method toString (line 68) | @Override
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/WriteIntegrationTestBase.java
class WriteIntegrationTestBase (line 110) | abstract class WriteIntegrationTestBase extends SparkBigQueryIntegration...
method WriteIntegrationTestBase (line 119) | public WriteIntegrationTestBase(SparkBigQueryConfig.WriteMethod writeM...
method WriteIntegrationTestBase (line 123) | public WriteIntegrationTestBase(
method WriteIntegrationTestBase (line 128) | public WriteIntegrationTestBase(
method metadata (line 139) | private Metadata metadata(Map<String, String> map) {
method createTestTableName (line 145) | @Before
method resetDefaultTimeZone (line 151) | @After
method createDiffInSchemaDestTable (line 156) | private String createDiffInSchemaDestTable(String schema) {
method initialData (line 166) | protected Dataset<Row> initialData() {
method additonalData (line 179) | protected Dataset<Row> additonalData() {
method testTableNumberOfRows (line 191) | protected int testTableNumberOfRows() throws InterruptedException {
method testTableNumberOfRows (line 195) | protected int testTableNumberOfRows(String table) throws InterruptedEx...
method testPartitionedTableDefinition (line 200) | private StandardTableDefinition testPartitionedTableDefinition() {
method writeToBigQueryAvroFormat (line 204) | protected void writeToBigQueryAvroFormat(
method writeToBigQuery (line 209) | protected void writeToBigQuery(Dataset<Row> df, SaveMode mode, String ...
method writeToBigQuery (line 213) | protected void writeToBigQuery(
method readAllTypesTable (line 226) | Dataset<Row> readAllTypesTable() {
method writeToBigQuery_AppendSaveMode_Internal (line 235) | private void writeToBigQuery_AppendSaveMode_Internal(String writeAtLea...
method testWriteToBigQuery_AppendSaveMode (line 247) | @Test
method testWriteToBigQuery_AppendSaveMode_AtLeastOnce (line 252) | @Test
method writeToBigQuery_WithTableLabels_Internal (line 258) | private void writeToBigQuery_WithTableLabels_Internal(String writeAtLe...
method testWriteToBigQuery_WithTableLabels (line 280) | @Test
method testWriteToBigQuery_WithTableLabels_AtLeastOnce (line 285) | @Test
method writeToBigQuery_EnableListInference_Internal (line 291) | private void writeToBigQuery_EnableListInference_Internal(String write...
method testWriteToBigQuery_EnableListInference (line 318) | @Test
method testWriteToBigQuery_EnableListInference_AtLeastOnce (line 323) | @Test
method writeToBigQuery_ErrorIfExistsSaveMode_Internal (line 329) | private void writeToBigQuery_ErrorIfExistsSaveMode_Internal(String wri...
method testWriteToBigQuery_ErrorIfExistsSaveMode (line 340) | @Test
method testWriteToBigQuery_ErrorIfExistsSaveMode_AtLeastOnce (line 345) | @Test
method writeToBigQuery_IgnoreSaveMode_Internal (line 351) | private void writeToBigQuery_IgnoreSaveMode_Internal(String writeAtLea...
method testWriteToBigQuery_IgnoreSaveMode (line 364) | @Test
method testWriteToBigQuery_IgnoreSaveMode_AtLeastOnce (line 369) | @Test
method writeToBigQuery_OverwriteSaveMode_Internal (line 375) | private void writeToBigQuery_OverwriteSaveMode_Internal(String writeAt...
method testWriteToBigQuery_OverwriteSaveMode (line 395) | @Test
method testWriteToBigQuery_OverwriteSaveMode_AtLeastOnce (line 400) | @Test
method testWriteToBigQuery_AvroFormat (line 406) | @Test
method writeToBigQuerySimplifiedApi_Internal (line 413) | private void writeToBigQuerySimplifiedApi_Internal(String writeAtLeast...
method testWriteToBigQuerySimplifiedApi (line 426) | @Test
method testWriteToBigQuerySimplifiedApi_AtLeastOnce (line 431) | @Test
method testWriteToBigQueryAddingTheSettingsToSparkConf (line 437) | @Test
method directWriteToBigQueryWithDiffInSchema_Internal (line 450) | private void directWriteToBigQueryWithDiffInSchema_Internal(String wri...
method testDirectWriteToBigQueryWithDiffInSchema (line 473) | @Test
method testDirectWriteToBigQueryWithDiffInSchema_AtLeastOnce (line 478) | @Test
method directWriteToBigQueryWithDiffInSchemaAndDisableModeCheck_Internal (line 484) | private void directWriteToBigQueryWithDiffInSchemaAndDisableModeCheck_...
method testDirectWriteToBigQueryWithDiffInSchemaAndDisableModeCheck (line 506) | @Test
method testDirectWriteToBigQueryWithDiffInSchemaAndDisableModeCheck_AtLeastOnce (line 511) | @Test
method directWriteToBigQueryWithDiffInDescription_Internal (line 518) | private void directWriteToBigQueryWithDiffInDescription_Internal(Strin...
method testDirectWriteToBigQueryWithDiffInDescription (line 543) | @Test
method testDirectWriteToBigQueryWithDiffInDescription_AtLeastOnce (line 548) | @Test
method testInDirectWriteToBigQueryWithDiffInSchemaAndModeCheck (line 554) | @Test
method testIndirectWriteToBigQueryWithDiffInSchemaNullableFieldAndDisableModeCheck (line 576) | @Test
method testInDirectWriteToBigQueryWithDiffInDescription (line 599) | @Test
method testInDirectWriteToBigQueryWithStreaming (line 622) | @Test
method testInDirectWriteToBigQueryWithStreaming_AllTypes (line 661) | @Test
method writeDFNullableToBigQueryNullable_Internal (line 707) | private void writeDFNullableToBigQueryNullable_Internal(String writeAt...
method testWriteDFNullableToBigQueryNullable (line 727) | @Test
method testWriteDFNullableToBigQueryNullable_AtLeastOnce (line 732) | @Test
method writeDFNullableWithNonNullDataToBigQueryRequired_Internal (line 738) | private void writeDFNullableWithNonNullDataToBigQueryRequired_Internal...
method testWriteDFNullableWithNonNullDataToBigQueryRequired (line 759) | @Test
method testWriteDFNullableWithNonNullDataToBigQueryRequired_AtLeastOnce (line 764) | @Test
method testWriteNullableDFWithNullDataToBigQueryRequired (line 770) | @Test
method testWriteNullableDFToBigQueryRepeated (line 791) | @Test
method testWriteRequiredDFToBigQueryNullable (line 811) | @Test
method testWriteRequiredDFToBigQueryRequired (line 830) | @Test
method testWriteRequiredDFToBigQueryRepeated (line 849) | @Test
method testWriteRepeatedDFToBigQueryNullable (line 869) | @Test
method testWriteRepeatedDFToBigQueryRequired (line 895) | @Test
method testWriteRepeatedDFToBigQueryRepeated (line 921) | @Test
method testWriteToBigQueryPartitionedAndClusteredTable (line 944) | @Test
method testWriteToBigQueryClusteredTable (line 972) | @Test
method testWriteWithTableLabels (line 996) | @Test
method overwriteSinglePartition (line 1017) | protected Dataset<Row> overwriteSinglePartition(StructField dateField) {
method testOverwriteSinglePartition (line 1075) | public void testOverwriteSinglePartition() {
method testOverwriteSinglePartitionWithComment (line 1082) | public void testOverwriteSinglePartitionWithComment() {
method testWriteToBigQueryWithDescription (line 1091) | @Test
method testWriteEmptyDataFrame (line 1148) | @Test
method structType (line 1155) | protected StructType structType(StructField... fields) {
method testPartition_Hourly (line 1159) | @Test
method testPartition_Daily (line 1164) | @Test
method testPartition_Monthly (line 1169) | @Test
method testPartition_Yearly (line 1174) | @Test
method testPartition (line 1179) | private void testPartition(String partitionType) {
method testPartitionRange (line 1204) | @Test
method testCacheDataFrameInDataSource (line 1239) | @Test
method testWriteJsonToANewTable (line 1267) | @Test
method testWriteJsonToAnExistingTable (line 1312) | @Test
method testWriteMapToANewTable (line 1356) | @Test
method testAllowFieldAddition (line 1417) | @Test
method testWriteToCmekManagedTable (line 1480) | @Test
method testWriteNumericsToWiderFields (line 1504) | @Test
method testWriteStringToTimeField_internal (line 1543) | private void testWriteStringToTimeField_internal(SaveMode saveMode) {
method testWriteStringToTimeField_OverwriteSaveMode (line 1579) | @Test
method testWriteStringToTimeField_AppendSaveMode (line 1584) | @Test
method testWriteStringToDateTimeField_internal (line 1589) | private void testWriteStringToDateTimeField_internal(SaveMode saveMode) {
method testWriteStringToDateTimeField_OverwriteSaveMode (line 1631) | @Test
method testWriteStringToDateTimeField_AppendSaveMode (line 1636) | @Test
method testWriteToTimestampField (line 1641) | @Test
method writeAndLoadDatasetOverwriteDynamicPartition (line 1682) | protected Dataset<Row> writeAndLoadDatasetOverwriteDynamicPartition(
method testOverwriteDynamicPartition_partitionTimestampByHour (line 1710) | @Test
method testOverwriteDynamicPartition_partitionTimestampByDay (line 1754) | @Test
method testOverwriteDynamicPartition_partitionTimestampByMonth (line 1798) | @Test
method testOverwriteDynamicPartition_partitionTimestampByYear (line 1842) | @Test
method testOverwriteDynamicPartition_partitionDateByDay (line 1886) | @Test
method testOverwriteDynamicPartition_partitionDateByMonth (line 1926) | @Test
method testOverwriteDynamicPartition_partitionDateByYear (line 1967) | @Test
method testOverwriteDynamicPartition_partitionDateTimeByHour (line 2008) | @Test
method testOverwriteDynamicPartition_partitionDateTimeByDay (line 2050) | @Test
method testOverwriteDynamicPartition_partitionDateTimeByMonth (line 2092) | @Test
method testOverwriteDynamicPartition_partitionDateTimeByYear (line 2134) | @Test
method testOverwriteDynamicPartition_noTimePartitioning (line 2176) | @Test
method testOverwriteDynamicPartition_rangePartitioned (line 2214) | @Test
method testOverwriteDynamicPartition_rangePartitionedOutsideRangeLessThanStart (line 2265) | @Test
method testOverwriteDynamicPartition_rangePartitionedOutsideRangeGreaterThanEnd (line 2300) | @Test
method testOverwriteDynamicPartition_rangePartitionedBoundaryCondition (line 2335) | @Test
method testOverwriteDynamicPartition_rangePartitionedWithNulls (line 2374) | @Test
method testWriteSchemaSubset (line 2425) | public void testWriteSchemaSubset() throws Exception {
method allowFieldAdditionWithNestedColumns (line 2466) | @Test
method allowFieldAdditionIntoNestedColumns (line 2542) | @Test
method hasTwoValues (line 2622) | private boolean hasTwoValues(FieldValue record) {
method hasThreeValues (line 2627) | private boolean hasThreeValues(FieldValue record) {
method testWriteSparkMlTypes (line 2632) | @Test
method testTimestampNTZDirectWriteToBigQuery (line 2695) | @Test
method testTimestampNTZIndirectWriteToBigQueryAvroFormat (line 2725) | @Test
method testTimestampNTZIndirectWriteToBigQueryParquetFormat (line 2737) | @Test
method testTableDescriptionRemainsUnchanged (line 2749) | @Test
method testCountAfterWrite (line 2774) | @Test
method insertAndGetTimestampNTZToBigQuery (line 2791) | private TableResult insertAndGetTimestampNTZToBigQuery(LocalDateTime t...
method numberOfRowsWith (line 2810) | protected long numberOfRowsWith(String name) {
method fullTableName (line 2821) | protected String fullTableName() {
method fullTableNamePartitioned (line 2825) | protected String fullTableNamePartitioned() {
method additionalDataValuesExist (line 2829) | protected boolean additionalDataValuesExist() {
method initialDataValuesExist (line 2833) | protected boolean initialDataValuesExist() {
method not (line 2838) | static <T> Predicate<T> not(Predicate<T> predicate) {
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/model/ColumnOrderTestClass.java
class ColumnOrderTestClass (line 20) | public class ColumnOrderTestClass {
method ColumnOrderTestClass (line 25) | public ColumnOrderTestClass() {}
method ColumnOrderTestClass (line 27) | public ColumnOrderTestClass(NumStruct nums, String str) {
method getNums (line 32) | public NumStruct getNums() {
method setNums (line 36) | public void setNums(NumStruct nums) {
method getStr (line 40) | public String getStr() {
method setStr (line 44) | public void setStr(String str) {
method equals (line 48) | @Override
method hashCode (line 60) | @Override
method toString (line 65) | @Override
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/model/Data.java
class Data (line 22) | public class Data implements Serializable {
method Data (line 28) | public Data(String str, Timestamp ts) {
method getStr (line 33) | public String getStr() {
method setStr (line 37) | public void setStr(String str) {
method getTs (line 41) | public Timestamp getTs() {
method setTs (line 45) | public void setTs(Timestamp ts) {
method equals (line 49) | @Override
method hashCode (line 61) | @Override
method toString (line 66) | @Override
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/model/Friend.java
class Friend (line 21) | public class Friend {
method Friend (line 26) | public Friend() {}
method Friend (line 28) | public Friend(int age, List<Link> links) {
method getAge (line 33) | public int getAge() {
method setAge (line 37) | public void setAge(int age) {
method getLinks (line 41) | public List<Link> getLinks() {
method setLinks (line 45) | public void setLinks(List<Link> links) {
method equals (line 49) | @Override
method hashCode (line 61) | @Override
method toString (line 66) | @Override
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/model/Link.java
class Link (line 21) | public class Link implements Serializable {
method Link (line 26) | public Link() {}
method Link (line 28) | public Link(String uri) {
method getUri (line 32) | public String getUri() {
method setUri (line 36) | public void setUri(String uri) {
method equals (line 40) | @Override
method hashCode (line 52) | @Override
method toString (line 57) | @Override
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/model/NumStruct.java
class NumStruct (line 21) | public class NumStruct {
method NumStruct (line 28) | public NumStruct() {}
method NumStruct (line 30) | public NumStruct(Long num3, Long num2, Long num1, List<StringStruct> s...
method getNum3 (line 37) | public Long getNum3() {
method setNum3 (line 41) | public void setNum3(Long num3) {
method getNum2 (line 45) | public Long getNum2() {
method setNum2 (line 49) | public void setNum2(Long num2) {
method getNum1 (line 53) | public Long getNum1() {
method setNum1 (line 57) | public void setNum1(Long num1) {
method getStrings (line 61) | public List<StringStruct> getStrings() {
method setStrings (line 65) | public void setStrings(List<StringStruct> strings) {
method equals (line 69) | @Override
method hashCode (line 84) | @Override
method toString (line 89) | @Override
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/model/Person.java
class Person (line 21) | public class Person {
method Person (line 26) | public Person() {}
method Person (line 28) | public Person(String name, List<Friend> friends) {
method getName (line 33) | public String getName() {
method setName (line 37) | public void setName(String name) {
method getFriends (line 41) | public List<Friend> getFriends() {
method setFriends (line 45) | public void setFriends(List<Friend> friends) {
method equals (line 49) | @Override
method hashCode (line 61) | @Override
method toString (line 66) | @Override
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/model/RangeData.java
class RangeData (line 21) | public class RangeData implements Serializable {
method RangeData (line 27) | public RangeData(String str, Long rng) {
method getStr (line 32) | public String getStr() {
method setStr (line 36) | public void setStr(String str) {
method getRng (line 40) | public Long getRng() {
method setRng (line 44) | public void setRng(Long rng) {
method equals (line 48) | @Override
method hashCode (line 60) | @Override
method toString (line 65) | @Override
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/model/StringStruct.java
class StringStruct (line 20) | public class StringStruct {
method StringStruct (line 26) | public StringStruct() {}
method StringStruct (line 28) | public StringStruct(String str3, String str1, String str2) {
method getStr3 (line 34) | public String getStr3() {
method setStr3 (line 38) | public void setStr3(String str3) {
method getStr1 (line 42) | public String getStr1() {
method setStr1 (line 46) | public void setStr1(String str1) {
method getStr2 (line 50) | public String getStr2() {
method setStr2 (line 54) | public void setStr2(String str2) {
method equals (line 58) | @Override
method hashCode (line 72) | @Override
method toString (line 77) | @Override
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/metrics/SparkBigQueryReadSessionMetricsTest.java
class SparkBigQueryReadSessionMetricsTest (line 13) | public class SparkBigQueryReadSessionMetricsTest {
method setup (line 18) | @Before
method tearDown (line 29) | @After
method testReadSessionMetricsAccumulator (line 36) | @Test
method testSerialization (line 67) | @Test
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/metrics/SparkMetricsSourceTest.java
class SparkMetricsSourceTest (line 7) | public class SparkMetricsSourceTest {
method testNew (line 9) | @Test
method testParseTime (line 15) | @Test
method testTimeInSpark (line 23) | @Test
method testBytesReadCounter (line 33) | @Test
method testRowsReadCounter (line 43) | @Test
method testScanTime (line 53) | @Test
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/util/HdfsUtilsTest.java
class HdfsUtilsTest (line 33) | public class HdfsUtilsTest {
method createTestDirectory (line 37) | @BeforeClass
method toIteratorTest (line 49) | @Test
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/write/DataSourceWriterContextPartitionHandlerTest.java
class DataSourceWriterContextPartitionHandlerTest (line 44) | @RunWith(MockitoJUnitRunner.class)
method testGoodWrite (line 49) | @Test
method testBadWrite (line 72) | @Test
FILE: spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/write/context/BigQueryDirectDataSourceWriterContextTest.java
class BigQueryDirectDataSourceWriterContextTest (line 44) | @RunWith(MockitoJUnitRunner.class)
method testDeleteOnAbort_saveModeAppend (line 84) | @Test
method testDeleteOnAbort_saveModeErrorIfExists (line 95) | @Test
method testDeleteOnAbort_saveModeIgnore (line 107) | @Test
method testDeleteOnAbort_saveModeOverwrite (line 118) | @Test
method testDeleteOnAbort_newTable (line 130) | @Test
method createBigQueryDirectDataSourceWriterContext (line 141) | private BigQueryDirectDataSourceWriterContext createBigQueryDirectData...
FILE: spark-bigquery-connector-common/src/test/java/org/apache/spark/sql/Scala213SparkSqlUtilsTest.java
class Scala213SparkSqlUtilsTest (line 25) | public class Scala213SparkSqlUtilsTest {
method testRowToInternalRow (line 27) | @Test
FILE: spark-bigquery-connector-common/third_party/apache-spark/src/main/java/com/google/cloud/spark/bigquery/ArrowSchemaConverter.java
class ArrowSchemaConverter (line 47) | public abstract class ArrowSchemaConverter extends ColumnVector {
method vector (line 48) | public abstract ValueVector vector();
method hasNull (line 49) | @Override
method numNulls (line 54) | @Override
method close (line 59) | @Override
method getBoolean (line 64) | @Override
method getByte (line 69) | @Override
method getShort (line 74) | @Override
method getInt (line 79) | @Override
method getLong (line 84) | @Override
method getFloat (line 89) | @Override
method getDouble (line 94) | @Override
method getDecimal (line 99) | @Override
method getUTF8String (line 104) | @Override
method getBinary (line 109) | @Override
method getArray (line 114) | @Override
method getMap (line 119) | @Override
method getChild (line 124) | @Override
method fromArrowType (line 127) | private static DataType fromArrowType(ArrowType arrowType)
method fromArrowField (line 145) | private static DataType fromArrowField(Field field)
method ArrowSchemaConverter (line 174) | ArrowSchemaConverter(ValueVector vector) {
method newArrowSchemaConverter (line 178) | public static ArrowSchemaConverter newArrowSchemaConverter(ValueVector...
class BooleanAccessor (line 212) | private static class BooleanAccessor extends ArrowSchemaConverter {
method BooleanAccessor (line 215) | BooleanAccessor(BitVector vector) {
method isNullAt (line 220) | @Override
method getBoolean (line 225) | @Override
method vector (line 230) | @Override
class LongAccessor (line 236) | private static class LongAccessor extends ArrowSchemaConverter {
method LongAccessor (line 239) | LongAccessor(BigIntVector vector) {
method isNullAt (line 244) | @Override
method getByte (line 249) | @Override
method getShort (line 254) | @Override
method getInt (line 259) | @Override
method getLong (line 264) | @Override
method vector (line 269) | @Override
class DoubleAccessor (line 276) | private static class DoubleAccessor extends ArrowSchemaConverter {
method DoubleAccessor (line 279) | DoubleAccessor(Float8Vector vector) {
method isNullAt (line 284) | @Override
method getDouble (line 289) | @Override
method vector (line 294) | @Override
class DecimalAccessor (line 301) | private static class DecimalAccessor extends ArrowSchemaConverter {
method DecimalAccessor (line 304) | DecimalAccessor(DecimalVector vector) {
method getByte (line 310) | @Override
method isNullAt (line 315) | @Override
method getDecimal (line 320) | @Override
method vector (line 326) | @Override
class Decimal256Accessor (line 333) | private static class Decimal256Accessor extends ArrowSchemaConverter {
method Decimal256Accessor (line 336) | Decimal256Accessor(Decimal256Vector vector) {
method isNullAt (line 341) | @Override
method getUTF8String (line 347) | public UTF8String getUTF8String(int rowId){
method getDecimal (line 357) | @Override
method vector (line 363) | @Override
class StringAccessor (line 369) | private static class StringAccessor extends ArrowSchemaConverter {
method StringAccessor (line 371) | StringAccessor(VarCharVector vector) {
method isNullAt (line 376) | @Override
method getUTF8String (line 381) | @Override
method vector (line 401) | @Override
class BinaryAccessor (line 407) | private static class BinaryAccessor extends ArrowSchemaConverter {
method BinaryAccessor (line 409) | BinaryAccessor(VarBinaryVector vector) {
method isNullAt (line 414) | @Override
method getBinary (line 419) | @Override
method vector (line 424) | @Override
class DateAccessor (line 431) | private static class DateAccessor extends ArrowSchemaConverter { ;
method DateAccessor (line 434) | DateAccessor(DateDayVector vector) {
method isNullAt (line 439) | @Override
method getInt (line 447) | @Override
method vector (line 452) | @Override
class TimeMicroVectorAccessor (line 459) | private static class TimeMicroVectorAccessor extends ArrowSchemaConver...
method TimeMicroVectorAccessor (line 462) | TimeMicroVectorAccessor(TimeMicroVector vector) {
method isNullAt (line 467) | @Override
method getLong (line 472) | @Override
method vector (line 477) | @Override
class TimestampMicroVectorAccessor (line 485) | private static class TimestampMicroVectorAccessor extends ArrowSchemaC...
method TimestampMicroVectorAccessor (line 492) | TimestampMicroVectorAccessor(TimeStampMicroVector vector) {
method getLong (line 497) | @Override
method isNullAt (line 502) | @Override
method getUTF8String (line 508) | @Override
method vector (line 532) | @Override
class TimestampMicroTZVectorAccessor (line 539) | private static class TimestampMicroTZVectorAccessor extends ArrowSchem...
method TimestampMicroTZVectorAccessor (line 544) | TimestampMicroTZVectorAccessor(TimeStampMicroTZVector vector) {
method isNullAt (line 557) | @Override
method getLong (line 563) | @Override
method vector (line 573) | @Override
class ArrayAccessor (line 579) | private static class ArrayAccessor extends ArrowSchemaConverter {
method ArrayAccessor (line 584) | ArrayAccessor(ListVector vector, StructField userProvidedField) {
method convertMapTypeToArrayType (line 605) | static ArrayType convertMapTypeToArrayType(MapType mapType) {
method isNullAt (line 612) | @Override
method getArray (line 618) | @Override
method getMap (line 627) | @Override
method vector (line 638) | @Override
class StructAccessor (line 648) | private static class StructAccessor extends ArrowSchemaConverter {
method StructAccessor (line 652) | StructAccessor(StructVector structVector, StructField userProvidedFi...
method isNullAt (line 682) | @Override
method getChild (line 688) | @Override
method close (line 692) | @Override
method vector (line 703) | @Override
FILE: spark-bigquery-connector-common/third_party/apache-spark/src/main/java/com/google/cloud/spark/bigquery/AvroSchemaConverter.java
class AvroSchemaConverter (line 64) | public class AvroSchemaConverter {
method sparkSchemaToAvroSchema (line 70) | public static Schema sparkSchemaToAvroSchema(StructType sparkSchema) {
method sparkTypeToRawAvroType (line 74) | static Schema sparkTypeToRawAvroType(DataType dataType, Metadata metad...
method sparkTypeToRawAvroType (line 85) | static Schema sparkTypeToRawAvroType(
method sparkRowToAvroGenericData (line 166) | public static GenericData.Record sparkRowToAvroGenericData(
method resolveNullableType (line 172) | static Schema resolveNullableType(Schema avroType, boolean nullable) {
method createConverterFor (line 187) | static Converter createConverterFor(DataType sparkType, Schema avroTyp...
type Converter (line 364) | @FunctionalInterface
method convert (line 366) | Object convert(SpecializedGetters getters, int ordinal);
class StructConverter (line 369) | static class StructConverter {
method StructConverter (line 373) | StructConverter(StructType sparkStruct, Schema avroStruct) {
method convert (line 384) | GenericData.Record convert(InternalRow row) {
FILE: spark-bigquery-dsv1/spark-bigquery-dsv1-spark2-support/src/main/java/com/google/cloud/spark/bigquery/spark2/Spark2DataFrameToRDDConverter.java
class Spark2DataFrameToRDDConverter (line 34) | public class Spark2DataFrameToRDDConverter implements DataFrameToRDDConv...
method convertToRDD (line 36) | @Override
method supports (line 62) | @Override
class EncodingIterator (line 67) | static class EncodingIterator implements Iterator<Row> {
method EncodingIterator (line 71) | public EncodingIterator(
method hasNext (line 77) | @Override
method next (line 82) | @Override
FILE: spark-bigquery-dsv1/spark-bigquery-dsv1-spark3-support/src/main/java/com/google/cloud/spark/bigquery/spark3/SerializableAbstractFunction1.java
class SerializableAbstractFunction1 (line 22) | public class SerializableAbstractFunction1<T, U> extends AbstractFunctio...
method SerializableAbstractFunction1 (line 26) | SerializableAbstractFunction1(Function<T, U> func) {
method apply (line 30) | @Override
FILE: spark-bigquery-dsv1/spark-bigquery-dsv1-spark3-support/src/main/java/com/google/cloud/spark/bigquery/spark3/Spark3DataFrameToRDDConverter.java
class Spark3DataFrameToRDDConverter (line 34) | public class Spark3DataFrameToRDDConverter implements DataFrameToRDDConv...
method convertToRDD (line 36) | @Override
method supports (line 52) | @Override
method getIteratorMapper (line 57) | private AbstractFunction1<Iterator<InternalRow>, Iterator<Row>> getIte...
FILE: spark-bigquery-dsv1/spark-bigquery-with-dependencies_2.11/src/test/java/com/google/cloud/spark/bigquery/acceptance/Scala211DataprocImage13AcceptanceTest.java
class Scala211DataprocImage13AcceptanceTest (line 23) | @Ignore
method Scala211DataprocImage13AcceptanceTest (line 28) | public Scala211DataprocImage13AcceptanceTest() {
method set
Condensed preview — 676 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (8,018K chars).
[
{
"path": ".github/workflows/codeql-analysis.yml",
"chars": 2326,
"preview": "# For most projects, this workflow file will not need changing; you simply need\n# to commit it to your repository.\n#\n# Y"
},
{
"path": ".github/workflows/cpd.yaml",
"chars": 506,
"preview": "name: Duplicate Code Detection\n\non:\n push:\n branches: [ master ]\n pull_request:\n # The branches below must be a "
},
{
"path": ".github/workflows/spotless.yaml",
"chars": 469,
"preview": "name: Spotless Check\n\non:\n push:\n branches: [ master ]\n pull_request:\n # The branches below must be a subset of "
},
{
"path": ".gitignore",
"chars": 506,
"preview": "*.class\n*.log\n\n# sbt/maven specific\n.cache/\n.history/\n.lib/\n.flattened-pom.xml\ndist/*\ntarget/\nlib_managed/\nsrc_managed/\n"
},
{
"path": ".mvn/wrapper/MavenWrapperDownloader.java",
"chars": 4941,
"preview": "/*\n * Copyright 2007-present the original author or authors.\n *\n * Licensed under the Apache License, Version 2.0 (the \""
},
{
"path": ".mvn/wrapper/maven-wrapper.properties",
"chars": 218,
"preview": "distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip\nwrap"
},
{
"path": "CHANGES.md",
"chars": 32888,
"preview": "# Release Notes\n\n## Next\n\n* Issue #1476: Fixed BigQuery MERGE statement ambiguity on overwrite when table columns are na"
},
{
"path": "CONTRIBUTING.md",
"chars": 5973,
"preview": "# How to Contribute\n\nWe'd love to accept your patches and contributions to this project. There are\njust a few small guid"
},
{
"path": "LICENSE",
"chars": 11357,
"preview": "\n Apache License\n Version 2.0, January 2004\n "
},
{
"path": "README-template.md",
"chars": 70502,
"preview": "# Apache Spark SQL connector for Google BigQuery\n\n<!--- TODO(#2): split out into more documents. -->\n\nThe connector supp"
},
{
"path": "README.md",
"chars": 69747,
"preview": "# Apache Spark SQL connector for Google BigQuery\n\n<!--- TODO(#2): split out into more documents. -->\n\nThe connector supp"
},
{
"path": "bigquery-connector-common/pom.xml",
"chars": 5667,
"preview": "<project xmlns=\"http://maven.apache.org/POM/4.0.0\"\n xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n xsi:schemaL"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/AccessToken.java",
"chars": 1175,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/AccessTokenProvider.java",
"chars": 842,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/AccessTokenProviderCredentials.java",
"chars": 1363,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/ArrowReaderIterator.java",
"chars": 1918,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/ArrowUtil.java",
"chars": 1428,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryClient.java",
"chars": 51046,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryClientFactory.java",
"chars": 12418,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryClientFactoryConfig.java",
"chars": 10840,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryClientModule.java",
"chars": 5716,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryConfig.java",
"chars": 4276,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryConfigurationUtil.java",
"chars": 7270,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryConnectorException.java",
"chars": 3812,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryCredentialsSupplier.java",
"chars": 9063,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryDirectDataWriterHelper.java",
"chars": 17143,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryErrorCode.java",
"chars": 1049,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryJobCompletionListener.java",
"chars": 803,
"preview": "/*\n * Copyright 2023 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryMetrics.java",
"chars": 284,
"preview": "package com.google.cloud.bigquery.connector.common;\n\npublic interface BigQueryMetrics {\n void incrementBytesReadCounter"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryProxyConfig.java",
"chars": 1511,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryProxyTransporterBuilder.java",
"chars": 5146,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryPushdownException.java",
"chars": 1078,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryPushdownUnsupportedException.java",
"chars": 1137,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryStorageReadRowsTracer.java",
"chars": 2302,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryTracerFactory.java",
"chars": 1018,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/BigQueryUtil.java",
"chars": 46351,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/ComparisonResult.java",
"chars": 1190,
"preview": "package com.google.cloud.bigquery.connector.common;\n\nimport com.google.common.collect.ImmutableList;\nimport java.util.Li"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/DecompressReadRowsResponse.java",
"chars": 2849,
"preview": "package com.google.cloud.bigquery.connector.common;\n\nimport com.google.cloud.bigquery.storage.v1.ReadRowsResponse;\nimpor"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/DurationTimer.java",
"chars": 1668,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/EnvironmentContext.java",
"chars": 1203,
"preview": "/*\n * Copyright 2023 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/GcpUtil.java",
"chars": 2796,
"preview": "/*\n * Copyright 2025 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/HttpUtil.java",
"chars": 1223,
"preview": "/*\n * Copyright 2021 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/IdentityTokenSupplier.java",
"chars": 1512,
"preview": "package com.google.cloud.bigquery.connector.common;\n\nimport com.google.auth.oauth2.GoogleCredentials;\nimport com.google."
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/IteratorMultiplexer.java",
"chars": 4877,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/LazyInitializationSupplier.java",
"chars": 1845,
"preview": "/*\n * Copyright 2024 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/LoggingBigQueryStorageReadRowsTracer.java",
"chars": 6306,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/LoggingBigQueryTracerFactory.java",
"chars": 1393,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/MaterializationConfiguration.java",
"chars": 3197,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/NonInterruptibleBlockingBytesChannel.java",
"chars": 2423,
"preview": "/*\n * Copyright 2021 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/ParallelArrowReader.java",
"chars": 10060,
"preview": "/*\n * Copyright 2021 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/ParameterMode.java",
"chars": 785,
"preview": "/*\n * Copyright 2025 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/QueryParameterHelper.java",
"chars": 4230,
"preview": "/*\n * Copyright 2025 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/ReadRowsHelper.java",
"chars": 4825,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/ReadRowsResponseInputStreamEnumeration.java",
"chars": 2737,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/ReadSessionCreator.java",
"chars": 12751,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/ReadSessionCreatorConfig.java",
"chars": 6596,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/ReadSessionCreatorConfigBuilder.java",
"chars": 7871,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/ReadSessionMetrics.java",
"chars": 891,
"preview": "/*\n * Copyright 2023 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/ReadSessionResponse.java",
"chars": 1195,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/StreamCombiningIterator.java",
"chars": 14484,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/UserAgentProvider.java",
"chars": 766,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/VersionProvider.java",
"chars": 753,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/main/java/com/google/cloud/bigquery/connector/common/WriteStreamStatistics.java",
"chars": 418,
"preview": "package com.google.cloud.bigquery.connector.common;\n\npublic class WriteStreamStatistics {\n private final long rowCount;"
},
{
"path": "bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/BigQueryClientFactoryTest.java",
"chars": 25604,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/BigQueryConfigurationUtilTest.java",
"chars": 4750,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/BigQueryConnectorExceptionTest.java",
"chars": 1830,
"preview": "/*\n * Copyright 2024 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/BigQueryCredentialsSupplierTest.java",
"chars": 26030,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/BigQueryUtilTest.java",
"chars": 62489,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/DurationTimerTest.java",
"chars": 2139,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/HttpUtilTest.java",
"chars": 1940,
"preview": "/*\n * Copyright 2021 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/IteratorMultiplexerTest.java",
"chars": 6462,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/LoggingBigQueryStorageReadRowsTracerTest.java",
"chars": 4260,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/MockResponsesBatch.java",
"chars": 1463,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/ParallelArrowReaderTest.java",
"chars": 8069,
"preview": "/*\n * Copyright 2021 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/ReadRowsHelperTest.java",
"chars": 11785,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/ReadSessionCreatorTest.java",
"chars": 24041,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/integration/CustomCredentialsIntegrationTest.java",
"chars": 4447,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/test/java/com/google/cloud/bigquery/connector/common/integration/DefaultCredentialsDelegateAccessTokenProvider.java",
"chars": 2226,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "bigquery-connector-common/src/test/resources/external-account-credentials.json",
"chars": 798,
"preview": "{\n \"type\": \"external_account\",\n \"audience\": \"//iam.googleapis.com/projects/1234/locations/global/workloadIdentityPools"
},
{
"path": "cloudbuild/Dockerfile",
"chars": 1826,
"preview": "# This Dockerfile creates an image for running presubmit tests.\nFROM ubuntu:22.04\n\n# Set noninteractive frontend for apt"
},
{
"path": "cloudbuild/cloudbuild.yaml",
"chars": 6416,
"preview": "steps:\n # 1. Create a Docker image containing hadoop-connectors repo\n - name: 'gcr.io/cloud-builders/docker'\n id: '"
},
{
"path": "cloudbuild/gcp-settings.xml",
"chars": 513,
"preview": "<settings xmlns=\"http://maven.apache.org/SETTINGS/1.0.0\"\n xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n xsi:s"
},
{
"path": "cloudbuild/nightly.sh",
"chars": 6067,
"preview": "#!/bin/bash\n\n# Copyright 2019 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the '"
},
{
"path": "cloudbuild/nightly.yaml",
"chars": 1591,
"preview": "steps:\n # 1. Create a Docker image containing hadoop-connectors repo\n - name: 'gcr.io/cloud-builders/docker'\n id: '"
},
{
"path": "cloudbuild/presubmit.sh",
"chars": 3851,
"preview": "\n#!/bin/bash\n\n# Copyright 2019 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the "
},
{
"path": "coverage/pom.xml",
"chars": 10341,
"preview": "<project xmlns=\"http://maven.apache.org/POM/4.0.0\"\n xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n xsi:sch"
},
{
"path": "examples/notebooks/Advanced ML Pipelines.ipynb",
"chars": 12475,
"preview": "{\"cells\": [{\"cell_type\": \"markdown\", \"metadata\": {}, \"source\": \"<h3>Problem: As a PM, I write lots of blogs. How do I kn"
},
{
"path": "examples/notebooks/Distribute_Generic_Functions.ipynb",
"chars": 14957,
"preview": "{\"cells\": [{\"cell_type\": \"markdown\", \"metadata\": {}, \"source\": \"<h1>Distribute functions across a BigQuery dataset using"
},
{
"path": "examples/notebooks/Top words in Shakespeare by work.ipynb",
"chars": 13645,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Using the Spark BigQuery connecto"
},
{
"path": "examples/notebooks/Transform_with_Python.ipynb",
"chars": 408261,
"preview": "{\"cells\": [{\"cell_type\": \"markdown\", \"metadata\": {}, \"source\": \"<table>\\n <tr><td><h2>A pythonic approach to BigQuery"
},
{
"path": "examples/python/query_results.py",
"chars": 1683,
"preview": "#!/usr/bin/env python\n# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2"
},
{
"path": "examples/python/shakespeare.py",
"chars": 1610,
"preview": "#!/usr/bin/env python\n# Copyright 2018 Google Inc. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2"
},
{
"path": "mvnw",
"chars": 10069,
"preview": "#!/bin/sh\n# ----------------------------------------------------------------------------\n# Licensed to the Apache Softwa"
},
{
"path": "mvnw.cmd",
"chars": 6607,
"preview": "@REM ----------------------------------------------------------------------------\n@REM Licensed to the Apache Software F"
},
{
"path": "pom.xml",
"chars": 10077,
"preview": "<project xmlns=\"http://maven.apache.org/POM/4.0.0\"\n xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n xsi:sch"
},
{
"path": "scalastyle-config.xml",
"chars": 16232,
"preview": "<!--\n ~ Licensed to the Apache Software Foundation (ASF) under one or more\n ~ contributor license agreements. See the"
},
{
"path": "scripts/verify-shading.sh",
"chars": 1070,
"preview": "#!/bin/sh\n\n# Copyright 2024 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not"
},
{
"path": "spark-bigquery-connector-common/pom.xml",
"chars": 5147,
"preview": "<project xmlns=\"http://maven.apache.org/POM/4.0.0\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n xsi:sc"
},
{
"path": "spark-bigquery-connector-common/src/build/resources/spark-bigquery-connector.properties",
"chars": 37,
"preview": "connector.version=${project.version}\n"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/ArrowBinaryIterator.java",
"chars": 5141,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/AvroBinaryIterator.java",
"chars": 4504,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/BigQueryConnectorUtils.java",
"chars": 1240,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/BigQueryRelation.java",
"chars": 2350,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/BigQueryRelationProviderBase.java",
"chars": 5815,
"preview": "/*\n * Copyright 2025 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/BigQueryStreamWriter.java",
"chars": 3912,
"preview": "/*\n * Copyright 2025 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/BigQueryStreamingSink.java",
"chars": 5027,
"preview": "/*\n * Copyright 2025 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/BigQueryUtilScala.java",
"chars": 4941,
"preview": "/*\n * Copyright 2025 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/DataFrameToRDDConverter.java",
"chars": 895,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/DataSourceVersion.java",
"chars": 1389,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/GuiceInjectorCreator.java",
"chars": 1607,
"preview": "/*\n * Copyright 2025 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/InjectorBuilder.java",
"chars": 2621,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/InjectorFactory.java",
"chars": 1807,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/InternalRowIterator.java",
"chars": 3026,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/PartitionOverwriteMode.java",
"chars": 101,
"preview": "package com.google.cloud.spark.bigquery;\n\npublic enum PartitionOverwriteMode {\n STATIC,\n DYNAMIC\n}\n"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/ProtobufUtils.java",
"chars": 31954,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/ReadRowsResponseToInternalRowIteratorConverter.java",
"chars": 6610,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SchemaConverters.java",
"chars": 26151,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SchemaConvertersConfiguration.java",
"chars": 3529,
"preview": "/*\n * Copyright 2023 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkBigQueryConfig.java",
"chars": 56590,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkBigQueryConnectorModule.java",
"chars": 4662,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkBigQueryConnectorUserAgentProvider.java",
"chars": 3498,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkBigQueryConnectorVersionProvider.java",
"chars": 1379,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkBigQueryLineageProvider.java",
"chars": 1172,
"preview": "/*\n * Copyright 2024 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkBigQueryProxyAndHttpConfig.java",
"chars": 9491,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkBigQueryUtil.java",
"chars": 12851,
"preview": "/*\n * Copyright 2020 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkFilterUtils.java",
"chars": 14233,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SupportedCustomDataType.java",
"chars": 2457,
"preview": "/*\n * Copyright 2020 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SupportsQueryPushdown.java",
"chars": 1033,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/TypeConverter.java",
"chars": 1195,
"preview": "/*\n * Copyright 2023 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/direct/BigQueryPartition.java",
"chars": 1040,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/direct/BigQueryRDDContext.java",
"chars": 5544,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/direct/BigQueryRDDFactory.java",
"chars": 10192,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/direct/DirectBigQueryRelation.java",
"chars": 9256,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/direct/Scala213BigQueryRDD.java",
"chars": 1888,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/direct/ScalaIterator.java",
"chars": 1098,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/events/BigQueryJobCompletedEvent.java",
"chars": 2500,
"preview": "/*\n * Copyright 2023 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/events/LoadJobCompletedEvent.java",
"chars": 1234,
"preview": "/*\n * Copyright 2023 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/events/QueryJobCompletedEvent.java",
"chars": 1242,
"preview": "/*\n * Copyright 2023 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/examples/JavaShakespeare.java",
"chars": 2580,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/metrics/DataOrigin.java",
"chars": 727,
"preview": "/*\n * Copyright 2024 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/metrics/SparkBigQueryConnectorMetricsUtils.java",
"chars": 5311,
"preview": "/*\n * Copyright 2023 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/metrics/SparkBigQueryReadSessionMetrics.java",
"chars": 8715,
"preview": "/*\n * Copyright 2023 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/metrics/SparkMetricsSource.java",
"chars": 2199,
"preview": "package com.google.cloud.spark.bigquery.metrics;\n\nimport com.codahale.metrics.Counter;\nimport com.codahale.metrics.Metri"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/pushdowns/SparkBigQueryPushdown.java",
"chars": 883,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/util/HdfsUtils.java",
"chars": 1571,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/BigQueryDataSourceWriterInsertableRelation.java",
"chars": 6145,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/BigQueryDeprecatedIndirectInsertableRelation.java",
"chars": 1860,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/BigQueryInsertableRelationBase.java",
"chars": 2794,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/BigQueryWriteHelper.java",
"chars": 11307,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/CreatableRelationProviderHelper.java",
"chars": 5574,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/DataSourceWriterContextPartitionHandler.java",
"chars": 3323,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/IntermediateDataCleaner.java",
"chars": 2910,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/AvroIntermediateRecordWriter.java",
"chars": 1748,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/BigQueryDataSourceWriterModule.java",
"chars": 4481,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/BigQueryDirectDataSourceWriterContext.java",
"chars": 14085,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/BigQueryDirectDataWriterContext.java",
"chars": 4918,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/BigQueryDirectDataWriterContextFactory.java",
"chars": 3079,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/BigQueryDirectWriterCommitMessageContext.java",
"chars": 2269,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/BigQueryIndirectDataSourceWriterContext.java",
"chars": 8828,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/BigQueryIndirectDataWriterContext.java",
"chars": 2657,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/BigQueryIndirectDataWriterContextFactory.java",
"chars": 2352,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/BigQueryIndirectWriterCommitMessageContext.java",
"chars": 1003,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/DataSourceWriterContext.java",
"chars": 5019,
"preview": "/*\n * Copyright 2021 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/DataWriterContext.java",
"chars": 992,
"preview": "/*\n * Copyright 2021 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/DataWriterContextFactory.java",
"chars": 846,
"preview": "/*\n * Copyright 2021 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/IntermediateRecordWriter.java",
"chars": 899,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/NoOpDataWriterContext.java",
"chars": 1138,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/write/context/WriterCommitMessageContext.java",
"chars": 856,
"preview": "/*\n * Copyright 2021 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "spark-bigquery-connector-common/src/main/java/org/apache/spark/sql/Scala213SparkSqlUtils.java",
"chars": 3232,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/java/org/apache/spark/sql/SparkSqlUtils.java",
"chars": 1960,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/main/resources/META-INF/services/io.openlineage.spark.extension.OpenLineageExtensionProvider",
"chars": 61,
"preview": "com.google.cloud.spark.bigquery.SparkBigQueryLineageProvider\n"
},
{
"path": "spark-bigquery-connector-common/src/main/resources/META-INF/services/org.apache.spark.sql.SparkSqlUtils",
"chars": 43,
"preview": "org.apache.spark.sql.Scala213SparkSqlUtils\n"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/AvroSchemaConverterTest.java",
"chars": 12055,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/BigQueryRelationProviderTestBase.java",
"chars": 8474,
"preview": "/*\n * Copyright 2025 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/BigQueryRelationTest.java",
"chars": 2893,
"preview": "/*\n * Copyright 2023 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/DataSourceOptions.java",
"chars": 1194,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/InjectorBuilderTest.java",
"chars": 3535,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/MockResponsesBatch.java",
"chars": 1454,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/MockSparkBigQueryPushdown.java",
"chars": 1228,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/OptimizeLoadUriListTest.java",
"chars": 5062,
"preview": "/*\n * Copyright 2020 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/ProtobufUtilsTest.java",
"chars": 20192,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/SchemaConverterTest.java",
"chars": 31686,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/SparkBigQueryConfigTest.java",
"chars": 54510,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/SparkBigQueryProxyAndHttpConfigTest.java",
"chars": 21141,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/SparkBigQueryUtilTest.java",
"chars": 7121,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/SparkFilterUtilsTest.java",
"chars": 15724,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/SupportedCustomDataTypeTest.java",
"chars": 1289,
"preview": "/*\n * Copyright 2020 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/TestConstants.java",
"chars": 10773,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/acceptance/AcceptanceTestConstants.java",
"chars": 2080,
"preview": "/*\n * Copyright 2021 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/acceptance/AcceptanceTestContext.java",
"chars": 1635,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/acceptance/AcceptanceTestUtils.java",
"chars": 6858,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/acceptance/BigNumericDataprocServerlessAcceptanceTestBase.java",
"chars": 2853,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/acceptance/DataprocAcceptanceTestBase.java",
"chars": 13759,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/acceptance/DataprocServerlessAcceptanceTestBase.java",
"chars": 6589,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/acceptance/ReadSheakspeareDataprocServerlessAcceptanceTestBase.java",
"chars": 1685,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/acceptance/WriteStreamDataprocServerlessAcceptanceTestBase.java",
"chars": 2240,
"preview": "/*\n * Copyright 2022 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/direct/Scala213BigQueryRDDTest.java",
"chars": 1818,
"preview": "/*\n * Copyright 2022 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/CatalogIntegrationTestBase.java",
"chars": 15018,
"preview": "/*\n * Copyright 2025 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/IntegrationTestUtils.java",
"chars": 7260,
"preview": "/*\n * Copyright 2018 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
},
{
"path": "spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/OpenLineageIntegrationTestBase.java",
"chars": 6223,
"preview": "/*\n * Copyright 2024 Google Inc. All Rights Reserved.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License"
}
]
// ... and 476 more files (download for full content)
About this extraction
This page contains the full source code of the GoogleCloudDataproc/spark-bigquery-connector GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 676 files (20.0 MB), approximately 2.0M tokens, and a symbol index with 3029 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.