Repository: snowflakedb/snowflake-kafka-connector Branch: master Commit: 1a8c6d75b9a3 Files: 399 Total size: 2.1 MB Directory structure: gitextract_xko0tdbw/ ├── .githooks/ │ └── pre-commit ├── .github/ │ ├── CODEOWNERS │ ├── actions/ │ │ ├── build-connector/ │ │ │ └── action.yml │ │ └── run-e2e-tests/ │ │ └── action.yml │ ├── dependabot.yaml │ ├── scripts/ │ │ ├── decrypt_secret.sh │ │ ├── parse_java_test_reports.py │ │ ├── profile.json.gpg │ │ ├── profile_azure.json.gpg │ │ ├── profile_gcs.json.gpg │ │ └── squid.conf │ └── workflows/ │ ├── IntegrationTest.yml │ ├── build-apache-kafka-images.yml │ ├── end-to-end-legacy.yml │ ├── end-to-end-stress.yml │ ├── end-to-end.yaml │ ├── formatting.yml │ └── semgrep.yml ├── .gitignore ├── .java-version ├── LICENSE ├── README.md ├── deploy.sh ├── format.sh ├── pom.xml ├── pom_confluent.xml ├── profile.json.enc ├── profile.json.example ├── scripts/ │ └── process_licenses.py ├── src/ │ ├── main/ │ │ ├── java/ │ │ │ └── com/ │ │ │ └── snowflake/ │ │ │ ├── ingest/ │ │ │ │ └── streaming/ │ │ │ │ └── internal/ │ │ │ │ └── TimestampWrapper.java │ │ │ └── kafka/ │ │ │ └── connector/ │ │ │ ├── ConnectorConfigTools.java │ │ │ ├── ConnectorConfigValidator.java │ │ │ ├── Constants.java │ │ │ ├── DefaultConnectorConfigValidator.java │ │ │ ├── SemanticVersion.java │ │ │ ├── SnowflakeSinkTask.java │ │ │ ├── SnowflakeSinkTaskAuthorizationExceptionTracker.java │ │ │ ├── SnowflakeStreamingSinkConnector.java │ │ │ ├── TopicToTableParser.java │ │ │ ├── Utils.java │ │ │ ├── config/ │ │ │ │ ├── AuthenticatorType.java │ │ │ │ ├── CommaSeparatedKeyValueValidator.java │ │ │ │ ├── ConnectorConfigDefinition.java │ │ │ │ ├── SinkTaskConfig.java │ │ │ │ ├── SnowflakeValidation.java │ │ │ │ └── TopicToTableValidator.java │ │ │ ├── dlq/ │ │ │ │ └── KafkaRecordErrorReporter.java │ │ │ ├── internal/ │ │ │ │ ├── CachingConfig.java │ │ │ │ ├── CachingSnowflakeConnectionService.java │ │ │ │ ├── DescribeTableRow.java │ │ │ │ ├── InternalUtils.java │ │ │ │ ├── JdbcProperties.java │ │ │ │ ├── JdbcPropertyKeys.java │ │ │ │ ├── KCLogger.java │ │ │ │ ├── PrivateKeyTool.java │ │ │ │ ├── SnowflakeConnectionService.java │ │ │ │ ├── SnowflakeConnectionServiceFactory.java │ │ │ │ ├── SnowflakeErrors.java │ │ │ │ ├── SnowflakeKafkaConnectorException.java │ │ │ │ ├── SnowflakeSinkService.java │ │ │ │ ├── SnowflakeURL.java │ │ │ │ ├── StandardSnowflakeConnectionService.java │ │ │ │ ├── URL.java │ │ │ │ ├── metrics/ │ │ │ │ │ ├── MetricsJmxReporter.java │ │ │ │ │ ├── MetricsUtil.java │ │ │ │ │ ├── NoopTaskMetrics.java │ │ │ │ │ ├── SnowflakeSinkTaskMetrics.java │ │ │ │ │ └── TaskMetrics.java │ │ │ │ ├── schemaevolution/ │ │ │ │ │ ├── ColumnInfos.java │ │ │ │ │ ├── ColumnTypeMapper.java │ │ │ │ │ ├── SchemaEvolutionTargetItems.java │ │ │ │ │ ├── SnowflakeColumnTypeMapper.java │ │ │ │ │ ├── SnowflakeSchemaEvolutionService.java │ │ │ │ │ ├── TableSchema.java │ │ │ │ │ ├── TableSchemaResolver.java │ │ │ │ │ └── ValidationResultMapper.java │ │ │ │ ├── streaming/ │ │ │ │ │ ├── DefaultStreamingConfigValidator.java │ │ │ │ │ ├── IngestionMethodConfig.java │ │ │ │ │ ├── LatestCommitedOffsetTokenExecutor.java │ │ │ │ │ ├── OpenChannelRetryPolicy.java │ │ │ │ │ ├── SnowflakeSinkServiceV2.java │ │ │ │ │ ├── StreamingClientProperties.java │ │ │ │ │ ├── StreamingConfigValidator.java │ │ │ │ │ ├── StreamingErrorHandler.java │ │ │ │ │ ├── TopicPartitionChannelInsertionException.java │ │ │ │ │ ├── channel/ │ │ │ │ │ │ └── TopicPartitionChannel.java │ │ │ │ │ ├── telemetry/ │ │ │ │ │ │ ├── PeriodicTelemetryReporter.java │ │ │ │ │ │ ├── SnowflakeTelemetryChannelCreation.java │ │ │ │ │ │ ├── SnowflakeTelemetryChannelStatus.java │ │ │ │ │ │ └── SnowflakeTelemetrySsv1Migration.java │ │ │ │ │ └── v2/ │ │ │ │ │ ├── AppendRowWithFallbackPolicy.java │ │ │ │ │ ├── BackpressureException.java │ │ │ │ │ ├── ClientRecreationException.java │ │ │ │ │ ├── ClientRecreator.java │ │ │ │ │ ├── PipeNameProvider.java │ │ │ │ │ ├── SnowpipeStreamingPartitionChannel.java │ │ │ │ │ ├── WaitForLastOffsetCommittedPolicy.java │ │ │ │ │ ├── channel/ │ │ │ │ │ │ └── PartitionOffsetTracker.java │ │ │ │ │ ├── client/ │ │ │ │ │ │ ├── StreamingClientFactory.java │ │ │ │ │ │ ├── StreamingClientPool.java │ │ │ │ │ │ ├── StreamingClientPools.java │ │ │ │ │ │ └── StreamingClientSupplier.java │ │ │ │ │ ├── migration/ │ │ │ │ │ │ ├── Ssv1MigrationMode.java │ │ │ │ │ │ └── Ssv1MigrationResponse.java │ │ │ │ │ └── service/ │ │ │ │ │ ├── BatchOffsetFetcher.java │ │ │ │ │ ├── PartitionChannelManager.java │ │ │ │ │ └── ThreadPools.java │ │ │ │ ├── telemetry/ │ │ │ │ │ ├── SnowflakeTelemetryBasicInfo.java │ │ │ │ │ ├── SnowflakeTelemetryService.java │ │ │ │ │ ├── SnowflakeTelemetryServiceFactory.java │ │ │ │ │ └── TelemetryConstants.java │ │ │ │ └── validation/ │ │ │ │ ├── BinaryStringUtils.java │ │ │ │ ├── ByteArraySerializer.java │ │ │ │ ├── ColumnLogicalType.java │ │ │ │ ├── ColumnPhysicalType.java │ │ │ │ ├── ColumnSchema.java │ │ │ │ ├── DataValidationUtil.java │ │ │ │ ├── DuplicateDetector.java │ │ │ │ ├── DuplicateKeyValidatedObject.java │ │ │ │ ├── DuplicateKeyValidatingSerializer.java │ │ │ │ ├── ErrorCode.java │ │ │ │ ├── Power10Util.java │ │ │ │ ├── RowValidator.java │ │ │ │ ├── SFExceptionValidation.java │ │ │ │ ├── SqlIdentifierNormalizer.java │ │ │ │ ├── TimestampWrapper.java │ │ │ │ ├── Utils.java │ │ │ │ ├── ValidationResult.java │ │ │ │ └── ZonedDateTimeSerializer.java │ │ │ ├── records/ │ │ │ │ ├── KafkaRecordConverter.java │ │ │ │ ├── SnowflakeMetadataConfig.java │ │ │ │ └── SnowflakeSinkRecord.java │ │ │ └── streaming/ │ │ │ └── iceberg/ │ │ │ └── IcebergDDLTypes.java │ │ └── resources/ │ │ └── com/ │ │ └── snowflake/ │ │ └── kafka/ │ │ └── connector/ │ │ ├── ingest_error_messages.properties │ │ └── internal/ │ │ └── validation/ │ │ └── ingest_error_messages.properties │ └── test/ │ ├── java/ │ │ └── com/ │ │ └── snowflake/ │ │ └── kafka/ │ │ └── connector/ │ │ ├── CachingConfigValidatorTest.java │ │ ├── ConnectClusterBaseIT.java │ │ ├── ConnectorConfigValidatorLogsTest.java │ │ ├── ConnectorConfigValidatorTest.java │ │ ├── ConnectorIT.java │ │ ├── InjectQueryRunner.java │ │ ├── InjectQueryRunnerExtension.java │ │ ├── InjectSnowflakeDataSource.java │ │ ├── InjectSnowflakeDataSourceExtension.java │ │ ├── LegacySchemaToggleIT.java │ │ ├── SchemaEvolutionAvroSrIT.java │ │ ├── SchemaEvolutionBase.java │ │ ├── SchemaEvolutionJsonIT.java │ │ ├── SinkTaskIT.java │ │ ├── SinkTaskProxyIT.java │ │ ├── SmtIT.java │ │ ├── SnowflakeSinkTaskAuthorizationExceptionTrackerTest.java │ │ ├── SnowflakeSinkTaskForStreamingIT.java │ │ ├── TopicToTableParserTest.java │ │ ├── UtilsTest.java │ │ ├── builder/ │ │ │ └── SinkRecordBuilder.java │ │ ├── config/ │ │ │ ├── ClientValidationConfigTest.java │ │ │ ├── SinkTaskConfigTest.java │ │ │ ├── SinkTaskConfigTestBuilder.java │ │ │ └── SnowflakeSinkConnectorConfigBuilder.java │ │ ├── dlq/ │ │ │ └── InMemoryKafkaRecordErrorReporter.java │ │ ├── internal/ │ │ │ ├── CachingSnowflakeConnectionServiceStatsTest.java │ │ │ ├── CachingSnowflakeConnectionServiceTest.java │ │ │ ├── ConnectionServiceIT.java │ │ │ ├── EmbeddedProxyServer.java │ │ │ ├── InternalUtilsTest.java │ │ │ ├── JdbcPropertiesTest.java │ │ │ ├── KCLoggerTest.java │ │ │ ├── NonEncryptedKeyTestSnowflakeConnection.java │ │ │ ├── ResetProxyConfigExec.java │ │ │ ├── SchematizationTestUtils.java │ │ │ ├── SnowflakeConnectionServiceCacheTest.java │ │ │ ├── SnowflakeDataSourceFactory.java │ │ │ ├── SnowflakeURLTest.java │ │ │ ├── StandardSnowflakeConnectionServiceDdlTest.java │ │ │ ├── TestUtils.java │ │ │ ├── TombstoneRecordIngestionIT.java │ │ │ ├── metrics/ │ │ │ │ ├── MetricsJmxReporterTest.java │ │ │ │ └── SnowflakeSinkTaskMetricsTest.java │ │ │ ├── schemaevolution/ │ │ │ │ ├── ColumnInfosTest.java │ │ │ │ ├── SchemaEvolutionTargetItemsTest.java │ │ │ │ ├── SnowflakeColumnTypeMapperTest.java │ │ │ │ ├── SnowflakeSchemaEvolutionServiceTest.java │ │ │ │ ├── TableSchemaResolverTest.java │ │ │ │ └── ValidationResultMapperTest.java │ │ │ ├── streaming/ │ │ │ │ ├── BatchOffsetFetcherTest.java │ │ │ │ ├── ChannelStatusCheckIT.java │ │ │ │ ├── CloseTopicPartitionChannelIT.java │ │ │ │ ├── DefaultStreamingConfigValidatorTest.java │ │ │ │ ├── FakeIngestClientSupplier.java │ │ │ │ ├── FakeSnowflakeStreamingIngestChannel.java │ │ │ │ ├── FakeSnowflakeStreamingIngestClient.java │ │ │ │ ├── InMemorySinkTaskContext.java │ │ │ │ ├── OpenChannelRetryPolicyTest.java │ │ │ │ ├── SnowflakeSinkServiceV2AvroSchematizationIT.java │ │ │ │ ├── SnowflakeSinkServiceV2BaseIT.java │ │ │ │ ├── SnowflakeSinkServiceV2IT.java │ │ │ │ ├── SnowflakeSinkServiceV2SchematizationIT.java │ │ │ │ ├── SnowflakeSinkServiceV2Test.java │ │ │ │ ├── SnowflakeSinkServiceV2ValidationLoggingTest.java │ │ │ │ ├── StreamingClientPropertiesTest.java │ │ │ │ ├── StreamingErrorHandlerIT.java │ │ │ │ ├── StreamingManualModeIT.java │ │ │ │ ├── StreamingSinkServiceBuilder.java │ │ │ │ ├── telemetry/ │ │ │ │ │ └── PeriodicTelemetryReporterTest.java │ │ │ │ └── v2/ │ │ │ │ ├── AppendRowWithFallbackPolicyTest.java │ │ │ │ ├── BackpressureExceptionTest.java │ │ │ │ ├── ClientRecreationExceptionTest.java │ │ │ │ ├── SnowpipeStreamingPartitionChannelTest.java │ │ │ │ ├── StreamingClientManagerIT.java │ │ │ │ ├── client/ │ │ │ │ │ ├── StreamingClientPoolTest.java │ │ │ │ │ └── StreamingClientPoolsTest.java │ │ │ │ └── service/ │ │ │ │ └── PartitionChannelManagerTest.java │ │ │ ├── telemetry/ │ │ │ │ ├── SnowflakeTelemetryChannelStatusTest.java │ │ │ │ └── SnowflakeTelemetryServiceTest.java │ │ │ └── validation/ │ │ │ ├── DataValidationUtilTest.java │ │ │ ├── RowValidatorTest.java │ │ │ └── SqlIdentifierNormalizerTest.java │ │ ├── mock/ │ │ │ └── MockResultSetForSizeTest.java │ │ ├── records/ │ │ │ ├── ConverterTest.java │ │ │ └── SnowflakeSinkRecordTest.java │ │ └── streaming/ │ │ └── iceberg/ │ │ ├── BaseIcebergIT.java │ │ ├── IcebergIngestionIT.java │ │ ├── IcebergIngestionIntoVariantIT.java │ │ ├── IcebergIngestionNoSchemaEvolutionIT.java │ │ ├── IcebergVersion.java │ │ └── sql/ │ │ ├── ComplexJsonRecord.java │ │ ├── MetadataRecord.java │ │ ├── PrimitiveJsonRecord.java │ │ └── RecordWithMetadata.java │ └── resources/ │ ├── com/ │ │ └── snowflake/ │ │ └── kafka/ │ │ └── connector/ │ │ ├── complexJsonPayload.json │ │ ├── complexJsonWithSchema.json │ │ └── records/ │ │ ├── test.avro │ │ ├── test_key.avro │ │ └── test_multi.avro │ ├── log4j.properties │ └── squid.conf ├── test/ │ ├── .gitignore │ ├── E2E_TEST_PLAN.md │ ├── README.md │ ├── __init__.py │ ├── apache_properties/ │ │ ├── connect-distributed.properties │ │ ├── file-secrets.txt │ │ ├── kraft-server.properties │ │ ├── schema-registry.properties │ │ ├── server.properties │ │ └── zookeeper.properties │ ├── build_image.sh │ ├── build_runtime_jar.sh │ ├── conftest.py │ ├── connect-log4j.properties │ ├── docker/ │ │ ├── .gitignore │ │ ├── Dockerfile.apache-kafka │ │ ├── Dockerfile.builder │ │ ├── Dockerfile.test-runner │ │ ├── docker-compose.amd64.yml │ │ ├── docker-compose.apache.yml │ │ ├── docker-compose.base.yml │ │ ├── docker-compose.confluent-kraft.yml │ │ ├── docker-compose.confluent.yml │ │ ├── docker-compose.profile-apache.yml │ │ ├── docker-compose.profile-confluent.yml │ │ └── scripts/ │ │ └── start-apache-kafka.sh │ ├── download_v3_jar.sh │ ├── lib/ │ │ ├── __init__.py │ │ ├── config.py │ │ ├── config_migration.py │ │ ├── crypto.py │ │ ├── driver.py │ │ ├── fixtures/ │ │ │ ├── __init__.py │ │ │ ├── connector.py │ │ │ ├── function.py │ │ │ ├── session.py │ │ │ └── table.py │ │ ├── matchers.py │ │ └── utils.py │ ├── pyproject.toml │ ├── rest_request_template/ │ │ ├── datagen_connector.json │ │ ├── datatype_ingestion.json │ │ ├── iceberg_avro_aws.json │ │ ├── iceberg_json_aws.json │ │ ├── iceberg_schema_evolution_avro_aws.json │ │ ├── iceberg_schema_evolution_json_aws.json │ │ ├── nullable_values_after_smt.json │ │ ├── snowpipe_streaming_legacy_avro_sr.json │ │ ├── snowpipe_streaming_legacy_byte_array_converter.json │ │ ├── snowpipe_streaming_legacy_string_converter.json │ │ ├── snowpipe_streaming_legacy_string_json.json │ │ ├── snowpipe_streaming_schema_evolution.json │ │ ├── snowpipe_streaming_schema_mapping_dlq.json │ │ ├── snowpipe_streaming_string_json_dlq.json │ │ ├── test_kc_delete_create.json │ │ ├── test_kc_delete_create_chaos.json │ │ ├── test_kc_delete_resume.json │ │ ├── test_kc_delete_resume_chaos.json │ │ ├── test_kc_pause_create.json │ │ ├── test_kc_pause_create_chaos.json │ │ ├── test_kc_pause_resume.json │ │ ├── test_kc_pause_resume_chaos.json │ │ ├── test_kc_recreate.json │ │ ├── test_kc_recreate_chaos.json │ │ ├── test_kc_resilience.json │ │ ├── test_kc_restart.json │ │ ├── test_snowpipe_streaming_string_json_ignore_tombstone.json │ │ ├── travis_correct_auto_table_creation.json │ │ ├── travis_correct_auto_table_creation_topic2table.json │ │ ├── travis_correct_avro_avro.json │ │ ├── travis_correct_avrosr_avrosr.json │ │ ├── travis_correct_confluent_protobuf_protobuf.json │ │ ├── travis_correct_json_json.json │ │ ├── travis_correct_multiple_topic_to_one_table_snowpipe_streaming.json │ │ ├── travis_correct_native_complex_smt.json │ │ ├── travis_correct_native_string_json_without_schema.json │ │ ├── travis_correct_native_string_protobuf.json │ │ ├── travis_correct_schema_mapping.json │ │ ├── travis_correct_schema_not_supported_converter.json │ │ ├── travis_correct_snowpipe_streaming_string_avro_sr.json │ │ ├── travis_correct_snowpipe_streaming_string_json.json │ │ ├── travis_correct_string_avro.json │ │ ├── travis_correct_string_avrosr.json │ │ ├── travis_correct_string_json.json │ │ └── travis_correct_string_proxy.json │ ├── run_tests.sh │ ├── scripts/ │ │ ├── analyze_metrics.sh │ │ ├── profile_connect.sh │ │ └── scrape_metrics.sh │ ├── test_data/ │ │ ├── .gitignore │ │ ├── protobuf/ │ │ │ └── pom.xml │ │ ├── sensor.proto │ │ └── twitter.avro │ └── tests/ │ ├── __init__.py │ ├── compatibility/ │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── test_compatibility_case_sensitivity.py │ │ ├── test_migration.py │ │ ├── test_schematization_disabled.py │ │ ├── test_type_compatibility.py │ │ ├── test_type_compatibility_avro.py │ │ └── test_unsupported_types.py │ ├── high_performance/ │ │ └── test_case_sensitivity.py │ ├── iceberg/ │ │ ├── __init__.py │ │ ├── test_iceberg_avro.py │ │ ├── test_iceberg_json.py │ │ ├── test_iceberg_se_avro.py │ │ └── test_iceberg_se_json.py │ ├── pressure/ │ │ ├── test_perf_backlog_drain.py │ │ ├── test_pressure_init.py │ │ └── test_pressure_restart.py │ ├── schema_evolution/ │ │ ├── __init__.py │ │ ├── test_se_auto_table_creation_avro_sr.py │ │ ├── test_se_auto_table_creation_json.py │ │ ├── test_se_avro_sr.py │ │ ├── test_se_json_ignore_tombstone.py │ │ ├── test_se_multi_topic_replace_table.py │ │ ├── test_se_nonnullable_json.py │ │ ├── test_se_nullable_values_after_smt.py │ │ ├── test_se_random_row_count.py │ │ └── test_se_replace_table.py │ ├── test_auto_table_creation.py │ ├── test_auto_table_creation_topic2table.py │ ├── test_avrosr_avrosr.py │ ├── test_channel_invalidation.py │ ├── test_channel_invalidation_recovery.py │ ├── test_column_identifier_normalization.py │ ├── test_confluent_protobuf_protobuf.py │ ├── test_default_pipe_features.py │ ├── test_error_table.py │ ├── test_json_json.py │ ├── test_kc_delete_create.py │ ├── test_kc_delete_create_chaos.py │ ├── test_kc_delete_resume.py │ ├── test_kc_delete_resume_chaos.py │ ├── test_kc_pause_create.py │ ├── test_kc_pause_create_chaos.py │ ├── test_kc_pause_resume.py │ ├── test_kc_pause_resume_chaos.py │ ├── test_kc_recreate.py │ ├── test_kc_recreate_chaos.py │ ├── test_kc_restart.py │ ├── test_multiple_topic_to_one_table_snowpipe_streaming.py │ ├── test_native_complex_smt.py │ ├── test_native_string_json_without_schema.py │ ├── test_native_string_protobuf.py │ ├── test_nullable_values_after_smt.py │ ├── test_schema_evolution_streaming.py │ ├── test_schema_mapping.py │ ├── test_schema_not_supported_converter.py │ ├── test_snowpipe_streaming_legacy_avro_sr.py │ ├── test_snowpipe_streaming_legacy_byte_array_converter.py │ ├── test_snowpipe_streaming_legacy_string_converter.py │ ├── test_snowpipe_streaming_legacy_string_json.py │ ├── test_snowpipe_streaming_schema_mapping_dlq.py │ ├── test_snowpipe_streaming_string_avro_sr.py │ ├── test_snowpipe_streaming_string_json.py │ ├── test_snowpipe_streaming_string_json_dlq.py │ ├── test_snowpipe_streaming_string_json_ignore_tombstone.py │ ├── test_string_avrosr.py │ └── test_string_json.py └── upload_jar.sh ================================================ FILE CONTENTS ================================================ ================================================ FILE: .githooks/pre-commit ================================================ #!/usr/bin/env bash # # Git pre-commit hook that enforces the same formatting checks as CI: # 1. Java – google-java-format via ./format.sh (requires java) # 2. Python – ruff check + ruff format --check (requires ruff) # Each tool is skipped when it is not available. # # Install with: # git config core.hooksPath .githooks # # If you need the corporate secret-scanner hook as well, this script # delegates to .git/hooks/pre-commit after running its own checks. set -euo pipefail REPO_ROOT="$(git rev-parse --show-toplevel)" # --- Java formatting (google-java-format) --- if git diff --cached --name-only | grep -q '^src/'; then "$REPO_ROOT/format.sh" fi # --- Python linting & formatting (ruff) --- PYTHON_PATHS="test/tests test/lib test/conftest.py" if git diff --cached --name-only | grep -q '^test/'; then if command -v ruff &>/dev/null; then ruff check --fix --exit-non-zero-on-fix $PYTHON_PATHS ruff format --exit-non-zero-on-format $PYTHON_PATHS fi fi # Chain to the default hooks directory so the secret-scanner (or any other # hook installed into .git/hooks/) still runs. if [ -x .git/hooks/pre-commit ]; then exec .git/hooks/pre-commit "$@" fi ================================================ FILE: .github/CODEOWNERS ================================================ * @snowflakedb/streaming-ingest ================================================ FILE: .github/actions/build-connector/action.yml ================================================ name: Build Connector description: Build the Snowflake Kafka Connector JAR/ZIP for a given platform inputs: platform: description: "Target platform: 'apache' or 'confluent'" required: true runs: using: composite steps: - name: Install Java 11 uses: actions/setup-java@v3 with: distribution: 'zulu' java-version: 11 - name: Cache local Maven repository uses: actions/cache@v4 with: path: ~/.m2/repository key: > ${{ runner.os }}-maven-${{ hashFiles( case(inputs.platform == 'confluent', '**/pom_confluent.xml', '**/pom.xml') ) }} restore-keys: | ${{ runner.os }}-maven- - name: Build connector shell: bash working-directory: test run: ./build_runtime_jar.sh ../../snowflake-kafka-connector package "$PLATFORM" env: SNOWFLAKE_CREDENTIAL_FILE: "../profile.json" PLATFORM: ${{ inputs.platform }} ================================================ FILE: .github/actions/run-e2e-tests/action.yml ================================================ name: Run E2E Tests description: Run Docker-based end-to-end tests for the Snowflake Kafka Connector inputs: platform: description: "Target platform: 'apache' or 'confluent'" required: true platform-version: description: "Platform version (e.g. '2.8.2', '7.8.2')" required: true snowflake-cloud: description: "Snowflake cloud provider: 'AWS', 'GCP', or 'AZURE'" required: true java-version: description: "Java version for Apache Kafka (e.g. '11', '17'). Ignored for Confluent." required: false default: '11' marker-filter: description: "pytest -m expression controlling which tests run (e.g. 'compatibility' or 'not compatibility and not schema_evolution and not correctness and not pressure')" required: false default: 'not pressure' test-group: description: "Short label for this test group used in artifact names (e.g. 'core', 'compatibility', 'schema_and_correctness')" required: false default: 'default' pressure: description: "Run pressure/stress tests instead of regular tests" required: false default: 'false' runs: using: composite steps: - name: Log in to GHCR (for prebuilt Apache Kafka image) if: inputs.platform == 'apache' uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ github.token }} - name: Run end-to-end tests shell: bash working-directory: test env: SNOWFLAKE_CREDENTIAL_FILE: "${{ github.workspace }}/profile.json" PLATFORM: ${{ inputs.platform }} PLATFORM_VERSION: ${{ inputs['platform-version'] }} SNOWFLAKE_CLOUD: ${{ inputs['snowflake-cloud'] }} JAVA_VERSION: ${{ inputs['java-version'] }} LOGS_DIR: "${{ github.workspace }}/test-logs" MARKER_FILTER: ${{ inputs.pressure == 'true' && 'pressure' || inputs['marker-filter'] }} run: | ./run_tests.sh \ --platform="$PLATFORM" \ --platform-version="$PLATFORM_VERSION" \ --cloud="$SNOWFLAKE_CLOUD" \ --java-version="$JAVA_VERSION" \ --logs-dir="$LOGS_DIR" \ -- -m "$MARKER_FILTER" - name: Upload service logs on failure if: failure() uses: actions/upload-artifact@v4 with: name: logs-${{ inputs.platform }}-${{ inputs['platform-version'] }}-${{ inputs['snowflake-cloud'] }}-${{ inputs['test-group'] }} path: ${{ github.workspace }}/test-logs/ retention-days: 14 if-no-files-found: ignore ================================================ FILE: .github/dependabot.yaml ================================================ # To get started with Dependabot version updates, you'll need to specify which # package ecosystems to update and where the package manifests are located. version: 2 updates: - package-ecosystem: "maven" # See documentation for possible values directory: "/" # Location of package manifests schedule: interval: "weekly" ignore: - dependency-name: "org.apache.kafka:*" update-types: ["version-update:semver-major"] ================================================ FILE: .github/scripts/decrypt_secret.sh ================================================ #!/bin/sh # Decrypt the file # mkdir $HOME/secrets # --batch to prevent interactive command --yes to assume "yes" for questions snowflake_deployment=$1 if [ $snowflake_deployment = 'AWS' ]; then gpg --quiet --batch --yes --decrypt --passphrase="$SNOWFLAKE_TEST_PROFILE_SECRET" \ --output profile.json .github/scripts/profile.json.gpg elif [ $snowflake_deployment = 'GCS' ]; then gpg --quiet --batch --yes --decrypt --passphrase="$SNOWFLAKE_TEST_PROFILE_SECRET" \ --output profile.json .github/scripts/profile_gcs.json.gpg else gpg --quiet --batch --yes --decrypt --passphrase="$SNOWFLAKE_TEST_PROFILE_SECRET" \ --output profile.json .github/scripts/profile_azure.json.gpg fi ================================================ FILE: .github/scripts/parse_java_test_reports.py ================================================ #!/usr/bin/env python3 """ Parse Maven Surefire and Failsafe XML reports, append a Markdown summary to GITHUB_STEP_SUMMARY when set, and emit ::error workflow commands to stderr for annotations. Exits silently when GITHUB_STEP_SUMMARY is unset or no failures. """ import os import re import sys import xml.etree.ElementTree as ET from pathlib import Path def _classname_to_path(classname: str, connector_root: Path): """Return repo-relative path to Java file (src/test/... or src/main/...).""" rel = classname.replace(".", "/") + ".java" for prefix in ("src/test/java/", "src/main/java/"): candidate = connector_root / prefix / rel if candidate.exists(): return prefix + rel return None def _line_in_test_class(stack_trace: str, classname: str): """First (File.java:line) in stack trace for the test class (not JUnit/framework).""" test_class_file = classname.split(".")[-1] + ".java" pattern = re.escape(test_class_file) + r":(\d+)\)" match = re.search(pattern, stack_trace) return int(match.group(1)) if match else None def parse_suite(path: Path) -> list[tuple[str, str, str, str, str]]: """Parse a TEST-*.xml file; return list of (classname, testname, exc_type, message, stack_trace).""" failures = [] try: root = ET.parse(path).getroot() except (ET.ParseError, OSError): return failures for testcase in root.findall(".//testcase"): for kind in ("failure", "error"): node = testcase.find(kind) if node is not None: classname = testcase.get("classname", "") name = testcase.get("name", "") exc_type = (node.get("type") or "").strip() message = (node.get("message") or "").strip() stack_trace = (node.text or "").strip() failures.append((classname, name, exc_type, message, stack_trace)) return failures def _first_line_for_annotation(exc_type: str, message: str, stack_trace: str) -> str: """First line of failure for ::error message (exception type + message or first stack line).""" if message: first = f"{exc_type}: {message}" if exc_type else message elif stack_trace: first = stack_trace.split("\n")[0].strip() else: first = exc_type or "Failure" return first[:500] def _emit_error_annotation( classname: str, name: str, exc_type: str, message: str, stack_trace: str, connector_root: Path, stderr: object, ) -> None: """Print one ::error workflow command to stderr for GitHub annotations.""" title = f"{classname}#{name}" first_line = _first_line_for_annotation(exc_type, message, stack_trace) first_line_escaped = ( first_line.replace("%", "%25").replace("\r", "%0D").replace("\n", "%0A") ) file_path = _classname_to_path(classname, connector_root) file_line = _line_in_test_class(stack_trace, classname) if stack_trace else None parts = [f"title={title}"] if file_path: parts.append(f"file={file_path}") if file_line is not None: parts.append(f"line={file_line}") opts = ",".join(parts) print(f"::error {opts}::{first_line_escaped}", file=stderr) def main() -> None: summary_path = os.environ.get("GITHUB_STEP_SUMMARY") if not summary_path: sys.exit(0) connector_root = ( Path(sys.argv[1]) if len(sys.argv) >= 2 else Path(os.environ.get("GITHUB_WORKSPACE", ".")) ) surefire_dir = connector_root / "target" / "surefire-reports" failsafe_dir = connector_root / "target" / "failsafe-reports" all_failures = [] for report_dir in (surefire_dir, failsafe_dir): if not report_dir.is_dir(): continue for path in sorted(report_dir.glob("TEST-*.xml")): all_failures.extend(parse_suite(path)) if not all_failures: sys.exit(0) for classname, name, exc_type, message, stack_trace in all_failures: _emit_error_annotation( classname, name, exc_type, message, stack_trace, connector_root, sys.stderr ) # Group by class for headings: class -> [(name, exc_type, message, stack_trace), ...] by_class = {} for classname, name, exc_type, message, stack_trace in all_failures: by_class.setdefault(classname, []).append( (name, exc_type, message, stack_trace) ) lines = ["", "## Java test failures", ""] for classname in sorted(by_class.keys()): short_name = classname.split(".")[-1] if classname else classname lines.append(f"## {short_name}") lines.append("") lines.append(f"**Class:** `{classname}`") lines.append("") for name, exc_type, message, stack_trace in by_class[classname]: lines.append(f"### {name}") lines.append("") if exc_type: lines.append(f"**Exception type:** `{exc_type}`") lines.append("") if message: lines.append("**Message:**") lines.append("") lines.append(message) lines.append("") if stack_trace: lines.append("**Stack trace:**") lines.append("") lines.append("```") lines.append(stack_trace) lines.append("```") lines.append("") lines.append("") with open(summary_path, "a", encoding="utf-8") as summary_file: summary_file.write("\n".join(lines)) if __name__ == "__main__": main() ================================================ FILE: .github/scripts/squid.conf ================================================ acl SSL_ports port 443 acl Safe_ports port 80 # http acl Safe_ports port 21 # ftp acl Safe_ports port 443 # https acl Safe_ports port 70 # gopher acl Safe_ports port 210 # wais acl Safe_ports port 1025-65535 # unregistered ports acl Safe_ports port 280 # http-mgmt acl Safe_ports port 488 # gss-http acl Safe_ports port 591 # filemaker acl Safe_ports port 777 # multiling http acl CONNECT method CONNECT http_access deny !Safe_ports http_access deny CONNECT !SSL_ports http_port 3128 coredump_dir /var/spool/squid refresh_pattern ^ftp: 1440 20% 10080 refresh_pattern ^gopher: 1440 0% 1440 refresh_pattern -i (/cgi-bin/|\?) 0 0% 0 refresh_pattern (Release|Packages(.gz)*)$ 0 20% 2880 refresh_pattern . 0 20% 4320 auth_param basic program /usr/lib/squid/basic_ncsa_auth /etc/squid/passwords auth_param basic realm proxy acl authenticated proxy_auth REQUIRED http_access allow authenticated http_access allow localhost ident_lookup_access deny all http_access deny all ================================================ FILE: .github/workflows/IntegrationTest.yml ================================================ name: Kafka Connector Java Integration Tests on: push: branches: [ master ] pull_request: branches: '**' jobs: build_and_test: runs-on: ubuntu-24.04 strategy: fail-fast: false # https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstrategyfail-fast matrix: snowflake_cloud: ['AWS'] # for now only AWS has support for ssv2 # snowflake_cloud: [ 'AWS', 'AZURE', 'GCP' ] steps: - name: Checkout Code uses: actions/checkout@v4 - name: "Install Java 11" uses: actions/setup-java@v1 with: java-version: 11 - name: "Cache local Maven repository" uses: actions/cache@v4 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- - name: Install Python uses: actions/setup-python@v4 with: python-version: '3.9' architecture: 'x64' - name: Decrypt profile.json in Snowflake Cloud ${{ matrix.snowflake_cloud }} run: ./.github/scripts/decrypt_secret.sh ${{ matrix.snowflake_cloud }} env: SNOWFLAKE_TEST_PROFILE_SECRET: ${{ secrets.SNOWFLAKE_TEST_PROFILE_SECRET }} - name: Install Dependency run: | pip3 install --upgrade setuptools pip3 install requests certifi "confluent-kafka[avro,json,protobuf]==1.9.2" pip3 install avro kafka-python pip3 install protobuf pip3 install --upgrade snowflake-connector-python==2.7.4 curl https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 | bash sudo apt update sudo apt-get -y install jq vim sudo apt-get -y install protobuf-compiler - name: Install Squid as Proxy Server and Apache Utils for Password Authentication run: | sudo apt-get -y install squid sudo apt-get install apache2-utils - name: Change squid config and run Proxy Server run: | sudo touch /etc/squid/passwords sudo chmod 777 /etc/squid/passwords sudo htpasswd -db -c /etc/squid/passwords admin test sudo mv .github/scripts/squid.conf /etc/squid/squid.conf sudo service squid start - name: Unit and Integration Test - ${{ matrix.snowflake_cloud }} id: java_tests env: SNOWFLAKE_CREDENTIAL_FILE: "${{ github.workspace }}/profile.json" SHELL: "/bin/bash" run: | set -Eeuo pipefail # line-buffer stdout/stderr for all child procs export PYTHONUNBUFFERED=1 export RUST_BACKTRACE=1 cd test trap 'echo "::group::Last 1000 lines of logs"; tail -n 1000 build.log || true; echo "::endgroup::"' ERR stdbuf -oL -eL ./build_runtime_jar.sh ../../snowflake-kafka-connector verify apache ${{ matrix.snowflake_cloud }} 2>&1 | tee build.log - name: Report Java test failures to job summary if: failure() && steps.java_tests.outcome == 'failure' run: ./.github/scripts/parse_java_test_reports.py "${{ github.workspace }}" ================================================ FILE: .github/workflows/build-apache-kafka-images.yml ================================================ name: Build Apache Kafka Docker images on: push: branches: [ master ] paths: - 'test/docker/Dockerfile.apache-kafka' - 'test/apache_properties/**' - 'test/connect-log4j.properties' - '.github/workflows/build-apache-kafka-images.yml' workflow_dispatch: jobs: build: runs-on: ubuntu-22.04 permissions: packages: write strategy: fail-fast: false matrix: include: - kafka_version: '2.8.2' scala_version: '2.12' java_version: '11' - kafka_version: '3.9.2' scala_version: '2.12' java_version: '11' - kafka_version: '4.1.1' scala_version: '2.13' java_version: '17' steps: - uses: actions/checkout@v4 - uses: docker/setup-buildx-action@v3 - uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Build and push apache-kafka:${{ matrix.kafka_version }}-java${{ matrix.java_version }} uses: docker/build-push-action@v6 with: context: test file: test/docker/Dockerfile.apache-kafka build-args: | KAFKA_VERSION=${{ matrix.kafka_version }} SCALA_VERSION=${{ matrix.scala_version }} JAVA_VERSION=${{ matrix.java_version }} push: true tags: ghcr.io/snowflakedb/snowflake-kafka-connector/apache-kafka:${{ matrix.kafka_version }}-java${{ matrix.java_version }} platforms: linux/amd64,linux/arm64 cache-from: type=gha,scope=apache-kafka-${{ matrix.kafka_version }}-java${{ matrix.java_version }} cache-to: type=gha,mode=max,scope=apache-kafka-${{ matrix.kafka_version }}-java${{ matrix.java_version }} ================================================ FILE: .github/workflows/end-to-end-legacy.yml ================================================ name: Kafka Connector end-to-end tests (legacy platforms) # Runs the core e2e suite on older Kafka platform versions. # Intentionally limited to master push and workflow_dispatch — these jobs are # skipped on pull_requests to reduce concurrent load on the shared Snowflake # test account. Full PR coverage is provided by end-to-end.yaml on the new # platforms (apache 4.1.1 / confluent 8.2.0). on: push: branches: [ master ] workflow_dispatch: jobs: build_and_test: runs-on: ubuntu-22.04 name: > ${{ matrix.platform }} ${{ matrix.platform_version }}, ${{ matrix.java_test_version && format('Java {0},', matrix.java_test_version) || '' }} ${{ matrix.snowflake_cloud }} [${{ matrix.test_group }}] permissions: packages: read strategy: fail-fast: false matrix: include: # Compatibility, schema_evolution and correctness groups are intentionally # skipped here — full coverage runs on the new platforms in end-to-end.yaml. - platform: apache platform_version: '2.8.2' snowflake_cloud: 'AWS' java_test_version: '11' test_group: core marker_filter: 'not compatibility and not schema_evolution and not correctness and not pressure' - platform: apache platform_version: '3.9.2' snowflake_cloud: 'GCP' java_test_version: '11' test_group: core marker_filter: 'not compatibility and not schema_evolution and not correctness and not pressure' - platform: confluent platform_version: '6.2.15' snowflake_cloud: 'AZURE' test_group: core marker_filter: 'not compatibility and not schema_evolution and not correctness and not pressure' - platform: confluent platform_version: '7.9.3' snowflake_cloud: 'AWS' test_group: core marker_filter: 'not compatibility and not schema_evolution and not correctness and not pressure' steps: - uses: actions/checkout@v4 - name: Decrypt profile.json in Snowflake Cloud ${{ matrix.snowflake_cloud }} run: ./.github/scripts/decrypt_secret.sh ${{ matrix.snowflake_cloud }} env: SNOWFLAKE_TEST_PROFILE_SECRET: ${{ secrets.SNOWFLAKE_TEST_PROFILE_SECRET }} - uses: ./.github/actions/build-connector with: platform: ${{ matrix.platform }} - uses: ./.github/actions/run-e2e-tests with: platform: ${{ matrix.platform }} platform-version: ${{ matrix.platform_version }} snowflake-cloud: ${{ matrix.snowflake_cloud }} java-version: ${{ matrix.java_test_version || '11' }} marker-filter: ${{ matrix.marker_filter }} test-group: ${{ matrix.test_group }} ================================================ FILE: .github/workflows/end-to-end-stress.yml ================================================ name: Kafka Connector stress test on: push: branches: [ master ] workflow_dispatch: jobs: build_and_test: runs-on: ubuntu-22.04 name: ${{ matrix.platform }} ${{ matrix.platform_version }}, ${{ matrix.snowflake_cloud }} strategy: fail-fast: false matrix: include: - platform: confluent platform_version: '7.6.0' snowflake_cloud: 'AWS' steps: - uses: actions/checkout@v4 - name: Decrypt profile.json in Snowflake Cloud ${{ matrix.snowflake_cloud }} run: ./.github/scripts/decrypt_secret.sh ${{ matrix.snowflake_cloud }} env: SNOWFLAKE_TEST_PROFILE_SECRET: ${{ secrets.SNOWFLAKE_TEST_PROFILE_SECRET }} - uses: ./.github/actions/build-connector with: platform: ${{ matrix.platform }} - uses: ./.github/actions/run-e2e-tests env: # Less frequent preCommit than default E2E CONNECT_OFFSET_FLUSH_INTERVAL_MS: '10000' with: platform: ${{ matrix.platform }} platform-version: ${{ matrix.platform_version }} snowflake-cloud: ${{ matrix.snowflake_cloud }} pressure: 'true' ================================================ FILE: .github/workflows/end-to-end.yaml ================================================ name: Kafka Connector end-to-end tests on: push: branches: [ master ] pull_request: branches: ['**'] workflow_dispatch: jobs: build_and_test: runs-on: ubuntu-22.04 name: > ${{ matrix.platform }} ${{ matrix.platform_version }}, ${{ matrix.java_test_version && format('Java {0},', matrix.java_test_version) || '' }} ${{ matrix.snowflake_cloud }} [${{ matrix.test_group }}] permissions: packages: read strategy: fail-fast: false matrix: include: # ── New platforms: full suite split into 3 parallel groups ─────── - platform: apache platform_version: '4.1.1' snowflake_cloud: 'AWS' java_test_version: '17' test_group: compatibility marker_filter: 'compatibility and not schema_evolution' - platform: apache platform_version: '4.1.1' snowflake_cloud: 'AWS' java_test_version: '17' test_group: schema_and_correctness marker_filter: 'schema_evolution or correctness' - platform: apache platform_version: '4.1.1' snowflake_cloud: 'AWS' java_test_version: '17' test_group: core marker_filter: 'not compatibility and not schema_evolution and not correctness and not pressure' - platform: confluent platform_version: '8.2.0' snowflake_cloud: 'AWS' test_group: compatibility marker_filter: 'compatibility and not schema_evolution' - platform: confluent platform_version: '8.2.0' snowflake_cloud: 'AWS' test_group: schema_and_correctness marker_filter: 'schema_evolution or correctness' - platform: confluent platform_version: '8.2.0' snowflake_cloud: 'AWS' test_group: core marker_filter: 'not compatibility and not schema_evolution and not correctness and not pressure' steps: - uses: actions/checkout@v4 - name: Decrypt profile.json in Snowflake Cloud ${{ matrix.snowflake_cloud }} run: ./.github/scripts/decrypt_secret.sh ${{ matrix.snowflake_cloud }} env: SNOWFLAKE_TEST_PROFILE_SECRET: ${{ secrets.SNOWFLAKE_TEST_PROFILE_SECRET }} - uses: ./.github/actions/build-connector with: platform: ${{ matrix.platform }} - uses: ./.github/actions/run-e2e-tests with: platform: ${{ matrix.platform }} platform-version: ${{ matrix.platform_version }} snowflake-cloud: ${{ matrix.snowflake_cloud }} java-version: ${{ matrix.java_test_version || '11' }} marker-filter: ${{ matrix.marker_filter }} test-group: ${{ matrix.test_group }} ================================================ FILE: .github/workflows/formatting.yml ================================================ name: formatting on: pull_request: push: branches: - master jobs: java: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: axel-op/googlejavaformat-action@v3 with: skip-commit: true version: v1.24.0 args: "-n --set-exit-if-changed" python: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: "3.x" - run: pip install ruff - run: ruff check test/tests test/lib test/conftest.py - run: ruff format --check test/tests test/lib test/conftest.py formatting-done: needs: [java, python] runs-on: ubuntu-latest steps: - run: echo "All formatting checks passed" ================================================ FILE: .github/workflows/semgrep.yml ================================================ --- name: Run semgrep checks on: pull_request: branches: [master] permissions: contents: read jobs: run-semgrep-reusable-workflow: uses: snowflakedb/reusable-workflows/.github/workflows/semgrep-v2.yml@main secrets: token: ${{ secrets.SEMGREP_APP_TOKEN }} ================================================ FILE: .gitignore ================================================ .DS_Store .envrc # IDEs .idea/ .settings/ .claude/ .cursor/ .vscode/ .project/ # Python __pycache__ venv # Java .cache/ .classpath *.iml target docker-setup *.log *.log.* profile*.json licenses/ ai-docs/ docs/ CLAUDE.md .mcp.json profile.txt profile_qa3.json profiling-results*/ ================================================ FILE: .java-version ================================================ 11 ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2019 Snowflake Computing, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ # `snowflake-kafka-connector` [![License](http://img.shields.io/:license-Apache%202-brightgreen.svg)](http://www.apache.org/licenses/LICENSE-2.0.txt) The Snowflake Kafka Connector is a plugin for Apache Kafka Connect. It ingests data from a Kafka Topic into a Snowflake Table. [Official documentation](https://docs.snowflake.com/en/user-guide/kafka-connector) for the Snowflake Kafka Connector ## Contributing ### Guidelines The following requirements must be met before you can merge your PR: - Tests: all test suites must pass, see the [test README](https://github.com/snowflakedb/snowflake-kafka-connector/blob/master/test/README.md) - Formatting: Java sources must pass [Google Java Format](https://github.com/google/google-java-format) (`./format.sh`) and Python test code must pass `ruff check` + `ruff format --check`. The [pre-commit hook](#pre-commit-hook) runs both automatically. - CLA: all contributers must sign the Snowflake CLA. This is a one time signature, please provide your email so we can work with you to get this signed after you open a PR. Thank you for contributing! We will review and approve PRs as soon as we can. ### Pre-commit hook A pre-commit hook is provided in `.githooks/` that enforces the same formatting checks as CI. Python formatting is skipped when ruff is not available. To enable the hook: ```bash git config core.hooksPath .githooks ``` ### Unit tests ```bash mvn package -Dgpg.skip=true ``` Runs all test files in `src/test` that do not end with `IT`. Requires `SNOWFLAKE_CREDENTIAL_FILE` to be set. ### Integration tests ```bash mvn verify -Dgpg.skip=true ``` Runs all test files in `src/test`, including unit tests. ### End-to-end tests Refer to [test/README.md](test/README.md). ## Third party licenses Custom license handling process is run during build to meet legal standards. - License files are copied directly from JAR if present in one of the following locations: META-INF/LICENSE.txt, META-INF/LICENSE, META-INF/LICENSE.md - If no license file is found then license must be manually added to [`process_licenses.py`](https://github.com/snowflakedb/snowflake-kafka-connector/blob/master/scripts/process_licenses.py) script in order to pass build ## Test and Code Coverage Statuses [![Kafka Connector integration test](https://github.com/snowflakedb/snowflake-kafka-connector/actions/workflows/IntegrationTest.yml/badge.svg?branch=master)](https://github.com/snowflakedb/snowflake-kafka-connector/actions/workflows/IntegrationTest.yml) [![Kafka Connector end-to-end test](https://github.com/snowflakedb/snowflake-kafka-connector/actions/workflows/end-to-end.yaml/badge.svg?branch=master)](https://github.com/snowflakedb/snowflake-kafka-connector/actions/workflows/end-to-end.yaml) [![Kafka Connector end-to-end test (legacy platforms)](https://github.com/snowflakedb/snowflake-kafka-connector/actions/workflows/end-to-end-legacy.yml/badge.svg?branch=master)](https://github.com/snowflakedb/snowflake-kafka-connector/actions/workflows/end-to-end-legacy.yml) [![Kafka Connector stress test](https://github.com/snowflakedb/snowflake-kafka-connector/actions/workflows/end-to-end-stress.yml/badge.svg?branch=master)](https://github.com/snowflakedb/snowflake-kafka-connector/actions/workflows/end-to-end-stress.yml) ================================================ FILE: deploy.sh ================================================ #!/bin/bash # exit on error set -e THIS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" if [ -z "$GPG_KEY_ID" ]; then echo "[ERROR] Key Id not specified!" exit 1 fi if [ -z "$GPG_KEY_PASSPHRASE" ]; then echo "[ERROR] GPG passphrase is not specified for $GPG_KEY_ID!" exit 1 fi if [ -z "$GPG_PRIVATE_KEY" ]; then echo "[ERROR] GPG private key file is not specified!" exit 1 fi echo "[INFO] Import PGP Key" if ! gpg --list-secret-key | grep "$GPG_KEY_ID"; then gpg --allow-secret-key-import --import "$GPG_PRIVATE_KEY" fi CENTRAL_DEPLOY_SETTINGS_XML="$THIS_DIR/mvn_settings_central_deploy.xml" cat > $CENTRAL_DEPLOY_SETTINGS_XML << SETTINGS.XML central $SONATYPE_USER $SONATYPE_PWD central true gpg2 $GPG_KEY_ID $GPG_KEY_PASSPHRASE SETTINGS.XML mvn --settings $CENTRAL_DEPLOY_SETTINGS_XML -DskipTests clean deploy #confluent release mvn -f pom_confluent.xml --settings $CENTRAL_DEPLOY_SETTINGS_XML -DskipTests clean package #white source # whitesource/run_whitesource.sh aws s3 cp target/components/packages/*.zip s3://sfc-eng-jenkins/repository/kafka/ ================================================ FILE: format.sh ================================================ #!/usr/bin/env bash set -euo pipefail SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" cd "$SCRIPT_DIR" DOWNLOAD_URL="https://github.com/google/google-java-format/releases/download/v1.24.0/google-java-format-1.24.0-all-deps.jar" JAR_FILE="./.cache/google-java-format-1.24.0-all-deps.jar" if [ ! -f "${JAR_FILE}" ]; then mkdir -p "$(dirname "${JAR_FILE}")" echo "Downloading Google Java format to ${JAR_FILE}" curl -# -L --fail "${DOWNLOAD_URL}" --output "${JAR_FILE}" fi if ! command -v java > /dev/null; then echo "Java not installed." exit 1 fi echo "Running Google Java Format" find ./src -type f -name "*.java" -print0 | xargs -0 java -jar "${JAR_FILE}" --replace --set-exit-if-changed && echo "OK" ================================================ FILE: pom.xml ================================================ 4.0.0 com.snowflake snowflake-kafka-connector 4.1.0 jar Snowflake Kafka Connector Snowflake Kafka Connect Sink Connector https://www.snowflake.com/ Snowflake Support Team snowflake-java@snowflake.com Snowflake Computing https://www.snowflake.com Apache License, Version 2.0 http://www.apache.org/licenses/LICENSE-2.0.txt repo scm:git:https://github.com/snowflakedb/snowflake-kafka-connector.git scm:git:https://github.com/snowflakedb/snowflake-kafka-connector.git https://github.com/snowflakedb/snowflake-kafka-connector 11 11 false ${skipTests} ${skipTests} 3.9.2 4.3.0 3.27.7 7.9.2 3.25.5 33.6.0-jre 2.21.2 1.28.0 3.5.5 4.2.0 2.0.17 3.20.0 1.11.1 confluent Confluent https://packages.confluent.io/maven/ cloudera-repo https://repository.cloudera.com/content/repositories/releases/ true true org.apache.maven.plugins maven-site-plugin 3.12.1 org.apache.maven.plugins maven-project-info-reports-plugin 3.3.0 org.sonatype.central central-publishing-maven-plugin 0.10.0 true true org.apache.maven.plugins maven-surefire-plugin ${maven-surefire-plugin.version} ${skipUnitTests} false --add-opens java.base/java.util=ALL-UNNAMED org.apache.maven.plugins maven-compiler-plugin 3.15.0 com.google.auto.value auto-value ${auto-value.version} maven-assembly-plugin 3.8.0 package single true jar-with-dependencies ${project.artifactId}-${project.version} false org.apache.maven.plugins maven-jar-plugin 3.5.0 default-jar none org.apache.maven.plugins maven-gpg-plugin 3.2.8 sign-artifacts install sign org.apache.maven.plugins maven-source-plugin 3.4.0 attach-sources jar-no-fork org.apache.maven.plugins maven-javadoc-plugin 3.12.0 attach-javadocs prepare-package jar false false none org.codehaus.mojo exec-maven-plugin 3.2.0 com.snowflake.kafka.connector.internal.ResetProxyConfigExec test maven-deploy-plugin true org.bouncycastle bcpkix-fips 2.1.11 provided org.apache.kafka connect-api ${kafka.version} provided javax.ws.rs javax.ws.rs-api org.slf4j slf4j-api com.github.luben zstd-jni org.lz4 lz4-java org.xerial.snappy snappy-java org.apache.kafka kafka-clients ${kafka.version} org.lz4 lz4-java org.xerial.snappy snappy-java org.slf4j slf4j-api com.github.luben zstd-jni net.snowflake snowflake-jdbc ${snowflake-jdbc.version} com.google.protobuf protobuf-java ${protobuf.version} com.google.protobuf protobuf-java-util ${protobuf.version} org.apache.avro avro 1.12.1 com.fasterxml.jackson.core jackson-core com.fasterxml.jackson.core jackson-databind org.apache.commons commons-compress org.slf4j slf4j-api org.apache.commons commons-compress ${commons-compress.version} org.apache.commons commons-lang3 org.apache.commons commons-lang3 ${commons-lang3.version} com.fasterxml.jackson.core jackson-core ${jackson.version} com.fasterxml.jackson.core jackson-databind ${jackson.version} io.confluent kafka-schema-registry-client ${confluent.version} io.confluent common-utils io.confluent common-config io.swagger swagger-annotations io.swagger swagger-core io.confluent kafka-avro-serializer ${confluent.version} io.confluent common-utils io.confluent kafka-connect-avro-converter ${confluent.version} io.confluent kafka-schema-registry-client-encryption ${confluent.version} io.dropwizard.metrics metrics-core 4.2.33 io.dropwizard.metrics metrics-jmx 4.2.33 com.google.guava guava ${guava.version} com.google.auto.value auto-value-annotations ${auto-value.version} com.google.auto.value auto-value ${auto-value.version} provided com.github.ben-manes.caffeine caffeine 3.2.4 dev.failsafe failsafe 3.3.2 org.junit.jupiter junit-jupiter-engine test org.junit.jupiter junit-jupiter-params test org.junit.vintage junit-vintage-engine test junit junit 4.13.1 test org.mockito mockito-core 5.23.0 test org.slf4j slf4j-api ${slf4j-api.version} org.slf4j slf4j-log4j12 ${slf4j-api.version} test org.apache.logging.log4j log4j-core 2.25.4 test com.github.stefanbirkner system-rules 1.19.0 test commons-dbutils commons-dbutils 1.8.1 test org.apache.commons commons-dbcp2 2.14.0 test org.apache.kafka connect-json 0.9.0.0 test org.apache.kafka kafka_2.13 ${kafka.version} test org.apache.kafka kafka_2.13 test-jar test ${kafka.version} test org.apache.kafka kafka-server-common test-jar test ${kafka.version} test org.apache.kafka connect-runtime ${kafka.version} test org.apache.kafka connect-runtime ${kafka.version} test test-jar test org.apache.kafka kafka-clients ${kafka.version} test test-jar test org.lz4 lz4-java org.slf4j slf4j-api org.awaitility awaitility ${awaitility.version} test org.assertj assertj-core ${assertj-core.version} test org.testcontainers testcontainers 2.0.5 test com.snowflake snowpipe-streaming 1.4.0 aws org.apache.maven.plugins maven-failsafe-plugin 3.5.5 false **/*IT.java none ${skipIntegrationTests} integration-test verify non-aws org.apache.maven.plugins maven-failsafe-plugin 3.5.5 false **/*IT.java **/*Iceberg*IT.java **/*RowSchemaProvider*IT.java **/*StreamingIngestClientV2Provider*IT.java **/*SnowflakeSinkServiceV2*IT.java ${skipIntegrationTests} integration-test verify org.junit junit-bom 5.14.4 pom import ================================================ FILE: pom_confluent.xml ================================================ 4.0.0 com.snowflake snowflake-kafka-connector 4.1.0 jar Snowflake Kafka Connector Snowflake Kafka Connect Sink Connector https://www.snowflake.com/ Snowflake Support Team snowflake-java@snowflake.com Snowflake Computing https://www.snowflake.com Apache License, Version 2.0 http://www.apache.org/licenses/LICENSE-2.0.txt repo scm:git:https://github.com/snowflakedb/snowflake-kafka-connector.git scm:git:https://github.com/snowflakedb/snowflake-kafka-connector.git https://github.com/snowflakedb/snowflake-kafka-connector 11 11 false ${skipTests} ${skipTests} ${project.build.directory}/dependency-jars ${project.build.directory}/dependency-list.txt ${project.build.directory}/../licenses 3.9.1 4.2.2 3.26.3 7.9.2 3.25.5 33.4.0-jre 2.18.2 1.27.1 3.5.2 4.0.2 2.0.17 1.14.4 3.18.0 1.10.4 confluent Confluent https://packages.confluent.io/maven/ cloudera-repo https://repository.cloudera.com/content/repositories/releases/ true true org.apache.maven.plugins maven-dependency-plugin 3.6.0 copy-dependencies copy-dependencies generate-resources ${license.processing.dependencyJarsDir} false false true org.apache.maven.plugins maven-dependency-plugin 3.6.0 runtime ${license.processing.dependencyListFile} list generate-resources org.codehaus.mojo exec-maven-plugin 3.2.0 process-third-party-licenses exec generate-resources python3 ${project.basedir}/scripts/process_licenses.py ${license.processing.dependencyListFile} ${license.processing.dependencyJarsDir} ${license.processing.targetDir} io.confluent 0.11.1 kafka-connect-maven-plugin kafka-connect Snowflake Sink Connector https://docs.snowflake.com/en/connectors/kafkahp/about The Snowflake High Performance Kafka connector lets you quickly and easily move messages in formats such as Avro, JSON, and Protobuf from Kafka topics into Snowflake tables. It uses the Snowpipe Streaming High Performance architecture. logo/snowflake.png Snowflake Inc. Some features of the connector may be in preview as mentioned in the documentation, and provided primarily for evaluation and testing purposes. If you decide to use a preview feature in production, please contact Snowflake Support before doing so. snowflakeinc organization Snowflake Inc. https://snowflake.com/ logo/snowflake.png sink Snowflake jdbc database dbms rdbms sql data warehouse high performance true true any maven-clean-plugin 3.3.2 false ${project.basedir}/licenses ** org.apache.maven.plugins maven-site-plugin 3.12.1 org.apache.maven.plugins maven-project-info-reports-plugin 3.3.0 org.sonatype.central central-publishing-maven-plugin 0.8.0 true true org.apache.maven.plugins maven-surefire-plugin ${maven-surefire-plugin.version} ${skipUnitTests} org.apache.maven.plugins maven-compiler-plugin 3.11.0 com.google.auto.value auto-value ${auto-value.version} maven-assembly-plugin 3.4.2 package single true jar-with-dependencies ${project.artifactId}-${project.version} false org.apache.maven.plugins maven-jar-plugin 3.2.2 default-jar none org.apache.maven.plugins maven-gpg-plugin 3.0.1 sign-artifacts package sign org.apache.maven.plugins maven-source-plugin 3.3.1 attach-sources jar-no-fork org.apache.maven.plugins maven-javadoc-plugin 3.11.2 attach-javadocs jar false maven-deploy-plugin true org.bouncycastle bcpkix-fips 2.1.8 org.apache.kafka connect-api ${kafka.version} provided javax.ws.rs javax.ws.rs-api org.slf4j slf4j-api com.github.luben zstd-jni org.lz4 lz4-java org.xerial.snappy snappy-java org.apache.kafka kafka-clients ${kafka.version} org.lz4 lz4-java org.slf4j slf4j-api net.snowflake snowflake-jdbc ${snowflake-jdbc.version} com.google.protobuf protobuf-java ${protobuf.version} com.google.protobuf protobuf-java-util ${protobuf.version} org.apache.avro avro 1.11.4 com.fasterxml.jackson.core jackson-core com.fasterxml.jackson.core jackson-databind org.apache.commons commons-compress org.slf4j slf4j-api org.apache.commons commons-compress ${commons-compress.version} org.apache.commons commons-lang3 org.apache.commons commons-lang3 ${commons-lang3.version} com.fasterxml.jackson.core jackson-core ${jackson.version} com.fasterxml.jackson.core jackson-databind ${jackson.version} io.confluent kafka-schema-registry-client ${confluent.version} io.confluent common-utils io.confluent common-config io.swagger swagger-annotations io.swagger swagger-core io.confluent kafka-avro-serializer ${confluent.version} io.confluent common-utils io.confluent kafka-connect-avro-converter ${confluent.version} provided io.confluent kafka-schema-registry-client-encryption ${confluent.version} io.dropwizard.metrics metrics-core 4.2.26 io.dropwizard.metrics metrics-jmx 4.2.3 com.google.guava guava ${guava.version} com.google.auto.value auto-value-annotations ${auto-value.version} com.google.auto.value auto-value ${auto-value.version} provided com.github.ben-manes.caffeine caffeine 2.9.3 dev.failsafe failsafe 3.3.2 org.apache.parquet parquet-column ${parquet.version} org.junit.jupiter junit-jupiter-engine test org.junit.jupiter junit-jupiter-params test org.junit.vintage junit-vintage-engine test junit junit 4.13.1 test org.mockito mockito-core 2.20.1 test org.slf4j slf4j-api ${slf4j-api.version} org.slf4j slf4j-log4j12 ${slf4j-api.version} test org.apache.logging.log4j log4j-core 2.17.1 test com.github.stefanbirkner system-rules 1.19.0 test commons-dbutils commons-dbutils 1.8.1 test org.apache.commons commons-dbcp2 2.12.0 test org.apache.kafka connect-json 0.9.0.0 test org.apache.kafka kafka_2.13 ${kafka.version} test org.apache.kafka kafka_2.13 test-jar test ${kafka.version} test org.apache.kafka kafka-server-common test-jar test ${kafka.version} test org.apache.kafka connect-runtime ${kafka.version} test org.apache.kafka connect-runtime ${kafka.version} test test-jar test org.apache.kafka kafka-clients ${kafka.version} test test-jar test org.lz4 lz4-java org.xerial.snappy snappy-java org.slf4j slf4j-api org.awaitility awaitility ${awaitility.version} test org.assertj assertj-core ${assertj-core.version} test org.testcontainers testcontainers 1.19.3 test com.snowflake snowpipe-streaming 1.2.0 aws org.apache.maven.plugins maven-failsafe-plugin 2.22.2 **/*IT.java none ${skipIntegrationTests} integration-test verify non-aws org.apache.maven.plugins maven-failsafe-plugin 2.22.2 **/*IT.java **/*Iceberg*IT.java **/*RowSchemaProvider*IT.java **/*StreamingIngestClientV2Provider*IT.java **/*SnowflakeSinkServiceV2*IT.java ${skipIntegrationTests} integration-test verify org.junit junit-bom 5.11.2 pom import ================================================ FILE: profile.json.example ================================================ { "user": "user name", "private_key": "private key", "host": "acountname.snowflakecomputing.com:443", "schema": "schema name", "database": "database name", "warehouse": "warehouse name" } ================================================ FILE: scripts/process_licenses.py ================================================ #!/usr/bin/env python # This script processes licenses of 3rd party dependencies and stores them in the JAR. The rules are: # 1. Dependencies, which contains a license file should be put into the shaded JAR as-is. # 2. Dependencies, which do not contain a license file should be mentioned in the file ADDITIONAL_LICENCES, together with the name of its license. # # # The script accepts the following arguments: # * DEPENDENCY_LIST_FILE_PATH # * Can be obtained by running mvn dependency:list -DincludeScope=runtime -DoutputFile=target/dependency_list.txt # * DEPENDENCIES_DIR # * Directory containing the JAR files of all SDK dependencies. Automatically generated by `mvn clean package` in target/dependency-jars # * TARGET_DIR # * Where to save all output, should be target/generated-sources/META-INF/third-party-licenses # # # Useful mvn commands: # * mvn clean license:add-third-party # * Generate dependency report; useful to find out licenses for dependencies that don't ship with a license file # * mvn dependency:list -DincludeScope=runtime -DoutputFile=target/dependency_list.txt # * Used as input of this script (DEPENDENCY_LIST_FILE_PATH) import sys from pathlib import Path from zipfile import ZipFile # License name constants APACHE_LICENSE = "Apache License 2.0" BSD_2_CLAUSE_LICENSE = "2-Clause BSD License" BSD_3_CLAUSE_LICENSE = "3-Clause BSD License" EDL_10_LICENSE = "EDL 1.0" MIT_LICENSE = "The MIT License" GO_LICENSE = "The Go license" BOUNCY_CASTLE_LICENSE = "Bouncy Castle License" LGPL = "LGPL License" # The SDK does not need to include licenses of dependencies, which aren't shaded IGNORED_DEPENDENCIES = {"net.snowflake:snowflake-jdbc", "org.slf4j:slf4j-api"} # List of dependencies, which don't ship with a license file. # Only add a new record here after verifying that the dependency JAR does not contain a license! ADDITIONAL_LICENSES_MAP = { "com.eclipsesource.minimal-json:minimal-json": MIT_LICENSE, "com.fasterxml.jackson.dataformat:jackson-dataformat-protobuf": APACHE_LICENSE, "com.github.ben-manes.caffeine:caffeine": APACHE_LICENSE, "com.github.luben:zstd-jni": BSD_2_CLAUSE_LICENSE, "com.google.auto.value:auto-value-annotations": APACHE_LICENSE, "com.google.code.findbugs:jsr305": APACHE_LICENSE, "com.google.crypto.tink:tink": APACHE_LICENSE, "com.google.errorprone:error_prone_annotations": APACHE_LICENSE, "com.google.code.findbugs:annotations": LGPL, "com.google.code.gson:gson": APACHE_LICENSE, "com.google.guava:failureaccess": APACHE_LICENSE, "com.google.guava:listenablefuture": APACHE_LICENSE, "com.google.j2objc:j2objc-annotations": APACHE_LICENSE, "com.google.protobuf:protobuf-java": BSD_3_CLAUSE_LICENSE, "com.google.protobuf:protobuf-java-util": BSD_3_CLAUSE_LICENSE, "com.google.re2j:re2j": GO_LICENSE, "com.hubspot.jackson:jackson-datatype-protobuf": APACHE_LICENSE, "com.ibm.jsonata4java:JSONata4Java": APACHE_LICENSE, "com.snowflake:snowpipe-streaming": APACHE_LICENSE, "com.squareup:protoparser": APACHE_LICENSE, "dev.failsafe:failsafe": APACHE_LICENSE, "info.picocli:picocli": APACHE_LICENSE, "io.confluent:common-utils": APACHE_LICENSE, "io.confluent:dek-registry-client": APACHE_LICENSE, "io.confluent:kafka-avro-serializer": APACHE_LICENSE, "io.confluent:kafka-connect-avro-converter": APACHE_LICENSE, "io.confluent:kafka-connect-avro-data": APACHE_LICENSE, "io.confluent:kafka-schema-converter": APACHE_LICENSE, "io.confluent:kafka-schema-registry-client": APACHE_LICENSE, "io.confluent:kafka-schema-registry-client-encryption": APACHE_LICENSE, "io.confluent:kafka-schema-registry-client-encryption-tink": APACHE_LICENSE, "io.confluent:kafka-schema-rules": APACHE_LICENSE, "io.confluent:kafka-schema-serializer": APACHE_LICENSE, "io.confluent:logredactor": APACHE_LICENSE, "io.confluent:logredactor-metrics": APACHE_LICENSE, "io.dropwizard.metrics:metrics-core": APACHE_LICENSE, "io.dropwizard.metrics:metrics-jmx": APACHE_LICENSE, "io.dropwizard.metrics:metrics-jvm": APACHE_LICENSE, "io.swagger.core.v3:swagger-annotations": APACHE_LICENSE, "net.snowflake:snowflake-kafka-connector": APACHE_LICENSE, "net.snowflake:snowflake-ingest-sdk": APACHE_LICENSE, "org.agrona:agrona": APACHE_LICENSE, "org.antlr:antlr4-runtime": BSD_3_CLAUSE_LICENSE, "org.apache.kafka:kafka-clients": APACHE_LICENSE, "org.apache.parquet:parquet-common": APACHE_LICENSE, "org.apache.parquet:parquet-format-structures": APACHE_LICENSE, "org.bouncycastle:bc-fips": BOUNCY_CASTLE_LICENSE, "org.bouncycastle:bcpkix-fips": BOUNCY_CASTLE_LICENSE, "org.projectnessie.cel:cel-core": APACHE_LICENSE, "org.projectnessie.cel:cel-generated-antlr": APACHE_LICENSE, "org.projectnessie.cel:cel-generated-pb": APACHE_LICENSE, "org.projectnessie.cel:cel-jackson": APACHE_LICENSE, "org.projectnessie.cel:cel-tools": APACHE_LICENSE, "org.xerial.snappy:snappy-java": APACHE_LICENSE, "org.yaml:snakeyaml": APACHE_LICENSE, "org.roaringbitmap:RoaringBitmap": APACHE_LICENSE, "org.jspecify:jspecify": APACHE_LICENSE, } def parse_cmdline_args(): if len(sys.argv) != 4: raise Exception( "usage: process_licenses.py DEPENDENCY_LIST_FILE_PATH DEPENDENCIES_DIR TARGET_DIR" ) dependency_list_file_path = Path(sys.argv[1]).absolute() dependencies_dir_path = Path(sys.argv[2]).absolute() target_dir = Path(sys.argv[3]).absolute() if ( not dependency_list_file_path.exists() or not dependency_list_file_path.is_file() ): raise Exception(f"File {dependency_list_file_path} does not exist") if not dependencies_dir_path.exists() or not dependencies_dir_path.is_dir(): raise Exception(f"Directory {dependencies_dir_path} does not exist") return dependency_list_file_path, dependencies_dir_path, target_dir def main(): dependency_list_path, dependency_jars_path, target_dir = parse_cmdline_args() dependency_count = 0 dependency_with_license_count = 0 dependency_without_license_count = 0 dependency_ignored_count = 0 missing_licenses_str = "" target_dir.mkdir(parents=True, exist_ok=True) with open(dependency_list_path, "r") as dependency_file_handle: for line in dependency_file_handle.readlines(): line = line.strip() if line == "" or line == "The following files have been resolved:": continue dependency_count += 1 # Line is a string like: "commons-codec:commons-codec:jar:1.15:compile -- module org.apache.commons.codec [auto]" artifact_details = line.split()[0] group_id, artifact_id, _, version, scope = artifact_details.split(":") current_jar = Path(dependency_jars_path, f"{artifact_id}-{version}.jar") if not current_jar.exists() and current_jar.is_file(): raise Exception(f"Expected JAR file does not exist: {current_jar}") current_jar_as_zip = ZipFile(current_jar) dependency_lookup_key = f"{group_id}:{artifact_id}" if dependency_lookup_key in IGNORED_DEPENDENCIES: dependency_ignored_count += 1 continue license_found = False for zip_info in current_jar_as_zip.infolist(): if zip_info.is_dir(): continue if zip_info.filename in ( "META-INF/LICENSE.txt", "META-INF/LICENSE", "META-INF/LICENSE.md", ): license_found = True dependency_with_license_count += 1 # Extract license to the target directory zip_info.filename = f"LICENSE_{group_id}__{artifact_id}" current_jar_as_zip.extract(zip_info, target_dir) break if ( "license" in zip_info.filename.lower() ): # Log potential license matches print(f"Potential license match: {current_jar} {zip_info}") if not license_found: print( f"License not found {current_jar}; using value from ADDITIONAL_LICENSES_MAP" ) license_name = ADDITIONAL_LICENSES_MAP.get(dependency_lookup_key) if license_name: dependency_without_license_count += 1 missing_licenses_str += f"{dependency_lookup_key}: {license_name}\n" else: err_msg = f"The dependency {dependency_lookup_key} does not ship a license file, but neither is it not defined in ADDITIONAL_LICENSES_MAP" raise Exception(err_msg) with open( Path(target_dir, "ADDITIONAL_LICENCES"), "w" ) as additional_licenses_handle: additional_licenses_handle.write(missing_licenses_str) if dependency_count < 30: raise Exception( f"Suspiciously low number of dependency JARs detected in {dependency_jars_path}: {dependency_count}" ) print("License generation finished") print(f"\tTotal dependencies: {dependency_count}") print(f"\tTotal dependencies (with license): {dependency_with_license_count}") print(f"\tTotal dependencies (without license): {dependency_without_license_count}") print(f"\tIgnored dependencies: {dependency_ignored_count}") if __name__ == "__main__": main() ================================================ FILE: src/main/java/com/snowflake/ingest/streaming/internal/TimestampWrapper.java ================================================ package com.snowflake.ingest.streaming.internal; import com.snowflake.kafka.connector.internal.validation.Power10Util; import java.math.BigDecimal; import java.math.BigInteger; import java.math.RoundingMode; import java.time.OffsetDateTime; /** * Copy of {@code net.snowflake.ingest.streaming.internal.TimestampWrapper} from * snowflake-ingest-sdk used by {@code PkgDataValidationUtil} to serialize timestamp values without * depending on the legacy SDK artifact. */ public class TimestampWrapper { private final long epoch; private final int fraction; private final int timezoneOffsetSeconds; private final int scale; private static final int BITS_FOR_TIMEZONE = 14; private static final int MASK_OF_TIMEZONE = (1 << BITS_FOR_TIMEZONE) - 1; public TimestampWrapper(OffsetDateTime offsetDateTime, int scale) { if (scale < 0 || scale > 9) { throw new IllegalArgumentException( String.format("Scale must be between 0 and 9, actual: %d", scale)); } this.epoch = offsetDateTime.toEpochSecond(); this.fraction = offsetDateTime.getNano() / Power10Util.intTable[9 - scale] * Power10Util.intTable[9 - scale]; this.timezoneOffsetSeconds = offsetDateTime.getOffset().getTotalSeconds(); this.scale = scale; } /** Convert the timestamp to a binary representation. */ public BigInteger toBinary(boolean includeTimezone) { BigDecimal timeInNs = BigDecimal.valueOf(epoch).scaleByPowerOfTen(9).add(new BigDecimal(fraction)); BigDecimal scaledTime = timeInNs.scaleByPowerOfTen(scale - 9); scaledTime = scaledTime.setScale(0, RoundingMode.DOWN); BigInteger fcpInt = scaledTime.unscaledValue(); if (includeTimezone) { int offsetMin = timezoneOffsetSeconds / 60; offsetMin += 1440; fcpInt = fcpInt.shiftLeft(BITS_FOR_TIMEZONE); fcpInt = fcpInt.add(BigInteger.valueOf(offsetMin & MASK_OF_TIMEZONE)); } return fcpInt; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/ConnectorConfigTools.java ================================================ /* * Copyright (c) 2019 Snowflake Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.snowflake.kafka.connector; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.internal.KCLogger; import java.util.Locale; import java.util.Map; import org.apache.kafka.common.config.ConfigDef; public class ConnectorConfigTools { private static final KCLogger LOGGER = new KCLogger(ConnectorConfigTools.class.getName()); public static final ConfigDef.Validator BOOLEAN_VALIDATOR = new ConfigDef.Validator() { private final ConfigDef.ValidString validator = ConfigDef.ValidString.in( Boolean.TRUE.toString().toLowerCase(Locale.ROOT), Boolean.FALSE.toString().toLowerCase(Locale.ROOT)); @Override public void ensureValid(String name, Object value) { if (value instanceof String) { value = ((String) value).toLowerCase(Locale.ROOT); } this.validator.ensureValid(name, value); } }; public static void setDefaultValues(Map config) { if (!config.containsKey(KafkaConnectorConfigParams.CACHE_TABLE_EXISTS)) { config.put( KafkaConnectorConfigParams.CACHE_TABLE_EXISTS, String.valueOf(KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_DEFAULT)); LOGGER.info( "{} set to default {}", KafkaConnectorConfigParams.CACHE_TABLE_EXISTS, KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_DEFAULT); } if (!config.containsKey(KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS)) { config.put( KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS, String.valueOf(KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS_DEFAULT)); LOGGER.info( "{} set to default {} ms", KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS, KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS_DEFAULT); } if (!config.containsKey(KafkaConnectorConfigParams.CACHE_PIPE_EXISTS)) { config.put( KafkaConnectorConfigParams.CACHE_PIPE_EXISTS, String.valueOf(KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_DEFAULT)); LOGGER.info( "{} set to default {}", KafkaConnectorConfigParams.CACHE_PIPE_EXISTS, KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_DEFAULT); } if (!config.containsKey(KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS)) { config.put( KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS, String.valueOf(KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS_DEFAULT)); LOGGER.info( "{} set to default {} ms", KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS, KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS_DEFAULT); } } /** * Get a property from the config map * * @param config connector configuration * @param key name of the key to be retrieved * @return property value or null */ public static String getProperty(final Map config, final String key) { if (config.containsKey(key) && !config.get(key).isEmpty()) { return config.get(key); } else { return null; } } /* The allowed values for tombstone records. */ public enum BehaviorOnNullValues { // Default as the name suggests, would be a default behavior which will not filter null values. // This will put an empty JSON string in corresponding snowflake table. // Using this means we will fall back to old behavior before introducing this config. DEFAULT, // Ignore would filter out records which has null value, but a valid key. IGNORE, ; /* Validator to validate behavior.on.null.values which says whether kafka should keep null value records or ignore them while ingesting into snowflake table. */ public static final ConfigDef.Validator VALIDATOR = new ConfigDef.Validator() { private final ConfigDef.ValidString validator = ConfigDef.ValidString.in(names()); @Override public void ensureValid(String name, Object value) { if (value instanceof String) { value = ((String) value).toLowerCase(Locale.ROOT); } validator.ensureValid(name, value); } // Overridden here so that ConfigDef.toEnrichedRst shows possible values correctly @Override public String toString() { return validator.toString(); } }; // All valid enum values public static String[] names() { BehaviorOnNullValues[] behaviors = values(); String[] result = new String[behaviors.length]; for (int i = 0; i < behaviors.length; i++) { result[i] = behaviors[i].toString(); } return result; } @Override public String toString() { return name().toLowerCase(Locale.ROOT); } } /* https://www.confluent.io/blog/kafka-connect-deep-dive-error-handling-dead-letter-queues/ */ public enum ErrorTolerance { /** Tolerate no errors. */ NONE, /** Tolerate all errors. */ ALL; /** * Validator to validate behavior.on.null.values which says whether kafka should keep null value * records or ignore them while ingesting into snowflake table. */ public static final ConfigDef.Validator VALIDATOR = new ConfigDef.Validator() { private final ConfigDef.ValidString validator = ConfigDef.ValidString.in(ErrorTolerance.names()); @Override public void ensureValid(String name, Object value) { if (value instanceof String) { value = ((String) value).toLowerCase(Locale.ROOT); } validator.ensureValid(name, value); } @Override public String toString() { return validator.toString(); } }; /** * @return All valid enum values */ public static String[] names() { ErrorTolerance[] errorTolerances = values(); String[] result = new String[errorTolerances.length]; for (int i = 0; i < errorTolerances.length; i++) { result[i] = errorTolerances[i].toString(); } return result; } @Override public String toString() { return name().toLowerCase(Locale.ROOT); } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/ConnectorConfigValidator.java ================================================ package com.snowflake.kafka.connector; import java.util.Map; public interface ConnectorConfigValidator { /** * Validate input configuration * * @param config configuration Map */ void validateConfig(Map config); } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/Constants.java ================================================ package com.snowflake.kafka.connector; public final class Constants { public static final String DEFAULT_PIPE_NAME_SUFFIX = "-STREAMING"; public static final class KafkaConnectorConfigParams { // connector parameter list public static final String NAME = "name"; public static final String TOPICS = "topics"; public static final String SNOWFLAKE_TOPICS2TABLE_MAP = "snowflake.topic2table.map"; public static final String SNOWFLAKE_URL_NAME = "snowflake.url.name"; public static final String SNOWFLAKE_USER_NAME = "snowflake.user.name"; public static final String SNOWFLAKE_PRIVATE_KEY = "snowflake.private.key"; public static final String SNOWFLAKE_DATABASE_NAME = "snowflake.database.name"; public static final String SNOWFLAKE_SCHEMA_NAME = "snowflake.schema.name"; public static final String SNOWFLAKE_PRIVATE_KEY_PASSPHRASE = "snowflake.private.key.passphrase"; public static final String SNOWFLAKE_ROLE_NAME = "snowflake.role.name"; public static final String SNOWFLAKE_AUTHENTICATOR = "snowflake.authenticator"; public static final String SNOWFLAKE_OAUTH_CLIENT_ID = "snowflake.oauth.client.id"; public static final String SNOWFLAKE_OAUTH_CLIENT_SECRET = "snowflake.oauth.client.secret"; public static final String SNOWFLAKE_OAUTH_REFRESH_TOKEN = "snowflake.oauth.refresh.token"; public static final String SNOWFLAKE_OAUTH_TOKEN_ENDPOINT = "snowflake.oauth.token.endpoint"; public static final String SNOWFLAKE_JDBC_MAP = "snowflake.jdbc.map"; public static final String SNOWFLAKE_METADATA_CREATETIME = "snowflake.metadata.createtime"; public static final String SNOWFLAKE_METADATA_TOPIC = "snowflake.metadata.topic"; public static final String SNOWFLAKE_METADATA_OFFSET_AND_PARTITION = "snowflake.metadata.offset.and.partition"; public static final String SNOWFLAKE_METADATA_ALL = "snowflake.metadata.all"; public static final String SNOWFLAKE_METADATA_ALL_DEFAULT = "true"; public static final String SNOWFLAKE_STREAMING_METADATA_CONNECTOR_PUSH_TIME = "snowflake.streaming.metadata.connectorPushTime"; public static final boolean SNOWFLAKE_STREAMING_METADATA_CONNECTOR_PUSH_TIME_DEFAULT = true; public static final String SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP = "snowflake.streaming.client.provider.override.map"; public static final String SNOWFLAKE_OPEN_CHANNEL_IO_THREADS = "snowflake.open.channel.io.threads"; public static final int SNOWFLAKE_OPEN_CHANNEL_IO_THREADS_DEFAULT = 50; // Validation public static final String SNOWFLAKE_VALIDATION = "snowflake.validation"; public static final String SNOWFLAKE_VALIDATION_DEFAULT = "server_side"; // Snowpipe Streaming Classic (SSv1) offset migration public static final String SNOWFLAKE_SSV1_OFFSET_MIGRATION = "snowflake.streaming.classic.offset.migration"; public static final String SNOWFLAKE_SSV1_OFFSET_MIGRATION_DEFAULT = "skip"; public static final String SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME = "snowflake.streaming.classic.offset.migration.include.connector.name"; public static final boolean SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME_DEFAULT = false; // Caching public static final String CACHE_TABLE_EXISTS = "snowflake.cache.table.exists"; public static final boolean CACHE_TABLE_EXISTS_DEFAULT = true; public static final String CACHE_TABLE_EXISTS_EXPIRE_MS = "snowflake.cache.table.exists.expire.ms"; public static final long CACHE_TABLE_EXISTS_EXPIRE_MS_DEFAULT = 5 * 60 * 1000L; public static final long CACHE_TABLE_EXISTS_EXPIRE_MS_MIN = 1L; public static final String CACHE_PIPE_EXISTS = "snowflake.cache.pipe.exists"; public static final boolean CACHE_PIPE_EXISTS_DEFAULT = true; public static final String CACHE_PIPE_EXISTS_EXPIRE_MS = "snowflake.cache.pipe.exists.expire.ms"; public static final long CACHE_PIPE_EXISTS_EXPIRE_MS_DEFAULT = 5 * 60 * 1000L; public static final long CACHE_PIPE_EXISTS_EXPIRE_MS_MIN = 1L; public static final String BEHAVIOR_ON_NULL_VALUES = "behavior.on.null.values"; public static final String VALUE_CONVERTER_SCHEMAS_ENABLE = "value.converter.schemas.enable"; // metrics public static final String JMX_OPT = "jmx"; public static final boolean JMX_OPT_DEFAULT = true; public static final String ERRORS_TOLERANCE_CONFIG = "errors.tolerance"; public static final String ERRORS_TOLERANCE_DEFAULT = ConnectorConfigTools.ErrorTolerance.NONE.toString(); public static final String ERRORS_LOG_ENABLE_CONFIG = "errors.log.enable"; public static final boolean ERRORS_LOG_ENABLE_DEFAULT = false; public static final String ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_CONFIG = "errors.deadletterqueue.topic.name"; public static final String ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_DEFAULT = ""; public static final String ENABLE_TASK_FAIL_ON_AUTHORIZATION_ERRORS = "enable.task.fail.on.authorization.errors"; public static final boolean ENABLE_TASK_FAIL_ON_AUTHORIZATION_ERRORS_DEFAULT = false; // Compatibility validation public static final String SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC = "snowflake.streaming.validate.compatibility.with.classic"; public static final boolean SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC_DEFAULT = true; public static final String SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION = "snowflake.compatibility.enable.autogenerated.table.name.sanitization"; public static final boolean SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION_DEFAULT = false; public static final String SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION = "snowflake.compatibility.enable.column.identifier.normalization"; public static final boolean SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION_DEFAULT = false; public static final String SNOWFLAKE_ENABLE_SCHEMATIZATION = "snowflake.enable.schematization"; public static final boolean SNOWFLAKE_ENABLE_SCHEMATIZATION_DEFAULT = true; // MDC logging header public static final String ENABLE_MDC_LOGGING_CONFIG = "enable.mdc.logging"; public static final String ENABLE_MDC_LOGGING_DEFAULT = "false"; public static final String KEY_CONVERTER = "key.converter"; public static final String VALUE_CONVERTER = "value.converter"; public static final String VALUE_CONVERTER_SCHEMA_REGISTRY_URL = "value.converter.schema.registry.url"; // Proxy Info public static final String JVM_PROXY_HOST = "jvm.proxy.host"; public static final String JVM_PROXY_PORT = "jvm.proxy.port"; public static final String JVM_NON_PROXY_HOSTS = "jvm.nonProxy.hosts"; public static final String JVM_PROXY_USERNAME = "jvm.proxy.username"; public static final String JVM_PROXY_PASSWORD = "jvm.proxy.password"; // jvm proxy public static final String HTTP_USE_PROXY = "http.useProxy"; public static final String HTTPS_PROXY_HOST = "https.proxyHost"; public static final String HTTPS_PROXY_PORT = "https.proxyPort"; public static final String HTTP_PROXY_HOST = "http.proxyHost"; public static final String HTTP_PROXY_PORT = "http.proxyPort"; public static final String HTTP_NON_PROXY_HOSTS = "http.nonProxyHosts"; public static final String HTTPS_PROXY_USER = "https.proxyUser"; public static final String HTTPS_PROXY_PASSWORD = "https.proxyPassword"; public static final String HTTP_PROXY_USER = "http.proxyUser"; public static final String HTTP_PROXY_PASSWORD = "http.proxyPassword"; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/DefaultConnectorConfigValidator.java ================================================ package com.snowflake.kafka.connector; import static com.snowflake.kafka.connector.ConnectorConfigTools.BehaviorOnNullValues.VALIDATOR; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.BEHAVIOR_ON_NULL_VALUES; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_PIPE_EXISTS; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_TABLE_EXISTS; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.JMX_OPT; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY; import static com.snowflake.kafka.connector.Utils.isValidSnowflakeApplicationName; import static com.snowflake.kafka.connector.Utils.validateProxySettings; import com.google.common.collect.ImmutableMap; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.config.AuthenticatorType; import com.snowflake.kafka.connector.internal.KCLogger; import com.snowflake.kafka.connector.internal.SnowflakeErrors; import com.snowflake.kafka.connector.internal.streaming.StreamingConfigValidator; import java.util.HashMap; import java.util.Map; import org.apache.kafka.common.config.ConfigException; public class DefaultConnectorConfigValidator implements ConnectorConfigValidator { private static final KCLogger LOGGER = new KCLogger(DefaultConnectorConfigValidator.class.getName()); private final StreamingConfigValidator streamingConfigValidator; public DefaultConnectorConfigValidator(StreamingConfigValidator streamingConfigValidator) { this.streamingConfigValidator = streamingConfigValidator; } public void validateConfig(Map config) { Map invalidConfigParams = new HashMap(); // define the input parameters / keys in one place as static constants, // instead of using them directly // define the thresholds statically in one place as static constants, // instead of using the values directly // unique name of this connector instance String connectorName = config.getOrDefault(KafkaConnectorConfigParams.NAME, ""); if (connectorName.isEmpty() || !isValidSnowflakeApplicationName(connectorName)) { invalidConfigParams.put( KafkaConnectorConfigParams.NAME, Utils.formatString( "{} is empty or invalid. It should match Snowflake object identifier syntax. Please" + " see the documentation.", KafkaConnectorConfigParams.NAME)); } if (config.containsKey(KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP)) { try { TopicToTableParser.parse(config.get(KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP)); } catch (IllegalArgumentException e) { invalidConfigParams.put( KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP, e.getMessage()); } } // sanity check if (!config.containsKey(KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME)) { invalidConfigParams.put( KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME, Utils.formatString( "{} cannot be empty.", KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME)); } // sanity check if (!config.containsKey(KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME)) { invalidConfigParams.put( KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME, Utils.formatString( "{} cannot be empty.", KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME)); } AuthenticatorType authenticator; try { authenticator = AuthenticatorType.fromConfig( config.getOrDefault( KafkaConnectorConfigParams.SNOWFLAKE_AUTHENTICATOR, AuthenticatorType.SNOWFLAKE_JWT.toConfigValue())); } catch (IllegalArgumentException e) { invalidConfigParams.put(KafkaConnectorConfigParams.SNOWFLAKE_AUTHENTICATOR, e.getMessage()); authenticator = null; } if (authenticator != null) { switch (authenticator) { case OAUTH: validateOAuthConfig(config, invalidConfigParams); break; case SNOWFLAKE_JWT: if (!config.containsKey(SNOWFLAKE_PRIVATE_KEY)) { invalidConfigParams.put( SNOWFLAKE_PRIVATE_KEY, Utils.formatString("{} cannot be empty", SNOWFLAKE_PRIVATE_KEY)); } break; default: throw new IllegalStateException("Unhandled authenticator type: " + authenticator); } } if (!config.containsKey(KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME)) { invalidConfigParams.put( KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME, Utils.formatString( "{} cannot be empty.", KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME)); } if (!config.containsKey(KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME)) { invalidConfigParams.put( KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME, Utils.formatString("{} cannot be empty.", KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME)); } if (!config.containsKey(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME)) { invalidConfigParams.put( KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, Utils.formatString( "{} cannot be empty.", KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME)); } // jvm proxy settings invalidConfigParams.putAll(validateProxySettings(config)); if (config.containsKey(BEHAVIOR_ON_NULL_VALUES)) { try { // This throws an exception if config value is invalid. VALIDATOR.ensureValid(BEHAVIOR_ON_NULL_VALUES, config.get(BEHAVIOR_ON_NULL_VALUES)); } catch (ConfigException exception) { invalidConfigParams.put( BEHAVIOR_ON_NULL_VALUES, Utils.formatString( "Kafka config: {} error: {}", BEHAVIOR_ON_NULL_VALUES, exception.getMessage())); } } if (config.containsKey(JMX_OPT)) { if (!(config.get(JMX_OPT).equalsIgnoreCase("true") || config.get(JMX_OPT).equalsIgnoreCase("false"))) { invalidConfigParams.put( JMX_OPT, Utils.formatString("Kafka config: {} should either be true or false", JMX_OPT)); } } validateCacheConfig(config, invalidConfigParams); validateCompatibilitySettings(config, invalidConfigParams); // Check all config values for ingestion method == IngestionMethodConfig.SNOWPIPE_STREAMING invalidConfigParams.putAll(streamingConfigValidator.validate(config)); // logs and throws exception if there are invalid params handleInvalidParameters(ImmutableMap.copyOf(invalidConfigParams)); } private void validateOAuthConfig( Map config, Map invalidConfigParams) { String clientId = config.getOrDefault(KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_ID, ""); if (clientId.isEmpty()) { invalidConfigParams.put( KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_ID, Utils.formatString( "{} must be non-empty when using oauth authenticator", KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_ID)); } String clientSecret = config.getOrDefault(KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_SECRET, ""); if (clientSecret.isEmpty()) { invalidConfigParams.put( KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_SECRET, Utils.formatString( "{} must be non-empty when using oauth authenticator", KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_SECRET)); } } private void validateCompatibilitySettings( Map config, Map invalidConfigParams) { boolean validateCompat = Boolean.parseBoolean( config.getOrDefault( KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC, String.valueOf( KafkaConnectorConfigParams .SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC_DEFAULT))); if (!validateCompat) { return; } String optOutHint = " To skip this check, set " + KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC + "=false."; // snowflake.validation must be client_side String validation = config.getOrDefault( KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION_DEFAULT); if (!"client_side".equals(validation)) { invalidConfigParams.put( KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC + " is enabled but " + KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION + " is set to '" + validation + "'. For KC v3 compatibility, set " + KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION + "=client_side." + optOutHint); } // snowflake.compatibility.enable.column.identifier.normalization must be true String columnNormalization = config.getOrDefault( KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION, String.valueOf( KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION_DEFAULT)); if (!"true".equalsIgnoreCase(columnNormalization)) { invalidConfigParams.put( KafkaConnectorConfigParams.SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION, KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC + " is enabled but " + KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION + " is set to '" + columnNormalization + "'. For KC v3 compatibility, set " + KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION + "=true." + optOutHint); } // snowflake.compatibility.enable.autogenerated.table.name.sanitization must be true String tableSanitization = config.getOrDefault( KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION, String.valueOf( KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION_DEFAULT)); if (!"true".equalsIgnoreCase(tableSanitization)) { invalidConfigParams.put( KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION, KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC + " is enabled but " + KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION + " is set to '" + tableSanitization + "'. For KC v3 compatibility, set " + KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION + "=true." + optOutHint); } // snowflake.enable.schematization must be explicitly set (any value) if (!config.containsKey(KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION)) { invalidConfigParams.put( KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION, KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC + " is enabled but " + KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION + " is not explicitly set. The default changed from false (KC v3) to true (KC v4)." + " Please set " + KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION + " explicitly to confirm your intended behavior." + optOutHint); } // snowflake.streaming.classic.offset.migration must be explicitly set if (!config.containsKey(KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION)) { invalidConfigParams.put( KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION, KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC + " is enabled but " + KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION + " is not explicitly set. If migrating from KC v3, set it to 'strict' or" + " 'best_effort' so that committed offsets from the previous connector version are" + " carried over. If migrating from file-based Snowpipe, set it to 'skip'." + optOutHint); } // snowflake.streaming.classic.offset.migration.include.connector.name is only relevant // when offset migration is active (strict or best_effort), not when skipped. String offsetMigration = config.getOrDefault( KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION, KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_DEFAULT); boolean offsetMigrationActive = "strict".equalsIgnoreCase(offsetMigration) || "best_effort".equalsIgnoreCase(offsetMigration); if (offsetMigrationActive && !config.containsKey( KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME)) { invalidConfigParams.put( KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME, KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC + " is enabled but " + KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME + " is not explicitly set. Whether the SSv1 channel name included the connector" + " name depends on the KC v3 configuration that was used. Please set " + KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME + " explicitly to match how the previous connector was configured." + optOutHint); } } private void validateCacheConfig( Map config, Map invalidConfigParams) { // Validate table exists cache boolean flag if (config.containsKey(CACHE_TABLE_EXISTS)) { String value = config.get(CACHE_TABLE_EXISTS); if (!isValidBooleanString(value)) { invalidConfigParams.put( CACHE_TABLE_EXISTS, Utils.formatString( "{} must be either 'true' or 'false', got: {}", CACHE_TABLE_EXISTS, value)); } } // Validate table exists cache expiration if (config.containsKey(CACHE_TABLE_EXISTS_EXPIRE_MS)) { try { long value = Long.parseLong(config.get(CACHE_TABLE_EXISTS_EXPIRE_MS)); if (value <= 0) { invalidConfigParams.put( CACHE_TABLE_EXISTS_EXPIRE_MS, Utils.formatString( "{} must be a positive number, got: {}", CACHE_TABLE_EXISTS_EXPIRE_MS, value)); } } catch (NumberFormatException e) { invalidConfigParams.put( CACHE_TABLE_EXISTS_EXPIRE_MS, Utils.formatString( "{} must be a valid long number, got: {}", CACHE_TABLE_EXISTS_EXPIRE_MS, config.get(CACHE_TABLE_EXISTS_EXPIRE_MS))); } } // Validate pipe exists cache boolean flag if (config.containsKey(CACHE_PIPE_EXISTS)) { String value = config.get(CACHE_PIPE_EXISTS); if (!isValidBooleanString(value)) { invalidConfigParams.put( CACHE_PIPE_EXISTS, Utils.formatString( "{} must be either 'true' or 'false', got: {}", CACHE_PIPE_EXISTS, value)); } } // Validate pipe exists cache expiration if (config.containsKey(CACHE_PIPE_EXISTS_EXPIRE_MS)) { try { long value = Long.parseLong(config.get(CACHE_PIPE_EXISTS_EXPIRE_MS)); if (value <= 0) { invalidConfigParams.put( CACHE_PIPE_EXISTS_EXPIRE_MS, Utils.formatString( "{} must be a positive number, got: {}", CACHE_PIPE_EXISTS_EXPIRE_MS, value)); } } catch (NumberFormatException e) { invalidConfigParams.put( CACHE_PIPE_EXISTS_EXPIRE_MS, Utils.formatString( "{} must be a valid long number, got: {}", CACHE_PIPE_EXISTS_EXPIRE_MS, config.get(CACHE_PIPE_EXISTS_EXPIRE_MS))); } } } private static boolean isValidBooleanString(String value) { return "true".equalsIgnoreCase(value) || "false".equalsIgnoreCase(value); } private void handleInvalidParameters(ImmutableMap invalidConfigParams) { // log all invalid params and throw exception if (!invalidConfigParams.isEmpty()) { String invalidParamsMessage = ""; for (String invalidKey : invalidConfigParams.keySet()) { String invalidValue = invalidConfigParams.get(invalidKey); String errorMessage = Utils.formatString( "Config value '{}' is invalid. Error message: '{}'", invalidKey, invalidValue); invalidParamsMessage += errorMessage + "\n"; } LOGGER.error("Invalid config: " + invalidParamsMessage); throw SnowflakeErrors.ERROR_0001.getException(invalidParamsMessage); } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/SemanticVersion.java ================================================ package com.snowflake.kafka.connector; import java.util.Objects; import java.util.regex.Matcher; import java.util.regex.Pattern; /** Represents a parsed semantic version. */ public class SemanticVersion implements Comparable { private final int major; private final int minor; private final int patch; private final boolean isReleaseCandidate; private final String originalVersion; public SemanticVersion(String version) { this.originalVersion = version; // Pattern to match versions like "3.1.0" or "4.0.0-rc" or "4.0.0-RC1" Pattern pattern = Pattern.compile("(\\d+)\\.(\\d+)\\.(\\d+)(?:-[rR][cC]\\d*)?"); Matcher matcher = pattern.matcher(version); if (!matcher.find()) { throw new IllegalArgumentException("Invalid version format: " + version); } this.major = Integer.parseInt(matcher.group(1)); this.minor = Integer.parseInt(matcher.group(2)); this.patch = Integer.parseInt(matcher.group(3)); this.isReleaseCandidate = version.toLowerCase().contains("-rc"); } public String originalVersion() { return originalVersion; } public boolean isReleaseCandidate() { return isReleaseCandidate; } public int major() { return major; } public int minor() { return minor; } public int patch() { return patch; } @Override public int compareTo(SemanticVersion other) { if (this.major != other.major) { return Integer.compare(this.major, other.major); } if (this.minor != other.minor) { return Integer.compare(this.minor, other.minor); } return Integer.compare(this.patch, other.patch); } @Override public boolean equals(Object obj) { if (!(obj instanceof SemanticVersion)) { return false; } SemanticVersion other = (SemanticVersion) obj; return this.major == other.major && this.minor == other.minor && this.patch == other.patch; } @Override public int hashCode() { return Objects.hash(major, minor, patch); } @Override public String toString() { return originalVersion; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/SnowflakeSinkTask.java ================================================ /* * Copyright (c) 2019 Snowflake Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.snowflake.kafka.connector; import com.google.common.annotations.VisibleForTesting; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.dlq.KafkaRecordErrorReporter; import com.snowflake.kafka.connector.internal.KCLogger; import com.snowflake.kafka.connector.internal.SnowflakeConnectionService; import com.snowflake.kafka.connector.internal.SnowflakeConnectionServiceFactory; import com.snowflake.kafka.connector.internal.SnowflakeErrors; import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException; import com.snowflake.kafka.connector.internal.SnowflakeSinkService; import com.snowflake.kafka.connector.internal.metrics.MetricsJmxReporter; import com.snowflake.kafka.connector.internal.metrics.SnowflakeSinkTaskMetrics; import com.snowflake.kafka.connector.internal.metrics.TaskMetrics; import com.snowflake.kafka.connector.internal.streaming.SnowflakeSinkServiceV2; import com.snowflake.kafka.connector.internal.streaming.telemetry.PeriodicTelemetryReporter; import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientPools; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.Map; import java.util.Optional; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeoutException; import java.util.function.Supplier; import org.apache.kafka.clients.consumer.OffsetAndMetadata; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.connect.errors.ConnectException; import org.apache.kafka.connect.errors.RetriableException; import org.apache.kafka.connect.sink.ErrantRecordReporter; import org.apache.kafka.connect.sink.SinkRecord; import org.apache.kafka.connect.sink.SinkTask; /** * SnowflakeSinkTask implements SinkTask for Kafka Connect framework. * *

Expects configuration from SnowflakeStreamingSinkConnector * *

Creates sink service instance, takes records loaded from those Kafka partitions and ingests to * Snowflake via Sink service */ public class SnowflakeSinkTask extends SinkTask { private static final long WAIT_TIME = 5 * 1000; // 5 sec private static final int REPEAT_TIME = 12; // 60 sec // the dynamic logger is intended to be attached per task instance. the instance id will be set // during task start, however if it is not set, it falls back to the static logger private static final KCLogger STATIC_LOGGER = new KCLogger(SnowflakeSinkTask.class.getName() + "_STATIC"); private KCLogger DYNAMIC_LOGGER; private volatile SnowflakeSinkService sink = null; // snowflake JDBC connection provides methods to interact with user's // snowflake // account and execute queries private SnowflakeConnectionService conn = null; // tracks number of tasks the config wants to create private String taskConfigId = "-1"; private long taskStartTime; private final SnowflakeSinkTaskAuthorizationExceptionTracker authorizationExceptionTracker = new SnowflakeSinkTaskAuthorizationExceptionTracker(); // Stores channel error exception detected in preCommit to fail on next put() call private volatile SnowflakeKafkaConnectorException channelErrorToFailOn = null; // Periodic telemetry reporter for channel status private PeriodicTelemetryReporter telemetryReporter = null; // Task-level JMX metrics (lifecycle, throughput, duration) private TaskMetrics taskMetrics = TaskMetrics.noop(); /** default constructor, invoked by kafka connect framework */ public SnowflakeSinkTask() { DYNAMIC_LOGGER = new KCLogger(this.getClass().getName()); } @VisibleForTesting public SnowflakeSinkTask( SnowflakeSinkService service, SnowflakeConnectionService connectionService) { DYNAMIC_LOGGER = new KCLogger(this.getClass().getName()); this.sink = service; this.conn = connectionService; } private SnowflakeConnectionService getConnection() { try { waitFor(() -> conn != null); } catch (Exception e) { throw SnowflakeErrors.ERROR_5013.getException(); } return conn; } /** * Return an instance of SnowflakeConnection if it was set previously by calling Start(). Else, * return an empty * * @return Optional of SnowflakeConnectionService */ public Optional getSnowflakeConnection() { return Optional.ofNullable(getConnection()); } protected SnowflakeSinkService getSink() { try { waitFor(() -> sink != null && !sink.isClosed()); } catch (Exception e) { throw SnowflakeErrors.ERROR_5014.getException(); } return sink; } /** * start method handles configuration parsing and one-time setup of the task. loads configuration * * @param parsedConfig - has the configuration settings */ @Override public void start(final Map parsedConfig) { this.DYNAMIC_LOGGER.info("starting task..."); final long startNanos = System.nanoTime(); // Parse raw config once into typed structure; validates required fields and applies defaults final SinkTaskConfig config = SinkTaskConfig.from(parsedConfig); // get task id and start time this.taskStartTime = System.currentTimeMillis(); this.taskConfigId = config.getTaskId(); this.authorizationExceptionTracker.updateStateOnTaskStart(parsedConfig); // enable jvm proxy Utils.enableJVMProxy(parsedConfig); KafkaRecordErrorReporter kafkaRecordErrorReporter = createKafkaRecordErrorReporter(); conn = SnowflakeConnectionServiceFactory.builder() .setProperties(config) .setTaskID(this.taskConfigId) .build(); if (this.sink != null) { this.sink.closeAll(); } String connectorName = config.getConnectorName(); Optional metricsJmxReporter = config.isJmxEnabled() ? Optional.of( new MetricsJmxReporter(new com.codahale.metrics.MetricRegistry(), connectorName)) : Optional.empty(); // Initialize task-level metrics (real JMX or noop depending on config) this.taskMetrics = metricsJmxReporter .map( reporter -> new SnowflakeSinkTaskMetrics( connectorName, this.taskConfigId, reporter, () -> (int) StreamingClientPools.getClientCountForTask( connectorName, this.taskConfigId))) .orElse(TaskMetrics.noop()); this.taskMetrics.recordStartDuration(System.nanoTime() - startNanos); this.sink = new SnowflakeSinkServiceV2( conn, config, kafkaRecordErrorReporter, this.context, metricsJmxReporter, this.taskMetrics); // Initialize and start periodic telemetry reporter for channel status this.telemetryReporter = new PeriodicTelemetryReporter( conn.getTelemetryClient(), sink::getPartitionChannels, config); this.telemetryReporter.start(); DYNAMIC_LOGGER.info( "task started, execution time: {} milliseconds", this.taskConfigId, getDurationFromStartMs(this.taskStartTime)); } /** * stop method is invoked only once outstanding calls to other methods have completed. e.g. after * current put, and a final preCommit has completed. * *

Note that calling this method does not perform synchronous cleanup in Snowpipe based * implementation */ @Override public void stop() { this.DYNAMIC_LOGGER.info("stopping task {}", this.taskConfigId); // Stop telemetry reporter first if (this.telemetryReporter != null) { this.telemetryReporter.stop(); } this.taskMetrics.unregister(); if (this.sink != null) { this.sink.stop(); } this.DYNAMIC_LOGGER.info( "task stopped, total task runtime: {} milliseconds", getDurationFromStartMs(this.taskStartTime)); } /** * init ingestion task in Sink service * * @param partitions - The list of all partitions that are now assigned to the task */ @Override public void open(final Collection partitions) { long startTime = System.currentTimeMillis(); try (TaskMetrics.TimingContext ignored = taskMetrics.timeOpen()) { this.sink.startPartitions(partitions); taskMetrics.incOpenCount(); taskMetrics.setAssignedPartitions(partitions.size()); } this.DYNAMIC_LOGGER.info( "task opened with {} partitions, execution time: {} milliseconds", partitions.size(), getDurationFromStartMs(startTime)); } /** * Closes sink service * *

Closes all running task because the parameter of open function contains all partition info * but not only the new partition * * @param partitions - The list of all partitions that were assigned to the task */ @Override public void close(final Collection partitions) { long startTime = System.currentTimeMillis(); try (TaskMetrics.TimingContext ignored = taskMetrics.timeClose()) { this.DYNAMIC_LOGGER.info( "closing task {} with {} partitions", this.taskConfigId, partitions.size()); if (this.sink != null) { this.sink.close(partitions); } taskMetrics.incCloseCount(); taskMetrics.setAssignedPartitions(0); } this.DYNAMIC_LOGGER.info( "task closed, execution time: {} milliseconds", this.taskConfigId, getDurationFromStartMs(startTime)); } /** * ingest records to Snowflake * * @param records - collection of records from kafka topic/partitions for this connector */ @Override public void put(final Collection records) { this.authorizationExceptionTracker.throwExceptionIfAuthorizationFailed(); // Check for channel errors detected in preCommit and fail the task if (this.channelErrorToFailOn != null) { SnowflakeKafkaConnectorException error = this.channelErrorToFailOn; this.channelErrorToFailOn = null; // Clear so we don't throw again on restart throw new ConnectException(error.getMessage(), error); } final long recordSize = records.size(); DYNAMIC_LOGGER.debug("Calling PUT with {} records", recordSize); final long startTime = System.currentTimeMillis(); try (TaskMetrics.TimingContext ignored = taskMetrics.timePut()) { getSink().insert(records); taskMetrics.markPutRecords(recordSize); } logWarningForPutAndPrecommit( startTime, Utils.formatString("Executed PUT with {} records", recordSize), false); } /** * Sync committed offsets * * @param offsets - the current map of offsets as of the last call to put * @return an empty map if Connect-managed offset commit is not desired, otherwise a map of * offsets by topic-partition that are safe to commit. If we return the same offsets that was * passed in, Kafka Connect assumes that all offsets that are already passed to put() are safe * to commit. * @throws RetriableException when meet any issue during processing */ @Override public Map preCommit( Map offsets) throws RetriableException { DYNAMIC_LOGGER.info("Precommit started for {} partitions", offsets.size()); if (DYNAMIC_LOGGER.isDebugEnabled()) { DYNAMIC_LOGGER.debug( "Precommit partitions and offsets: {}", Arrays.toString(offsets.entrySet().toArray())); } long startTime = System.currentTimeMillis(); try (TaskMetrics.TimingContext ignored = taskMetrics.timePreCommit()) { // return an empty map means that offset commitment is not desired if (sink == null || sink.isClosed()) { this.DYNAMIC_LOGGER.warn( "sink not initialized or closed before preCommit", this.taskConfigId); return new HashMap<>(); } else if (sink.getPartitionCount() == 0) { this.DYNAMIC_LOGGER.warn("no partition is assigned", this.taskConfigId); return new HashMap<>(); } Map committedOffsets = new HashMap<>(); try { Map batchOffsets = sink.getCommittedOffsets(offsets.keySet()); batchOffsets.forEach( (topicPartition, offset) -> committedOffsets.put(topicPartition, new OffsetAndMetadata(offset))); } catch (SnowflakeKafkaConnectorException e) { // It's OK to just log the error since preCommit can retry. this.authorizationExceptionTracker.reportPrecommitException(e); this.DYNAMIC_LOGGER.error("PreCommit error: {} ", e.getMessage()); // Channel error count exceeded - store to fail on next put() call if (e.checkErrorCode(SnowflakeErrors.ERROR_5030)) { this.channelErrorToFailOn = e; } } catch (Exception e) { this.authorizationExceptionTracker.reportPrecommitException(e); this.DYNAMIC_LOGGER.error("PreCommit error: {} ", e.getMessage()); } logWarningForPutAndPrecommit( startTime, Utils.formatString( "Executed PRECOMMIT on all {} partitions, safe to commit {} partitions", offsets.size(), committedOffsets.size()), true); return committedOffsets; } } /** * @return connector version */ @Override public String version() { return Utils.VERSION; } /** * wait for specific status * * @param func status checker */ private static void waitFor(Supplier func) throws InterruptedException, TimeoutException { for (int i = 0; i < REPEAT_TIME; i++) { if (func.get()) { return; } Thread.sleep(WAIT_TIME); } throw new TimeoutException(); } private static long getDurationFromStartMs(long startTime) { final long currTime = System.currentTimeMillis(); return currTime - startTime; } void logWarningForPutAndPrecommit(long startTime, String logContent, boolean isPrecommit) { final long executionTimeMs = getDurationFromStartMs(startTime); String logExecutionContent = Utils.formatString("{}, executionTime: {} ms", logContent, executionTimeMs); if (executionTimeMs > 300000) { // This won't be frequently printed. It is vary rare to have execution greater than 300 // seconds. // But having this warning helps customer to debug their Kafka Connect config. this.DYNAMIC_LOGGER.warn( "{}. Expected call to be under {} ms. If there is CommitFailedException in the log or" + " there is duplicated records, refer to this link for solution: " + "https://docs.snowflake.com/en/user-guide/kafka-connector-ts.html#resolving-specific-issues", logExecutionContent, executionTimeMs); } else { if (isPrecommit) { this.DYNAMIC_LOGGER.info(logExecutionContent); } else { this.DYNAMIC_LOGGER.debug(logExecutionContent); } } } /* Used to report a record back to DLQ if error tolerance is specified */ private KafkaRecordErrorReporter createKafkaRecordErrorReporter() { KafkaRecordErrorReporter result = noOpKafkaRecordErrorReporter(); if (context != null) { try { ErrantRecordReporter errantRecordReporter = context.errantRecordReporter(); if (errantRecordReporter != null) { result = (record, error) -> { try { // Blocking this until record is delivered to DLQ DYNAMIC_LOGGER.debug( "Sending Sink Record to DLQ with recordOffset:{}, partition:{}", record.kafkaOffset(), record.kafkaPartition()); errantRecordReporter.report(record, error).get(); } catch (InterruptedException | ExecutionException e) { final String errMsg = "ERROR reporting records to ErrantRecordReporter"; this.DYNAMIC_LOGGER.error(errMsg, e); throw new ConnectException(errMsg, e); } }; } else { this.DYNAMIC_LOGGER.info("Errant record reporter is not configured."); } } catch (NoClassDefFoundError | NoSuchMethodError e) { // Will occur in Connect runtimes earlier than 2.6 this.DYNAMIC_LOGGER.info( "Kafka versions prior to 2.6 do not support the errant record reporter."); } } else { DYNAMIC_LOGGER.warn("SinkTaskContext is not set"); } return result; } /** Blocks until all partition channels have finished initialization. */ @VisibleForTesting public void awaitInitialization() { this.getSink().awaitInitialization(); } /** * For versions older than 2.6 * * @see * link */ @VisibleForTesting static KafkaRecordErrorReporter noOpKafkaRecordErrorReporter() { return (record, e) -> { STATIC_LOGGER.warn( "DLQ Kafka Record Error Reporter is not set, requires Kafka Version to be >= 2.6"); }; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/SnowflakeSinkTaskAuthorizationExceptionTracker.java ================================================ package com.snowflake.kafka.connector; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.ENABLE_TASK_FAIL_ON_AUTHORIZATION_ERRORS; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.ENABLE_TASK_FAIL_ON_AUTHORIZATION_ERRORS_DEFAULT; import static com.snowflake.kafka.connector.internal.SnowflakeErrors.ERROR_1005; import java.util.Map; /** * When the user rotates Snowflake key that is stored in an external file the Connector hangs and * does not mark its tasks as failed. To fix this corner case we need to track the authorization * exception thrown during preCommit() and stop tasks during put(). * *

Note that exceptions thrown during preCommit() are swallowed by Kafka Connect and will not * cause task failure. */ public class SnowflakeSinkTaskAuthorizationExceptionTracker { private static final String AUTHORIZATION_EXCEPTION_MESSAGE = "Authorization failed after retry"; private boolean authorizationTaskFailureEnabled; private boolean authorizationErrorReported; public SnowflakeSinkTaskAuthorizationExceptionTracker() { this.authorizationTaskFailureEnabled = true; this.authorizationErrorReported = false; } public void updateStateOnTaskStart(Map taskConfig) { authorizationTaskFailureEnabled = Boolean.parseBoolean( taskConfig.getOrDefault( ENABLE_TASK_FAIL_ON_AUTHORIZATION_ERRORS, Boolean.toString(ENABLE_TASK_FAIL_ON_AUTHORIZATION_ERRORS_DEFAULT))); } /** * Check if the thrown exception is related to authorization * * @param ex - any exception that occurred during preCommit */ public void reportPrecommitException(Exception ex) { if (ex.getMessage().contains(AUTHORIZATION_EXCEPTION_MESSAGE)) { authorizationErrorReported = true; } } /** Throw exception if authorization has failed before */ public void throwExceptionIfAuthorizationFailed() { if (authorizationTaskFailureEnabled && authorizationErrorReported) { throw ERROR_1005.getException(); } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/SnowflakeStreamingSinkConnector.java ================================================ /* * Copyright (c) 2019 Snowflake Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.snowflake.kafka.connector; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.config.ConnectorConfigDefinition; import com.snowflake.kafka.connector.internal.KCLogger; import com.snowflake.kafka.connector.internal.SnowflakeConnectionService; import com.snowflake.kafka.connector.internal.SnowflakeConnectionServiceFactory; import com.snowflake.kafka.connector.internal.SnowflakeErrors; import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException; import com.snowflake.kafka.connector.internal.streaming.DefaultStreamingConfigValidator; import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Pattern; import org.apache.kafka.common.config.Config; import org.apache.kafka.common.config.ConfigDef; import org.apache.kafka.connect.connector.Task; import org.apache.kafka.connect.sink.SinkConnector; /** * SnowflakeStreamingSinkConnector implements SinkConnector for Kafka Connect framework. * *

Expected configuration: including topic names, partition numbers, snowflake connection info * and credentials info * *

Creates snowflake internal stages, snowflake tables provides configuration to SinkTasks * running on Kafka Connect Workers. */ public class SnowflakeStreamingSinkConnector extends SinkConnector { // create logger without correlationId for now private static final KCLogger LOGGER = new KCLogger(SnowflakeStreamingSinkConnector.class.getName()); private Map config; // connector configuration, provided by // user through kafka connect framework // SnowflakeJDBCWrapper provides methods to interact with user's snowflake // account and executes queries private SnowflakeConnectionService conn; // Snowflake Telemetry provides methods to report usage statistics private SnowflakeTelemetryService telemetryClient; private long connectorStartTime; // Kafka Connect starts sink tasks without waiting for setup in // SnowflakeStreamingSinkConnector to finish. // This causes race conditions for: config validation, tables and stages // creation, etc. // Using setupComplete to synchronize private boolean setupComplete; private final ConnectorConfigValidator connectorConfigValidator = new DefaultConnectorConfigValidator(new DefaultStreamingConfigValidator()); /** No-Arg constructor. Required by Kafka Connect framework */ public SnowflakeStreamingSinkConnector() { setupComplete = false; } /** * start method will only be called on a clean connector, i.e. it has either just been * instantiated and initialized or stop () has been invoked. loads configuration and validates. * *

Creates snowflake internal stages and snowflake tables * * @param parsedConfig has the configuration settings */ @Override public void start(final Map parsedConfig) { LOGGER.info("SnowflakeStreamingSinkConnector:starting..."); Utils.checkConnectorVersion(); setupComplete = false; connectorStartTime = System.currentTimeMillis(); config = new HashMap<>(parsedConfig); ConnectorConfigTools.setDefaultValues(config); // modify invalid connector name Utils.convertAppName(config); connectorConfigValidator.validateConfig(config); // enable mdc logging if needed KCLogger.toggleGlobalMdcLoggingContext( Boolean.parseBoolean( config.getOrDefault( KafkaConnectorConfigParams.ENABLE_MDC_LOGGING_CONFIG, KafkaConnectorConfigParams.ENABLE_MDC_LOGGING_DEFAULT))); // enable proxy Utils.enableJVMProxy(config); // create a persisted connection, and validate snowflake connection // config as a side effect conn = SnowflakeConnectionServiceFactory.builder().setProperties(config).build(); telemetryClient = conn.getTelemetryClient(); telemetryClient.reportKafkaConnectStart(connectorStartTime, this.config); setupComplete = true; LOGGER.info("SnowflakeStreamingSinkConnector:started"); } /** * Stop method will be called to stop a connector, cleans up snowflake internal stages, after * making sure that there are no pending files to ingest. * *

Cleans up pipes, after making sure there are no pending files to ingest. * *

Also ensures that there are no leaked stages, no leaked staged files, and no leaked pipes */ @Override public void stop() { LOGGER.info("SnowflakeStreamingSinkConnector connector stopping..."); setupComplete = false; if (telemetryClient != null) { telemetryClient.reportKafkaConnectStop(connectorStartTime); } } /** * @return Sink task class */ @Override public Class taskClass() { return SnowflakeSinkTask.class; } /** * taskConfigs method returns a set of configurations for SinkTasks based on the current * configuration, producing at most 'maxTasks' configurations * * @param maxTasks maximum number of SinkTasks for this instance of * SnowflakeStreamingSinkConnector * @return a list containing 'maxTasks' copies of the configuration */ @Override public List> taskConfigs(final int maxTasks) { LOGGER.info("taskConfigs called with maxTasks: {}", maxTasks); // wait for setup to complete int counter = 0; while (counter < 120) // poll for 120*5 seconds (10 mins) maximum { if (setupComplete) { break; } else { counter++; try { LOGGER.info("Sleeping 5000ms to allow setup to " + "complete."); Thread.sleep(5000); } catch (InterruptedException ex) { LOGGER.warn("Waiting for setup to complete got " + "interrupted"); } } } if (!setupComplete) { throw SnowflakeErrors.ERROR_5007.getException(telemetryClient); } List> taskConfigs = new ArrayList<>(maxTasks); for (int i = 0; i < maxTasks; i++) { Map conf = new HashMap<>(config); conf.put(Utils.TASK_ID, i + ""); taskConfigs.add(conf); } return taskConfigs; } /** * @return ConfigDef with original configuration properties */ @Override public ConfigDef config() { return ConnectorConfigDefinition.getConfig(); } @Override public Config validate(Map connectorConfigs) { LOGGER.debug("Validating connector Config: Start"); // cross-fields validation here Config result = super.validate(connectorConfigs); // Validate ensure that url, user, db, schema, private key exist in config and is not empty // and there is no single field validation error if (!Utils.isSingleFieldValid(result)) { return result; } // Verify proxy config is valid Map invalidProxyParams = Utils.validateProxySettings(connectorConfigs); for (String invalidKey : invalidProxyParams.keySet()) { Utils.updateConfigErrorMessage(result, invalidKey, invalidProxyParams.get(invalidKey)); } // If private key or private key passphrase is // provided through a config provider, skip validation if (isUsingConfigProviderForPrivateKey(connectorConfigs)) { return result; } // We don't validate name, since it is not included in the return value // so just put a test connector here connectorConfigs.put(KafkaConnectorConfigParams.NAME, "TEST_CONNECTOR"); SnowflakeConnectionService testConnection; try { testConnection = SnowflakeConnectionServiceFactory.builder().setProperties(connectorConfigs).build(); } catch (SnowflakeKafkaConnectorException e) { LOGGER.error( "Validate: Error connecting to snowflake:{}, errorCode:{}", e.getMessage(), e.getCode()); // Since url, user, db, schema, exist in config and is not empty, // the exceptions here would be invalid URL, and cannot connect, and no private key switch (e.getCode()) { case "1001": // Could be caused by invalid url, invalid user name, invalid password. Utils.updateConfigErrorMessage( result, KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME, ": Cannot connect to Snowflake"); Utils.updateConfigErrorMessage( result, KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY, ": Cannot connect to Snowflake"); Utils.updateConfigErrorMessage( result, KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME, ": Cannot connect to Snowflake"); break; case "0007": Utils.updateConfigErrorMessage( result, KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME, " is not a valid snowflake url"); break; case "0018": Utils.updateConfigErrorMessage( result, KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE, " is not valid"); Utils.updateConfigErrorMessage( result, KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY, " is not valid"); break; case "0013": Utils.updateConfigErrorMessage( result, KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY, " must be non-empty"); break; case "0002": Utils.updateConfigErrorMessage( result, KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY, " must be a valid PEM RSA private key"); break; default: throw e; // Shouldn't reach here, so crash. } return result; } try { testConnection.databaseExists( connectorConfigs.get(KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME)); } catch (SnowflakeKafkaConnectorException e) { LOGGER.error("Validate Error msg:{}, errorCode:{}", e.getMessage(), e.getCode()); if (e.getCode().equals("2001")) { Utils.updateConfigErrorMessage( result, KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME, " database does not exist"); } else { throw e; } return result; } try { testConnection.schemaExists( connectorConfigs.get(KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME)); } catch (SnowflakeKafkaConnectorException e) { LOGGER.error("Validate Error msg:{}, errorCode:{}", e.getMessage(), e.getCode()); if (e.getCode().equals("2001")) { Utils.updateConfigErrorMessage( result, KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME, " schema does not exist"); } else { throw e; } return result; } LOGGER.info("Validated config with no error"); return result; } private static boolean isUsingConfigProviderForPrivateKey(Map connectorConfigs) { Pattern configProviderPrefix = Pattern.compile("[$][{][a-zA-Z]+:"); return configProviderPrefix .matcher( connectorConfigs.getOrDefault(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY, "")) .find() || configProviderPrefix .matcher( connectorConfigs.getOrDefault( KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE, "")) .find(); } /** * @return connector version */ @Override public String version() { return Utils.VERSION; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/TopicToTableParser.java ================================================ package com.snowflake.kafka.connector; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; import java.util.Map; public class TopicToTableParser { private final String input; private int index; TopicToTableParser(String input) { this.input = input; } public static Map parse(String input) { List entries = new TopicToTableParser(input).parseEntries(); Map result = new LinkedHashMap<>(); for (Entry entry : entries) { String newTopic = entry.getTopic(); if (result.containsKey(newTopic)) { throw new IllegalArgumentException("Duplicate topic: " + newTopic); } // Check that regexes don't overlap. for (String topic : result.keySet()) { if (topic.matches(newTopic) || newTopic.matches(topic)) { throw new IllegalArgumentException( "Topic regexes cannot overlap. Overlapping regexes: " + topic + ", " + newTopic); } } result.put(newTopic, entry.getTable()); } return result; } public List parseEntries() { List entries = new ArrayList<>(); while (true) { skipWhitespace(); if (isAtEnd()) { return entries; } String topic = parseToken(false); skipWhitespace(); expect(':'); skipWhitespace(); String table = parseToken(true); entries.add(new Entry(topic, table)); skipWhitespace(); if (isAtEnd()) { return entries; } expect(','); } } private String parseToken(boolean uppercaseIfUnquoted) { if (isAtEnd()) { throw error("Expected token, found end of input"); } if (input.charAt(index) == '"') { return parseQuotedToken(); } if (uppercaseIfUnquoted) { return parseUnquotedToken().toUpperCase(Locale.ROOT); } else { return parseUnquotedToken(); } } private String parseQuotedToken() { index++; // opening quote int textStart = index; while (!isAtEnd() && input.charAt(index) != '"') { index++; } if (isAtEnd()) { throw error("Unterminated quoted token"); } if (index == textStart) { throw error("Empty quoted token"); } String text = input.substring(textStart, index); index++; // closing quote return text; } private String parseUnquotedToken() { int start = index; while (!isAtEnd()) { char character = input.charAt(index); if (Character.isWhitespace(character) || character == ':' || character == ',' || character == '"') { break; } index++; } if (index == start) { throw error("Expected token"); } return input.substring(start, index); } private void skipWhitespace() { while (!isAtEnd() && Character.isWhitespace(input.charAt(index))) { index++; } } private void expect(char expectedCharacter) { if (isAtEnd() || input.charAt(index) != expectedCharacter) { throw error("Expected '" + expectedCharacter + "'"); } index++; } private boolean isAtEnd() { return index >= input.length(); } private IllegalArgumentException error(String message) { StringBuilder sb = new StringBuilder(); sb.append(message); sb.append(" at position "); sb.append(index); sb.append(": \""); sb.append(input); sb.append("\". Format: :,:\"\",..."); return new IllegalArgumentException(sb.toString()); } public static final class Entry { private final String topic; private final String table; private Entry(String topic, String table) { this.topic = topic; this.table = table; } public String getTopic() { return topic; } public String getTable() { return table; } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/Utils.java ================================================ /* * Copyright (c) 2024 Snowflake Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.snowflake.kafka.connector; import com.google.common.collect.ImmutableMap; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.internal.KCLogger; import com.snowflake.kafka.connector.internal.SnowflakeErrors; import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; import java.net.Authenticator; import java.net.PasswordAuthentication; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Objects; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.kafka.common.config.Config; import org.apache.kafka.common.config.ConfigValue; /** Various arbitrary helper functions */ public class Utils { // Connector version, change every release public static final String VERSION = "4.1.0"; // task id public static final String TASK_ID = "task_id"; public static final String JDK_HTTP_AUTH_TUNNELING = "jdk.http.auth.tunneling.disabledSchemes"; // mvn repo private static final String MVN_REPO = "https://repo1.maven.org/maven2/com/snowflake/snowflake-kafka-connector/"; public static final String TABLE_COLUMN_CONTENT = "RECORD_CONTENT"; public static final String TABLE_COLUMN_METADATA = "RECORD_METADATA"; private static final KCLogger LOGGER = new KCLogger(Utils.class.getName()); /** * Check the connector version from Maven repo, report if any update version is available. * *

A URl connection timeout is added in case Maven repo is not reachable in a proxy'd * environment. Returning false from this method doesn't have any side effects to start the * connector. * *

Version upgrade logic: * *

    *
  • Suggest only version that is newer than current version. If many new versions available * suggest the most recent one. *
  • Never suggest RC (release candidate) versions *
*/ public static boolean checkConnectorVersion() { return checkConnectorVersion(VERSION, fetchAvailableVersionsFromMaven()); } /** * Check connector version with provided current version and available versions. * * @param currentVersionString current version string * @param availableVersions list of available version strings from Maven */ static boolean checkConnectorVersion( String currentVersionString, List availableVersions) { LOGGER.info("Current Snowflake Kafka Connector Version: {}", currentVersionString); try { SemanticVersion currentVersion = new SemanticVersion(currentVersionString); String recommendedVersion = findRecommendedVersion(currentVersion, availableVersions); if (recommendedVersion != null) { LOGGER.warn( "Connector update is available, please upgrade Snowflake Kafka Connector ({} -> {})." + " Please check release notes for breaking changes and upgrade procedures before" + " installing.", currentVersionString, recommendedVersion); } return true; } catch (Exception e) { LOGGER.warn("can't verify latest connector version\n{}", e.getMessage()); } return false; } /** * Fetch available versions from Maven repository. * * @return list of available version strings */ static List fetchAvailableVersionsFromMaven() { List versions = new ArrayList<>(); try { URLConnection urlConnection = new URL(MVN_REPO).openConnection(); urlConnection.setConnectTimeout(5000); urlConnection.setReadTimeout(5000); InputStream input = urlConnection.getInputStream(); BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(input)); String line; Pattern pattern = Pattern.compile("(\\d+\\.\\d+\\.\\d+(?:-[rR][cC]\\d*)?)"); while ((line = bufferedReader.readLine()) != null) { Matcher matcher = pattern.matcher(line); if (matcher.find()) { versions.add(matcher.group(1)); } } } catch (Exception e) { LOGGER.warn("Failed to fetch versions from Maven: {}", e.getMessage()); } return versions; } /** * Find the recommended version to upgrade to based on current version and available versions. * Package-private for testing. * * @param currentVersion the current connector version * @param availableVersions list of available version strings * @return recommended version string, or null if no upgrade is recommended */ static String findRecommendedVersion( SemanticVersion currentVersion, List availableVersions) { SemanticVersion highestCompatibleVersion = null; for (String versionString : availableVersions) { try { SemanticVersion version = new SemanticVersion(versionString); // Skip RC versions if (version.isReleaseCandidate()) { continue; } // Skip versions that are not greater than current if (version.compareTo(currentVersion) <= 0) { continue; } // Track the highest compatible version if (highestCompatibleVersion == null || version.compareTo(highestCompatibleVersion) > 0) { highestCompatibleVersion = version; } } catch (IllegalArgumentException e) { LOGGER.warn("Could not parse version string {}", versionString, e); } } return highestCompatibleVersion != null ? highestCompatibleVersion.toString() : null; } /** * validate whether proxy settings in the config is valid * * @param config connector configuration */ public static ImmutableMap validateProxySettings(Map config) { Map invalidConfigParams = new HashMap(); String host = ConnectorConfigTools.getProperty(config, KafkaConnectorConfigParams.JVM_PROXY_HOST); String port = ConnectorConfigTools.getProperty(config, KafkaConnectorConfigParams.JVM_PROXY_PORT); // either both host and port are provided or none of them are provided if (host != null ^ port != null) { invalidConfigParams.put( KafkaConnectorConfigParams.JVM_PROXY_HOST, "proxy host and port must be provided together"); invalidConfigParams.put( KafkaConnectorConfigParams.JVM_PROXY_PORT, "proxy host and port must be provided together"); } else if (host != null) { String username = ConnectorConfigTools.getProperty(config, KafkaConnectorConfigParams.JVM_PROXY_USERNAME); String password = ConnectorConfigTools.getProperty(config, KafkaConnectorConfigParams.JVM_PROXY_PASSWORD); // either both username and password are provided or none of them are provided if (username != null ^ password != null) { invalidConfigParams.put( KafkaConnectorConfigParams.JVM_PROXY_USERNAME, "proxy username and password must be provided together"); invalidConfigParams.put( KafkaConnectorConfigParams.JVM_PROXY_PASSWORD, "proxy username and password must be provided together"); } } return ImmutableMap.copyOf(invalidConfigParams); } /** * Enable JVM proxy * * @param config connector configuration */ public static void enableJVMProxy(Map config) { String host = ConnectorConfigTools.getProperty(config, KafkaConnectorConfigParams.JVM_PROXY_HOST); String port = ConnectorConfigTools.getProperty(config, KafkaConnectorConfigParams.JVM_PROXY_PORT); String nonProxyHosts = ConnectorConfigTools.getProperty(config, KafkaConnectorConfigParams.JVM_NON_PROXY_HOSTS); if (host != null && port != null) { LOGGER.info( "enable jvm proxy: {}:{} and bypass proxy for hosts: {}", host, port, nonProxyHosts); // enable https proxy System.setProperty(KafkaConnectorConfigParams.HTTP_USE_PROXY, "true"); System.setProperty(KafkaConnectorConfigParams.HTTP_PROXY_HOST, host); System.setProperty(KafkaConnectorConfigParams.HTTP_PROXY_PORT, port); System.setProperty(KafkaConnectorConfigParams.HTTPS_PROXY_HOST, host); System.setProperty(KafkaConnectorConfigParams.HTTPS_PROXY_PORT, port); // If the user provided the jvm.nonProxy.hosts configuration then we // will append that to the list provided by the JVM argument // -Dhttp.nonProxyHosts and not override it altogether, if it exists. if (nonProxyHosts != null) { nonProxyHosts = (System.getProperty(KafkaConnectorConfigParams.HTTP_NON_PROXY_HOSTS) != null) ? System.getProperty(KafkaConnectorConfigParams.HTTP_NON_PROXY_HOSTS) + "|" + nonProxyHosts : nonProxyHosts; System.setProperty(KafkaConnectorConfigParams.HTTP_NON_PROXY_HOSTS, nonProxyHosts); } // set username and password String username = ConnectorConfigTools.getProperty(config, KafkaConnectorConfigParams.JVM_PROXY_USERNAME); String password = ConnectorConfigTools.getProperty(config, KafkaConnectorConfigParams.JVM_PROXY_PASSWORD); if (username != null && password != null) { Authenticator.setDefault( new Authenticator() { @Override public PasswordAuthentication getPasswordAuthentication() { return new PasswordAuthentication(username, password.toCharArray()); } }); System.setProperty(JDK_HTTP_AUTH_TUNNELING, ""); System.setProperty(KafkaConnectorConfigParams.HTTP_PROXY_USER, username); System.setProperty(KafkaConnectorConfigParams.HTTP_PROXY_PASSWORD, password); System.setProperty(KafkaConnectorConfigParams.HTTPS_PROXY_USER, username); System.setProperty(KafkaConnectorConfigParams.HTTPS_PROXY_PASSWORD, password); } } } /** * validates that given name is a valid snowflake object identifier * * @param objName snowflake object name * @return true if given object name is valid */ static boolean isValidSnowflakeObjectIdentifier(String objName) { return objName.matches("^[_a-zA-Z]{1}[_$a-zA-Z0-9]+$"); } /** * validates that given name is a valid snowflake application name, support '-' * * @param appName snowflake application name * @return true if given application name is valid */ public static boolean isValidSnowflakeApplicationName(String appName) { return appName.matches("^[-_a-zA-Z]{1}[-_$a-zA-Z0-9]+$"); } /** * modify invalid application name in config and return the generated application name * * @param config input config object */ public static void convertAppName(Map config) { String appName = config.getOrDefault(KafkaConnectorConfigParams.NAME, ""); // If appName is empty the following call will throw error // Application names are always sanitized for backward compatibility String validAppName = generateValidNameFromMap(appName, new HashMap<>(), true); config.put(KafkaConnectorConfigParams.NAME, validAppName); } /** * verify topic name, and generate valid table name with optional sanitization * * @param topic input topic name * @param topic2table topic to table map * @param enableSanitization if true, sanitize invalid identifiers; if false, pass through * @return valid table name */ public static String getTableName( String topic, Map topic2table, boolean enableSanitization) { return generateValidNameFromMap(topic, topic2table, enableSanitization); } /** * verify topic name, and generate valid table/application name with optional sanitization * * @param topic input topic name * @param topic2table topic to table map * @param enableSanitization if true, sanitize invalid identifiers; if false, pass through * @return valid generated table/application name */ private static String generateValidNameFromMap( String topic, Map topic2table, boolean enableSanitization) { final String PLACE_HOLDER = "_"; if (topic == null || topic.isEmpty()) { throw SnowflakeErrors.ERROR_0020.getException("topic name: " + topic); } // Map entries always bypass sanitization if (topic2table.containsKey(topic)) { return topic2table.get(topic); } // try matching regex tables for (String regexTopic : topic2table.keySet()) { if (topic.matches(regexTopic)) { return topic2table.get(regexTopic); } } // If sanitization is disabled, pass through the topic name as is if (!enableSanitization) { return topic; } // When sanitization is enabled, check if the topic is a valid identifier if (Utils.isValidSnowflakeObjectIdentifier(topic)) { // Valid identifiers are uppercased when sanitization is enabled return topic.toUpperCase(Locale.ROOT); } // Invalid identifiers are sanitized and uppercased when sanitization is enabled int hash = Math.abs(topic.hashCode()); StringBuilder result = new StringBuilder(); // remove wildcard regex from topic name to generate table name topic = topic.replaceAll("\\.\\*", ""); int index = 0; // first char if (topic.substring(index, index + 1).matches("[_a-zA-Z]")) { result.append(topic.charAt(index)); index++; } else { result.append(PLACE_HOLDER); } while (index < topic.length()) { if (topic.substring(index, index + 1).matches("[_$a-zA-Z0-9]")) { result.append(topic.charAt(index)); } else { result.append(PLACE_HOLDER); } index++; } result.append(PLACE_HOLDER); result.append(hash); // Uppercase the sanitized result when sanitization is enabled return result.toString().toUpperCase(Locale.ROOT); } /** * Convert a Comma separated key value pairs into a Map * * @param input Provided in KC config * @return Map */ public static Map parseCommaSeparatedKeyValuePairs(String input) { Map pairs = new HashMap<>(); for (String str : input.split(",")) { String[] tt = str.split(":"); if (tt.length != 2 || tt[0].trim().isEmpty() || tt[1].trim().isEmpty()) { LOGGER.error( "Invalid {} config format: {}", KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP, input); throw SnowflakeErrors.ERROR_0030.getException(); } pairs.put(tt[0].trim(), tt[1].trim()); } return pairs; } static final String[] loginPropList = { KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME, KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME, KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME, KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME }; public static boolean isSingleFieldValid(Config result) { // if any single field validation failed for (ConfigValue v : result.configValues()) { if (!v.errorMessages().isEmpty()) { return false; } } // if any of url, user, schema, database or password is empty // update error message and return false boolean isValidate = true; final String errorMsg = " must be provided"; Map validateMap = validateConfigToMap(result); // for (String prop : loginPropList) { if (validateMap.get(prop).value() == null) { updateConfigErrorMessage(result, prop, errorMsg); isValidate = false; } } return isValidate; } public static Map validateConfigToMap(final Config result) { Map validateMap = new HashMap<>(); for (ConfigValue v : result.configValues()) { validateMap.put(v.name(), v); } return validateMap; } public static void updateConfigErrorMessage(Config result, String key, String msg) { for (ConfigValue v : result.configValues()) { if (v.name().equals(key)) { v.addErrorMessage(key + msg); } } } // static elements // log message tag static final String SF_LOG_TAG = "[SF_KAFKA_CONNECTOR]"; /** * the following method wraps log messages with Snowflake tag. For example, * *

[SF_KAFKA_CONNECTOR] this is a log message * *

[SF_KAFKA_CONNECTOR] this is the second line * *

All log messages should be wrapped by Snowflake tag. Then user can filter out log messages * output from Snowflake Kafka connector by these tags. * * @param format log message format string * @param vars variable list * @return log message wrapped by snowflake tag */ public static String formatLogMessage(String format, Object... vars) { return SF_LOG_TAG + " " + formatString(format, vars); } public static String formatString(String format, Object... vars) { for (int i = 0; i < vars.length; i++) { format = format.replaceFirst("\\{}", Objects.toString(vars[i]).replaceAll("\\$", "\\\\\\$")); } return format; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/config/AuthenticatorType.java ================================================ package com.snowflake.kafka.connector.config; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_AUTHENTICATOR; import java.util.Arrays; import java.util.Locale; import java.util.stream.Collectors; /** Authentication method for Snowflake connections. */ public enum AuthenticatorType { /** Key-pair (JWT) authentication. This is the default. */ SNOWFLAKE_JWT, /** External OAuth authentication. */ OAUTH; /** The config string value, matching the v3 connector convention (lowercase with underscores). */ public String toConfigValue() { return name().toLowerCase(Locale.ROOT); } /** * Parses a config string into an authenticator type (case-insensitive). Returns {@link * #SNOWFLAKE_JWT} for null or empty input. * * @throws IllegalArgumentException for unrecognized values */ public static AuthenticatorType fromConfig(String value) { if (value == null || value.trim().isEmpty()) { return SNOWFLAKE_JWT; } String normalized = value.trim().toUpperCase(Locale.ROOT); try { return valueOf(normalized); } catch (IllegalArgumentException e) { String validValues = Arrays.stream(values()) .map(AuthenticatorType::toConfigValue) .collect(Collectors.joining(", ")); throw new IllegalArgumentException( "Invalid value '" + value.trim() + "' for config '" + SNOWFLAKE_AUTHENTICATOR + "'. Valid values are: " + validValues, e); } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/config/CommaSeparatedKeyValueValidator.java ================================================ package com.snowflake.kafka.connector.config; import org.apache.kafka.common.config.ConfigDef; import org.apache.kafka.common.config.ConfigException; /** * Class which validates key value pairs in the format :,: * *

It doesn't validate the type of values, only making sure the format is correct. */ class CommaSeparatedKeyValueValidator implements ConfigDef.Validator { public CommaSeparatedKeyValueValidator() {} public void ensureValid(String name, Object value) { String s = (String) value; // Validate the comma-separated key-value pairs string if (s != null && !s.isEmpty() && !isValidCommaSeparatedKeyValueString(s)) { throw new ConfigException(name, value, "Format: :,:,..."); } } private boolean isValidCommaSeparatedKeyValueString(String input) { // Split the input string by commas String[] pairs = input.split(","); for (String pair : pairs) { // Trim the pair to remove leading and trailing whitespaces pair = pair.trim(); // Split each pair by colon String[] keyValue = pair.split(":"); // Check if the pair has exactly two elements after trimming if (keyValue.length != 2) { return false; } // Check if the key or value is empty after trimming if (keyValue[0].trim().isEmpty() || keyValue[1].trim().isEmpty()) { return false; } } return true; } public String toString() { return "Comma-separated key-value pairs format: :,:,..."; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/config/ConnectorConfigDefinition.java ================================================ package com.snowflake.kafka.connector.config; import static org.apache.kafka.common.config.ConfigDef.Importance.*; import static org.apache.kafka.common.config.ConfigDef.Range.*; import static org.apache.kafka.common.config.ConfigDef.Type.*; import com.snowflake.kafka.connector.ConnectorConfigTools; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import org.apache.kafka.common.config.ConfigDef; import org.apache.kafka.common.config.ConfigDef.Width; /** This class is a placeholder for config definition in Apache Kafka specific format */ public class ConnectorConfigDefinition { private static final String SNOWFLAKE_LOGIN_INFO_DOC = "Snowflake Login Info"; private static final String PROXY_INFO_DOC = "Proxy Info"; private static final String CONNECTOR_CONFIG_DOC = "Connector Config"; private static final String SNOWFLAKE_METADATA_FLAGS_DOC = "Snowflake Metadata Flags"; private static final String ERRORS = "ERRORS"; private static final ConfigDef.Validator NON_EMPTY_STRING_VALIDATOR = new ConfigDef.NonEmptyString(); private static final ConfigDef.Validator TOPIC_TO_TABLE_VALIDATOR = new TopicToTableValidator(); private static final ConfigDef.Validator STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP_VALIDATOR = new CommaSeparatedKeyValueValidator(); public static ConfigDef getConfig() { return new ConfigDef() // snowflake login info .define( KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME, STRING, null, NON_EMPTY_STRING_VALIDATOR, HIGH, "Snowflake account url", SNOWFLAKE_LOGIN_INFO_DOC, 0, Width.NONE, KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME) .define( KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME, STRING, null, NON_EMPTY_STRING_VALIDATOR, HIGH, "Snowflake user name", SNOWFLAKE_LOGIN_INFO_DOC, 1, Width.NONE, KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME) .define( KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY, PASSWORD, "", HIGH, "Private key for Snowflake user", SNOWFLAKE_LOGIN_INFO_DOC, 2, Width.NONE, KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY) .define( KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE, PASSWORD, "", LOW, "Passphrase of private key if encrypted", SNOWFLAKE_LOGIN_INFO_DOC, 3, Width.NONE, KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE) .define( KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME, STRING, null, NON_EMPTY_STRING_VALIDATOR, HIGH, "Snowflake database name", SNOWFLAKE_LOGIN_INFO_DOC, 4, Width.NONE, KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME) .define( KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME, STRING, null, NON_EMPTY_STRING_VALIDATOR, HIGH, "Snowflake database schema name", SNOWFLAKE_LOGIN_INFO_DOC, 5, Width.NONE, KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME) .define( KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, STRING, null, NON_EMPTY_STRING_VALIDATOR, HIGH, "Snowflake role: snowflake.role.name", SNOWFLAKE_LOGIN_INFO_DOC, 6, Width.NONE, KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME) // OAuth .define( KafkaConnectorConfigParams.SNOWFLAKE_AUTHENTICATOR, STRING, AuthenticatorType.SNOWFLAKE_JWT.toConfigValue(), LOW, "Authenticator for JDBC and streaming ingest SDK." + " Valid values: snowflake_jwt, oauth.", SNOWFLAKE_LOGIN_INFO_DOC, 7, Width.NONE, KafkaConnectorConfigParams.SNOWFLAKE_AUTHENTICATOR) .define( KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_ID, STRING, "", HIGH, "Client id of target OAuth integration", SNOWFLAKE_LOGIN_INFO_DOC, 8, Width.NONE, KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_ID) .define( KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_SECRET, PASSWORD, "", HIGH, "Client secret of target OAuth integration", SNOWFLAKE_LOGIN_INFO_DOC, 9, Width.NONE, KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_SECRET) .define( KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_REFRESH_TOKEN, PASSWORD, "", HIGH, "Refresh token for OAuth. If empty, client_credentials grant is used.", SNOWFLAKE_LOGIN_INFO_DOC, 10, Width.NONE, KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_REFRESH_TOKEN) .define( KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_TOKEN_ENDPOINT, STRING, null, HIGH, "OAuth token endpoint URL. If not set, defaults to the Snowflake account URL.", SNOWFLAKE_LOGIN_INFO_DOC, 11, Width.NONE, KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_TOKEN_ENDPOINT) // proxy .define( KafkaConnectorConfigParams.JVM_PROXY_HOST, STRING, "", LOW, "JVM option: https.proxyHost", PROXY_INFO_DOC, 0, Width.NONE, KafkaConnectorConfigParams.JVM_PROXY_HOST) .define( KafkaConnectorConfigParams.JVM_PROXY_PORT, STRING, "", LOW, "JVM option: https.proxyPort", PROXY_INFO_DOC, 1, Width.NONE, KafkaConnectorConfigParams.JVM_PROXY_PORT) .define( KafkaConnectorConfigParams.JVM_NON_PROXY_HOSTS, STRING, "", LOW, "JVM option: http.nonProxyHosts", PROXY_INFO_DOC, 2, Width.NONE, KafkaConnectorConfigParams.JVM_NON_PROXY_HOSTS) .define( KafkaConnectorConfigParams.JVM_PROXY_USERNAME, STRING, "", LOW, "JVM proxy username", PROXY_INFO_DOC, 3, Width.NONE, KafkaConnectorConfigParams.JVM_PROXY_USERNAME) .define( KafkaConnectorConfigParams.JVM_PROXY_PASSWORD, PASSWORD, "", LOW, "JVM proxy password", PROXY_INFO_DOC, 4, Width.NONE, KafkaConnectorConfigParams.JVM_PROXY_PASSWORD) // Metadata .define( KafkaConnectorConfigParams.SNOWFLAKE_METADATA_ALL, BOOLEAN, KafkaConnectorConfigParams.SNOWFLAKE_METADATA_ALL_DEFAULT, LOW, "Flag to control whether there is metadata collected. If set to false, all metadata" + " will be dropped", SNOWFLAKE_METADATA_FLAGS_DOC, 0, Width.NONE, KafkaConnectorConfigParams.SNOWFLAKE_METADATA_ALL) .define( KafkaConnectorConfigParams.SNOWFLAKE_METADATA_CREATETIME, BOOLEAN, KafkaConnectorConfigParams.SNOWFLAKE_METADATA_ALL_DEFAULT, LOW, "Flag to control whether createtime is collected in snowflake metadata", SNOWFLAKE_METADATA_FLAGS_DOC, 1, Width.NONE, KafkaConnectorConfigParams.SNOWFLAKE_METADATA_CREATETIME) .define( KafkaConnectorConfigParams.SNOWFLAKE_METADATA_TOPIC, BOOLEAN, KafkaConnectorConfigParams.SNOWFLAKE_METADATA_ALL_DEFAULT, LOW, "Flag to control whether kafka topic name is collected in snowflake metadata", SNOWFLAKE_METADATA_FLAGS_DOC, 2, Width.NONE, KafkaConnectorConfigParams.SNOWFLAKE_METADATA_TOPIC) .define( KafkaConnectorConfigParams.SNOWFLAKE_METADATA_OFFSET_AND_PARTITION, BOOLEAN, KafkaConnectorConfigParams.SNOWFLAKE_METADATA_ALL_DEFAULT, LOW, "Flag to control whether kafka partition and offset are collected in snowflake" + " metadata", SNOWFLAKE_METADATA_FLAGS_DOC, 3, Width.NONE, KafkaConnectorConfigParams.SNOWFLAKE_METADATA_OFFSET_AND_PARTITION) .define( KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_METADATA_CONNECTOR_PUSH_TIME, BOOLEAN, KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_METADATA_CONNECTOR_PUSH_TIME_DEFAULT, LOW, "Flag to control whether ConnectorPushTime is collected in snowflake metadata for" + " Snowpipe Streaming", SNOWFLAKE_METADATA_FLAGS_DOC, 4, Width.NONE, KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_METADATA_CONNECTOR_PUSH_TIME) // Connector Config .define( KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC, BOOLEAN, KafkaConnectorConfigParams .SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC_DEFAULT, HIGH, "When true (default), the connector validates that all settings required for KC v3" + " backward compatibility are configured. Set to false to use v4-optimized" + " defaults without compatibility checks.", CONNECTOR_CONFIG_DOC, 0, Width.NONE, KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC) .define( KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP, STRING, "", TOPIC_TO_TABLE_VALIDATOR, LOW, "Map of topics to tables (optional). Format : comma-separated tuples, e.g." + " :,:,... ", CONNECTOR_CONFIG_DOC, 1, Width.NONE, KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP) .define( KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, STRING, KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION_DEFAULT, ConfigDef.ValidString.in("client_side", "server_side"), HIGH, "Data validation mode. 'client_side' enables client-side data validation and schema" + " evolution before sending to Snowflake. 'server_side' defers validation and" + " schema evolution to the backend for maximum throughput; requires that error" + " logging is enabled on the target table.", CONNECTOR_CONFIG_DOC, 2, Width.NONE, KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION) .define( KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION, STRING, KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_DEFAULT, ConfigDef.ValidString.in("skip", "best_effort", "strict"), HIGH, "Controls offset migration from KC v3 (Snowpipe Streaming Classic) channels. 'skip'" + " (default): do not consult Classic channels. 'best_effort': migrate the offset" + " if the Classic channel exists, otherwise fall through to the Kafka consumer" + " group offset. 'strict': migrate the offset if the Classic channel exists, fail" + " if it does not.", CONNECTOR_CONFIG_DOC, 3, Width.NONE, KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION) .define( KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME, BOOLEAN, KafkaConnectorConfigParams .SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME_DEFAULT, HIGH, "Whether the KC v3 connector included the connector name in its channel names." + " Set to true if the v3 connector had" + " 'snowflake.streaming.channel.name.include.connector.name=true'." + " Only relevant when offset migration is not 'skip'.", CONNECTOR_CONFIG_DOC, 4, Width.NONE, KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME) .define( KafkaConnectorConfigParams.BEHAVIOR_ON_NULL_VALUES, STRING, ConnectorConfigTools.BehaviorOnNullValues.DEFAULT.toString(), ConnectorConfigTools.BehaviorOnNullValues.VALIDATOR, LOW, "How to handle records with a null value (i.e. Kafka tombstone records)." + " Valid options are 'DEFAULT' and 'IGNORE'.", CONNECTOR_CONFIG_DOC, 5, Width.NONE, KafkaConnectorConfigParams.BEHAVIOR_ON_NULL_VALUES) .define( KafkaConnectorConfigParams.JMX_OPT, BOOLEAN, KafkaConnectorConfigParams.JMX_OPT_DEFAULT, HIGH, "Whether to enable JMX MBeans for custom SF metrics") .define( KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP, STRING, "", STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP_VALIDATOR, LOW, "Map of Key value pairs representing Streaming Client Properties to Override. These are" + " optional and recommended to use ONLY after consulting Snowflake Support. Format" + " : comma-separated tuples, e.g.: key1:value1,key2:value2", CONNECTOR_CONFIG_DOC, 6, Width.NONE, KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP) .define( KafkaConnectorConfigParams.ERRORS_TOLERANCE_CONFIG, STRING, KafkaConnectorConfigParams.ERRORS_TOLERANCE_DEFAULT, ConnectorConfigTools.ErrorTolerance.VALIDATOR, LOW, "Behavior for tolerating errors during Sink connector's operation. 'NONE' is set as" + " default and denotes that it will be fail fast. i.e any error will result in an" + " immediate task failure. 'ALL' skips over problematic records.", ERRORS, 0, Width.NONE, "Error Tolerance") .define( KafkaConnectorConfigParams.ERRORS_LOG_ENABLE_CONFIG, BOOLEAN, KafkaConnectorConfigParams.ERRORS_LOG_ENABLE_DEFAULT, LOW, "If true, write/log each error along with details of the failed operation and record" + " properties to the Connect log. Default is 'false', so that only errors that are" + " not tolerated are reported.", ERRORS, 1, Width.NONE, "Log Errors") .define( KafkaConnectorConfigParams.ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_CONFIG, STRING, KafkaConnectorConfigParams.ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_DEFAULT, LOW, "Whether to output conversion errors to the dead letter queue " + "By default messages are not sent to the dead letter queue. " + "Requires property `errors.tolerance=all`.", ERRORS, 2, Width.NONE, "Send error records to the Dead Letter Queue (DLQ)") .define( KafkaConnectorConfigParams.ENABLE_MDC_LOGGING_CONFIG, BOOLEAN, KafkaConnectorConfigParams.ENABLE_MDC_LOGGING_DEFAULT, LOW, "Enable MDC context to prepend log messages. Note that this is only available after" + " Apache Kafka 2.3", CONNECTOR_CONFIG_DOC, 7, Width.NONE, "Enable MDC logging") .define( KafkaConnectorConfigParams.ENABLE_TASK_FAIL_ON_AUTHORIZATION_ERRORS, BOOLEAN, KafkaConnectorConfigParams.ENABLE_TASK_FAIL_ON_AUTHORIZATION_ERRORS_DEFAULT, LOW, "If set to true the Connector will fail its tasks when authorization error from" + " Snowflake occurred") .define( KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION, BOOLEAN, KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION_DEFAULT, LOW, "When enabled, auto-generated table names are sanitized (special characters replaced)" + " and uppercased for v3 compatibility. When disabled, topic names are passed" + " through as-is. Use topic2table.map with quoted identifiers for special" + " characters when disabled.", CONNECTOR_CONFIG_DOC, 8, Width.NONE, KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION) .define( KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION, BOOLEAN, KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION_DEFAULT, LOW, "When enabled, column identifiers are normalized to uppercase for v3 compatibility.", CONNECTOR_CONFIG_DOC, 9, Width.NONE, KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION) .define( KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION, BOOLEAN, KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION_DEFAULT, MEDIUM, "When true (default), records are schematized into individual columns. When false," + " records are wrapped into legacy RECORD_CONTENT and RECORD_METADATA VARIANT" + " columns for backward compatibility with KC v3.", CONNECTOR_CONFIG_DOC, 10, Width.NONE, KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION) .define( KafkaConnectorConfigParams.CACHE_TABLE_EXISTS, BOOLEAN, KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_DEFAULT, LOW, "Enable caching for Snowflake table existence checks to reduce database queries", CONNECTOR_CONFIG_DOC, 11, Width.NONE, KafkaConnectorConfigParams.CACHE_TABLE_EXISTS) .define( KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS, LONG, KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS_DEFAULT, atLeast(KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS_MIN), LOW, "Cache expiration time in milliseconds for table existence checks. Must be a positive" + " number.", CONNECTOR_CONFIG_DOC, 12, Width.NONE, KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS) .define( KafkaConnectorConfigParams.CACHE_PIPE_EXISTS, BOOLEAN, KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_DEFAULT, LOW, "Enable caching for pipe existence checks to reduce database queries", CONNECTOR_CONFIG_DOC, 13, Width.NONE, KafkaConnectorConfigParams.CACHE_PIPE_EXISTS) .define( KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS, LONG, KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS_DEFAULT, atLeast(KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS_MIN), LOW, "Cache expiration time in milliseconds for pipe existence checks. Must be a positive" + " number.", CONNECTOR_CONFIG_DOC, 14, Width.NONE, KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/config/SinkTaskConfig.java ================================================ package com.snowflake.kafka.connector.config; import com.google.auto.value.AutoValue; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableMap; import com.snowflake.kafka.connector.ConnectorConfigTools; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.TopicToTableParser; import com.snowflake.kafka.connector.Utils; import com.snowflake.kafka.connector.internal.CachingConfig; import com.snowflake.kafka.connector.internal.SnowflakeErrors; import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationMode; import com.snowflake.kafka.connector.records.SnowflakeMetadataConfig; import java.util.HashMap; import java.util.Map; import java.util.Optional; import javax.annotation.Nullable; import org.apache.kafka.common.config.types.Password; /** * Parsed, typed configuration for the sink task. Built once from the raw connector config map in * {@link com.snowflake.kafka.connector.SnowflakeSinkTask#start(Map)} and passed through the task * and streaming layer so call sites use accessors instead of string keys and repeated defaults. */ @AutoValue public abstract class SinkTaskConfig { public abstract String getConnectorName(); public abstract String getTaskId(); /** Returns an unmodifiable view of the topic-to-table mapping. */ public abstract Map getTopicToTableMap(); public abstract ConnectorConfigTools.BehaviorOnNullValues getBehaviorOnNullValues(); public abstract boolean isJmxEnabled(); public abstract boolean isTolerateErrors(); public abstract boolean isErrorsLogEnable(); @Nullable public abstract String getDlqTopicName(); public abstract boolean isEnableSanitization(); public abstract boolean isEnableSchematization(); public abstract boolean isEnableColumnIdentifierNormalization(); public abstract SnowflakeValidation getValidation(); public abstract int getOpenChannelIoThreads(); @Nullable public abstract String getStreamingClientProviderOverrideMap(); public abstract CachingConfig getCachingConfig(); public abstract SnowflakeMetadataConfig getMetadataConfig(); @Nullable public abstract String getSnowflakeUrl(); @Nullable public abstract String getSnowflakeUser(); @Nullable public abstract String getSnowflakeRole(); @Nullable public abstract Password getSnowflakePrivateKey(); @Nullable public abstract Password getSnowflakePrivateKeyPassphrase(); public abstract AuthenticatorType getAuthenticator(); @Nullable public abstract String getOauthClientId(); @Nullable public abstract Password getOauthClientSecret(); @Nullable public abstract Password getOauthRefreshToken(); @Nullable public abstract String getOauthTokenEndpoint(); @Nullable public abstract String getSnowflakeDatabase(); @Nullable public abstract String getSnowflakeSchema(); @Nullable public abstract String getProxyHost(); @Nullable public abstract String getProxyPort(); @Nullable public abstract String getNonProxyHosts(); @Nullable public abstract String getProxyUsername(); @Nullable public abstract String getProxyPassword(); @Nullable public abstract String getJdbcMap(); public abstract Ssv1MigrationMode getSsv1MigrationMode(); public abstract boolean isSsv1MigrationIncludeConnectorName(); /** Convenience overload that calls {@link #from(Map, boolean)} with {@code false}. */ public static SinkTaskConfig from(Map raw) { return from(raw, false); } /** * Parses the raw connector config map into an immutable SinkTaskConfig. Applies defaults for * missing optional keys. * * @param raw raw config from the connector (typically after setDefaultValues) * @param skipTaskSpecificConfig if true, task ID and connector name default to "" when absent * instead of throwing. Use this when building a config outside of task startup -- e.g. in * {@code validate()} or connection factory setup -- where task ID is not yet assigned. * @return parsed config * @throws IllegalArgumentException if required fields are missing or invalid */ public static SinkTaskConfig from(Map raw, boolean skipTaskSpecificConfig) { return builderFrom(raw, skipTaskSpecificConfig).build(); } @VisibleForTesting public static Builder builderFrom(Map raw) { return builderFrom(raw, false); } @VisibleForTesting public static Builder builderFrom(Map raw, boolean skipTaskSpecificConfig) { if (raw == null) { raw = new HashMap<>(); } Map config = new HashMap<>(raw); String connectorName = config.getOrDefault(KafkaConnectorConfigParams.NAME, ""); String taskId = config.getOrDefault(Utils.TASK_ID, ""); if (!skipTaskSpecificConfig) { if (connectorName == null || connectorName.trim().isEmpty()) { throw new IllegalArgumentException( "Connector name ('" + KafkaConnectorConfigParams.NAME + "') must be set and cannot be empty"); } if (taskId == null || taskId.trim().isEmpty()) { throw new IllegalArgumentException( "Task ID ('" + Utils.TASK_ID + "') must be set and cannot be null or empty"); } } ImmutableMap topicToTableMap = ImmutableMap.of(); if (config.containsKey(KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP)) { try { Map parsed = TopicToTableParser.parse( config.get(KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP)); if (parsed != null) { topicToTableMap = ImmutableMap.copyOf(parsed); } } catch (IllegalArgumentException e) { throw SnowflakeErrors.ERROR_0021.getException(e.getMessage()); } } ConnectorConfigTools.BehaviorOnNullValues behaviorOnNullValues = ConnectorConfigTools.BehaviorOnNullValues.DEFAULT; if (config.containsKey(KafkaConnectorConfigParams.BEHAVIOR_ON_NULL_VALUES)) { behaviorOnNullValues = ConnectorConfigTools.BehaviorOnNullValues.valueOf( config .get(KafkaConnectorConfigParams.BEHAVIOR_ON_NULL_VALUES) .toUpperCase(java.util.Locale.ROOT)); } boolean jmxEnabled = Optional.ofNullable(config.get(KafkaConnectorConfigParams.JMX_OPT)) .map(Boolean::parseBoolean) .orElse(KafkaConnectorConfigParams.JMX_OPT_DEFAULT); String errorsTolerance = config.getOrDefault( KafkaConnectorConfigParams.ERRORS_TOLERANCE_CONFIG, KafkaConnectorConfigParams.ERRORS_TOLERANCE_DEFAULT); boolean tolerateErrors = ConnectorConfigTools.ErrorTolerance.valueOf( errorsTolerance.toUpperCase(java.util.Locale.ROOT)) .equals(ConnectorConfigTools.ErrorTolerance.ALL); boolean errorsLogEnable = Boolean.parseBoolean( config.getOrDefault( KafkaConnectorConfigParams.ERRORS_LOG_ENABLE_CONFIG, String.valueOf(KafkaConnectorConfigParams.ERRORS_LOG_ENABLE_DEFAULT))); String dlqTopicName = config.get(KafkaConnectorConfigParams.ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_CONFIG); boolean enableSanitization = Boolean.parseBoolean( config.getOrDefault( KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION, String.valueOf( KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION_DEFAULT))); boolean enableSchematization = Boolean.parseBoolean( config.getOrDefault( KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION, String.valueOf( KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION_DEFAULT))); boolean enableColumnIdentifierNormalization = Boolean.parseBoolean( config.getOrDefault( KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION, String.valueOf( KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION_DEFAULT))); SnowflakeValidation validation = SnowflakeValidation.fromConfig( config.getOrDefault( KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION_DEFAULT)); int openChannelIoThreads = Optional.ofNullable( config.get(KafkaConnectorConfigParams.SNOWFLAKE_OPEN_CHANNEL_IO_THREADS)) .map(Integer::parseInt) .orElse(KafkaConnectorConfigParams.SNOWFLAKE_OPEN_CHANNEL_IO_THREADS_DEFAULT); String streamingClientProviderOverrideMap = config.get(KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP); CachingConfig cachingConfig = CachingConfig.fromConfig(config); SnowflakeMetadataConfig metadataConfig = new SnowflakeMetadataConfig(config); Ssv1MigrationMode ssv1MigrationMode = Ssv1MigrationMode.fromConfig( config.getOrDefault( KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION, KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_DEFAULT)); boolean ssv1MigrationIncludeConnectorName = Boolean.parseBoolean( config.getOrDefault( KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME, String.valueOf( KafkaConnectorConfigParams .SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME_DEFAULT))); String snowflakeUrl = config.get(KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME); String snowflakeUser = config.get(KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME); String snowflakeRole = config.get(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME); Password snowflakePrivateKey = passwordOrNull(config.get(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY)); Password snowflakePrivateKeyPassphrase = passwordOrNull(config.get(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE)); String snowflakeDatabase = config.get(KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME); String snowflakeSchema = config.get(KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME); AuthenticatorType authenticator = AuthenticatorType.fromConfig( config.get(KafkaConnectorConfigParams.SNOWFLAKE_AUTHENTICATOR)); String oauthClientId = config.get(KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_ID); Password oauthClientSecret = passwordOrNull(config.get(KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_SECRET)); Password oauthRefreshToken = passwordOrNull(config.get(KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_REFRESH_TOKEN)); String oauthTokenEndpoint = config.get(KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_TOKEN_ENDPOINT); String proxyHost = config.get(KafkaConnectorConfigParams.JVM_PROXY_HOST); String proxyPort = config.get(KafkaConnectorConfigParams.JVM_PROXY_PORT); String nonProxyHosts = config.get(KafkaConnectorConfigParams.JVM_NON_PROXY_HOSTS); String proxyUsername = config.get(KafkaConnectorConfigParams.JVM_PROXY_USERNAME); String proxyPassword = config.get(KafkaConnectorConfigParams.JVM_PROXY_PASSWORD); String jdbcMap = config.get(KafkaConnectorConfigParams.SNOWFLAKE_JDBC_MAP); return builder() .connectorName(connectorName) .taskId(taskId) .topicToTableMap(topicToTableMap) .behaviorOnNullValues(behaviorOnNullValues) .jmxEnabled(jmxEnabled) .tolerateErrors(tolerateErrors) .errorsLogEnable(errorsLogEnable) .dlqTopicName(dlqTopicName) .enableSanitization(enableSanitization) .enableSchematization(enableSchematization) .enableColumnIdentifierNormalization(enableColumnIdentifierNormalization) .validation(validation) .openChannelIoThreads(openChannelIoThreads) .streamingClientProviderOverrideMap(streamingClientProviderOverrideMap) .cachingConfig(cachingConfig) .metadataConfig(metadataConfig) .snowflakeUrl(snowflakeUrl) .snowflakeUser(snowflakeUser) .snowflakeRole(snowflakeRole) .snowflakePrivateKey(snowflakePrivateKey) .snowflakePrivateKeyPassphrase(snowflakePrivateKeyPassphrase) .authenticator(authenticator) .oauthClientId(oauthClientId) .oauthClientSecret(oauthClientSecret) .oauthRefreshToken(oauthRefreshToken) .oauthTokenEndpoint(oauthTokenEndpoint) .snowflakeDatabase(snowflakeDatabase) .snowflakeSchema(snowflakeSchema) .proxyHost(proxyHost) .proxyPort(proxyPort) .nonProxyHosts(nonProxyHosts) .proxyUsername(proxyUsername) .proxyPassword(proxyPassword) .jdbcMap(jdbcMap) .ssv1MigrationMode(ssv1MigrationMode) .ssv1MigrationIncludeConnectorName(ssv1MigrationIncludeConnectorName); } private static Password passwordOrNull(String value) { return value == null ? null : new Password(value); } /** Creates a new builder. Used by {@link #from(Map)} and by tests. */ public static Builder builder() { return new AutoValue_SinkTaskConfig.Builder(); } /** * AutoValue-generated builder. When using directly (e.g. in tests), set connectorName and taskId. */ @AutoValue.Builder public abstract static class Builder { public abstract Builder connectorName(String connectorName); public abstract Builder taskId(String taskId); public abstract Builder topicToTableMap(Map topicToTableMap); public abstract Builder behaviorOnNullValues( ConnectorConfigTools.BehaviorOnNullValues behaviorOnNullValues); public abstract Builder jmxEnabled(boolean jmxEnabled); public abstract Builder tolerateErrors(boolean tolerateErrors); public abstract Builder errorsLogEnable(boolean errorsLogEnable); public abstract Builder dlqTopicName(String dlqTopicName); public abstract Builder enableSanitization(boolean enableSanitization); public abstract Builder enableSchematization(boolean enableSchematization); public abstract Builder enableColumnIdentifierNormalization( boolean enableColumnIdentifierNormalization); public abstract Builder validation(SnowflakeValidation validation); public abstract Builder openChannelIoThreads(int openChannelIoThreads); public abstract Builder streamingClientProviderOverrideMap( String streamingClientProviderOverrideMap); public abstract Builder cachingConfig(CachingConfig cachingConfig); public abstract Builder metadataConfig(SnowflakeMetadataConfig metadataConfig); public abstract Builder snowflakeUrl(String snowflakeUrl); public abstract Builder snowflakeUser(String snowflakeUser); public abstract Builder snowflakeRole(String snowflakeRole); public abstract Builder snowflakePrivateKey(Password snowflakePrivateKey); public abstract Builder snowflakePrivateKeyPassphrase(Password snowflakePrivateKeyPassphrase); public abstract Builder authenticator(AuthenticatorType authenticator); public abstract Builder oauthClientId(String oauthClientId); public abstract Builder oauthClientSecret(Password oauthClientSecret); public abstract Builder oauthRefreshToken(Password oauthRefreshToken); public abstract Builder oauthTokenEndpoint(String oauthTokenEndpoint); public abstract Builder snowflakeDatabase(String snowflakeDatabase); public abstract Builder snowflakeSchema(String snowflakeSchema); public abstract Builder proxyHost(String proxyHost); public abstract Builder proxyPort(String proxyPort); public abstract Builder nonProxyHosts(String nonProxyHosts); public abstract Builder proxyUsername(String proxyUsername); public abstract Builder proxyPassword(String proxyPassword); public abstract Builder jdbcMap(String jdbcMap); public abstract Builder ssv1MigrationMode(Ssv1MigrationMode ssv1MigrationMode); public abstract Builder ssv1MigrationIncludeConnectorName( boolean ssv1MigrationIncludeConnectorName); public abstract SinkTaskConfig build(); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/config/SnowflakeValidation.java ================================================ package com.snowflake.kafka.connector.config; import java.util.Locale; /** * Determines the connector validation mode for data ingestion. Controls whether the connector * performs client-side validation before sending data to Snowflake. */ public enum SnowflakeValidation { /** * Client-side validation is enabled. The connector validates data types and schema compatibility * before sending to Snowflake. Validation errors can be routed to a DLQ or abort the task. */ CLIENT_SIDE, /** * Server-side validation. Client-side validation is disabled. Invalid records are handled by the * SSv2 Error Table. Use when throughput is critical and an Error Table is configured. */ SERVER_SIDE; /** Parses a config string into a validation mode, case-insensitive. */ public static SnowflakeValidation fromConfig(String value) { if (value == null || value.trim().isEmpty()) { return CLIENT_SIDE; } return valueOf(value.trim().toUpperCase(Locale.ROOT)); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/config/TopicToTableValidator.java ================================================ package com.snowflake.kafka.connector.config; import com.snowflake.kafka.connector.TopicToTableParser; import org.apache.kafka.common.config.ConfigDef; import org.apache.kafka.common.config.ConfigException; class TopicToTableValidator implements ConfigDef.Validator { public TopicToTableValidator() {} public void ensureValid(String name, Object value) { String s = (String) value; if (s != null && !s.isEmpty()) // this value is optional and can be empty { try { TopicToTableParser.parse(s); } catch (IllegalArgumentException e) { throw new ConfigException(name, value, e.getMessage()); } } } public String toString() { return "Topic to table map format : comma-separated tuples, e.g." + " :,:,... "; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/dlq/KafkaRecordErrorReporter.java ================================================ package com.snowflake.kafka.connector.dlq; import org.apache.kafka.connect.sink.ErrantRecordReporter; import org.apache.kafka.connect.sink.SinkRecord; /** * This interface is a wrapper on top of {@link ErrantRecordReporter}. This allows tolerating * situations when the class {@link ErrantRecordReporter} is not available because it was recently * added and backported to older versions. * * @see * Documentation */ public interface KafkaRecordErrorReporter { void reportError(SinkRecord record, Exception e); } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/CachingConfig.java ================================================ package com.snowflake.kafka.connector.internal; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import java.util.Map; import java.util.Optional; /** * Configuration class for table and pipe existence caching. Contains all cache-related settings * with proper types. The values are coming from the connector config map. If you have any cache * related configuration parameters add them here. */ public final class CachingConfig { private final boolean tableExistsCacheEnabled; private final long tableExistsCacheExpireMs; private final boolean pipeExistsCacheEnabled; private final long pipeExistsCacheExpireMs; private CachingConfig( boolean tableExistsCacheEnabled, long tableExistsCacheExpireMs, boolean pipeExistsCacheEnabled, long pipeExistsCacheExpireMs) { this.tableExistsCacheEnabled = tableExistsCacheEnabled; this.tableExistsCacheExpireMs = tableExistsCacheExpireMs; this.pipeExistsCacheEnabled = pipeExistsCacheEnabled; this.pipeExistsCacheExpireMs = pipeExistsCacheExpireMs; } public boolean isTableExistsCacheEnabled() { return tableExistsCacheEnabled; } public long getTableExistsCacheExpireMs() { return tableExistsCacheExpireMs; } public boolean isPipeExistsCacheEnabled() { return pipeExistsCacheEnabled; } public long getPipeExistsCacheExpireMs() { return pipeExistsCacheExpireMs; } public static CachingConfig fromConfig(final Map config) { boolean tableExistsCacheEnabled = Optional.ofNullable(config.get(KafkaConnectorConfigParams.CACHE_TABLE_EXISTS)) .map(Boolean::parseBoolean) .orElse(KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_DEFAULT); long tableExistsCacheExpireMs = Optional.ofNullable(config.get(KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS)) .map(Long::parseLong) .orElse(KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS_DEFAULT); boolean pipeExistsCacheEnabled = Optional.ofNullable(config.get(KafkaConnectorConfigParams.CACHE_PIPE_EXISTS)) .map(Boolean::parseBoolean) .orElse(KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_DEFAULT); long pipeExistsCacheExpireMs = Optional.ofNullable(config.get(KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS)) .map(Long::parseLong) .orElse(KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS_DEFAULT); // Validate expiration times are positive if (tableExistsCacheExpireMs <= 0) { throw new IllegalArgumentException( "Cache expiration for table existence must be positive, got: " + tableExistsCacheExpireMs); } if (pipeExistsCacheExpireMs <= 0) { throw new IllegalArgumentException( "Cache expiration for pipe existence must be positive, got: " + pipeExistsCacheExpireMs); } return new CachingConfig( tableExistsCacheEnabled, tableExistsCacheExpireMs, pipeExistsCacheEnabled, pipeExistsCacheExpireMs); } @Override public String toString() { return "CacheConfig{" + "tableExistsCacheEnabled=" + tableExistsCacheEnabled + ", tableExistsCacheExpireMs=" + tableExistsCacheExpireMs + ", pipeExistsCacheEnabled=" + pipeExistsCacheEnabled + ", pipeExistsCacheExpireMs=" + pipeExistsCacheExpireMs + '}'; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/CachingSnowflakeConnectionService.java ================================================ package com.snowflake.kafka.connector.internal; import static java.util.concurrent.TimeUnit.MINUTES; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheStats; import com.snowflake.kafka.connector.internal.schemaevolution.ColumnInfos; import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationResponse; import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService; import java.sql.Connection; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; /** * Decorator implementation of SnowflakeConnectionService that adds caching for table and pipe * existence checks. This class wraps an existing SnowflakeConnectionService and intercepts calls to * tableExist() and pipeExist() to provide caching. */ public class CachingSnowflakeConnectionService implements SnowflakeConnectionService { private static final KCLogger LOGGER = new KCLogger(CachingSnowflakeConnectionService.class.getName()); private static final long CACHE_STATS_LOG_INTERVAL_MS = MINUTES.toMillis(5); private static final int CACHE_SIZE = 100; private final SnowflakeConnectionService delegate; private final Cache tableExistsCache; private final Cache pipeExistsCache; private final Cache errorLoggingCache; private final boolean tableExistsCacheEnabled; private final boolean pipeExistsCacheEnabled; private final AtomicLong lastStatsLogTimestamp = new AtomicLong(System.currentTimeMillis()); /** * Creates a cached wrapper around an existing SnowflakeConnectionService. * * @param delegate the underlying connection service to wrap * @param cachingConfig cache configuration settings */ public CachingSnowflakeConnectionService( SnowflakeConnectionService delegate, CachingConfig cachingConfig) { this.delegate = delegate; this.tableExistsCacheEnabled = cachingConfig.isTableExistsCacheEnabled(); this.pipeExistsCacheEnabled = cachingConfig.isPipeExistsCacheEnabled(); this.tableExistsCache = CacheBuilder.newBuilder() .expireAfterWrite(cachingConfig.getTableExistsCacheExpireMs(), TimeUnit.MILLISECONDS) .recordStats() .maximumSize(CACHE_SIZE) .build(); this.pipeExistsCache = CacheBuilder.newBuilder() .expireAfterWrite(cachingConfig.getPipeExistsCacheExpireMs(), TimeUnit.MILLISECONDS) .maximumSize(CACHE_SIZE) .recordStats() .build(); // Reuses the table-exists TTL since error_logging is also a per-table property. this.errorLoggingCache = CacheBuilder.newBuilder() .expireAfterWrite(cachingConfig.getTableExistsCacheExpireMs(), TimeUnit.MILLISECONDS) .maximumSize(CACHE_SIZE) .recordStats() .build(); LOGGER.info( "Initialized cached connection service - tableExists: {} ({}ms), pipeExists: {} ({}ms)", tableExistsCacheEnabled, cachingConfig.getTableExistsCacheExpireMs(), pipeExistsCacheEnabled, cachingConfig.getPipeExistsCacheExpireMs()); } @Override public boolean tableExist(final String tableName) { if (!tableExistsCacheEnabled) { return delegate.tableExist(tableName); } try { boolean result = tableExistsCache.get(tableName, () -> delegate.tableExist(tableName)); logStatsIfNeeded(); return result; } catch (Exception e) { throw new RuntimeException("Error accessing table exists cache for table: " + tableName, e); } } @Override public boolean pipeExist(final String pipeName) { if (!pipeExistsCacheEnabled) { return delegate.pipeExist(pipeName); } try { boolean result = pipeExistsCache.get(pipeName, () -> delegate.pipeExist(pipeName)); logStatsIfNeeded(); return result; } catch (Exception e) { throw new RuntimeException("Error accessing pipe exists cache for pipe: " + pipeName, e); } } /** Logs detailed cache statistics for both table and pipe caches. */ public void logCacheStatistics() { if (tableExistsCacheEnabled) { CacheStats tableStats = tableExistsCache.stats(); LOGGER.info( "Table cache stats - Requests: {}, Hits: {}, Misses: {}, Hit Rate: {}%, " + "Evictions: {}, Load Success: {}, Load Failures: {}, Avg Load Time: {}ms, Size: {}", tableStats.requestCount(), tableStats.hitCount(), tableStats.missCount(), String.format("%.2f", tableStats.hitRate() * 100), tableStats.evictionCount(), tableStats.loadSuccessCount(), tableStats.loadExceptionCount(), String.format( "%.2f", tableStats.averageLoadPenalty() / 1_000_000.0), // Convert nanoseconds to milliseconds tableExistsCache.size()); } if (pipeExistsCacheEnabled) { CacheStats pipeStats = pipeExistsCache.stats(); LOGGER.info( "Pipe cache stats - Requests: {}, Hits: {}, Misses: {}, Hit Rate: {}%, " + "Evictions: {}, Load Success: {}, Load Failures: {}, Avg Load Time: {}ms, Size: {}", pipeStats.requestCount(), pipeStats.hitCount(), pipeStats.missCount(), String.format("%.2f", pipeStats.hitRate() * 100), pipeStats.evictionCount(), pipeStats.loadSuccessCount(), pipeStats.loadExceptionCount(), String.format( "%.2f", pipeStats.averageLoadPenalty() / 1_000_000.0), // Convert nanoseconds to milliseconds pipeExistsCache.size()); } if (tableExistsCacheEnabled) { CacheStats errorLoggingStats = errorLoggingCache.stats(); LOGGER.info( "Error logging cache stats - Requests: {}, Hits: {}, Misses: {}, Hit Rate: {}%," + " Size: {}", errorLoggingStats.requestCount(), errorLoggingStats.hitCount(), errorLoggingStats.missCount(), String.format("%.2f", errorLoggingStats.hitRate() * 100), errorLoggingCache.size()); } } // All other methods delegate directly without caching @Override public void createTableWithOnlyMetadataColumn(String tableName) { delegate.createTableWithOnlyMetadataColumn(tableName); tableExistsCache.invalidate(tableName); errorLoggingCache.invalidate(tableName); } @Override public boolean isTableCompatible(String tableName) { return delegate.isTableCompatible(tableName); } @Override public void databaseExists(String databaseName) { delegate.databaseExists(databaseName); } @Override public void schemaExists(String schemaName) { delegate.schemaExists(schemaName); } @Override public SnowflakeTelemetryService getTelemetryClient() { return delegate.getTelemetryClient(); } @Override public void close() { LOGGER.info("Closing CachedSnowflakeConnectionService, final cache statistics:"); logCacheStatistics(); delegate.close(); } @Override public boolean isClosed() { return delegate.isClosed(); } @Override public String getConnectorName() { return delegate.getConnectorName(); } @Override public Connection getConnection() { return delegate.getConnection(); } @Override public Optional> describeTable(String tableName) { return delegate.describeTable(tableName); } @Override public void executeQueryWithParameters(String query, String... parameters) { delegate.executeQueryWithParameters(query, parameters); pipeExistsCache.invalidateAll(); tableExistsCache.invalidateAll(); errorLoggingCache.invalidateAll(); } @Override public void appendColumnsToTable(String tableName, Map columnInfosMap) { delegate.appendColumnsToTable(tableName, columnInfosMap); } @Override public void alterNonNullableColumns(String tableName, List columnNames) { delegate.alterNonNullableColumns(tableName, columnNames); } @Override public boolean shouldEvolveSchema(String tableName, String role) { return delegate.shouldEvolveSchema(tableName, role); } @Override public boolean isIcebergTable(String tableName) { return delegate.isIcebergTable(tableName); } @Override public boolean hasErrorLoggingEnabled(String tableName) { if (!tableExistsCacheEnabled) { return delegate.hasErrorLoggingEnabled(tableName); } try { boolean result = errorLoggingCache.get(tableName, () -> delegate.hasErrorLoggingEnabled(tableName)); logStatsIfNeeded(); return result; } catch (Exception e) { throw new RuntimeException("Error accessing error logging cache for table: " + tableName, e); } } @Override public Ssv1MigrationResponse migrateSsv1ChannelOffset( String tableName, String ssv1ChannelName, String ssv2ChannelName, String pipeName) { return delegate.migrateSsv1ChannelOffset(tableName, ssv1ChannelName, ssv2ChannelName, pipeName); } private void logStatsIfNeeded() { final long now = System.currentTimeMillis(); final long lastLogged = lastStatsLogTimestamp.get(); if (now - lastLogged >= CACHE_STATS_LOG_INTERVAL_MS && lastStatsLogTimestamp.compareAndSet(lastLogged, now)) { logCacheStatistics(); } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/DescribeTableRow.java ================================================ package com.snowflake.kafka.connector.internal; import java.util.Objects; /** Class representing a single row returned by describe table statement. */ public class DescribeTableRow { private final String column; private final String type; private final String comment; private final String nullable; private final String defaultValue; private final String autoincrement; /** Full constructor with default and autoincrement metadata. */ public DescribeTableRow( String column, String type, String comment, String nullable, String defaultValue, String autoincrement) { this.column = column; this.type = type; this.comment = comment; this.nullable = nullable; this.defaultValue = defaultValue; this.autoincrement = autoincrement; } /** Backward-compatible constructor (no default/autoincrement metadata). */ public DescribeTableRow(String column, String type, String comment, String nullable) { this(column, type, comment, nullable, null, null); } public String getColumn() { return column; } public String getType() { return type; } public String getComment() { return comment; } public String getNullable() { return nullable; } public String getDefaultValue() { return defaultValue; } public String getAutoincrement() { return autoincrement; } /** True when the column has a server-assigned default value. */ public boolean hasDefault() { return defaultValue != null && !defaultValue.isEmpty(); } /** True when the column is an autoincrement/identity column. */ public boolean isAutoincrement() { return autoincrement != null && !autoincrement.isEmpty(); } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; DescribeTableRow that = (DescribeTableRow) o; return Objects.equals(column, that.column) && Objects.equals(type, that.type); } @Override public int hashCode() { return Objects.hash(column, type); } @Override public String toString() { StringBuilder sb = new StringBuilder(" " + column + " " + type); if (hasDefault()) { sb.append(" DEFAULT=").append(defaultValue); } if (isAutoincrement()) { sb.append(" ").append(autoincrement); } return sb.toString(); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/InternalUtils.java ================================================ package com.snowflake.kafka.connector.internal; import static org.apache.commons.lang3.StringUtils.isBlank; import com.snowflake.kafka.connector.Utils; import com.snowflake.kafka.connector.config.SinkTaskConfig; import java.sql.ResultSet; import java.sql.SQLException; import java.time.Instant; import java.time.ZoneOffset; import java.time.format.DateTimeFormatter; import java.util.Optional; import java.util.Properties; import org.apache.kafka.common.config.types.Password; public class InternalUtils { // authenticator type public static final String SNOWFLAKE_JWT = "snowflake_jwt"; // JDBC parameter list static final String JDBC_DATABASE = "db"; static final String JDBC_SCHEMA = "schema"; static final String JDBC_USER = "user"; static final String JDBC_PRIVATE_KEY = "privateKey"; static final String JDBC_SSL = "ssl"; static final String JDBC_SESSION_KEEP_ALIVE = "client_session_keep_alive"; static final String JDBC_WAREHOUSE = "warehouse"; // for test only static final String JDBC_TOKEN = JdbcPropertyKeys.TOKEN; static final String JDBC_QUERY_RESULT_FORMAT = "JDBC_QUERY_RESULT_FORMAT"; // internal parameters private static final KCLogger LOGGER = new KCLogger(InternalUtils.class.getName()); private static final DateTimeFormatter ISO_DATE_TIME_FORMAT = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'").withZone(ZoneOffset.UTC); /** * count the size of result set * * @param resultSet sql result set * @return size * @throws SQLException when failed to read result set */ static int resultSize(ResultSet resultSet) throws SQLException { int size = 0; while (resultSet.next()) { size++; } return size; } static void assertNotEmpty(String name, Object value) { if (value == null || (value instanceof String && value.toString().isEmpty())) { switch (name.toLowerCase()) { case "tablename": throw SnowflakeErrors.ERROR_0005.getException(); case "pipename": throw SnowflakeErrors.ERROR_0006.getException(); case "conf": throw SnowflakeErrors.ERROR_0001.getException(); default: throw SnowflakeErrors.ERROR_0003.getException("parameter name: " + name); } } } /** * convert a timestamp to Date String * * @param time a long integer representing timestamp * @return date string */ static String timestampToDate(long time) { String date = ISO_DATE_TIME_FORMAT.format(Instant.ofEpochMilli(time)); LOGGER.debug("converted date: {}", date); return date; } /** * Build JDBC driver properties from a parsed {@link SinkTaskConfig}. * * @param config parsed sink task configuration * @param url target server url * @return a Properties instance ready for JDBC */ static Properties makeJdbcDriverProperties(SinkTaskConfig config, SnowflakeURL url) { Properties properties = new Properties(); putIfNotBlank(properties, JDBC_DATABASE, config.getSnowflakeDatabase()); putIfNotBlank(properties, JDBC_SCHEMA, config.getSnowflakeSchema()); putIfNotBlank(properties, JDBC_USER, config.getSnowflakeUser()); putIfNotBlank(properties, JdbcPropertyKeys.ROLE, config.getSnowflakeRole()); properties.put(JdbcPropertyKeys.AUTHENTICATOR, SNOWFLAKE_JWT); String privateKey = Optional.ofNullable(config.getSnowflakePrivateKey()).map(Password::value).orElse(null); if (isBlank(privateKey)) { throw SnowflakeErrors.ERROR_0013.getException(); } String privateKeyPassphrase = Optional.ofNullable(config.getSnowflakePrivateKeyPassphrase()) .map(Password::value) .orElse(null); properties.put( JDBC_PRIVATE_KEY, PrivateKeyTool.parsePrivateKey(privateKey, privateKeyPassphrase)); properties.put(JDBC_SSL, url.sslEnabled() ? "on" : "off"); // put values for optional parameters properties.put(JDBC_SESSION_KEEP_ALIVE, "true"); // SNOW-989387 - Set query resultset format to JSON as a workaround properties.put(JDBC_QUERY_RESULT_FORMAT, "json"); properties.put(JdbcPropertyKeys.ALLOW_UNDERSCORES_IN_HOST, "true"); if (!properties.containsKey(JDBC_SCHEMA)) { throw SnowflakeErrors.ERROR_0014.getException(); } if (!properties.containsKey(JDBC_DATABASE)) { throw SnowflakeErrors.ERROR_0015.getException(); } if (!properties.containsKey(JDBC_USER)) { throw SnowflakeErrors.ERROR_0016.getException(); } return properties; } private static void putIfNotBlank(Properties properties, String key, String value) { if (!isBlank(value)) { properties.put(key, value); } } /** * Helper method to decide whether to add any properties related to proxy server. These property * is passed on to snowflake JDBC while calling put API, which requires proxyProperties * * @param config parsed connector configuration * @return proxy parameters if needed */ protected static Properties generateProxyParametersIfRequired(SinkTaskConfig config) { Properties properties = new Properties(); // Set proxyHost and proxyPort only if both of them are present and are non null if (config.getProxyHost() != null && config.getProxyPort() != null) { properties.put(JdbcPropertyKeys.USE_PROXY, "true"); properties.put(JdbcPropertyKeys.PROXY_HOST, config.getProxyHost()); properties.put(JdbcPropertyKeys.PROXY_PORT, config.getProxyPort()); // nonProxyHosts parameter is not required. Check if it was set or not. if (config.getNonProxyHosts() != null) { properties.put(JdbcPropertyKeys.NON_PROXY_HOSTS, config.getNonProxyHosts()); } // For username and password, check if host and port are given. // If they are given, check if username and password are non null if (config.getProxyUsername() != null && config.getProxyPassword() != null) { properties.put(JdbcPropertyKeys.PROXY_USER, config.getProxyUsername()); properties.put(JdbcPropertyKeys.PROXY_PASSWORD, config.getProxyPassword()); } } return properties; } protected static Properties parseJdbcPropertiesMap(SinkTaskConfig config) { if (config.getJdbcMap() == null) { return new Properties(); } Properties properties = new Properties(); properties.putAll(Utils.parseCommaSeparatedKeyValuePairs(config.getJdbcMap())); return properties; } /** Interfaces to define the lambda function to be used by backoffAndRetry */ public interface backoffFunction { Object apply() throws Exception; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/JdbcProperties.java ================================================ package com.snowflake.kafka.connector.internal; import java.util.Properties; /** Wrapper class for all snowflake jdbc properties */ public class JdbcProperties { /** All jdbc properties including proxyProperties */ private final Properties properties; /** Proxy related properties */ private final Properties proxyProperties; private JdbcProperties(Properties combinedProperties, Properties proxyProperties) { this.properties = combinedProperties; this.proxyProperties = proxyProperties; } public Properties getProperties() { return properties; } public Properties getProxyProperties() { return proxyProperties; } /** * Combine all jdbc related properties. Throws error if jdbcPropertiesMap overrides any property * defined in connectionProperties or proxyProperties. * * @param connectionProperties snowflake.database.name, snowflake.schema,name, * snowflake.private.key etc. * @param proxyProperties jvm.proxy.xxx * @param jdbcPropertiesMap snowflake.jdbc.map */ static JdbcProperties create( Properties connectionProperties, Properties proxyProperties, Properties jdbcPropertiesMap) { InternalUtils.assertNotEmpty("connectionProperties", connectionProperties); proxyProperties = setEmptyIfNull(proxyProperties); jdbcPropertiesMap = setEmptyIfNull(jdbcPropertiesMap); Properties proxyAndConnection = mergeProperties(connectionProperties, proxyProperties); detectOverrides(proxyAndConnection, jdbcPropertiesMap); Properties combinedProperties = mergeProperties(proxyAndConnection, jdbcPropertiesMap); return new JdbcProperties(combinedProperties, proxyProperties); } private static void detectOverrides(Properties proxyAndConnection, Properties jdbcPropertiesMap) { jdbcPropertiesMap.forEach( (k, v) -> { if (proxyAndConnection.containsKey(k)) { throw SnowflakeErrors.ERROR_0031.getException("Duplicated property: " + k); } }); } private static Properties mergeProperties( Properties connectionProperties, Properties proxyProperties) { Properties mergedProperties = new Properties(); mergedProperties.putAll(connectionProperties); mergedProperties.putAll(proxyProperties); return mergedProperties; } /** Parsing methods does not return null. However, It's better to be perfectly sure. */ private static Properties setEmptyIfNull(Properties properties) { if (properties != null) { return properties; } return new Properties(); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/JdbcPropertyKeys.java ================================================ package com.snowflake.kafka.connector.internal; /** * Snowflake JDBC connection property key names. These match the official JDBC driver connection * parameters (see Snowflake JDBC documentation). Used instead of internal SFSessionProperty to * remain compatible with JDBC 4.x public API. */ public final class JdbcPropertyKeys { private JdbcPropertyKeys() {} public static final String AUTHENTICATOR = "authenticator"; public static final String TOKEN = "token"; public static final String ROLE = "role"; public static final String ALLOW_UNDERSCORES_IN_HOST = "allowUnderscoresInHost"; public static final String USE_PROXY = "useProxy"; public static final String PROXY_HOST = "proxyHost"; public static final String PROXY_PORT = "proxyPort"; public static final String PROXY_USER = "proxyUser"; public static final String PROXY_PASSWORD = "proxyPassword"; public static final String NON_PROXY_HOSTS = "nonProxyHosts"; } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/KCLogger.java ================================================ package com.snowflake.kafka.connector.internal; import com.snowflake.kafka.connector.Utils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.MDC; /** Logger for Snowflake Sink Connector. Attaches MDC's connector context if available */ public class KCLogger { public static final String MDC_CONN_CTX_KEY = "connector.context"; private static boolean prependMdcContext; private static final Logger META_LOGGER = LoggerFactory.getLogger(KCLogger.class.getName()); private Logger logger; /** * Enable or disables the MDC context. Only available for apache kafka versions after 2.3.0. * https://cwiki.apache.org/confluence/display/KAFKA/KIP-449%3A+Add+connector+contexts+to+Connect+worker+logs * * @param shouldPrependMdcContext If all KC loggers should enable or disable MDC context */ public static void toggleGlobalMdcLoggingContext(boolean shouldPrependMdcContext) { prependMdcContext = shouldPrependMdcContext; META_LOGGER.debug( "Setting MDC context enablement to: {}. MDC context is only available for Apache Kafka" + " versions after 2.3.0", shouldPrependMdcContext); } /** * Create and return a new logging handler * * @param name The class name passed for initializing the logger */ public KCLogger(String name) { this.logger = LoggerFactory.getLogger(name); } /** * Logs an info level message * * @param format The message format without variables * @param vars The variables to insert into the format. These variables will be toString()'ed */ public void info(String format, Object... vars) { if (this.logger.isInfoEnabled()) { this.logger.info(this.getFormattedLogMessage(format, vars)); } } public boolean isInfoEnabled() { return logger.isInfoEnabled(); } /** * Logs an trace level message * * @param format The message format without variables * @param vars The variables to insert into the format. These variables will be toString()'ed */ public void trace(String format, Object... vars) { if (this.logger.isTraceEnabled()) { this.logger.trace(this.getFormattedLogMessage(format, vars)); } } /** * Logs an debug level message * * @param format The message format without variables * @param vars The variables to insert into the format. These variables will be toString()'ed */ public void debug(String format, Object... vars) { if (this.logger.isDebugEnabled()) { this.logger.debug(this.getFormattedLogMessage(format, vars)); } } /** * Logs an warn level message * * @param format The message format without variables * @param vars The variables to insert into the format. These variables will be toString()'ed */ public void warn(String format, Object... vars) { if (this.logger.isWarnEnabled()) { this.logger.warn(this.getFormattedLogMessage(format, vars)); } } /** * Logs an error level message * * @param format The message format without variables * @param vars The variables to insert into the format. These variables will be toString()'ed */ public void error(String format, Object... vars) { if (this.logger.isErrorEnabled()) { this.logger.error(this.getFormattedLogMessage(format, vars)); } } public void error(String s, Throwable throwable) { if (this.logger.isErrorEnabled()) { logger.error(s, throwable); } } public boolean isDebugEnabled() { return logger.isDebugEnabled(); } public boolean isTraceEnabled() { return logger.isTraceEnabled(); } private String getFormattedLogMessage(String format, Object... vars) { if (prependMdcContext) { String connCtx = MDC.get(MDC_CONN_CTX_KEY); return Utils.formatLogMessage(connCtx + format, vars); } return Utils.formatLogMessage(format, vars); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/PrivateKeyTool.java ================================================ package com.snowflake.kafka.connector.internal; import static org.apache.commons.lang3.StringUtils.isBlank; import java.io.StringReader; import java.security.KeyFactory; import java.security.PrivateKey; import java.security.Security; import java.security.spec.PKCS8EncodedKeySpec; import java.util.Base64; import org.bouncycastle.asn1.pkcs.PrivateKeyInfo; import org.bouncycastle.jcajce.provider.BouncyCastleFipsProvider; import org.bouncycastle.openssl.PEMParser; import org.bouncycastle.openssl.jcajce.JcaPEMKeyConverter; import org.bouncycastle.openssl.jcajce.JceOpenSSLPKCS8DecryptorProviderBuilder; import org.bouncycastle.operator.InputDecryptorProvider; import org.bouncycastle.pkcs.PKCS8EncryptedPrivateKeyInfo; public final class PrivateKeyTool { private static final KCLogger LOGGER = new KCLogger(PrivateKeyTool.class.getName()); public static PrivateKey parsePrivateKey(String privateKeyStr, String privateKeyPassword) { if (isBlank(privateKeyPassword)) { return parseNonEncryptedPrivateKey(privateKeyStr); } else { return parseEncryptedPrivateKey(privateKeyStr, privateKeyPassword); } } private static PrivateKey parseNonEncryptedPrivateKey(String key) { LOGGER.info("Not using passphrase for private key, not specified"); // remove header, footer, and line breaks key = key.replaceAll("-+[A-Za-z ]+-+", ""); key = key.replaceAll("\\s", ""); byte[] encoded; try { encoded = Base64.getDecoder().decode(key); } catch (IllegalArgumentException e) { throw SnowflakeErrors.ERROR_0002.getException(e); } try { KeyFactory kf = KeyFactory.getInstance("RSA"); PKCS8EncodedKeySpec keySpec = new PKCS8EncodedKeySpec(encoded); return kf.generatePrivate(keySpec); } catch (Exception e) { throw SnowflakeErrors.ERROR_0002.getException(e); } } private static PrivateKey parseEncryptedPrivateKey(String key, String passphrase) { LOGGER.info("Using passphrase for private key"); // remove header, footer, and line breaks key = key.replaceAll("-+[A-Za-z ]+-+", ""); key = key.replaceAll("\\s", ""); StringBuilder builder = new StringBuilder(); builder.append("-----BEGIN ENCRYPTED PRIVATE KEY-----"); for (int i = 0; i < key.length(); i++) { if (i % 64 == 0) { builder.append("\n"); } builder.append(key.charAt(i)); } builder.append("\n-----END ENCRYPTED PRIVATE KEY-----"); key = builder.toString(); Security.addProvider(new BouncyCastleFipsProvider()); try { PEMParser pemParser = new PEMParser(new StringReader(key)); PKCS8EncryptedPrivateKeyInfo encryptedPrivateKeyInfo = (PKCS8EncryptedPrivateKeyInfo) pemParser.readObject(); pemParser.close(); InputDecryptorProvider pkcs8Prov = new JceOpenSSLPKCS8DecryptorProviderBuilder().build(passphrase.toCharArray()); JcaPEMKeyConverter converter = new JcaPEMKeyConverter().setProvider(BouncyCastleFipsProvider.PROVIDER_NAME); PrivateKeyInfo decryptedPrivateKeyInfo = encryptedPrivateKeyInfo.decryptPrivateKeyInfo(pkcs8Prov); return converter.getPrivateKey(decryptedPrivateKeyInfo); } catch (Exception e) { throw SnowflakeErrors.ERROR_0018.getException(e); } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/SnowflakeConnectionService.java ================================================ package com.snowflake.kafka.connector.internal; import com.snowflake.kafka.connector.internal.schemaevolution.ColumnInfos; import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationResponse; import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService; import java.sql.Connection; import java.util.List; import java.util.Map; import java.util.Optional; public interface SnowflakeConnectionService { /** * check table existence * * @param tableName table name * @return true if table exists, false otherwise */ boolean tableExist(String tableName); /** * check pipe existence * * @param pipeName pipe name * @return true if pipe exists, false otherwise */ boolean pipeExist(String pipeName); /** * Check the given table has correct schema correct schema: (record_metadata variant) * * @param tableName table name * @return true if schema is correct, false is schema is incorrect or table does not exist */ boolean isTableCompatible(String tableName); /** * check if a given database exists * * @param databaseName database name */ void databaseExists(String databaseName); /** * check if a given schema exists * * @param schemaName schema name */ void schemaExists(String schemaName); /** * @return telemetry client */ SnowflakeTelemetryService getTelemetryClient(); /** Close Connection */ void close(); /** * @return true is connection is closed */ boolean isClosed(); /** * @return name of Kafka Connector instance */ String getConnectorName(); /** * @return the raw jdbc connection */ Connection getConnection(); /** * Create a table with only the RECORD_METADATA column. The rest of the columns might be added * through schema evolution * *

In the beginning of the function we will check if we have the permission to do schema * evolution, and we will error out if we don't * * @param tableName table name */ void createTableWithOnlyMetadataColumn(String tableName); /** * Calls describe table statement and returns all columns and corresponding types. * * @param tableName - table name * @return Optional.empty() if table does not exist. List of all table columns and their types * otherwise. */ Optional> describeTable(String tableName); /** * execute sql query * * @param query sql query string * @param parameters query parameters */ void executeQueryWithParameters(String query, String... parameters); /** * Add columns to an existing table via ALTER TABLE ... ADD COLUMN IF NOT EXISTS. * * @param tableName table name * @param columnInfosMap map of column name to ColumnInfos (type + comment) */ void appendColumnsToTable(String tableName, Map columnInfosMap); /** * Drop NOT NULL constraints on columns via ALTER TABLE ... ALTER ... DROP NOT NULL. * * @param tableName table name * @param columnNames list of column names to make nullable */ void alterNonNullableColumns(String tableName, List columnNames); /** * Check whether the user has the role privilege to do schema evolution and whether the schema * evolution option is enabled on the table. * * @param tableName table name * @param role the role of the user * @return whether schema evolution has the required permission to be performed */ boolean shouldEvolveSchema(String tableName, String role); /** * Check whether the given table is an iceberg table. * * @param tableName table name * @return true if the table is an iceberg table, false otherwise */ boolean isIcebergTable(String tableName); /** * Check whether the given table has ERROR_LOGGING enabled via SHOW TABLES. * * @param tableName table name * @return true if error_logging is "Y", false otherwise or if the column is not present */ boolean hasErrorLoggingEnabled(String tableName); /** * Calls SYSTEM$MIGRATE_SSV1_CHANNEL_OFFSET to migrate the committed offset from an SSv1 channel * to an SSv2 channel. The system function reads the SSv1 offset and writes it directly to the * SSv2 channel in FDB. * * @param tableName unqualified table name (the JDBC session's database/schema are used) * @param ssv1ChannelName SSv1 channel name ({topic}_{partition} or * {connectorName}_{topic}_{partition}) * @param ssv2ChannelName SSv2 channel name ({connectorName}_{topic}_{partition}) * @param pipeName SSv2 pipe name * @return the parsed {@link Ssv1MigrationResponse} indicating whether the channel was found and * (if so) the migrated offset value * @throws RuntimeException if the system function call fails (SQL error, unexpected response) */ Ssv1MigrationResponse migrateSsv1ChannelOffset( String tableName, String ssv1ChannelName, String ssv2ChannelName, String pipeName); } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/SnowflakeConnectionServiceFactory.java ================================================ package com.snowflake.kafka.connector.internal; import com.snowflake.kafka.connector.config.SinkTaskConfig; import java.util.Map; import java.util.Properties; public class SnowflakeConnectionServiceFactory { public static SnowflakeConnectionServiceBuilder builder() { return new SnowflakeConnectionServiceBuilder(); } public static class SnowflakeConnectionServiceBuilder { private JdbcProperties jdbcProperties; private SnowflakeURL url; private String connectorName; private String taskID = "-1"; private CachingConfig cachingConfig; // For testing only public Properties getProperties() { return this.jdbcProperties.getProperties(); } public SnowflakeConnectionServiceBuilder setTaskID(String taskID) { this.taskID = taskID; return this; } public SnowflakeConnectionServiceBuilder setProperties(Map conf) { return setProperties(SinkTaskConfig.from(conf, true)); } public SnowflakeConnectionServiceBuilder setProperties(SinkTaskConfig parsedConfig) { if (parsedConfig.getSnowflakeUrl() == null || parsedConfig.getSnowflakeUrl().isEmpty()) { throw SnowflakeErrors.ERROR_0017.getException(); } this.url = new SnowflakeURL(parsedConfig.getSnowflakeUrl()); this.connectorName = parsedConfig.getConnectorName(); this.cachingConfig = parsedConfig.getCachingConfig(); Properties connectionProperties = InternalUtils.makeJdbcDriverProperties(parsedConfig, this.url); Properties proxyProperties = InternalUtils.generateProxyParametersIfRequired(parsedConfig); Properties jdbcPropertiesMap = InternalUtils.parseJdbcPropertiesMap(parsedConfig); this.jdbcProperties = JdbcProperties.create(connectionProperties, proxyProperties, jdbcPropertiesMap); return this; } public SnowflakeConnectionService build() { InternalUtils.assertNotEmpty("jdbcProperties", jdbcProperties); InternalUtils.assertNotEmpty("url", url); InternalUtils.assertNotEmpty("connectorName", connectorName); SnowflakeConnectionService baseService = new StandardSnowflakeConnectionService(jdbcProperties, url, connectorName, taskID); return new CachingSnowflakeConnectionService(baseService, cachingConfig); } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/SnowflakeErrors.java ================================================ /* * Copyright (c) 2019 Snowflake Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.snowflake.kafka.connector.internal; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.Utils; import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService; public enum SnowflakeErrors { // connector configuration issues 0--- ERROR_0001( "0001", "Invalid input connector configuration", "input kafka connector configuration is null, missing required values, " + "or is invalid. Check logs for list of invalid parameters."), ERROR_0002("0002", "Invalid private key", "private key should be a valid PEM RSA private key"), ERROR_0003( "0003", "Missing required parameter", "one or multiple required parameters haven't be provided"), ERROR_0005("0005", "Empty Table name", "Input Table name is empty string or null"), ERROR_0006("0006", "Empty Pipe name", "Input Pipe name is empty String or null"), ERROR_0007( "0007", "Invalid Snowflake URL", "Snowflake URL format: 'https://." + ".snowflakecomputing.com:443', 'https://' and ':443' are optional."), ERROR_0013( "0013", "Missed private key in connector config", "private key must be provided with " + KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY + " parameter"), ERROR_0014( "0014", "Missed snowflake schema name in connector config", "snowflake schema name must be provided with " + KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME + " " + "parameter"), ERROR_0015( "0015", "Missed snowflake database name in connector config ", "snowflake database name must be provided with " + KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME + " " + "parameter"), ERROR_0016( "0016", "Missed snowflake user name in connector config ", "snowflake user name must be provided with " + KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME + " parameter"), ERROR_0017( "0017", "Missed snowflake url in connector config ", "snowflake URL must be provided with " + KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME + " parameter, e.g. 'accountname.snoflakecomputing.com'"), ERROR_0018( "0018", "Invalid encrypted private key or passphrase", "failed to decrypt private key. Please verify input private key and passphrase. Snowflake" + " Kafka Connector only supports encryption algorithms in FIPS 140-2"), ERROR_0020("0020", "Invalid topic name", "Topic name is empty String or null"), ERROR_0021("0021", "Invalid topic2table map", "Failed to parse topic2table map"), ERROR_0022( "0022", "Invalid proxy host or port", "Both host and port need to be provided if one of them is provided"), ERROR_0023( "0023", "Invalid proxy username or password", "Both username and password need to be provided if one of them is provided"), ERROR_0030( "0030", String.format( "Invalid %s map", KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP), String.format( "Failed to parse %s map", KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP)), ERROR_0031( "0031", "Failed to combine JDBC properties", "One of snowflake.jdbc.map property overrides other jdbc property"), ERROR_1001( "1001", "Failed to connect to Snowflake Server", "Snowflake connection issue, reported by Snowflake JDBC"), ERROR_1003( "1003", "Snowflake connection is closed", "Either the current connection is closed or hasn't connect to snowflake" + " server"), ERROR_1005( "1005", "Task failed due to authorization error", "Set `enable.task.fail.on.authorization.errors=false` to avoid this behavior"), // SQL issues 2--- ERROR_2001( "2001", "Failed to prepare SQL statement", "SQL Exception, reported by Snowflake JDBC"), ERROR_2005("2005", "Failed to close connection", "Failed to close snowflake JDBC connection"), ERROR_2006( "2006", "Failed to connection status", "Failed to retrieve Snowflake JDBC connection Status"), ERROR_2007( "2007", "Failed to create table", "Failed to create table on Snowflake, please check that you have permission to do so."), ERROR_2015( "2015", "Failed to append columns", "Failed to append columns during schema evolution"), ERROR_2016("2016", "Failed to drop NOT NULL", "Failed to drop NOT NULL during schema evolution"), ERROR_5007( "5007", "SnowflakeStreamingSinkConnector timeout", "SnowflakeStreamingSinkConnector timed out. Tables or stages are not yet " + "available for data ingestion to start. If this persists, please " + "contact Snowflake support."), ERROR_5010( "5010", "Connection is null or closed", "Connection is closed or null when starting sink service"), ERROR_5013( "5013", "Failed to initialize SinkTask", "SinkTask hasn't been started before calling OPEN function"), ERROR_5014( "5014", "Failed to put records", "SinkTask hasn't been initialized before calling PUT function"), ERROR_5015( "5015", "Invalid SinkRecord received", "Error parsing SinkRecord value or SinkRecord header"), ERROR_5020("5020", "Failed to register MBean in MbeanServer", "Object Name is invalid"), ERROR_5021( "5021", "Failed to get data schema", "Failed to get data schema. Unrecognizable data type in JSON object"), ERROR_5022("5022", "Invalid column name", "Failed to find column in the schema"), ERROR_5027( "5027", "Data verification failed", "Connector couldn't verify that all data was committed to Snowflake. Stopping to avoid data" + " loss."), ERROR_5028( "5028", "Failed to open Snowpipe Streaming v2 channel", "Failed to open Snowpipe Streaming v2 channel"), ERROR_5030( "5030", "Channel error count threshold exceeded", "Channel has reported errors during data ingestion. Check the channel history for details."), ERROR_0032( "0032", "Non-default pipe not supported with client-side validation", "Client-side validation only supports default pipes ({table}-STREAMING). Either disable" + " client-side validation (snowflake.validation=server_side) or drop the" + " existing pipe so the connector uses the default pipe."); // properties private final String name; private final String detail; private final String code; SnowflakeErrors(String code, String name, String detail) { this.code = code; this.name = name; this.detail = detail; } public SnowflakeKafkaConnectorException getException() { return getException("", null); } public SnowflakeKafkaConnectorException getException(String msg) { return getException(msg, null); } public SnowflakeKafkaConnectorException getException(Exception e) { return getException(e, null); } public SnowflakeKafkaConnectorException getException( Exception e, SnowflakeTelemetryService telemetryService) { StringBuilder str = new StringBuilder(); str.append(e.getMessage()); for (StackTraceElement element : e.getStackTrace()) { str.append("\n").append(element.toString()); } return getException(str.toString(), telemetryService); } public SnowflakeKafkaConnectorException getException(SnowflakeTelemetryService telemetryService) { return getException("", telemetryService); } /** * Convert a given message into SnowflakeKafkaConnectorException. * *

If message is null, we use Enum's toString() method to wrap inside * SnowflakeKafkaConnectorException * * @param msg Message to send to Telemetry Service. Remember, we Strip the message * @param telemetryService can be null * @return Exception wrapped in Snowflake Connector Exception */ public SnowflakeKafkaConnectorException getException( String msg, SnowflakeTelemetryService telemetryService) { if (telemetryService != null) { telemetryService.reportKafkaConnectFatalError( getCode() + msg.substring(0, Math.min(msg.length(), 500))); } if (msg == null || msg.isEmpty()) { return new SnowflakeKafkaConnectorException(toString(), code); } else { return new SnowflakeKafkaConnectorException( Utils.formatLogMessage( "Exception: {}\nError Code: {}\nDetail: {}\nMessage: {}", name, code, detail, msg), code); } } public String getCode() { return code; } public String getDetail() { return this.detail; } @Override public String toString() { return Utils.formatLogMessage("Exception: {}\nError Code: {}\nDetail: {}", name, code, detail); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/SnowflakeKafkaConnectorException.java ================================================ package com.snowflake.kafka.connector.internal; public class SnowflakeKafkaConnectorException extends RuntimeException { private final String code; public SnowflakeKafkaConnectorException(String msg, String code) { super(msg); this.code = code; } public String getCode() { return code; } public boolean checkErrorCode(SnowflakeErrors error) { return this.code.equals(error.getCode()); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/SnowflakeSinkService.java ================================================ package com.snowflake.kafka.connector.internal; import com.codahale.metrics.MetricRegistry; import com.google.common.annotations.VisibleForTesting; import com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel; import java.util.Collection; import java.util.Map; import java.util.Optional; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.connect.sink.SinkRecord; /** Background service of data sink, responsible to create/drop pipe and ingest/purge files */ public interface SnowflakeSinkService { /** * Start the Task. This should handle any configuration parsing and one-time setup of the task. * * @param topicPartition TopicPartition passed from Kafka */ void startPartition(TopicPartition topicPartition); /** * Start a collection of TopicPartition. This should handle any configuration parsing and one-time * setup of the task. * * @param partitions collection of topic partitions */ void startPartitions(Collection partitions); /** * call pipe to insert a collections of JSON records will trigger time based flush * * @param records record content */ void insert(final Collection records); /** * call pipe to insert a JSON record will not trigger time based flush * * @param record record content * @return true if the record was processed successfully, false if recovery was triggered and the * caller should stop feeding records to this partition for the remainder of the batch */ boolean insert(final SinkRecord record); /** * retrieve offset of last loaded record for given pipe name * * @param topicPartition topic and partition * @return offset, or -1 for empty */ long getOffset(TopicPartition topicPartition); /** * Fetches committed offsets for all given partitions using the SDK's batch channel-status API. * Makes at most one network call per SDK client (i.e. per topic/pipe), regardless of the number * of partitions. * * @param partitions the partitions to query * @return map of TopicPartition to the offset safe to commit to Kafka (committed + 1), only * containing entries where a valid offset was found */ Map getCommittedOffsets(Collection partitions); /** * get the number of partitions assigned to this sink service * * @return number of partitions */ int getPartitionCount(); /** terminate all tasks and close this service instance */ void closeAll(); /** * terminate given topic partitions * * @param partitions a list of topic partition */ void close(Collection partitions); /** * close all cleaner thread but have no effect on sink service context * *

Note that calling this method does not perform synchronous cleanup in Snowpipe based * implementation */ void stop(); /** * retrieve sink service status * * @return true is closed */ boolean isClosed(); Map getPartitionChannels(); /** Blocks until all partition channels have finished initialization. No-op by default. */ default void awaitInitialization() {} /* Get metric registry of an associated partition */ @VisibleForTesting Optional getMetricRegistry(final String partitionIdentifier); } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/SnowflakeURL.java ================================================ /* * Copyright (c) 2019 Snowflake Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.snowflake.kafka.connector.internal; import java.util.regex.Matcher; import java.util.regex.Pattern; /** Snowflake URL Object https://account.region.snowflakecomputing.com:443 */ public class SnowflakeURL implements URL { private final KCLogger LOGGER = new KCLogger(SnowflakeURL.class.getName()); private String jdbcUrl; private final String url; private final boolean ssl; private final String account; private final int port; /** * There are several matching groups here. Matching groups numbers are identified as the opening * braces start and are indexed from number 1. * *

Group 1: If https is present or not. (Not required) * *

Group 2: Is the entire URL including the port number * *

Group 3: URL until .com * *

Group 4: Account name (may include org-account/alias) * *

Group 5: (Everything after accountname or org-accountname until .com) * *

Group 7: port number */ private static final String SNOWFLAKE_URL_REGEX_PATTERN = "^(https?://)?((([\\w\\d-]+)(\\.[\\w\\d-]+){2,})(:(\\d+))?)/?$"; public SnowflakeURL(String urlStr) { Pattern pattern = Pattern.compile(SNOWFLAKE_URL_REGEX_PATTERN); Matcher matcher = pattern.matcher(urlStr.trim().toLowerCase()); if (!matcher.find()) { throw SnowflakeErrors.ERROR_0007.getException("input url: " + urlStr); } ssl = !"http://".equals(matcher.group(1)); url = matcher.group(3); account = matcher.group(4); if (matcher.group(7) != null) { port = Integer.parseInt(matcher.group(7)); } else if (ssl) { port = 443; } else { port = 80; } jdbcUrl = "jdbc:snowflake://" + url + ":" + port; LOGGER.debug("parsed Snowflake URL: {}", urlStr); } String getJdbcUrl() { return jdbcUrl; } public String getAccount() { return account; } public boolean sslEnabled() { return ssl; } public String getScheme() { if (ssl) { return "https"; } else { return "http"; } } String getFullUrl() { return url + ":" + port; } public String getUrlWithoutPort() { return url; } int getPort() { return port; } @Override public String toString() { return getFullUrl(); } @Override public String hostWithPort() { return getFullUrl(); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/StandardSnowflakeConnectionService.java ================================================ package com.snowflake.kafka.connector.internal; import static com.snowflake.kafka.connector.Utils.TABLE_COLUMN_METADATA; import com.fasterxml.jackson.databind.ObjectMapper; import com.snowflake.kafka.connector.internal.schemaevolution.ColumnInfos; import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationResponse; import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService; import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryServiceFactory; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Optional; import java.util.Properties; import net.snowflake.client.api.driver.SnowflakeDriver; /** * Implementation of Snowflake Connection Service interface which includes all handshake between KC * and SF through JDBC connection. */ public class StandardSnowflakeConnectionService implements SnowflakeConnectionService { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final String COLUMN_COMMENT = "created by automatic table creation from Snowflake Kafka Connector High Performance"; private static final String SHOW_ICEBERG_TABLES_QUERY = "show iceberg tables like ? limit 1"; private final KCLogger LOGGER = new KCLogger(StandardSnowflakeConnectionService.class.getName()); private final Connection conn; private final SnowflakeTelemetryService telemetry; private final String connectorName; private final String taskID; StandardSnowflakeConnectionService( JdbcProperties jdbcProperties, SnowflakeURL url, String connectorName, String taskID) { this.connectorName = connectorName; this.taskID = taskID; Properties proxyProperties = jdbcProperties.getProxyProperties(); Properties combinedProperties = jdbcProperties.getProperties(); try { if (!proxyProperties.isEmpty()) { LOGGER.debug("Proxy properties are set, passing in JDBC while creating the connection"); } else { LOGGER.info("Establishing a JDBC connection with url:{}", url.getJdbcUrl()); } this.conn = new SnowflakeDriver().connect(url.getJdbcUrl(), combinedProperties); } catch (SQLException e) { throw SnowflakeErrors.ERROR_1001.getException(e); } this.telemetry = SnowflakeTelemetryServiceFactory.builder(conn) .setAppName(this.connectorName) .setTaskID(this.taskID) .build(); LOGGER.info("initialized the snowflake connection"); } @Override public void createTableWithOnlyMetadataColumn(final String tableName) { checkConnection(); InternalUtils.assertNotEmpty("tableName", tableName); String createTableQuery = "create table if not exists identifier(?) (record_metadata variant comment '" + COLUMN_COMMENT + "') enable_schema_evolution = true error_logging = true"; try { PreparedStatement stmt = conn.prepareStatement(createTableQuery); stmt.setString(1, quoteIdentifier(tableName)); stmt.execute(); stmt.close(); } catch (SQLException e) { // Snowflake rejects CREATE TABLE IF NOT EXISTS when the name is already taken by an // ICEBERG TABLE (cross-type conflict is not suppressed by IF NOT EXISTS). KCv4 only // supports pre-created Iceberg tables; error_logging is not available for them. // We match on the error message text because Snowflake does not provide a stable SQL // error code that distinguishes this cross-type conflict from other CREATE TABLE errors. if (e.getMessage() != null && e.getMessage().contains("already exists as ICEBERG_TABLE")) { LOGGER.warn( "Table '{}' is a pre-created Iceberg table. Skipping auto-creation." + " Error table functionality is not available for Iceberg tables.", tableName); return; } throw SnowflakeErrors.ERROR_2007.getException(e); } LOGGER.info( "Created table {} with RECORD_METADATA column and ERROR_LOGGING enabled", tableName); } @Override public boolean tableExist(final String tableName) { return describeTable(tableName).isPresent(); } @Override public boolean pipeExist(final String pipeName) { LOGGER.info("Calling DESCRIBE PIPE {}", pipeName); checkConnection(); InternalUtils.assertNotEmpty("pipeName", pipeName); String query = "desc pipe identifier(?)"; PreparedStatement stmt = null; boolean exist; try { stmt = conn.prepareStatement(query); stmt.setString(1, pipeName); stmt.execute(); exist = true; } catch (SQLException e) { LOGGER.debug("pipe {} doesn't exist", pipeName); exist = false; } finally { if (stmt != null) { try { stmt.close(); } catch (SQLException e) { e.printStackTrace(); } } } return exist; } @Override public boolean isTableCompatible(final String tableName) { checkConnection(); InternalUtils.assertNotEmpty("tableName", tableName); String query = "desc table identifier(?)"; PreparedStatement stmt = null; ResultSet result = null; boolean compatible; try { stmt = conn.prepareStatement(query); stmt.setString(1, quoteIdentifier(tableName)); result = stmt.executeQuery(); boolean hasMeta = false; boolean allNullable = true; while (result.next()) { switch (result.getString(1)) { case TABLE_COLUMN_METADATA: if (result.getString(2).equals("VARIANT")) { hasMeta = true; } break; default: if (result.getString(4).equals("N")) { allNullable = false; } } } compatible = hasMeta && allNullable; } catch (SQLException e) { LOGGER.debug("Table {} doesn't exist. Exception {}", tableName, e.getStackTrace()); compatible = false; } finally { try { if (result != null) { result.close(); } } catch (Exception e) { e.printStackTrace(); } try { if (stmt != null) { stmt.close(); } } catch (Exception e) { e.printStackTrace(); } } LOGGER.info("Table {} compatibility is {}", tableName, compatible); return compatible; } @Override public void databaseExists(String databaseName) { checkConnection(); String query = "use database identifier(?)"; try { PreparedStatement stmt = conn.prepareStatement(query); stmt.setString(1, databaseName); stmt.execute(); stmt.close(); } catch (SQLException e) { throw SnowflakeErrors.ERROR_2001.getException(e); } LOGGER.info("database {} exists", databaseName); } @Override public void schemaExists(String schemaName) { checkConnection(); String query = "use schema identifier(?)"; boolean foundSchema = false; try { PreparedStatement stmt = conn.prepareStatement(query); stmt.setString(1, schemaName); stmt.execute(); stmt.close(); } catch (SQLException e) { throw SnowflakeErrors.ERROR_2001.getException(e); } LOGGER.info("schema {} exists", schemaName); } @Override public SnowflakeTelemetryService getTelemetryClient() { return this.telemetry; } @Override public void close() { try { conn.close(); } catch (SQLException e) { throw SnowflakeErrors.ERROR_2005.getException(e, this.telemetry); } LOGGER.info("snowflake connection closed"); } @Override public boolean isClosed() { try { return conn.isClosed(); } catch (SQLException e) { throw SnowflakeErrors.ERROR_2006.getException(e, this.telemetry); } } @Override public String getConnectorName() { return this.connectorName; } /** make sure connection is not closed */ private void checkConnection() { try { if (conn.isClosed()) { throw SnowflakeErrors.ERROR_1003.getException(); } } catch (SQLException e) { throw SnowflakeErrors.ERROR_1003.getException(e, this.telemetry); } } /** * generate pipe definition * * @param tableName table name * @param stageName stage name * @return pipe definition string */ private String pipeDefinition(String tableName, String stageName) { return "copy into " + tableName + "(RECORD_METADATA, RECORD_CONTENT) from (select $1:meta, $1:content from" + " @" + stageName + " t) file_format = (type = 'json')"; } @Override public Connection getConnection() { return this.conn; } @Override public Optional> describeTable(String tableName) { LOGGER.info("Calling DESCRIBE TABLE {}", tableName); checkConnection(); String query = "desc table identifier(?)"; PreparedStatement stmt = null; List rows = new ArrayList<>(); try { stmt = conn.prepareStatement(query); stmt.setString(1, quoteIdentifier(tableName)); ResultSet result = stmt.executeQuery(); while (result.next()) { String columnName = result.getString("name"); String type = result.getString("type"); String comment = result.getString("comment"); String nullable = result.getString("null?"); String defaultValue = null; String autoincrement = null; try { defaultValue = result.getString("default"); autoincrement = result.getString("autoincrement"); } catch (SQLException e) { LOGGER.debug( "default/autoincrement columns not available in DESCRIBE TABLE for {}", tableName); } rows.add( new DescribeTableRow(columnName, type, comment, nullable, defaultValue, autoincrement)); } return Optional.of(rows); } catch (Exception e) { LOGGER.debug("table {} doesn't exist", tableName); return Optional.empty(); } finally { if (stmt != null) { try { stmt.close(); } catch (SQLException e) { e.printStackTrace(); } } } } @Override public boolean shouldEvolveSchema(String tableName, String role) { LOGGER.info("Checking schema evolution permission for table {}", tableName); checkConnection(); InternalUtils.assertNotEmpty("tableName", tableName); InternalUtils.assertNotEmpty("role", role); String query = "show grants on table identifier(?)"; List schemaEvolutionAllowedPrivilegeList = Arrays.asList("EVOLVE SCHEMA", "ALL", "OWNERSHIP"); boolean hasRolePrivilege = false; String myRole = (role.charAt(0) == '"' && role.charAt(role.length() - 1) == '"') ? role.substring(1, role.length() - 1) : role.toUpperCase(); try { PreparedStatement stmt = conn.prepareStatement(query); stmt.setString(1, quoteIdentifier(tableName)); ResultSet result = stmt.executeQuery(); while (result.next()) { if (!result.getString("grantee_name").equals(myRole)) { continue; } if (schemaEvolutionAllowedPrivilegeList.contains( result.getString("privilege").toUpperCase())) { hasRolePrivilege = true; } } stmt.close(); } catch (SQLException e) { throw SnowflakeErrors.ERROR_2001.getException(e); } boolean hasTableOptionEnabled = false; String escapedTableName = tableName.replace("\\", "\\\\").replace("_", "\\_").replace("%", "\\%"); for (String showQuery : new String[] {"show tables like ? limit 1", SHOW_ICEBERG_TABLES_QUERY}) { if (hasTableOptionEnabled) break; try (PreparedStatement stmt = conn.prepareStatement(showQuery)) { stmt.setString(1, escapedTableName); try (ResultSet result = stmt.executeQuery()) { while (result.next()) { String enableSchemaEvolution = "N"; try { enableSchemaEvolution = result.getString("enable_schema_evolution"); } catch (SQLException e) { LOGGER.warn( "enable_schema_evolution column not found in SHOW output for table {}: {}", tableName, e.getMessage()); } if (enableSchemaEvolution.equals("Y")) { hasTableOptionEnabled = true; } } } } catch (SQLException e) { throw SnowflakeErrors.ERROR_2001.getException(e); } } boolean hasPermission = hasRolePrivilege && hasTableOptionEnabled; LOGGER.info( "Table: {} has schema evolution permission: {} (hasRolePrivilege={}," + " hasTableOptionEnabled={})", tableName, hasPermission, hasRolePrivilege, hasTableOptionEnabled); return hasPermission; } @Override public boolean isIcebergTable(String tableName) { checkConnection(); InternalUtils.assertNotEmpty("tableName", tableName); try (PreparedStatement stmt = conn.prepareStatement(SHOW_ICEBERG_TABLES_QUERY)) { String escapedTableName = tableName.replace("\\", "\\\\").replace("_", "\\_").replace("%", "\\%"); stmt.setString(1, escapedTableName); try (ResultSet result = stmt.executeQuery()) { boolean iceberg = result.next(); LOGGER.info("Table {} isIcebergTable={}", tableName, iceberg); return iceberg; } } catch (SQLException e) { throw SnowflakeErrors.ERROR_2001.getException(e); } } @Override public boolean hasErrorLoggingEnabled(String tableName) { checkConnection(); InternalUtils.assertNotEmpty("tableName", tableName); try (PreparedStatement stmt = conn.prepareStatement("show tables like ? limit 1")) { String escapedTableName = tableName.replace("\\", "\\\\").replace("_", "\\_").replace("%", "\\%"); stmt.setString(1, escapedTableName); try (ResultSet result = stmt.executeQuery()) { if (result.next()) { try { if ("Y".equals(result.getString("error_logging"))) { LOGGER.debug("Table {} has ERROR_LOGGING enabled", tableName); return true; } } catch (SQLException e) { // error_logging column absent in result set — treat as disabled to surface a warning LOGGER.warn( "error_logging column not found in SHOW TABLES output for table {} —" + " treating as disabled", tableName); return false; } } } } catch (SQLException e) { throw SnowflakeErrors.ERROR_2001.getException(e); } LOGGER.debug("Table {} does not have ERROR_LOGGING enabled", tableName); return false; } @Override public void executeQueryWithParameters(String query, String... parameters) { try { PreparedStatement stmt = conn.prepareStatement(query); for (int i = 0; i < parameters.length; i++) { stmt.setString(i + 1, parameters[i]); } stmt.execute(); stmt.close(); } catch (Exception e) { throw new RuntimeException("Error executing query: " + query, e); } } @Override public void appendColumnsToTable(String tableName, Map columnInfosMap) { if (columnInfosMap == null || columnInfosMap.isEmpty()) { return; } checkConnection(); InternalUtils.assertNotEmpty("tableName", tableName); // identifier(?) works for the table name but NOT for column names in ADD COLUMN. // Column names are quoted inline to preserve case (e.g. "age" vs "AGE"). // Iceberg tables require ALTER ICEBERG TABLE instead of ALTER TABLE. String alterKeyword = isIcebergTable(tableName) ? "alter iceberg table" : "alter table"; StringBuilder query = new StringBuilder(alterKeyword + " identifier(?) add column if not exists "); boolean first = true; for (Map.Entry entry : columnInfosMap.entrySet()) { if (!first) { query.append(", if not exists "); } query.append(quoteIdentifier(entry.getKey())); query.append(" "); query.append(entry.getValue().getColumnType()); query.append(entry.getValue().getDdlComments()); first = false; } try (PreparedStatement stmt = conn.prepareStatement(query.toString())) { stmt.setString(1, quoteIdentifier(tableName)); stmt.execute(); LOGGER.info("Added columns to table {}: {}", tableName, columnInfosMap.keySet()); } catch (SQLException e) { LOGGER.warn( "ALTER TABLE/ICEBERG TABLE ADD COLUMN failed for table {} (may be concurrent race" + " condition): {}", tableName, e.getMessage()); throw SnowflakeErrors.ERROR_2015.getException(e); } } @Override public Ssv1MigrationResponse migrateSsv1ChannelOffset( String tableName, String ssv1ChannelName, String ssv2ChannelName, String pipeName) { checkConnection(); LOGGER.info( "Calling SYSTEM$MIGRATE_SSV1_CHANNEL_OFFSET for table={}, ssv1Channel={}, " + "ssv2Channel={}, pipe={}", tableName, ssv1ChannelName, ssv2ChannelName, pipeName); String query = "SELECT SYSTEM$MIGRATE_SSV1_CHANNEL_OFFSET(?, ?, ?, ?)"; try (PreparedStatement stmt = conn.prepareStatement(query)) { stmt.setString(1, quoteIdentifier(tableName)); // The backend should unquote/uppercase the channel name, but that fix is not yet rolled out. // Uppercase here as a workaround // TODO(SNOW-3360048): Remove once the backend fix is rolled out. stmt.setString(2, ssv1ChannelName.toUpperCase(Locale.ROOT)); stmt.setString(3, ssv2ChannelName); stmt.setString(4, pipeName); try (ResultSet rs = stmt.executeQuery()) { if (!rs.next()) { throw new RuntimeException( "SYSTEM$MIGRATE_SSV1_CHANNEL_OFFSET returned no result for table " + tableName); } String jsonResponse = rs.getString(1); try { return OBJECT_MAPPER.readValue(jsonResponse, Ssv1MigrationResponse.class); } catch (Exception e) { throw new RuntimeException( "Failed to parse SYSTEM$MIGRATE_SSV1_CHANNEL_OFFSET response for channel " + ssv1ChannelName, e); } } } catch (SQLException e) { throw new RuntimeException( "SYSTEM$MIGRATE_SSV1_CHANNEL_OFFSET failed for ssv1Channel=" + ssv1ChannelName + ", ssv2Channel=" + ssv2ChannelName + ": " + e.getMessage(), e); } } @Override public void alterNonNullableColumns(String tableName, List columnNames) { if (columnNames == null || columnNames.isEmpty()) { return; } checkConnection(); InternalUtils.assertNotEmpty("tableName", tableName); // identifier(?) works for the table name but NOT for column names in ALTER ... DROP NOT NULL. // Column names are quoted inline to preserve case. // Iceberg tables require ALTER ICEBERG TABLE instead of ALTER TABLE. String alterKeyword = isIcebergTable(tableName) ? "alter iceberg table" : "alter table"; StringBuilder query = new StringBuilder(alterKeyword + " identifier(?) alter "); boolean first = true; for (String colName : columnNames) { if (!first) { query.append(", "); } String quoted = quoteIdentifier(colName); query .append(quoted) .append(" drop not null, ") .append(quoted) .append( " comment 'column altered to be nullable by schema evolution from" + " Snowflake Kafka Connector'"); first = false; } try (PreparedStatement stmt = conn.prepareStatement(query.toString())) { stmt.setString(1, quoteIdentifier(tableName)); stmt.execute(); LOGGER.info("Dropped NOT NULL constraints on table {}: {}", tableName, columnNames); } catch (SQLException e) { LOGGER.warn( "ALTER TABLE/ICEBERG TABLE DROP NOT NULL failed for table {} (may be concurrent race" + " condition): {}", tableName, e.getMessage()); throw SnowflakeErrors.ERROR_2016.getException(e); } } /** * Wraps a raw column name in double quotes to preserve case in DDL statements. Snowflake treats * unquoted identifiers as case-insensitive (uppercased), so quoting is required for * case-sensitive column names like {@code "age"} vs {@code "AGE"}. Internal double quotes are * escaped per SQL standard. */ private static String quoteIdentifier(String name) { return "\"" + name.replace("\"", "\"\"") + "\""; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/URL.java ================================================ package com.snowflake.kafka.connector.internal; public interface URL { String hostWithPort(); String getScheme(); } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/metrics/MetricsJmxReporter.java ================================================ package com.snowflake.kafka.connector.internal.metrics; import static com.snowflake.kafka.connector.internal.metrics.MetricsUtil.JMX_METRIC_PREFIX; import com.codahale.metrics.MetricFilter; import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.jmx.JmxReporter; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Splitter; import com.snowflake.kafka.connector.internal.KCLogger; import com.snowflake.kafka.connector.internal.SnowflakeErrors; import java.util.Hashtable; import java.util.Iterator; import java.util.concurrent.TimeUnit; import javax.management.MalformedObjectNameException; import javax.management.ObjectName; /** * Helper class for creation of JMX Metrics from metrics registry, also includes a definition to * create an ObjectName used to register a {@link com.codahale.metrics.Metric} * *

This instance is separate for all pipes and hence registration and unregistration of metrics * is handled per pipe level. */ public class MetricsJmxReporter { static final KCLogger LOGGER = new KCLogger(MetricsJmxReporter.class.getName()); // The registry which will hold pool of all metrics for this instance private final MetricRegistry metricRegistry; /** * Wrapper on top of listeners and metricRegistry for codehale. This will be useful to start the * jmx metrics when time is appropriate. (Check {@link MetricsJmxReporter#start()} */ private final JmxReporter jmxReporter; public MetricsJmxReporter(MetricRegistry metricRegistry, final String connectorName) { this.metricRegistry = metricRegistry; this.jmxReporter = createJMXReporter(connectorName); } public MetricRegistry getMetricRegistry() { return metricRegistry; } /** * This function will internally register all metrics present inside metric registry and will * register mbeans to the mbeanserver */ public void start() { jmxReporter.start(); } /** * This method is called to fetch an object name for all registered metrics. It can be called * during registration or unregistration. (Internal implementation of codehale) * * @param connectorName name of the connector. (From Config) * @param jmxDomain JMX Domain * @param metricName metric name used while registering the metric. (Check {@link * MetricsUtil#channelMetricName(String, String, String)} * @return Object Name constructed from above three args */ @VisibleForTesting static ObjectName getObjectName(String connectorName, String jmxDomain, String metricName) { try { // each metric name is scope:scopeValue/subDomain/metricName // e.g. "task:task-0/lifecycle/open-count" or "channel:conn_topic_0/offsets/processed-offset" Iterator tokens = Splitter.on("/").split(metricName).iterator(); // First token is always scope:value -- split on colon to get the MBean key and value String firstToken = tokens.next(); int colonIndex = firstToken.indexOf(':'); Hashtable keys = new Hashtable<>(); keys.put("connector", connectorName); keys.put(firstToken.substring(0, colonIndex), firstToken.substring(colonIndex + 1)); keys.put("category", tokens.next()); keys.put("name", tokens.next()); return new ObjectName(jmxDomain, keys); } catch (MalformedObjectNameException e) { LOGGER.warn("Could not create Object name for MetricName:{}", metricName); throw SnowflakeErrors.ERROR_5020.getException(); } } /** * Unregister all snowflake KC related metrics from registry * * @param prefixFilter prefix for removing the filter. */ public void removeMetricsFromRegistry(final String prefixFilter) { if (metricRegistry.getMetrics().size() != 0) { LOGGER.debug("Unregistering all metrics matching prefix '{}'", prefixFilter); metricRegistry.removeMatching(MetricFilter.startsWith(prefixFilter)); LOGGER.debug( "Metric registry size after removing '{}' is:{}, names:{}", prefixFilter, metricRegistry.getMetrics().size(), metricRegistry.getMetrics().keySet().toString()); } } /** * Remove a single metric by its exact registered name. O(1) lookup vs the O(N) scan of {@link * #removeMetricsFromRegistry}. */ public boolean removeMetric(final String exactName) { return metricRegistry.remove(exactName); } /** * Create JMXReporter Instance, which internally handles the mbean server fetching and * registration of Mbeans. We use codehale metrics library to achieve this. More details * here: @see DropWizard * *

We will convert all duration to SECONDS and prefix our metrics with {@link * MetricsUtil#JMX_METRIC_PREFIX} * * @param connectorName connectorName passed inside configuration * @return JMXReporter instance. */ private JmxReporter createJMXReporter(final String connectorName) { return JmxReporter.forRegistry(this.metricRegistry) .inDomain(JMX_METRIC_PREFIX) .convertDurationsTo(TimeUnit.SECONDS) .createsObjectNamesWith( (ignoreMeterType, jmxDomain, metricName) -> getObjectName(connectorName, jmxDomain, metricName)) .build(); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/metrics/MetricsUtil.java ================================================ package com.snowflake.kafka.connector.internal.metrics; import java.util.Collection; /** All metrics related constants. Mainly for JMX */ public class MetricsUtil { public static final String JMX_METRIC_PREFIX = "snowflake.kafka.connector"; // file count related constants public static final String OFFSET_SUB_DOMAIN = "offsets"; /** * Offset number that is most recent inside the buffer (In memory buffer) * *

This is updated every time an offset is sent as put API of SinkTask {@link * org.apache.kafka.connect.sink.SinkTask#put(Collection)} */ public static final String PROCESSED_OFFSET = "processed-offset"; public static final String OFFSET_PERSISTED_IN_SNOWFLAKE = "persisted-in-snowflake-offset"; public static final String LATEST_CONSUMER_OFFSET = "latest-consumer-offset"; /** * Returns the metric-registry key prefix for a given channel, e.g. {@code "channel:myConn_t_0"}. * Use this when removing all metrics for a channel via {@link * MetricsJmxReporter#removeMetricsFromRegistry}. */ public static String channelMetricPrefix(final String channelName) { return "channel:" + channelName; } /** * Construct a channel-level metric name. The resulting MBean will use {@code channel=} as the * first key property. * *

Will be of form channel:channelName/subDomain/metricName. The {@code channel:} prefix * is parsed by {@link MetricsJmxReporter#getObjectName} to produce the MBean key. * * @param channelName channel or partition identifier * @param subDomain categorize this metric (e.g. "offsets") * @param metricName actual Metric name for which we will use Gauge, Meter, Histogram * @return concatenized String */ public static String channelMetricName( final String channelName, final String subDomain, final String metricName) { return channelMetricPrefix(channelName) + "/" + subDomain + "/" + metricName; } /** * Returns the metric-registry key prefix for a given task, e.g. {@code "task:task-0"}. Use this * when removing all metrics for a task via {@link MetricsJmxReporter#removeMetricsFromRegistry}. */ public static String taskMetricPrefix(final String taskPrefix) { return "task:" + taskPrefix; } /** * Construct a task-level metric name. The resulting MBean will use {@code task=} as the first key * property. * *

Will be of form task:taskPrefix/subDomain/metricName * * @param taskPrefix task identifier (e.g. "task-0") * @param subDomain categorize this metric (e.g. "task", "lifecycle") * @param metricName actual Metric name * @return concatenized String with scope prefix */ public static String taskMetricName( final String taskPrefix, final String subDomain, final String metricName) { return taskMetricPrefix(taskPrefix) + "/" + subDomain + "/" + metricName; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/metrics/NoopTaskMetrics.java ================================================ package com.snowflake.kafka.connector.internal.metrics; /** Null-object implementation of {@link TaskMetrics}. Every method is a no-op. */ enum NoopTaskMetrics implements TaskMetrics { INSTANCE; @Override public TimingContext timePut() { return TimingContext.NOOP; } @Override public TimingContext timePreCommit() { return TimingContext.NOOP; } @Override public TimingContext timeOpen() { return TimingContext.NOOP; } @Override public TimingContext timeClose() { return TimingContext.NOOP; } @Override public TimingContext timeSdkClientCreate() { return TimingContext.NOOP; } @Override public TimingContext timeChannelOpen() { return TimingContext.NOOP; } @Override public TimingContext timeOffsetFetch() { return TimingContext.NOOP; } @Override public void recordStartDuration(long nanos) {} @Override public void incOpenCount() {} @Override public void incCloseCount() {} @Override public void incChannelOpenCount() {} @Override public void incPreCommitPartitionsSkipped() {} @Override public void incBackpressureRewindCount() {} @Override public void markPutRecords(long count) {} @Override public void setAssignedPartitions(int count) {} @Override public void unregister() {} } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/metrics/SnowflakeSinkTaskMetrics.java ================================================ package com.snowflake.kafka.connector.internal.metrics; import static com.snowflake.kafka.connector.internal.metrics.MetricsUtil.taskMetricName; import static com.snowflake.kafka.connector.internal.metrics.MetricsUtil.taskMetricPrefix; import com.codahale.metrics.Counter; import com.codahale.metrics.Gauge; import com.codahale.metrics.Meter; import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.Timer; import com.snowflake.kafka.connector.internal.KCLogger; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Supplier; /** * Manages task-level JMX metrics for a single {@link * com.snowflake.kafka.connector.SnowflakeSinkTask} instance. Metrics are registered when the task * starts and unregistered when it stops. * *

MBean ObjectNames follow the pattern: * *

snowflake.kafka.connector:connector=X,task=task-N,category=task|lifecycle,name=metric
*/ public class SnowflakeSinkTaskMetrics implements TaskMetrics { private static final KCLogger LOGGER = new KCLogger(SnowflakeSinkTaskMetrics.class.getName()); static final String TASK_SUB_DOMAIN = "task"; static final String LIFECYCLE_SUB_DOMAIN = "lifecycle"; // Method duration timers static final String PUT_DURATION = "put-duration"; static final String PRECOMMIT_DURATION = "precommit-duration"; static final String OPEN_DURATION = "open-duration"; static final String CLOSE_DURATION = "close-duration"; static final String START_DURATION = "start-duration"; // Channel and SDK timers static final String CHANNEL_OPEN_DURATION = "channel-open-duration"; static final String SDK_CLIENT_CREATE_DURATION = "sdk-client-create-duration"; static final String PRECOMMIT_OFFSET_FETCH_DURATION = "precommit-offset-fetch-duration"; // Throughput static final String PUT_RECORDS = "put-records"; // Counters static final String PRECOMMIT_PARTITIONS_SKIPPED = "precommit-partitions-skipped"; static final String OPEN_COUNT = "open-count"; static final String CLOSE_COUNT = "close-count"; static final String CHANNEL_OPEN_COUNT = "channel-open-count"; static final String BACKPRESSURE_REWIND_COUNT = "backpressure-rewind-count"; // Gauges static final String ASSIGNED_PARTITIONS = "assigned-partitions"; static final String SDK_CLIENT_COUNT = "sdk-client-count"; private final String taskMetricPrefix; private final MetricsJmxReporter metricsJmxReporter; // Method duration timers private final Timer putDuration; private final Timer preCommitDuration; private final Timer openDuration; private final Timer closeDuration; private final Timer startDuration; // Channel/SDK timers (aggregated across all channels in this task) private final Timer channelOpenDuration; private final Timer sdkClientCreateDuration; private final Timer preCommitOffsetFetchDuration; // Throughput private final Meter putRecords; // Counters private final Counter preCommitPartitionsSkipped; private final Counter openCount; private final Counter closeCount; private final Counter channelOpenCount; private final Counter backpressureRewindCount; // Gauges (backed by atomics) private final AtomicInteger assignedPartitions; public SnowflakeSinkTaskMetrics( String connectorName, String taskId, MetricsJmxReporter metricsJmxReporter) { this(connectorName, taskId, metricsJmxReporter, null); } public SnowflakeSinkTaskMetrics( String connectorName, String taskId, MetricsJmxReporter metricsJmxReporter, Supplier sdkClientCountSupplier) { this.taskMetricPrefix = "task-" + taskId; this.metricsJmxReporter = metricsJmxReporter; this.assignedPartitions = new AtomicInteger(0); MetricRegistry registry = metricsJmxReporter.getMetricRegistry(); // Method duration timers this.putDuration = registry.timer(taskMetricName(taskMetricPrefix, TASK_SUB_DOMAIN, PUT_DURATION)); this.preCommitDuration = registry.timer(taskMetricName(taskMetricPrefix, TASK_SUB_DOMAIN, PRECOMMIT_DURATION)); this.openDuration = registry.timer(taskMetricName(taskMetricPrefix, LIFECYCLE_SUB_DOMAIN, OPEN_DURATION)); this.closeDuration = registry.timer(taskMetricName(taskMetricPrefix, LIFECYCLE_SUB_DOMAIN, CLOSE_DURATION)); this.startDuration = registry.timer(taskMetricName(taskMetricPrefix, LIFECYCLE_SUB_DOMAIN, START_DURATION)); // Channel/SDK timers this.channelOpenDuration = registry.timer( taskMetricName(taskMetricPrefix, LIFECYCLE_SUB_DOMAIN, CHANNEL_OPEN_DURATION)); this.sdkClientCreateDuration = registry.timer( taskMetricName(taskMetricPrefix, LIFECYCLE_SUB_DOMAIN, SDK_CLIENT_CREATE_DURATION)); this.preCommitOffsetFetchDuration = registry.timer( taskMetricName(taskMetricPrefix, TASK_SUB_DOMAIN, PRECOMMIT_OFFSET_FETCH_DURATION)); // Throughput this.putRecords = registry.meter(taskMetricName(taskMetricPrefix, TASK_SUB_DOMAIN, PUT_RECORDS)); // Counters this.preCommitPartitionsSkipped = registry.counter( taskMetricName(taskMetricPrefix, TASK_SUB_DOMAIN, PRECOMMIT_PARTITIONS_SKIPPED)); this.openCount = registry.counter(taskMetricName(taskMetricPrefix, LIFECYCLE_SUB_DOMAIN, OPEN_COUNT)); this.closeCount = registry.counter(taskMetricName(taskMetricPrefix, LIFECYCLE_SUB_DOMAIN, CLOSE_COUNT)); this.channelOpenCount = registry.counter( taskMetricName(taskMetricPrefix, LIFECYCLE_SUB_DOMAIN, CHANNEL_OPEN_COUNT)); this.backpressureRewindCount = registry.counter( taskMetricName(taskMetricPrefix, TASK_SUB_DOMAIN, BACKPRESSURE_REWIND_COUNT)); // Gauges registry.register( taskMetricName(taskMetricPrefix, TASK_SUB_DOMAIN, ASSIGNED_PARTITIONS), (Gauge) assignedPartitions::get); if (sdkClientCountSupplier != null) { registry.register( taskMetricName(taskMetricPrefix, LIFECYCLE_SUB_DOMAIN, SDK_CLIENT_COUNT), (Gauge) sdkClientCountSupplier::get); } metricsJmxReporter.start(); LOGGER.info( "Registered task-level JMX metrics for connector: {}, task: {}", connectorName, taskId); } // ---- TaskMetrics interface (timing) ---- @Override public TimingContext timePut() { return wrap(putDuration); } @Override public TimingContext timePreCommit() { return wrap(preCommitDuration); } @Override public TimingContext timeOpen() { return wrap(openDuration); } @Override public TimingContext timeClose() { return wrap(closeDuration); } @Override public TimingContext timeSdkClientCreate() { return wrap(sdkClientCreateDuration); } @Override public TimingContext timeChannelOpen() { return wrap(channelOpenDuration); } @Override public TimingContext timeOffsetFetch() { return wrap(preCommitOffsetFetchDuration); } @Override public void recordStartDuration(long nanos) { startDuration.update(nanos, TimeUnit.NANOSECONDS); } // ---- TaskMetrics interface (counters) ---- @Override public void incOpenCount() { openCount.inc(); } @Override public void incCloseCount() { closeCount.inc(); } @Override public void incChannelOpenCount() { channelOpenCount.inc(); } @Override public void incPreCommitPartitionsSkipped() { preCommitPartitionsSkipped.inc(); } @Override public void incBackpressureRewindCount() { backpressureRewindCount.inc(); } // ---- TaskMetrics interface (throughput) ---- @Override public void markPutRecords(long count) { putRecords.mark(count); } // ---- TaskMetrics interface (gauges) ---- @Override public void setAssignedPartitions(int count) { assignedPartitions.set(count); } // ---- TaskMetrics interface (lifecycle) ---- @Override public void unregister() { metricsJmxReporter.removeMetricsFromRegistry(taskMetricPrefix(taskMetricPrefix)); LOGGER.info("Unregistered task-level JMX metrics for prefix: {}", taskMetricPrefix); } // ---- raw accessors (package-private, for tests in the same package) ---- Timer putDuration() { return putDuration; } Timer preCommitDuration() { return preCommitDuration; } Timer openDuration() { return openDuration; } Timer closeDuration() { return closeDuration; } Timer startDuration() { return startDuration; } Timer channelOpenDuration() { return channelOpenDuration; } Timer sdkClientCreateDuration() { return sdkClientCreateDuration; } Timer preCommitOffsetFetchDuration() { return preCommitOffsetFetchDuration; } Meter putRecords() { return putRecords; } Counter preCommitPartitionsSkipped() { return preCommitPartitionsSkipped; } Counter openCount() { return openCount; } Counter closeCount() { return closeCount; } Counter channelOpenCount() { return channelOpenCount; } Counter backpressureRewindCount() { return backpressureRewindCount; } int getAssignedPartitions() { return assignedPartitions.get(); } // ---- internal ---- private static TimingContext wrap(Timer timer) { Timer.Context ctx = timer.time(); return ctx::stop; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/metrics/TaskMetrics.java ================================================ package com.snowflake.kafka.connector.internal.metrics; /** * Task-level metrics facade. Callers program against this interface; the connector wires in either * {@link SnowflakeSinkTaskMetrics} (real JMX) or the singleton returned by {@link #noop()} when * monitoring is disabled. * *

All methods are safe to call unconditionally -- the noop implementation is a no-op. */ public interface TaskMetrics { // ---- timing (try-with-resources) ---- TimingContext timePut(); TimingContext timePreCommit(); TimingContext timeOpen(); TimingContext timeClose(); TimingContext timeSdkClientCreate(); TimingContext timeChannelOpen(); TimingContext timeOffsetFetch(); void recordStartDuration(long nanos); // ---- counters ---- void incOpenCount(); void incCloseCount(); void incChannelOpenCount(); void incPreCommitPartitionsSkipped(); void incBackpressureRewindCount(); // ---- throughput ---- void markPutRecords(long count); // ---- gauges ---- void setAssignedPartitions(int count); // ---- lifecycle ---- void unregister(); // ---- timing context ---- @FunctionalInterface interface TimingContext extends AutoCloseable { TimingContext NOOP = () -> {}; @Override void close(); } // ---- factory ---- static TaskMetrics noop() { return NoopTaskMetrics.INSTANCE; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/schemaevolution/ColumnInfos.java ================================================ /* * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved. */ package com.snowflake.kafka.connector.internal.schemaevolution; import java.util.Objects; import java.util.Optional; /** Represents column type and DDL comment for schema evolution. */ public class ColumnInfos { private final String columnType; private final String comments; public ColumnInfos(String columnType, String comments) { this.columnType = Objects.requireNonNull(columnType, "columnType cannot be null"); this.comments = comments; } public ColumnInfos(String columnType) { this.columnType = Objects.requireNonNull(columnType, "columnType cannot be null"); this.comments = null; } public String getColumnType() { return columnType; } public String getComments() { return comments; } public String getDdlComments() { return Optional.ofNullable(comments) .map(comment -> String.format(" comment '%s' ", comment.replace("'", "''"))) .orElse(" comment 'column created by schema evolution from Snowflake Kafka Connector' "); } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; ColumnInfos that = (ColumnInfos) o; return Objects.equals(columnType, that.columnType) && Objects.equals(comments, that.comments); } @Override public int hashCode() { return Objects.hash(columnType, comments); } @Override public String toString() { return "ColumnInfos{" + "columnType='" + columnType + '\'' + ", comments='" + comments + '\'' + '}'; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/schemaevolution/ColumnTypeMapper.java ================================================ /* * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved. */ package com.snowflake.kafka.connector.internal.schemaevolution; import com.fasterxml.jackson.databind.JsonNode; import org.apache.kafka.connect.data.Schema; /** Abstract base for mapping Kafka Connect types to Snowflake DDL types. */ public abstract class ColumnTypeMapper { public String mapToColumnType(Schema.Type kafkaType) { return mapToColumnType(kafkaType, null); } public abstract String mapToColumnType(Schema.Type kafkaType, String schemaName); /** * Map the JSON node type to Kafka type * * @param value JSON node * @return Kafka type */ public abstract Schema.Type mapJsonNodeTypeToKafkaType(JsonNode value); } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/schemaevolution/SchemaEvolutionTargetItems.java ================================================ /* * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved. */ package com.snowflake.kafka.connector.internal.schemaevolution; import java.util.Collections; import java.util.HashSet; import java.util.Objects; import java.util.Set; import javax.annotation.Nonnull; /** * Contains target items for schema evolution: table name, columns to drop non-nullability, and * columns to add to the table. */ public class SchemaEvolutionTargetItems { private final String tableName; @Nonnull private final Set columnsToDropNonNullability; @Nonnull private final Set columnsToAdd; public SchemaEvolutionTargetItems( String tableName, Set columnsToDropNonNullability, Set columnsToAdd) { this.tableName = tableName; this.columnsToDropNonNullability = columnsToDropNonNullability != null ? new HashSet<>(columnsToDropNonNullability) : Collections.emptySet(); this.columnsToAdd = columnsToAdd != null ? new HashSet<>(columnsToAdd) : Collections.emptySet(); } public boolean hasDataForSchemaEvolution() { return !columnsToDropNonNullability.isEmpty() || !columnsToAdd.isEmpty(); } public SchemaEvolutionTargetItems(String tableName, Set columnsToAdd) { this(tableName, null, columnsToAdd); } public String getTableName() { return tableName; } @Nonnull public Set getColumnsToDropNonNullability() { return Collections.unmodifiableSet(columnsToDropNonNullability); } @Nonnull public Set getColumnsToAdd() { return Collections.unmodifiableSet(columnsToAdd); } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; SchemaEvolutionTargetItems that = (SchemaEvolutionTargetItems) o; return Objects.equals(tableName, that.tableName) && Objects.equals(columnsToDropNonNullability, that.columnsToDropNonNullability) && Objects.equals(columnsToAdd, that.columnsToAdd); } @Override public int hashCode() { return Objects.hash(tableName, columnsToDropNonNullability, columnsToAdd); } @Override public String toString() { return "SchemaEvolutionTargetItems{" + "tableName='" + tableName + '\'' + ", nonNullableColumns=" + columnsToDropNonNullability + ", extraColNames=" + columnsToAdd + '}'; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/schemaevolution/SnowflakeColumnTypeMapper.java ================================================ /* * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved. */ package com.snowflake.kafka.connector.internal.schemaevolution; import static org.apache.kafka.connect.data.Schema.Type.ARRAY; import static org.apache.kafka.connect.data.Schema.Type.BOOLEAN; import static org.apache.kafka.connect.data.Schema.Type.BYTES; import static org.apache.kafka.connect.data.Schema.Type.FLOAT32; import static org.apache.kafka.connect.data.Schema.Type.FLOAT64; import static org.apache.kafka.connect.data.Schema.Type.INT16; import static org.apache.kafka.connect.data.Schema.Type.INT32; import static org.apache.kafka.connect.data.Schema.Type.INT64; import static org.apache.kafka.connect.data.Schema.Type.STRING; import static org.apache.kafka.connect.data.Schema.Type.STRUCT; import com.fasterxml.jackson.databind.JsonNode; import org.apache.kafka.connect.data.Date; import org.apache.kafka.connect.data.Decimal; import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.Time; import org.apache.kafka.connect.data.Timestamp; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** Maps Kafka Connect types to Snowflake DDL types. */ public class SnowflakeColumnTypeMapper extends ColumnTypeMapper { private static final Logger LOGGER = LoggerFactory.getLogger(SnowflakeColumnTypeMapper.class); @Override public String mapToColumnType(Schema.Type kafkaType, String schemaName) { switch (kafkaType) { case INT8: return "BYTEINT"; case INT16: return "SMALLINT"; case INT32: if (Date.LOGICAL_NAME.equals(schemaName)) { return "DATE"; } else if (Time.LOGICAL_NAME.equals(schemaName)) { return "TIME(6)"; } else { return "INT"; } case INT64: if (Timestamp.LOGICAL_NAME.equals(schemaName)) { return "TIMESTAMP(6)"; } else { return "BIGINT"; } case FLOAT32: return "FLOAT"; case FLOAT64: return "DOUBLE"; case BOOLEAN: return "BOOLEAN"; case STRING: return "VARCHAR"; case BYTES: if (Decimal.LOGICAL_NAME.equals(schemaName)) { return "VARCHAR"; } else { return "BINARY"; } case ARRAY: return "ARRAY"; default: // MAP and STRUCT will go here LOGGER.debug( "The corresponding kafka type is {}, so infer to VARIANT type", kafkaType.getName()); return "VARIANT"; } } @Override public Schema.Type mapJsonNodeTypeToKafkaType(JsonNode value) { if (value == null || value.isNull()) { return STRING; } else if (value.isNumber()) { if (value.isShort()) { return INT16; } else if (value.isInt()) { return INT32; } else if (value.isFloat()) { return FLOAT32; } else if (value.isDouble()) { return FLOAT64; } return INT64; } else if (value.isTextual()) { return STRING; } else if (value.isBoolean()) { return BOOLEAN; } else if (value.isBinary()) { return BYTES; } else if (value.isArray()) { return ARRAY; } else if (value.isObject()) { return STRUCT; } else { return null; } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/schemaevolution/SnowflakeSchemaEvolutionService.java ================================================ /* * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved. */ package com.snowflake.kafka.connector.internal.schemaevolution; import com.snowflake.kafka.connector.internal.SnowflakeConnectionService; import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException; import com.snowflake.kafka.connector.records.SnowflakeSinkRecord; import java.util.ArrayList; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Executes schema evolution DDL (ALTER TABLE) based on validation results. Handles adding columns * and dropping NOT NULL constraints. */ public class SnowflakeSchemaEvolutionService { private static final Logger LOGGER = LoggerFactory.getLogger(SnowflakeSchemaEvolutionService.class); private final SnowflakeConnectionService conn; private final TableSchemaResolver tableSchemaResolver; public SnowflakeSchemaEvolutionService(SnowflakeConnectionService conn) { this(conn, new TableSchemaResolver()); } SnowflakeSchemaEvolutionService( SnowflakeConnectionService conn, TableSchemaResolver tableSchemaResolver) { this.conn = conn; this.tableSchemaResolver = tableSchemaResolver; } /** * Execute ALTER TABLE commands if there are columns to add or NOT NULL constraints to drop. * *

Note: Columns must be added BEFORE dropping NOT NULL constraints, otherwise the constraint * modification will fail if the column doesn't exist yet. * * @param targetItems target items for schema evolution * @param record the sink record that contains the schema and content */ public void evolveSchemaIfNeeded( SchemaEvolutionTargetItems targetItems, SnowflakeSinkRecord record) { if (!targetItems.hasDataForSchemaEvolution()) { return; } String tableName = targetItems.getTableName(); // Add new columns FIRST (must exist before we can modify constraints) if (!targetItems.getColumnsToAdd().isEmpty()) { LOGGER.debug( "Adding columns to table: {} columns: {}", tableName, targetItems.getColumnsToAdd()); TableSchema tableSchema = tableSchemaResolver.resolveTableSchemaFromSnowflakeRecord( record, new ArrayList<>(targetItems.getColumnsToAdd())); try { conn.appendColumnsToTable(tableName, tableSchema.getColumnInfos()); } catch (SnowflakeKafkaConnectorException e) { LOGGER.warn( "Failure altering table to add column: {}, this could happen when multiple" + " partitions try to alter the table at the same time and the warning could be" + " ignored", tableName, e); } } // Drop NOT NULL constraints AFTER columns exist if (!targetItems.getColumnsToDropNonNullability().isEmpty()) { LOGGER.debug( "Dropping nonNullability for table: {} columns: {}", tableName, targetItems.getColumnsToDropNonNullability()); try { conn.alterNonNullableColumns( tableName, new ArrayList<>(targetItems.getColumnsToDropNonNullability())); } catch (SnowflakeKafkaConnectorException e) { LOGGER.warn( "Failure altering table to update nullability: {}, this could happen when multiple" + " partitions try to alter the table at the same time and the warning could be" + " ignored", tableName, e); } } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/schemaevolution/TableSchema.java ================================================ /* * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved. */ package com.snowflake.kafka.connector.internal.schemaevolution; import java.util.Collections; import java.util.Map; import java.util.Objects; /** Wrapper around Map of column name to ColumnInfos. */ public class TableSchema { private final Map columnInfos; public TableSchema(Map columnInfos) { this.columnInfos = columnInfos; } public Map getColumnInfos() { return Collections.unmodifiableMap(columnInfos); } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; TableSchema that = (TableSchema) o; return Objects.equals(columnInfos, that.columnInfos); } @Override public int hashCode() { return Objects.hash(columnInfos); } @Override public String toString() { return "TableSchema{" + "columnInfos=" + columnInfos + '}'; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/schemaevolution/TableSchemaResolver.java ================================================ /* * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved. */ package com.snowflake.kafka.connector.internal.schemaevolution; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; import com.google.common.collect.Streams; import com.snowflake.kafka.connector.internal.SnowflakeErrors; import com.snowflake.kafka.connector.records.SnowflakeSinkRecord; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; import org.apache.kafka.connect.data.Field; import org.apache.kafka.connect.data.Schema; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Resolves table schema from Kafka SinkRecord. Supports both schema-ful (Avro/Protobuf) and * schema-less (JSON) records. */ public class TableSchemaResolver { private static final Logger LOGGER = LoggerFactory.getLogger(TableSchemaResolver.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private final ColumnTypeMapper columnTypeMapper; public TableSchemaResolver(ColumnTypeMapper columnTypeMapper) { this.columnTypeMapper = columnTypeMapper; } public TableSchemaResolver() { this(new SnowflakeColumnTypeMapper()); } /** * Collect column data types from either the Kafka Connect schema or the content values. Column * names in {@code columnsToInclude} and the returned TableSchema keys are raw internal names (as * returned by DESCRIBE TABLE or as normalized at record creation time). * * @param record the SnowflakeSinkRecord containing schema and content * @param columnsToInclude the names of the columns to include in the schema * @return a Map object where the key is column name and value is ColumnInfos */ public TableSchema resolveTableSchemaFromSnowflakeRecord( SnowflakeSinkRecord record, List columnsToInclude) { if (columnsToInclude == null || columnsToInclude.isEmpty()) { return new TableSchema(ImmutableMap.of()); } Set columnNamesSet = new HashSet<>(columnsToInclude); if (hasSchema(record)) { return getTableSchemaFromRecordSchema(record, columnNamesSet); } else { return getTableSchemaFromJson(record, columnNamesSet); } } private boolean hasSchema(SnowflakeSinkRecord record) { Schema schema = record.getSchema(); return schema != null && schema.type() == Schema.Type.STRUCT && schema.fields() != null && !schema.fields().isEmpty(); } private TableSchema getTableSchemaFromRecordSchema( SnowflakeSinkRecord record, Set columnNamesSet) { JsonNode recordNode = OBJECT_MAPPER.valueToTree(record.getContent()); Map schemaMap = getFullSchemaMapFromRecord(record); Map> columnsWithValue = Streams.stream(recordNode.fields()) .map(ColumnValuePair::from) .filter(pair -> columnNamesSet.contains(pair.getColumnName())) .collect( Collectors.partitioningBy(pair -> schemaMap.containsKey(pair.getColumnName()))); List notFoundFieldsInSchema = columnsWithValue.get(false); List foundFieldsInSchema = columnsWithValue.get(true); if (!notFoundFieldsInSchema.isEmpty()) { throw SnowflakeErrors.ERROR_5022.getException( "Columns not found in schema: " + notFoundFieldsInSchema.stream() .map(ColumnValuePair::getColumnName) .collect(Collectors.toList()) + ", schemaMap: " + schemaMap); } Map columnsInferredFromSchema = foundFieldsInSchema.stream() .map( pair -> Maps.immutableEntry(pair.getColumnName(), schemaMap.get(pair.getColumnName()))) .collect( Collectors.toMap( Map.Entry::getKey, Map.Entry::getValue, (oldValue, newValue) -> newValue)); return new TableSchema(columnsInferredFromSchema); } private TableSchema getTableSchemaFromJson( SnowflakeSinkRecord record, Set columnNamesSet) { JsonNode recordNode = OBJECT_MAPPER.valueToTree(record.getContent()); Map columnsInferredFromJson = Streams.stream(recordNode.fields()) .map(ColumnValuePair::from) .filter(pair -> columnNamesSet.contains(pair.getColumnName())) .map( pair -> Maps.immutableEntry( pair.getColumnName(), new ColumnInfos(inferDataTypeFromJsonObject(pair.getJsonNode())))) .collect( Collectors.toMap( Map.Entry::getKey, Map.Entry::getValue, (oldValue, newValue) -> newValue)); return new TableSchema(columnsInferredFromJson); } /** * Build column type information from a Kafka Connect schema. * * @param record the SnowflakeSinkRecord containing the schema * @return a Map where the key is the field name and value is ColumnInfos */ private Map getFullSchemaMapFromRecord(SnowflakeSinkRecord record) { Map schemaMap = new HashMap<>(); Schema schema = record.getSchema(); if (schema != null && schema.fields() != null) { for (Field field : schema.fields()) { String columnType = columnTypeMapper.mapToColumnType(field.schema().type(), field.schema().name()); LOGGER.info( "Got the data type for field:{}, schemaName:{}, schemaDoc: {} kafkaType:{}," + " columnType:{}", field.name(), field.schema().name(), field.schema().doc(), field.schema().type(), columnType); schemaMap.put(field.name(), new ColumnInfos(columnType, field.schema().doc())); } } return schemaMap; } /** Try to infer the data type from the data */ private String inferDataTypeFromJsonObject(JsonNode value) { Schema.Type schemaType = columnTypeMapper.mapJsonNodeTypeToKafkaType(value); if (schemaType == null) { // only when the type of the value is unrecognizable for JAVA throw SnowflakeErrors.ERROR_5021.getException("class: " + value.getClass()); } // Passing null to schemaName when there is no schema information return columnTypeMapper.mapToColumnType(schemaType); } // ---- ColumnValuePair ---- private static class ColumnValuePair { private final String columnName; private final JsonNode jsonNode; public static ColumnValuePair from(Map.Entry field) { return new ColumnValuePair(field.getKey(), field.getValue()); } private ColumnValuePair(String columnName, JsonNode jsonNode) { this.columnName = columnName; this.jsonNode = jsonNode; } public String getColumnName() { return columnName; } public JsonNode getJsonNode() { return jsonNode; } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/schemaevolution/ValidationResultMapper.java ================================================ /* * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved. */ package com.snowflake.kafka.connector.internal.schemaevolution; import com.snowflake.kafka.connector.internal.validation.ValidationResult; import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; /** * Maps {@link ValidationResult} to {@link SchemaEvolutionTargetItems}. Column names are raw * internal names (as returned by DESCRIBE TABLE / as normalized at record creation time). Quoting * for DDL is handled downstream in {@link * com.snowflake.kafka.connector.internal.StandardSnowflakeConnectionService}. */ public class ValidationResultMapper { /** * Convert ValidationResult to SchemaEvolutionTargetItems. * * @param result ValidationResult with structural error details (raw column names) * @param tableName Target table name * @return SchemaEvolutionTargetItems with raw column names to add and columns to drop NOT NULL */ public static SchemaEvolutionTargetItems mapToSchemaEvolutionItems( ValidationResult result, String tableName) { Set extraColNames = result.getExtraColNames(); Set columnsToDropNonNull = Stream.concat( result.getMissingNotNullColNames().stream(), result.getNullValueForNotNullColNames().stream()) .collect(Collectors.toSet()); return new SchemaEvolutionTargetItems(tableName, columnsToDropNonNull, extraColNames); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/DefaultStreamingConfigValidator.java ================================================ package com.snowflake.kafka.connector.internal.streaming; import static com.snowflake.kafka.connector.ConnectorConfigTools.BOOLEAN_VALIDATOR; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.ERRORS_LOG_ENABLE_CONFIG; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.ERRORS_TOLERANCE_CONFIG; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.VALUE_CONVERTER; import com.google.common.base.Strings; import com.google.common.collect.ImmutableMap; import com.snowflake.kafka.connector.ConnectorConfigTools; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.Utils; import java.util.HashMap; import java.util.Map; import java.util.Optional; import org.apache.kafka.common.config.ConfigException; public class DefaultStreamingConfigValidator implements StreamingConfigValidator { private static final String STRING_CONVERTER_KEYWORD = "StringConverter"; private static final String BYTE_ARRAY_CONVERTER_KEYWORD = "ByteArrayConverter"; @Override public ImmutableMap validate(Map inputConfig) { Map invalidParams = new HashMap<>(); validateRole(inputConfig) .ifPresent(errorEntry -> invalidParams.put(errorEntry.getKey(), errorEntry.getValue())); // Validate error handling configs if (inputConfig.containsKey(ERRORS_TOLERANCE_CONFIG)) { try { ConnectorConfigTools.ErrorTolerance.VALIDATOR.ensureValid( ERRORS_TOLERANCE_CONFIG, inputConfig.get(ERRORS_TOLERANCE_CONFIG)); } catch (ConfigException e) { invalidParams.put( ERRORS_TOLERANCE_CONFIG, Utils.formatString( "{} configuration error: {}", ERRORS_TOLERANCE_CONFIG, e.getMessage())); } } if (inputConfig.containsKey(ERRORS_LOG_ENABLE_CONFIG)) { try { BOOLEAN_VALIDATOR.ensureValid( ERRORS_LOG_ENABLE_CONFIG, inputConfig.get(ERRORS_LOG_ENABLE_CONFIG)); } catch (ConfigException e) { invalidParams.put(ERRORS_LOG_ENABLE_CONFIG, e.getMessage()); } } // Validate schematization config invalidParams.putAll(validateSchematizationConfig(inputConfig)); return ImmutableMap.copyOf(invalidParams); } private static Optional> validateRole(Map inputConfig) { if (!inputConfig.containsKey(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME) || Strings.isNullOrEmpty(inputConfig.get(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME))) { String missingRole = String.format( "Config: %s should be present for Snowpipe Streaming", KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME); return Optional.of(Map.entry(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, missingRole)); } return Optional.empty(); } private static void ensureValidLong( Map inputConfig, String param, Map invalidParams) { try { Long.parseLong(inputConfig.get(param)); } catch (NumberFormatException exception) { invalidParams.put( param, Utils.formatString( param + " configuration must be a parsable long. Given configuration" + " was: {}", inputConfig.get(param))); } } /** * Validates if the configs are allowed values when schematization is enabled. * *

return a map of invalid params */ private static Map validateSchematizationConfig(Map inputConfig) { Map invalidParams = new HashMap<>(); boolean schematizationEnabled = Boolean.parseBoolean( inputConfig.getOrDefault( KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION, String.valueOf( KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION_DEFAULT))); if (schematizationEnabled && inputConfig.get(VALUE_CONVERTER) != null && (inputConfig.get(VALUE_CONVERTER).contains(STRING_CONVERTER_KEYWORD) || inputConfig.get(VALUE_CONVERTER).contains(BYTE_ARRAY_CONVERTER_KEYWORD))) { invalidParams.put( inputConfig.get(VALUE_CONVERTER), Utils.formatString( "The value converter:{} is not supported when schematization is enabled.", inputConfig.get(VALUE_CONVERTER))); } return invalidParams; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/IngestionMethodConfig.java ================================================ package com.snowflake.kafka.connector.internal.streaming; import java.util.Locale; /** * Enum representing the ingestion method for Snowflake Kafka Connector. * *

Only SNOWPIPE_STREAMING is supported (SSv2). Legacy SNOWPIPE and SSv1 have been removed. */ public enum IngestionMethodConfig { /* Snowpipe streaming (SSv2) - the only supported ingestion method */ SNOWPIPE_STREAMING; @Override public String toString() { return name().toLowerCase(Locale.ROOT); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/LatestCommitedOffsetTokenExecutor.java ================================================ package com.snowflake.kafka.connector.internal.streaming; import static java.time.temporal.ChronoUnit.SECONDS; import com.snowflake.kafka.connector.internal.KCLogger; import dev.failsafe.Failsafe; import dev.failsafe.FailsafeExecutor; import dev.failsafe.Fallback; import dev.failsafe.RetryPolicy; import dev.failsafe.function.CheckedSupplier; import java.time.Duration; /** * Class that separates Failsafe specific logic (retries and fallback) from the actual channel logic */ public class LatestCommitedOffsetTokenExecutor { private static final KCLogger LOGGER = new KCLogger(LatestCommitedOffsetTokenExecutor.class.getName()); private static final Duration DURATION_BETWEEN_GET_OFFSET_TOKEN_RETRY = Duration.ofSeconds(1); protected static final int MAX_GET_OFFSET_TOKEN_RETRIES = 3; public static FailsafeExecutor getExecutor( String channelName, Class exceptionClass, CheckedSupplier fallbackSupplier) { RetryPolicy retryPolicy = createRetryPolicy(exceptionClass); Fallback fallback = createFallback(channelName, exceptionClass, fallbackSupplier); return Failsafe.with(fallback) .onFailure( event -> LOGGER.error( "[OFFSET_TOKEN_RETRY_FAILSAFE] Failure to fetch offsetToken even after retry" + " and fallback from snowflake for channel:{}, elapsedTimeSeconds:{}", channelName, event.getElapsedTime().get(SECONDS), event.getException())) .compose(retryPolicy); } private static RetryPolicy createRetryPolicy( Class retryExceptionClass) { return RetryPolicy.builder() .handle(retryExceptionClass) .withDelay(DURATION_BETWEEN_GET_OFFSET_TOKEN_RETRY) .withMaxAttempts(MAX_GET_OFFSET_TOKEN_RETRIES) .onRetry( event -> LOGGER.warn( "[OFFSET_TOKEN_RETRY_POLICY] retry for getLatestCommittedOffsetToken. Retry" + " no:{}, message:{}", event.getAttemptCount(), event.getLastException().getMessage())) .build(); } private static Fallback createFallback( String channelName, Class exceptionClass, CheckedSupplier fallbackSupplier) { return Fallback.builder(fallbackSupplier) .handle(exceptionClass) .onFailure( event -> LOGGER.error( "[OFFSET_TOKEN_FALLBACK] Failed to open Channel/fetch offsetToken for" + " channel:{}, exception:{}", channelName, event.getException().toString())) .build(); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/OpenChannelRetryPolicy.java ================================================ package com.snowflake.kafka.connector.internal.streaming; import com.snowflake.ingest.streaming.SFException; import com.snowflake.ingest.streaming.SnowflakeStreamingIngestChannel; import com.snowflake.kafka.connector.internal.KCLogger; import dev.failsafe.Failsafe; import dev.failsafe.RetryPolicy; import dev.failsafe.function.CheckedSupplier; import java.time.Duration; /** * Policy class that encapsulates retry logic for opening streaming channels with exponential * backoff and jitter. * *

This class provides a clean interface to execute channel opening operations with automatic * retry on HTTP 429 (rate limiting) errors from Snowflake streaming service. */ class OpenChannelRetryPolicy { private static final KCLogger LOGGER = new KCLogger(OpenChannelRetryPolicy.class.getName()); private static final String RATE_LIMIT_MESSAGE_PART = "HTTP Status: 429"; // Retry policy constants /** Initial delay before the first retry attempt. */ private static final Duration INITIAL_DELAY = Duration.ofSeconds(2); /** Maximum delay between retry attempts. */ private static final Duration MAX_DELAY = Duration.ofSeconds(8); /** Exponential backoff multiplier (retry delays: 2s, 4s, 8s max). */ private static final double BACKOFF_MULTIPLIER = 2.0; /** Random jitter added to retry delays to prevent thundering herd. */ private static final Duration JITTER_DURATION = Duration.ofMillis(200); /** * Executes the provided channel opening action with retry handling. * *

On SFException containing "429" (HTTP rate limiting), it will retry with exponential backoff * and jitter with unlimited retry attempts. Other exceptions are not retried. * * @param channelOpenAction the action to execute (typically openChannelForTable call) * @param channelName the channel name for logging purposes * @return the result of the channel opening operation */ static SnowflakeStreamingIngestChannel executeWithRetry( CheckedSupplier channelOpenAction, String channelName) { RetryPolicy retryPolicy = RetryPolicy.builder() .handleIf(OpenChannelRetryPolicy::isRetryableError) .withBackoff(INITIAL_DELAY, MAX_DELAY, BACKOFF_MULTIPLIER) .withJitter(JITTER_DURATION) .withMaxAttempts(-1) .onRetry( event -> LOGGER.warn( "Open channel {} retry attempt #{} due to: {}", channelName, event.getAttemptCount(), event.getLastException().getMessage())) .build(); return Failsafe.with(retryPolicy).get(channelOpenAction); } private static boolean isRetryableError(Throwable e) { return e instanceof SFException && e.getMessage().contains(RATE_LIMIT_MESSAGE_PART); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/SnowflakeSinkServiceV2.java ================================================ package com.snowflake.kafka.connector.internal.streaming; import static com.snowflake.kafka.connector.Utils.getTableName; import static com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel.NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE; import static com.snowflake.kafka.connector.internal.streaming.v2.PipeNameProvider.buildDefaultPipeName; import com.codahale.metrics.MetricRegistry; import com.google.common.annotations.VisibleForTesting; import com.snowflake.kafka.connector.ConnectorConfigTools; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.config.SnowflakeValidation; import com.snowflake.kafka.connector.dlq.KafkaRecordErrorReporter; import com.snowflake.kafka.connector.internal.KCLogger; import com.snowflake.kafka.connector.internal.SnowflakeConnectionService; import com.snowflake.kafka.connector.internal.SnowflakeErrors; import com.snowflake.kafka.connector.internal.SnowflakeSinkService; import com.snowflake.kafka.connector.internal.metrics.MetricsJmxReporter; import com.snowflake.kafka.connector.internal.metrics.TaskMetrics; import com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel; import com.snowflake.kafka.connector.internal.streaming.v2.BackpressureException; import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientPools; import com.snowflake.kafka.connector.internal.streaming.v2.service.BatchOffsetFetcher; import com.snowflake.kafka.connector.internal.streaming.v2.service.PartitionChannelManager; import com.snowflake.kafka.connector.internal.streaming.v2.service.ThreadPools; import java.time.Duration; import java.time.Instant; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.function.Supplier; import java.util.stream.Collectors; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.connect.sink.SinkRecord; import org.apache.kafka.connect.sink.SinkTaskContext; /** * This is per task configuration. A task can be assigned multiple partitions. Major methods are * startTask, insert, getOffset and close methods. * *

StartTask: Called when partitions are assigned. Responsible for generating the POJOs. * *

Insert and getOffset are called when {@link * com.snowflake.kafka.connector.SnowflakeSinkTask#put(Collection)} and {@link * com.snowflake.kafka.connector.SnowflakeSinkTask#preCommit(Map)} APIs are called. * *

This implementation of SinkService uses Streaming Snowpipe (Streaming Ingestion) * *

Hence this initializes the channel, opens, closes. The StreamingIngestChannel resides inside * {@link TopicPartitionChannel} which is per partition. */ public class SnowflakeSinkServiceV2 implements SnowflakeSinkService { private static final KCLogger LOGGER = new KCLogger(SnowflakeSinkServiceV2.class.getName()); private final SnowflakeConnectionService conn; private final Optional metricsJmxReporter; private final String connectorName; private final SinkTaskConfig taskConfig; private final SinkTaskContext sinkTaskContext; // Set that keeps track of the channels that have been seen per input batch private final Set channelsVisitedPerBatch = new HashSet<>(); private final BatchOffsetFetcher batchOffsetFetcher; private final PartitionChannelManager channelManager; private final TaskMetrics taskMetrics; /** Cooldown duration after a backpressure event before retrying inserts. */ static final Duration BACKPRESSURE_COOLDOWN = Duration.ofSeconds(1); /** Timestamp until which all inserts are skipped due to backpressure. */ @VisibleForTesting Instant backpressureUntil = Instant.MIN; public SnowflakeSinkServiceV2( SnowflakeConnectionService conn, SinkTaskConfig taskConfig, KafkaRecordErrorReporter recordErrorReporter, SinkTaskContext sinkTaskContext, Optional metricsJmxReporter, TaskMetrics taskMetrics) { this( conn, taskConfig, sinkTaskContext, metricsJmxReporter, () -> new BatchOffsetFetcher( taskConfig.getConnectorName(), taskConfig.getTaskId(), taskConfig, ThreadPools.getIoExecutor(taskConfig.getConnectorName()), taskMetrics), () -> new PartitionChannelManager( conn.getTelemetryClient(), taskConfig, recordErrorReporter, sinkTaskContext, metricsJmxReporter, taskMetrics, conn), taskMetrics); } SnowflakeSinkServiceV2( SnowflakeConnectionService conn, SinkTaskConfig taskConfig, SinkTaskContext sinkTaskContext, Optional metricsJmxReporter, Supplier batchOffsetFetcherFactory, Supplier channelManagerFactory, TaskMetrics taskMetrics) { if (conn == null || conn.isClosed()) { throw SnowflakeErrors.ERROR_5010.getException(); } this.conn = conn; this.taskConfig = taskConfig; this.sinkTaskContext = sinkTaskContext; this.metricsJmxReporter = metricsJmxReporter; this.connectorName = taskConfig.getConnectorName(); ThreadPools.registerTask(this.connectorName, taskConfig); this.taskMetrics = taskMetrics; this.batchOffsetFetcher = batchOffsetFetcherFactory.get(); this.channelManager = channelManagerFactory.get(); // Log validation configuration for operator visibility logValidationConfiguration(); LOGGER.info( "SnowflakeSinkServiceV2 initialized for connector: {}, task: {}, tolerateErrors: {}," + " enableSanitization: {}", this.connectorName, taskConfig.getTaskId(), taskConfig.isTolerateErrors(), taskConfig.isEnableSanitization()); } /** * Perform pre-flight safety checks on validation configuration. Verifies that error handling is * properly configured to prevent silent data loss or task crashes. * *

Safety checks: - If validation disabled: Warn that SSv2 Error Table is required to prevent * task crashes - If validation enabled: Verify DLQ or tolerance=none for safe error handling * * @throws IllegalStateException if configuration is unsafe and would cause data loss */ private void logValidationConfiguration() { String errorsTolerance = taskConfig.isTolerateErrors() ? ConnectorConfigTools.ErrorTolerance.ALL.toString() : ConnectorConfigTools.ErrorTolerance.NONE.toString(); String dlqTopic = taskConfig.getDlqTopicName(); boolean dlqConfigured = dlqTopic != null && !dlqTopic.trim().isEmpty(); boolean tolerateAll = "all".equalsIgnoreCase(errorsTolerance); // Check for legacy KC v3 config and warn if present (schematization enabled via task config) if (taskConfig.isEnableSchematization()) { LOGGER.warn( "Config 'snowflake.enable.schematization' is not supported in KC v4. " + "Schema evolution is now handled server-side via table property " + "'ENABLE_SCHEMA_EVOLUTION'. For pre-created tables, run: " + "ALTER TABLE ... SET ENABLE_SCHEMA_EVOLUTION = TRUE"); } else { LOGGER.info( "Schematization is disabled — the connector wraps payloads into" + " RECORD_CONTENT/RECORD_METADATA."); } if (taskConfig.getValidation() != SnowflakeValidation.CLIENT_SIDE) { // Check each target table for ERROR_LOGGING. // Note: makes up to 3 network calls per table (tableExist + isIcebergTable + // hasErrorLoggingEnabled). Acceptable at startup; only runs once per task constructor. Set uniqueTables = new HashSet<>(taskConfig.getTopicToTableMap().values()); for (String tableName : uniqueTables) { if (!conn.tableExist(tableName)) { // Table doesn't exist yet — will be auto-created with ERROR_LOGGING = TRUE continue; } if (conn.isIcebergTable(tableName)) { LOGGER.warn( "Table '{}' is an Iceberg table. Iceberg tables do not support ERROR_LOGGING." + " In v4 high-throughput mode, invalid records targeting this table will be" + " silently dropped. Error table functionality is not available for Iceberg" + " tables.", tableName); continue; } if (!conn.hasErrorLoggingEnabled(tableName)) { LOGGER.warn( "Table '{}' does not have ERROR_LOGGING enabled. In v4 high-throughput mode," + " invalid records will be silently dropped. Run: ALTER TABLE \"{}\" SET" + " ERROR_LOGGING = TRUE", tableName, tableName); } else { LOGGER.info("Table '{}' has ERROR_LOGGING enabled — error table is active.", tableName); } } return; } // VALIDATION ENABLED // Verify safe error handling configuration if (tolerateAll) { if (dlqConfigured) { // SAFE: Validation errors route to DLQ LOGGER.info( "Client-side validation enabled with errors.tolerance=all. " + "Validation failures will route to DLQ topic: {}", dlqTopic); } else { // UNSAFE: Validation errors are silently dropped LOGGER.error( "UNSAFE CONFIGURATION: Client-side validation enabled with errors.tolerance=all but NO" + " DLQ configured. " + "Invalid records will be SILENTLY DROPPED causing data loss. " + "Configure '{}' to preserve failed records, or set errors.tolerance=none to abort" + " on errors.", KafkaConnectorConfigParams.ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_CONFIG); // Note: Not throwing exception to allow connector to start, but logging ERROR // Operators can decide if they want to fail fast by checking logs } } else { // SAFE: Task aborts on validation failure (errors.tolerance=none) LOGGER.info( "Client-side validation enabled with errors.tolerance=none. " + "Validation failures will abort the task (safe - prevents data loss){}.", dlqConfigured ? " DLQ configured but only used when errors.tolerance=all" : ""); } } /** * Creates a table if it doesnt exist in Snowflake. * *

Initializes the Channel and partitionsToChannel map with new instance of {@link * TopicPartitionChannel} * * @param topicPartition TopicPartition passed from Kafka */ @Override public void startPartition(TopicPartition topicPartition) { startPartitions(Set.of(topicPartition)); } /** * Ensures tables and pipes exist in Snowflake, then delegates channel creation to the {@link * PartitionChannelManager}. * * @param partitions collection of topic partition */ @Override public void startPartitions(Collection partitions) { final Map tableToPipeMapping = new HashMap<>(); final Collection uniqueTopics = partitions.stream().map(TopicPartition::topic).collect(Collectors.toSet()); for (String topic : uniqueTopics) { final String tableName = getTableName(topic, taskConfig.getTopicToTableMap(), taskConfig.isEnableSanitization()); createTableIfNotExists(tableName); // Client-side validation only supports default pipes. // When validation is enabled, reject non-default pipes (pipes whose name equals the table // name) because validation assumptions may not hold for user-created pipes. final String targetPipeName; if (taskConfig.getValidation() == SnowflakeValidation.CLIENT_SIDE) { if (this.conn.pipeExist(tableName)) { throw SnowflakeErrors.ERROR_0032.getException("table: " + tableName); } targetPipeName = buildDefaultPipeName(tableName); } else { // When validation is disabled (high-performance mode), allow non-default pipes. final boolean pipeExists = this.conn.pipeExist(tableName); targetPipeName = pipeExists ? tableName : buildDefaultPipeName(tableName); } tableToPipeMapping.put(tableName, targetPipeName); LOGGER.info("Table: {}, using pipe: {}", tableName, targetPipeName); } channelManager.startPartitions(partitions, tableToPipeMapping); } private void createTableIfNotExists(final String tableName) { if (this.conn.tableExist(tableName)) { LOGGER.info("Using existing table {}.", tableName); } else { LOGGER.info("Creating new table {}.", tableName); this.conn.createTableWithOnlyMetadataColumn(tableName); } } private Set currentlyInitializing(Collection partitions) { return partitions.stream() .filter( tp -> { return channelManager .getChannel(tp) .map(TopicPartitionChannel::isInitializing) .orElse(false); }) .collect(Collectors.toSet()); } /** * @param records records coming from Kafka. Please note, they are not just from single topic and * partition. It depends on the kafka connect worker node which can consume from multiple * Topic and multiple Partitions */ @Override public void insert(final Collection records) { channelsVisitedPerBatch.clear(); // Skip partitions for which the partition-channel bridge is currently being initialized. Set partitions = records.stream() .map(record -> new TopicPartition(record.topic(), record.kafkaPartition())) .collect(Collectors.toSet()); Set initializingPartitions = currentlyInitializing(partitions); if (!initializingPartitions.isEmpty()) { LOGGER.debug( "Skipping put for {}/{} partitions that are currently being initialized: {}", initializingPartitions.size(), partitions.size(), initializingPartitions); } // If still in cooldown from a recent backpressure event, treat all partitions as // backpressured so we skip the entire batch and give the SDK time to drain. boolean skipAllPartitions = false; if (Instant.now().isBefore(backpressureUntil)) { LOGGER.debug( "Backpressure cooldown active until {}. Skipping entire batch.", backpressureUntil); skipAllPartitions = true; } Map offsetsOfFirstSkippedRecord = new HashMap<>(); boolean newBackpressure = false; for (SinkRecord record : records) { // check if it needs to handle null value records if (shouldSkipNullValue(record)) { continue; } TopicPartition tp = new TopicPartition(record.topic(), record.kafkaPartition()); if (offsetsOfFirstSkippedRecord.containsKey(tp)) { // We've already skipped a record in this partition, so should also skip the remaining // records in this partition. continue; } if (skipAllPartitions || initializingPartitions.contains(tp)) { // Make sure we store the first record in each partition that we skipped so we can correctly // rewind the offset. offsetsOfFirstSkippedRecord.putIfAbsent(tp, record.kafkaOffset()); continue; } try { if (!insert(record)) { offsetsOfFirstSkippedRecord.putIfAbsent(tp, record.kafkaOffset()); } } catch (BackpressureException e) { LOGGER.warn( "Backpressure on partition {}. Skipping remaining records for this partition." + " Exception: {}", tp, e.getMessage()); taskMetrics.incBackpressureRewindCount(); offsetsOfFirstSkippedRecord.putIfAbsent(tp, record.kafkaOffset()); skipAllPartitions = true; newBackpressure = true; } } if (newBackpressure) { backpressureUntil = Instant.now().plus(BACKPRESSURE_COOLDOWN); LOGGER.info("Backpressure cooldown set until {}", backpressureUntil); } if (!offsetsOfFirstSkippedRecord.isEmpty()) { LOGGER.info("Rewinding offsets for skipped partitions: {}", offsetsOfFirstSkippedRecord); offsetsOfFirstSkippedRecord.forEach(sinkTaskContext::offset); } } /** * Inserts individual records into buffer. It fetches the TopicPartitionChannel from the map and * then each partition(Streaming channel) calls its respective appendRows API */ @Override public boolean insert(SinkRecord record) { LOGGER.trace("Inserting record: {}", record); TopicPartition topicPartition = new TopicPartition(record.topic(), record.kafkaPartition()); // Initialize a new topic partition if it's not in the cache or if the channel is closed. if (channelManager .getChannel(topicPartition) .map(TopicPartitionChannel::isChannelClosed) .orElse(true)) { LOGGER.warn("Streaming channel doesn't exist or is closed for {}", topicPartition); startPartition(topicPartition); } TopicPartitionChannel channel = channelManager .getChannel(topicPartition) .orElseThrow( () -> new IllegalStateException( "Channel for " + topicPartition + " not found after startPartition")); boolean isFirstRowPerPartitionInBatch = channelsVisitedPerBatch.add(channel.getChannelName()); return channel.insertRecord(record, isFirstRowPerPartitionInBatch); } private boolean shouldSkipNullValue(SinkRecord record) { if (taskConfig.getBehaviorOnNullValues() == ConnectorConfigTools.BehaviorOnNullValues.DEFAULT) { return false; } if (record.value() == null) { LOGGER.debug( "Null valued record from topic '{}', partition {} and offset {} was skipped.", record.topic(), record.kafkaPartition(), record.kafkaOffset()); return true; } return false; } @Override public long getOffset(TopicPartition topicPartition) { return getCommittedOffsets(Collections.singleton(topicPartition)) .getOrDefault(topicPartition, NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE); } @Override public Map getCommittedOffsets( final Collection partitions) { // Skip partitions for which the partition-channel bridge is currently being initialized. Set initializingPartitions = currentlyInitializing(partitions); if (!initializingPartitions.isEmpty()) { LOGGER.info( "Skipping preCommit for {}/{} partitions that are currently being initialized: {}", initializingPartitions.size(), partitions.size(), initializingPartitions); } Set partitionsToFetchOffsetsFor = partitions.stream() .filter(tp -> !initializingPartitions.contains(tp)) .collect(Collectors.toSet()); return batchOffsetFetcher.getCommittedOffsets( partitionsToFetchOffsetsFor, channelManager::getChannel); } @Override public int getPartitionCount() { return channelManager.getPartitionChannels().size(); } @Override public void closeAll() { channelManager.closeAll(); } /** * This function is called during rebalance. * *

All the channels are closed. The client is still active. Upon rebalance, (inside {@link * com.snowflake.kafka.connector.SnowflakeSinkTask#open(Collection)} we will reopen the channel. * *

We will wipe the cache partitionsToChannel so that in {@link * com.snowflake.kafka.connector.SnowflakeSinkTask#open(Collection)} we reinstantiate and fetch * offsetToken * * @param partitions a list of topic partition */ @Override public void close(Collection partitions) { channelManager.close(partitions); } @Override public void stop() { LOGGER.info( "Stopping SnowflakeSinkServiceV2 for connector: {}, task: {}", this.connectorName, taskConfig.getTaskId()); channelManager.waitForAllChannelsToCommitData(); // Release all streaming clients used by this service. // Clients will only be closed if no other tasks are using them. StreamingClientPools.closeTaskClients(connectorName, taskConfig.getTaskId()); // Release this task's claim on the shared thread pool. // The pool is shut down when the last task for this connector unregisters. ThreadPools.closeForTask(connectorName); } /* Undefined */ @Override public boolean isClosed() { return false; } @Override public Map getPartitionChannels() { return channelManager.getPartitionChannels(); } @Override public Optional getMetricRegistry(String partitionChannelKey) { if (channelManager.getChannel(partitionChannelKey).isEmpty()) { return Optional.empty(); } return metricsJmxReporter.map(MetricsJmxReporter::getMetricRegistry); } /** Blocks until all partition channels have finished initialization. */ @Override public void awaitInitialization() { channelManager.awaitAllPartitions(); } @VisibleForTesting PartitionChannelManager getChannelManager() { return channelManager; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/StreamingClientProperties.java ================================================ /* * Copyright (c) 2023 Snowflake Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.snowflake.kafka.connector.internal.streaming; import com.google.common.base.Strings; import com.snowflake.kafka.connector.Utils; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.internal.KCLogger; import com.snowflake.kafka.connector.internal.PrivateKeyTool; import com.snowflake.kafka.connector.internal.SnowflakeURL; import java.security.PrivateKey; import java.util.Base64; import java.util.HashMap; import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.Properties; import org.apache.kafka.common.config.types.Password; /** * Object to convert and store properties for {@code * net.snowflake.ingest.streaming.SnowflakeStreamingIngestClient}. This object is used to compare * equality between clients in {@code StreamingClientProvider}. */ public class StreamingClientProperties { public static final String STREAMING_CLIENT_V2_PREFIX_NAME = "KC_CLIENT_V2_"; public static final String DEFAULT_CLIENT_NAME = "DEFAULT_CLIENT"; private static final KCLogger LOGGER = new KCLogger(StreamingClientProperties.class.getName()); public final Properties clientProperties; public final String clientNamePrefix; public final Map parameterOverrides; /** Constructor used by {@link #from(SinkTaskConfig)}. */ private StreamingClientProperties( Properties clientProperties, String clientNamePrefix, Map parameterOverrides) { this.clientProperties = clientProperties; this.clientNamePrefix = clientNamePrefix; this.parameterOverrides = parameterOverrides; } /** Creates streaming client properties from parsed {@link SinkTaskConfig}. */ public static StreamingClientProperties from(SinkTaskConfig config) { final Properties clientProperties = new Properties(); if (!Strings.isNullOrEmpty(config.getSnowflakeUrl())) { SnowflakeURL url = new SnowflakeURL(config.getSnowflakeUrl()); final String privateKeyStr = Optional.ofNullable(config.getSnowflakePrivateKey()).map(Password::value).orElse(null); final String privateKeyPassphrase = Optional.ofNullable(config.getSnowflakePrivateKeyPassphrase()) .map(Password::value) .orElse(null); final PrivateKey privateKey = PrivateKeyTool.parsePrivateKey(privateKeyStr, privateKeyPassphrase); final String privateKeyEncoded = Base64.getEncoder().encodeToString(privateKey.getEncoded()); clientProperties.put("private_key", privateKeyEncoded); clientProperties.put("user", config.getSnowflakeUser()); clientProperties.put("role", config.getSnowflakeRole()); clientProperties.put("account", url.getAccount()); clientProperties.put("host", url.getUrlWithoutPort()); } String clientNamePrefix = STREAMING_CLIENT_V2_PREFIX_NAME + (config.getConnectorName() != null ? config.getConnectorName() : DEFAULT_CLIENT_NAME); Map parameterOverrides = new HashMap<>(); String overrideMap = config.getStreamingClientProviderOverrideMap(); if (overrideMap != null && !overrideMap.isEmpty()) { Utils.parseCommaSeparatedKeyValuePairs(overrideMap) .forEach((key, value) -> parameterOverrides.put(key.toLowerCase(), value)); LOGGER.info("Streaming Client config overrides: {}", parameterOverrides); } return new StreamingClientProperties(clientProperties, clientNamePrefix, parameterOverrides); } /** * Determines equality between StreamingClientProperties by only looking at the parsed * clientProperties. This is used in {@code StreamingClientProvider} to determine equality in * registered clients * * @param other other object to determine equality * @return if the given object's clientProperties exists and is equal */ @Override public boolean equals(Object other) { return other.getClass().equals(StreamingClientProperties.class) && ((StreamingClientProperties) other).clientProperties.equals(this.clientProperties) && ((StreamingClientProperties) other).parameterOverrides.equals(this.parameterOverrides); } /** * Creates the hashcode for this object from the clientProperties. This is used in {@code * StreamingClientProvider} to determine equality in registered clients * * @return the clientProperties' hashcode */ @Override public int hashCode() { return Objects.hash(this.clientProperties, this.parameterOverrides); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/StreamingConfigValidator.java ================================================ package com.snowflake.kafka.connector.internal.streaming; import com.google.common.collect.ImmutableMap; import java.util.Map; /** Validates connector config for Snowpipe Streaming */ // TODO (separate PR) - rename to ConfigValidator and return an ordinary Map public interface StreamingConfigValidator { /** * @param inputConfig connector provided by user * @return map of invalid parameters */ ImmutableMap validate(final Map inputConfig); } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/StreamingErrorHandler.java ================================================ package com.snowflake.kafka.connector.internal.streaming; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_CONFIG; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.ERRORS_TOLERANCE_CONFIG; import com.google.common.base.Strings; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.dlq.KafkaRecordErrorReporter; import com.snowflake.kafka.connector.internal.KCLogger; import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService; import org.apache.kafka.connect.errors.DataException; import org.apache.kafka.connect.sink.SinkRecord; /** Class encapsulating logic related to error handling e.g. DLQ. */ public class StreamingErrorHandler { private static final KCLogger LOGGER = new KCLogger(StreamingErrorHandler.class.getName()); private final boolean logErrors; private final boolean isDLQTopicSet; private final boolean errorTolerance; private final KafkaRecordErrorReporter kafkaRecordErrorReporter; private final SnowflakeTelemetryService telemetryServiceV2; public StreamingErrorHandler( SinkTaskConfig config, KafkaRecordErrorReporter kafkaRecordErrorReporter, SnowflakeTelemetryService telemetryServiceV2) { this.logErrors = config.isErrorsLogEnable(); this.isDLQTopicSet = !Strings.isNullOrEmpty(config.getDlqTopicName()); this.errorTolerance = config.isTolerateErrors(); this.kafkaRecordErrorReporter = kafkaRecordErrorReporter; this.telemetryServiceV2 = telemetryServiceV2; } public boolean isLogErrors() { return logErrors; } public void handleError(Exception error, SinkRecord originalRecordForReporting) { if (logErrors) { LOGGER.error("Insert Row Error message:{}", error.getMessage()); } if (errorTolerance) { if (!isDLQTopicSet) { LOGGER.warn( "{} is set, however {} is not. The message will not be added to the Dead Letter Queue" + " topic.", ERRORS_TOLERANCE_CONFIG, ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_CONFIG); } else { LOGGER.warn( "Adding the message to Dead Letter Queue topic: {}", ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_CONFIG); // Wrap in DataException for KCv3 compatibility while preserving original exception DataException wrappedException = new DataException("Error converting record: " + error.getMessage(), error); this.kafkaRecordErrorReporter.reportError(originalRecordForReporting, wrappedException); } } else { // Preserve the record in DLQ before failing the task if (isDLQTopicSet && kafkaRecordErrorReporter != null) { LOGGER.warn( "Routing failed record to DLQ topic before aborting task (errors.tolerance=none)"); DataException wrappedException = new DataException("Error converting record: " + error.getMessage(), error); this.kafkaRecordErrorReporter.reportError(originalRecordForReporting, wrappedException); } final String errMsg = String.format( "Error inserting Records using Streaming API with msg:%s", error.getMessage()); this.telemetryServiceV2.reportKafkaConnectFatalError(errMsg); throw new DataException(errMsg, error); } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/TopicPartitionChannelInsertionException.java ================================================ package com.snowflake.kafka.connector.internal.streaming; /** * Class for exceptions that occur while interacting with Snowflake through Snowpipe Streaming. * *

Please note: This exception is translated from SFException when Client SDK determines this is * an invalid insert Operation. (For instance, clientSequencer is bumped up, but we are still * calling from older clientSequencer number) * *

Use this exception when a particular channel (Topic Partition) fails to insert Rows into * Snowflake Table, in this case we will reopen the channel and try to insert same rows again. * *

(Note: This exception is not when Streaming Snowpipe API returns error in its response) */ public class TopicPartitionChannelInsertionException extends RuntimeException { public TopicPartitionChannelInsertionException(String msg, Throwable t) { super(msg, t); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/channel/TopicPartitionChannel.java ================================================ package com.snowflake.kafka.connector.internal.streaming.channel; import com.google.common.annotations.VisibleForTesting; import com.snowflake.ingest.streaming.ChannelStatus; import com.snowflake.ingest.streaming.SFException; import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetryChannelStatus; import java.util.concurrent.CompletableFuture; import org.apache.kafka.connect.sink.SinkRecord; public interface TopicPartitionChannel { long NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE = -1L; /** * Inserts the record into buffer * *

Step 1: Initializes this channel by fetching the offsetToken from Snowflake for the first * time this channel/partition has received offset after start/restart. * *

Step 2: Decides whether given offset from Kafka needs to be processed and whether it * qualifies for being added into buffer. * * @param kafkaSinkRecord input record from Kafka * @param isFirstRowPerPartitionInBatch indicates whether the given record is the first record per * partition in a batch * @return true if the record was processed (or legitimately skipped as a duplicate), false if * recovery was triggered and the caller should stop feeding records to this partition for the * remainder of the batch */ boolean insertRecord(SinkRecord kafkaSinkRecord, boolean isFirstRowPerPartitionInBatch); /** * Asynchronously closes a channel associated to this partition. Any {@link SFException} occurred * is swallowed and a successful {@link CompletableFuture} is returned instead. */ CompletableFuture closeChannelAsync(); /** A channel which is initializing will be skipped in put and preCommit. */ default boolean isInitializing() { return false; } /** Blocks until channel initialization is complete. */ default void awaitInitialization() {} /* Return true is channel is closed. Caller should handle the logic for reopening the channel if it is closed. */ boolean isChannelClosed(); /** Returns the fully qualified channel name in the format of "db.schema.channel". */ String getChannelNameFormatV1(); /** Returns the simple (unqualified) channel name, as expected by the SDK batch status API. */ String getChannelName(); void setLatestConsumerGroupOffset(long consumerOffset); /** * Processes a channel status: logs it, checks for ingestion errors, updates offset tracking, and * returns the offset safe to commit to Kafka. * *

If the committed offset token is null (no data committed yet), returns {@link * #NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE}. Otherwise returns (committedOffset + 1) so that * Kafka resumes from the next record after a restart. * *

When {@code tolerateErrors} is false and new ingestion errors are detected, throws a * connector exception to fail the task. * * @param status the channel status, typically from a batch status call * @param tolerateErrors whether to tolerate ingestion errors (maps to {@code errors.tolerance}) * @return the offset safe to commit to Kafka, or {@link #NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE} */ long processChannelStatus(ChannelStatus status, boolean tolerateErrors); /** Returns the pipe name associated with this channel's SDK client. */ String getPipeName(); default CompletableFuture waitForLastProcessedRecordCommitted() { return CompletableFuture.completedFuture(null); } @VisibleForTesting SnowflakeTelemetryChannelStatus getSnowflakeTelemetryChannelStatus(); } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/telemetry/PeriodicTelemetryReporter.java ================================================ package com.snowflake.kafka.connector.internal.streaming.telemetry; import com.google.common.annotations.VisibleForTesting; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.internal.KCLogger; import com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel; import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService; import java.util.Map; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import java.util.function.Supplier; /** * Handles periodic reporting of channel status telemetry to Snowflake. This class manages a * background daemon thread that reports telemetry at regular intervals. */ public final class PeriodicTelemetryReporter { private static final KCLogger LOGGER = new KCLogger(PeriodicTelemetryReporter.class.getName()); public static final long DEFAULT_REPORT_INTERVAL_MS = 120 * 1000L; public static final long MAX_INITIAL_JITTER_MS = 10 * 1000L; private final SnowflakeTelemetryService telemetryService; private final Supplier> channelsSupplier; private final String connectorName; private final String taskId; private final long reportIntervalMs; private final ScheduledExecutorService executor; public PeriodicTelemetryReporter( SnowflakeTelemetryService telemetryService, Supplier> channelsSupplier, SinkTaskConfig taskConfig) { this( telemetryService, channelsSupplier, taskConfig.getConnectorName(), taskConfig.getTaskId(), DEFAULT_REPORT_INTERVAL_MS); } @VisibleForTesting PeriodicTelemetryReporter( SnowflakeTelemetryService telemetryService, Supplier> channelsSupplier, String connectorName, String taskId, long reportIntervalMs) { this.telemetryService = telemetryService; this.channelsSupplier = channelsSupplier; this.connectorName = connectorName; this.taskId = taskId; this.reportIntervalMs = reportIntervalMs; this.executor = createExecutor(); } private ScheduledExecutorService createExecutor() { return Executors.newSingleThreadScheduledExecutor( r -> { Thread t = new Thread(r); t.setName("snowflake-telemetry-reporter-" + connectorName + "-" + taskId); t.setDaemon(true); return t; }); } /** Starts the periodic telemetry reporting with jitter to prevent thundering herd. */ public void start() { long jitter = ThreadLocalRandom.current().nextLong(0, MAX_INITIAL_JITTER_MS); long initialDelay = reportIntervalMs + jitter; executor.scheduleAtFixedRate( this::reportChannelStatusTelemetry, initialDelay, reportIntervalMs, TimeUnit.MILLISECONDS); LOGGER.info( "Started periodic telemetry reporter with interval {} ms (initial delay {} ms including {}" + " ms jitter) for connector: {}, task: {}", reportIntervalMs, initialDelay, jitter, connectorName, taskId); } public void stop() { if (!executor.isShutdown()) { LOGGER.info("Stopping telemetry reporter for connector: {}, task: {}", connectorName, taskId); executor.shutdown(); try { if (!executor.awaitTermination(5, TimeUnit.SECONDS)) { LOGGER.warn("Telemetry reporter did not terminate gracefully, forcing shutdown"); executor.shutdownNow(); } } catch (InterruptedException e) { LOGGER.warn("Interrupted while waiting for telemetry reporter to terminate"); executor.shutdownNow(); Thread.currentThread().interrupt(); } } } /** * Reports telemetry for all active channels. This method is called periodically by the scheduled * executor. */ private void reportChannelStatusTelemetry() { try { Map channels = channelsSupplier.get(); if (channels == null || channels.isEmpty()) { LOGGER.info("No active channels to report telemetry for"); return; } LOGGER.debug( "Reporting telemetry for {} active channels for connector: {}, task: {}", channels.size(), connectorName, taskId); for (Map.Entry entry : channels.entrySet()) { reportChannelTelemetry(entry.getKey(), entry.getValue()); } } catch (Exception e) { LOGGER.error("Error during periodic telemetry reporting: {}", e.getMessage()); } } private void reportChannelTelemetry(String channelKey, TopicPartitionChannel channel) { try { final SnowflakeTelemetryChannelStatus channelStatus = channel.getSnowflakeTelemetryChannelStatus(); if (channelStatus != null && !channelStatus.isEmpty()) { telemetryService.reportKafkaPartitionUsage(channelStatus, false); LOGGER.trace("Reported telemetry for channel: {}", channelKey); } } catch (Exception e) { LOGGER.warn( "Failed to report telemetry for channel: {}, error: {}", channelKey, e.getMessage()); } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/telemetry/SnowflakeTelemetryChannelCreation.java ================================================ /* * Copyright (c) 2023 Snowflake Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.snowflake.kafka.connector.internal.streaming.telemetry; import static com.snowflake.kafka.connector.internal.telemetry.TelemetryConstants.TABLE_NAME; import static com.snowflake.kafka.connector.internal.telemetry.TelemetryConstants.TOPIC_PARTITION_CHANNEL_CREATION_TIME; import static com.snowflake.kafka.connector.internal.telemetry.TelemetryConstants.TOPIC_PARTITION_CHANNEL_NAME; import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryBasicInfo; import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService; import net.snowflake.client.jdbc.internal.fasterxml.jackson.databind.node.ObjectNode; /** * This object is sent only once when a channel starts. No concurrent modification is made on this * object, thus no lock is required. */ public class SnowflakeTelemetryChannelCreation extends SnowflakeTelemetryBasicInfo { private final long tpChannelCreationTime; // start time of the channel private final String tpChannelName; private boolean isReuseTable = false; // is the channel reusing existing table public SnowflakeTelemetryChannelCreation( final String tableName, final String channelName, final long startTime) { super(tableName, SnowflakeTelemetryService.TelemetryType.KAFKA_CHANNEL_START); this.tpChannelName = channelName; this.tpChannelCreationTime = startTime; } @Override public void dumpTo(ObjectNode msg) { msg.put(TABLE_NAME, this.tableName); msg.put(TOPIC_PARTITION_CHANNEL_NAME, this.tpChannelName); msg.put(TOPIC_PARTITION_CHANNEL_CREATION_TIME, tpChannelCreationTime); } @Override public boolean isEmpty() { throw new IllegalStateException( "Empty function doesnt apply to:" + this.getClass().getSimpleName()); } public void setReuseTable(boolean reuseTable) { isReuseTable = reuseTable; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/telemetry/SnowflakeTelemetryChannelStatus.java ================================================ /* * Copyright (c) 2023 Snowflake Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.snowflake.kafka.connector.internal.streaming.telemetry; import static com.snowflake.kafka.connector.internal.metrics.MetricsUtil.channelMetricName; import static com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel.NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE; import com.codahale.metrics.Gauge; import com.codahale.metrics.MetricRegistry; import com.google.common.annotations.VisibleForTesting; import com.snowflake.ingest.streaming.ChannelStatus; import com.snowflake.kafka.connector.internal.metrics.MetricsJmxReporter; import com.snowflake.kafka.connector.internal.metrics.MetricsUtil; import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryBasicInfo; import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService; import com.snowflake.kafka.connector.internal.telemetry.TelemetryConstants; import java.util.Optional; import java.util.concurrent.atomic.AtomicLong; import net.snowflake.client.jdbc.internal.fasterxml.jackson.databind.node.ObjectNode; /** * Extension of {@link SnowflakeTelemetryBasicInfo} class used to send data to snowflake when the * TopicPartitionChannel closes. Also creates and registers various metrics with JMX * *

Most of the data sent to Snowflake is aggregated data. */ public class SnowflakeTelemetryChannelStatus extends SnowflakeTelemetryBasicInfo { public static final long NUM_METRICS = 4; // update when new metrics are added static final String CHANNEL_RECOVERY_COUNT = "channel-recovery-count"; // channel properties private final String connectorName; private final String channelName; private final Optional metricsJmxReporter; private final long channelCreationTime; // offsets private final AtomicLong offsetPersistedInSnowflake; private final AtomicLong processedOffset; private final AtomicLong latestConsumerOffset; // channel recovery counter (always tracked; also registered as JMX gauge if enabled) private final AtomicLong recoveryCount = new AtomicLong(0); // Aggregated count of client-side validation failures for this channel. // Reported in channel status telemetry on close, avoiding per-record telemetry overhead. private final AtomicLong validationFailureCount = new AtomicLong(0); // Count of records where errors were tolerated (errors.tolerance=all) instead of failing the // task. private final AtomicLong errorToleratedCount = new AtomicLong(0); // Whether client-side validation was silently disabled due to initialization failure. private volatile boolean validationDisabled = false; // Latest SDK-reported metrics, updated on each processChannelStatus call. // Using volatile (not AtomicLong) since these are set, never atomically incremented. private volatile long rowsInsertedCount; private volatile long rowsParsedCount; private volatile long rowsErrorCount; private volatile long serverAvgProcessingLatencyMs = -1; // SDK ChannelStatus identity and error fields, updated on each processChannelStatus call. private volatile String databaseName; private volatile String schemaName; private volatile String pipeName; private volatile String statusCode; private volatile String lastErrorTimestamp; private volatile String lastErrorOffsetTokenUpperBound; // Counts of SDK backpressure retries and channel-reopen fallbacks during appendRow. private final AtomicLong backpressureRetryCount = new AtomicLong(0); private final AtomicLong appendRowFallbackCount = new AtomicLong(0); private final AtomicLong schemaEvolutionFailureCount = new AtomicLong(0); private volatile String[] registeredMetricNames; /** * Creates a new object tracking {@link * com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel} metrics with * JMX and send telemetry data to snowflake * * @param tableName the table the channel is ingesting to * @param channelName the name of the TopicPartitionChannel to track * @param metricsJmxReporter JMX reporter; present enables channel-level metrics, empty disables */ public SnowflakeTelemetryChannelStatus( final String tableName, final String connectorName, final String channelName, final long startTime, final Optional metricsJmxReporter, final AtomicLong offsetPersistedInSnowflake, final AtomicLong processedOffset, final AtomicLong latestConsumerOffset) { super(tableName, SnowflakeTelemetryService.TelemetryType.KAFKA_CHANNEL_USAGE); this.channelCreationTime = startTime; this.connectorName = connectorName; this.channelName = channelName; this.metricsJmxReporter = metricsJmxReporter; this.offsetPersistedInSnowflake = offsetPersistedInSnowflake; this.processedOffset = processedOffset; this.latestConsumerOffset = latestConsumerOffset; metricsJmxReporter.ifPresent(reporter -> registerChannelJMXMetrics(reporter)); } @Override public boolean isEmpty() { // Check that all properties are still at the default value. return this.offsetPersistedInSnowflake.get() == NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE && this.processedOffset.get() == NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE && this.latestConsumerOffset.get() == NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE; } @Override public void dumpTo(ObjectNode msg) { msg.put(TelemetryConstants.TABLE_NAME, this.tableName); msg.put(TelemetryConstants.CONNECTOR_NAME, this.connectorName); msg.put(TelemetryConstants.TOPIC_PARTITION_CHANNEL_NAME, this.channelName); msg.put( TelemetryConstants.OFFSET_PERSISTED_IN_SNOWFLAKE, this.offsetPersistedInSnowflake.get()); msg.put(TelemetryConstants.PROCESSED_OFFSET, this.processedOffset.get()); msg.put(TelemetryConstants.LATEST_CONSUMER_OFFSET, this.latestConsumerOffset.get()); msg.put(TelemetryConstants.TOPIC_PARTITION_CHANNEL_CREATION_TIME, this.channelCreationTime); msg.put(TelemetryConstants.TOPIC_PARTITION_CHANNEL_CLOSE_TIME, System.currentTimeMillis()); msg.put(TelemetryConstants.VALIDATION_FAILURE_COUNT, this.validationFailureCount.get()); msg.put(TelemetryConstants.ERROR_TOLERATED_COUNT, this.errorToleratedCount.get()); msg.put(TelemetryConstants.CHANNEL_RECOVERY_COUNT, this.recoveryCount.get()); msg.put(TelemetryConstants.VALIDATION_DISABLED, this.validationDisabled); msg.put(TelemetryConstants.ROWS_INSERTED_COUNT, this.rowsInsertedCount); msg.put(TelemetryConstants.ROWS_PARSED_COUNT, this.rowsParsedCount); msg.put(TelemetryConstants.ROWS_ERROR_COUNT, this.rowsErrorCount); msg.put(TelemetryConstants.SERVER_AVG_PROCESSING_LATENCY_MS, this.serverAvgProcessingLatencyMs); putIfNotNull(msg, TelemetryConstants.DATABASE_NAME, this.databaseName); putIfNotNull(msg, TelemetryConstants.SCHEMA_NAME, this.schemaName); putIfNotNull(msg, TelemetryConstants.PIPE_NAME, this.pipeName); putIfNotNull(msg, TelemetryConstants.STATUS_CODE, this.statusCode); putIfNotNull(msg, TelemetryConstants.LAST_ERROR_TIMESTAMP, this.lastErrorTimestamp); putIfNotNull( msg, TelemetryConstants.LAST_ERROR_OFFSET_TOKEN_UPPER_BOUND, this.lastErrorOffsetTokenUpperBound); msg.put(TelemetryConstants.BACKPRESSURE_RETRY_COUNT, this.backpressureRetryCount.get()); msg.put(TelemetryConstants.APPEND_ROW_FALLBACK_COUNT, this.appendRowFallbackCount.get()); msg.put( TelemetryConstants.SCHEMA_EVOLUTION_FAILURE_COUNT, this.schemaEvolutionFailureCount.get()); } private void registerChannelJMXMetrics(MetricsJmxReporter reporter) { MetricRegistry currentMetricRegistry = reporter.getMetricRegistry(); registeredMetricNames = new String[] { channelMetricName( this.channelName, MetricsUtil.OFFSET_SUB_DOMAIN, MetricsUtil.OFFSET_PERSISTED_IN_SNOWFLAKE), channelMetricName( this.channelName, MetricsUtil.OFFSET_SUB_DOMAIN, MetricsUtil.PROCESSED_OFFSET), channelMetricName( this.channelName, MetricsUtil.OFFSET_SUB_DOMAIN, MetricsUtil.LATEST_CONSUMER_OFFSET), channelMetricName( this.channelName, MetricsUtil.OFFSET_SUB_DOMAIN, CHANNEL_RECOVERY_COUNT), }; @SuppressWarnings("unchecked") Gauge[] gauges = new Gauge[] { (Gauge) this.offsetPersistedInSnowflake::get, (Gauge) this.processedOffset::get, (Gauge) this.latestConsumerOffset::get, (Gauge) this.recoveryCount::get, }; for (int i = 0; i < registeredMetricNames.length; i++) { try { currentMetricRegistry.register(registeredMetricNames[i], gauges[i]); } catch (IllegalArgumentException ex) { // Safe: channel registration is serialized per task within open() LOGGER.warn( "Metric already present for channel {}, replacing: {}", this.channelName, registeredMetricNames[i]); reporter.removeMetric(registeredMetricNames[i]); currentMetricRegistry.register(registeredMetricNames[i], gauges[i]); } } // JmxReporter is started once at task level (SnowflakeSinkTaskMetrics constructor). // Its MetricRegistryListener auto-registers new MBeans as metrics are added. // Calling start() per-channel would re-process ALL metrics: O(N) unregister + register. } /** Unregisters the JMX metrics if possible */ public void tryUnregisterChannelJMXMetrics() { metricsJmxReporter.ifPresent( reporter -> { if (registeredMetricNames != null) { for (String name : registeredMetricNames) { reporter.removeMetric(name); } } }); } /** Increments the channel recovery counter. Thread-safe. */ public void incRecoveryCount() { this.recoveryCount.incrementAndGet(); } /** Increments the validation failure counter. Thread-safe. */ public void incValidationFailureCount() { this.validationFailureCount.incrementAndGet(); } /** Increments the error-tolerated counter. Thread-safe. */ public void incErrorToleratedCount() { this.errorToleratedCount.incrementAndGet(); } /** Marks that client-side validation was silently disabled due to initialization failure. */ public void setValidationDisabled() { this.validationDisabled = true; } /** Increments the backpressure retry counter. Thread-safe. */ public void incBackpressureRetryCount() { this.backpressureRetryCount.incrementAndGet(); } /** Increments the append-row fallback counter. Thread-safe. */ public void incAppendRowFallbackCount() { this.appendRowFallbackCount.incrementAndGet(); } /** Increments the schema evolution failure counter. Thread-safe. */ public void incSchemaEvolutionFailureCount() { this.schemaEvolutionFailureCount.incrementAndGet(); } /** Updates SDK-reported metrics from a ChannelStatus response. */ public void updateFromChannelStatus(ChannelStatus status) { this.rowsInsertedCount = status.getRowsInsertedCount(); this.rowsParsedCount = status.getRowsParsedCount(); this.rowsErrorCount = status.getRowsErrorCount(); this.serverAvgProcessingLatencyMs = status.getServerAvgProcessingLatency() != null ? status.getServerAvgProcessingLatency().toMillis() : -1; this.databaseName = status.getDatabaseName(); this.schemaName = status.getSchemaName(); this.pipeName = status.getPipeName(); this.statusCode = status.getStatusCode() != null ? status.getStatusCode().toString() : null; this.lastErrorTimestamp = status.getLastErrorTimestamp() != null ? status.getLastErrorTimestamp().toString() : null; this.lastErrorOffsetTokenUpperBound = status.getLastErrorOffsetTokenUpperBound(); } private static void putIfNotNull(ObjectNode msg, String key, String value) { if (value != null) { msg.put(key, value); } } @VisibleForTesting public long getOffsetPersistedInSnowflake() { return this.offsetPersistedInSnowflake.get(); } @VisibleForTesting public long getProcessedOffset() { return this.processedOffset.get(); } @VisibleForTesting public long getLatestConsumerOffset() { return this.latestConsumerOffset.get(); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/telemetry/SnowflakeTelemetrySsv1Migration.java ================================================ package com.snowflake.kafka.connector.internal.streaming.telemetry; import static com.snowflake.kafka.connector.internal.telemetry.TelemetryConstants.SSV1_CHANNEL_NAME; import static com.snowflake.kafka.connector.internal.telemetry.TelemetryConstants.SSV1_MIGRATED_OFFSET; import static com.snowflake.kafka.connector.internal.telemetry.TelemetryConstants.SSV1_MIGRATION_MODE; import static com.snowflake.kafka.connector.internal.telemetry.TelemetryConstants.SSV1_MIGRATION_OUTCOME; import static com.snowflake.kafka.connector.internal.telemetry.TelemetryConstants.TABLE_NAME; import static com.snowflake.kafka.connector.internal.telemetry.TelemetryConstants.TOPIC_PARTITION_CHANNEL_NAME; import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationMode; import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationResponse; import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryBasicInfo; import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService; import java.util.Locale; import net.snowflake.client.jdbc.internal.fasterxml.jackson.databind.node.ObjectNode; /** * One-shot telemetry event sent when SSv1 offset migration is attempted for a channel. Only emitted * when the migration mode is not SKIP and the SSv2 channel has no committed offset yet. */ public class SnowflakeTelemetrySsv1Migration extends SnowflakeTelemetryBasicInfo { private final String channelName; private final String ssv1ChannelName; private final Ssv1MigrationMode migrationMode; private final Ssv1MigrationResponse response; public SnowflakeTelemetrySsv1Migration( String tableName, String channelName, String ssv1ChannelName, Ssv1MigrationMode migrationMode, Ssv1MigrationResponse response) { super(tableName, SnowflakeTelemetryService.TelemetryType.KAFKA_SSV1_MIGRATION); this.channelName = channelName; this.ssv1ChannelName = ssv1ChannelName; this.migrationMode = migrationMode; this.response = response; } @Override public void dumpTo(ObjectNode msg) { msg.put(TABLE_NAME, this.tableName); msg.put(TOPIC_PARTITION_CHANNEL_NAME, this.channelName); msg.put(SSV1_CHANNEL_NAME, this.ssv1ChannelName); msg.put(SSV1_MIGRATION_MODE, this.migrationMode.name().toLowerCase(Locale.ROOT)); msg.put(SSV1_MIGRATION_OUTCOME, deriveOutcome()); Long offset = this.response.getMigratedOffset(); if (offset != null) { msg.put(SSV1_MIGRATED_OFFSET, offset); } } private String deriveOutcome() { if (response.getMigratedOffset() != null) { return "migrated"; } else if (!response.isSsv1ChannelFound()) { return migrationMode == Ssv1MigrationMode.STRICT ? "ssv1_not_found_strict" : "ssv1_not_found"; } else { return "ssv1_no_offset"; } } @Override public boolean isEmpty() { throw new IllegalStateException("isEmpty does not apply to " + this.getClass().getSimpleName()); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/AppendRowWithFallbackPolicy.java ================================================ package com.snowflake.kafka.connector.internal.streaming.v2; import com.snowflake.ingest.streaming.SFException; import com.snowflake.kafka.connector.internal.KCLogger; import dev.failsafe.Failsafe; import dev.failsafe.Fallback; import dev.failsafe.function.CheckedRunnable; import java.time.Duration; /** * Policy class that encapsulates Failsafe logic for insert row operations with channel reopening * fallback functionality. * *

This class provides a clean interface to execute append row operations with automatic channel * recovery on non-retryable {@link SFException}. For retryable backpressure errors, it throws * {@link BackpressureException} to signal the batch-level insert loop to abandon the batch and * rewind offsets. */ class AppendRowWithFallbackPolicy { private static final KCLogger LOGGER = new KCLogger(AppendRowWithFallbackPolicy.class.getName()); /** Delay before fallback attempt (channel reopening). */ private static final Duration FALLBACK_DELAY = Duration.ofMillis(500); /** Random jitter added to fallback delays to prevent retry storms. */ private static final Duration JITTER_DURATION = Duration.ofMillis(200); /** * Executes the given action after a delay with jitter to prevent retry storms. * * @param action the action to execute after the delay * @param channelName the channel name for logging purposes */ private static void withDelay(CheckedRunnable action, String channelName) throws Throwable { try { long delayMs = FALLBACK_DELAY.toMillis() + (long) (Math.random() * JITTER_DURATION.toMillis()); LOGGER.info("Delaying channel recovery by {}ms for channel: {}", delayMs, channelName); Thread.sleep(delayMs); LOGGER.info("Executing channel recovery for channel: {}", channelName); action.run(); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } catch (SFException e) { // Re-throw SFException unchanged so Fallback can handle it properly throw e; } catch (Exception e) { throw new RuntimeException(e); } } /** * Executes the provided append row action with fallback handling. * *

On retryable {@link SFException} (backpressure errors), throws {@link BackpressureException} * to signal the batch-level insert loop that the batch should be abandoned and offsets should be * rewound. The channel remains valid. * *

On non-retryable {@link SFException}, it will execute the fallback supplier to reopen the * channel and reset offsets after a simple blocking delay with jitter to prevent retry storms. * * @param appendRowAction the action to execute (typically channel.appendRow call) * @param fallbackSupplier the fallback action to execute on non-retryable failure (channel * reopening logic) * @param channelName the channel name for logging purposes */ /** * @return true if the append row action succeeded normally, false if the fallback was executed * (meaning the record was NOT inserted). When this returns false, callers must NOT advance * processedOffset — the fallback's recovery logic has already reset offset state. */ static boolean executeWithFallback( CheckedRunnable appendRowAction, FallbackSupplierWithException fallbackSupplier, String channelName) { boolean[] succeeded = {true}; Fallback reopenChannelFallbackExecutor = Fallback.builder( executionAttemptedEvent -> { Throwable lastException = executionAttemptedEvent.getLastException(); // Check if this is a retryable backpressure error if (BackpressureException.isRetryableError(lastException)) { // The channel is still valid; throw BackpressureException to signal // the batch-level insert loop to abandon the batch and rewind offsets throw new BackpressureException((SFException) lastException); } // Non-retryable error: proceed with channel reopening succeeded[0] = false; withDelay(() -> fallbackSupplier.execute(lastException), channelName); }) .handle(SFException.class) .onFailedAttempt( event -> LOGGER.warn( "Failed Attempt to invoke the appendRow API for channel: {}. Exception: {}", channelName, event.getLastException())) .onFailure( event -> { if (event.getException() instanceof BackpressureException) { LOGGER.warn( "Backpressure on channel {}: {}", channelName, event.getException().getMessage()); } else { LOGGER.error( "{} Failed to open Channel or fetching offsetToken for channel:{}." + " Exception: {}", "APPEND_ROW_FALLBACK", channelName, event.getException()); } }) .build(); Failsafe.with(reopenChannelFallbackExecutor).run(appendRowAction); return succeeded[0]; } /** * Functional interface for fallback supplier that can throw exceptions. * *

This is used to encapsulate the channel reopening logic that needs to be executed when the * primary append row operation fails. */ @FunctionalInterface interface FallbackSupplierWithException { /** * Executes the fallback logic. * * @param exception the original exception that caused the fallback to be triggered * @throws Exception if the fallback operation fails */ void execute(Throwable exception) throws Exception; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/BackpressureException.java ================================================ package com.snowflake.kafka.connector.internal.streaming.v2; import com.google.common.base.Preconditions; import com.snowflake.ingest.streaming.SFException; import java.util.Set; /** * Unchecked exception thrown when the Snowflake SDK signals backpressure due to memory saturation * or receiver overload. * *

This exception wraps {@link SFException} instances with specific error codes indicating * transient memory pressure. It signals to the batch-level insert loop that the current batch * should be abandoned and offsets should be rewound, but the channel remains valid and does not * need to be reopened. * *

Retryable error codes: * *

    *
  • {@code ReceiverSaturated} - 429 Too Many Requests *
  • {@code MemoryThresholdExceeded} - 429 Too Many Requests *
  • {@code MemoryThresholdExceededInContainer} - 429 Too Many Requests *
  • {@code HttpRetryableClientError} - 503 Service Unavailable *
*/ public class BackpressureException extends RuntimeException { private static final Set RETRYABLE_ERROR_CODE_NAMES = Set.of( // 429 Too Many Requests "ReceiverSaturated", "MemoryThresholdExceeded", "MemoryThresholdExceededInContainer", // 503 Service Unavailable "HttpRetryableClientError"); /** * Constructs a new {@code BackpressureException} wrapping the given {@link SFException}. * * @param cause the SDK exception indicating backpressure */ public BackpressureException(SFException cause) { super( "SDK backpressure: " + Preconditions.checkNotNull(cause, "cause").getErrorCodeName(), cause); Preconditions.checkArgument( isRetryableError(cause), "BackpressureException requires a retryable SFException, got: %s", cause.getErrorCodeName()); } /** * Checks if the given throwable represents a retryable backpressure error. * * @param e the exception to check (may be null) * @return {@code true} if {@code e} is an {@link SFException} with a retryable error code name; * {@code false} otherwise */ public static boolean isRetryableError(Throwable e) { if (!(e instanceof SFException)) { return false; } return RETRYABLE_ERROR_CODE_NAMES.contains(((SFException) e).getErrorCodeName()); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/ClientRecreationException.java ================================================ package com.snowflake.kafka.connector.internal.streaming.v2; import com.google.common.base.Preconditions; import com.snowflake.ingest.streaming.SFException; import java.util.Set; /** * Unchecked exception thrown when the Snowflake SDK signals that the streaming client is in an * invalid state and must be recreated. * *

This exception wraps {@link SFException} instances with specific error codes indicating the * client itself is no longer usable. Unlike {@link BackpressureException} (where the channel * remains valid), this signals that the client and all its channels must be replaced. * *

Client-invalid error codes: * *

    *
  • {@code InvalidClientError} - client marked invalid after a fatal internal error or pipe * failover (409 Conflict) *
  • {@code SfApiPipeFailedOverError} - HTTP 410 on any API call triggers client invalidation *
  • {@code ClosedClientError} - client has been closed and cannot be reused (409 Conflict) *
*/ public class ClientRecreationException extends RuntimeException { private static final Set CLIENT_INVALID_ERROR_CODE_NAMES = Set.of( // Client invalidated by SDK (pipe failover, auth refresh failure, etc.) "InvalidClientError", // HTTP 410 on open_channel, insert_rows, get_channel_status, or pipe refresh "SfApiPipeFailedOverError", // Client was closed "ClosedClientError"); /** * Constructs a new {@code ClientRecreationException} wrapping the given {@link SFException}. * * @param cause the SDK exception indicating the client is invalid */ public ClientRecreationException(SFException cause) { super( "SDK client invalid: " + Preconditions.checkNotNull(cause, "cause").getErrorCodeName(), cause); Preconditions.checkArgument( isClientInvalidError(cause), "ClientRecreationException requires a client-invalid SFException, got: %s", cause.getErrorCodeName()); } /** * Wraps the given throwable as a {@code ClientRecreationException} if it is a client-invalid * {@link SFException}. Avoids the need for callers to cast to {@code SFException} manually. * * @param e the exception to wrap * @return a new {@code ClientRecreationException} wrapping the cause * @throws IllegalArgumentException if {@code e} is not a client-invalid {@link SFException} */ public static ClientRecreationException wrap(Throwable e) { Preconditions.checkArgument( isClientInvalidError(e), "Cannot wrap non-client-invalid exception: %s", e.getClass().getName()); return new ClientRecreationException((SFException) e); } /** * Checks if the given throwable represents a client-level invalidation error that requires client * recreation. * * @param e the exception to check (may be null) * @return {@code true} if {@code e} is an {@link SFException} with a client-invalid error code * name; {@code false} otherwise */ public static boolean isClientInvalidError(Throwable e) { if (!(e instanceof SFException)) { return false; } return CLIENT_INVALID_ERROR_CODE_NAMES.contains(((SFException) e).getErrorCodeName()); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/ClientRecreator.java ================================================ package com.snowflake.kafka.connector.internal.streaming.v2; import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient; /** * Strategy for replacing an invalid {@link SnowflakeStreamingIngestClient} with a new one. * *

Implementations are expected to use compare-and-swap semantics: if the client has already been * replaced by another caller, the existing replacement should be returned without creating a second * one. */ @FunctionalInterface public interface ClientRecreator { /** * Replaces the given invalid client with a new one. * * @param invalidClient the client instance that is no longer valid (identity-compared in the * pool) * @return the new client, or the already-replaced client if another caller got there first */ SnowflakeStreamingIngestClient recreate(SnowflakeStreamingIngestClient invalidClient); } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/PipeNameProvider.java ================================================ package com.snowflake.kafka.connector.internal.streaming.v2; import static com.snowflake.kafka.connector.Constants.DEFAULT_PIPE_NAME_SUFFIX; /** Class that generates pipe name for Snowpipe Streaming v2 */ public final class PipeNameProvider { public static String buildDefaultPipeName(String table) { return table + DEFAULT_PIPE_NAME_SUFFIX; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/SnowpipeStreamingPartitionChannel.java ================================================ package com.snowflake.kafka.connector.internal.streaming.v2; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME; import static com.snowflake.kafka.connector.internal.SnowflakeErrors.ERROR_5027; import static com.snowflake.kafka.connector.internal.SnowflakeErrors.ERROR_5028; import static com.snowflake.kafka.connector.internal.SnowflakeErrors.ERROR_5030; import com.google.common.annotations.VisibleForTesting; import com.snowflake.ingest.streaming.ChannelStatus; import com.snowflake.ingest.streaming.OpenChannelResult; import com.snowflake.ingest.streaming.SFException; import com.snowflake.ingest.streaming.SnowflakeStreamingIngestChannel; import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.config.SnowflakeValidation; import com.snowflake.kafka.connector.internal.DescribeTableRow; import com.snowflake.kafka.connector.internal.KCLogger; import com.snowflake.kafka.connector.internal.SnowflakeConnectionService; import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException; import com.snowflake.kafka.connector.internal.metrics.TaskMetrics; import com.snowflake.kafka.connector.internal.schemaevolution.SchemaEvolutionTargetItems; import com.snowflake.kafka.connector.internal.schemaevolution.SnowflakeSchemaEvolutionService; import com.snowflake.kafka.connector.internal.schemaevolution.ValidationResultMapper; import com.snowflake.kafka.connector.internal.streaming.StreamingErrorHandler; import com.snowflake.kafka.connector.internal.streaming.TopicPartitionChannelInsertionException; import com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel; import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetryChannelCreation; import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetryChannelStatus; import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetrySsv1Migration; import com.snowflake.kafka.connector.internal.streaming.v2.channel.PartitionOffsetTracker; import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationMode; import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationResponse; import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService; import com.snowflake.kafka.connector.internal.validation.ColumnSchema; import com.snowflake.kafka.connector.internal.validation.RowValidator; import com.snowflake.kafka.connector.internal.validation.ValidationResult; import com.snowflake.kafka.connector.records.SnowflakeSinkRecord; import java.time.Duration; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.concurrent.CancellationException; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.kafka.connect.errors.ConnectException; import org.apache.kafka.connect.errors.DataException; import org.apache.kafka.connect.sink.SinkRecord; public class SnowpipeStreamingPartitionChannel implements TopicPartitionChannel { private static final KCLogger LOGGER = new KCLogger(SnowpipeStreamingPartitionChannel.class.getName()); private volatile CompletableFuture channel; private final AtomicBoolean cancelled = new AtomicBoolean(false); private final PartitionOffsetTracker offsetTracker; // Tracks the initial error count when the channel was opened. // Used to detect NEW errors (current error count > initial error count) since error counts // are cumulative and don't reset when a channel is reopened. private long initialErrorCount = 0; /** Max consecutive channel recoveries before giving up and letting the task fail. */ private static final int MAX_CONSECUTIVE_RECOVERIES = 5; /** * Consecutive recovery counter. Incremented each time the fallback reopens the channel, reset to * zero on every successful appendRow. If this reaches {@link #MAX_CONSECUTIVE_RECOVERIES} the * fallback re-throws to let the KC framework kill the task. */ private int consecutiveRecoveryCount = 0; private final String channelName; private final SnowflakeTelemetryChannelStatus snowflakeTelemetryChannelStatus; private final SinkTaskConfig taskConfig; /** * Used to send telemetry to Snowflake. Currently, TelemetryClient created from a Snowflake * Connection Object, i.e. not a session-less Client */ private final SnowflakeTelemetryService telemetryService; private final String pipeName; private final SnowflakeStreamingIngestClient streamingClient; private final ExecutorService openChannelIoExecutor; private final StreamingErrorHandler streamingErrorHandler; private final TaskMetrics taskMetrics; // SSv1 offset migration private final Optional ssv1ChannelName; // Client-side validation fields private final SnowflakeConnectionService conn; private final String tableName; private volatile RowValidator rowValidator; private volatile SnowflakeSchemaEvolutionService schemaEvolutionService; private volatile Map tableSchema; private final boolean shouldEvolveSchema; public SnowpipeStreamingPartitionChannel( String tableName, String channelName, String pipeName, SnowflakeStreamingIngestClient streamingClient, ExecutorService openChannelIoExecutor, SnowflakeTelemetryService telemetryService, SnowflakeTelemetryChannelStatus snowflakeTelemetryChannelStatus, PartitionOffsetTracker offsetTracker, SinkTaskConfig taskConfig, StreamingErrorHandler streamingErrorHandler, TaskMetrics taskMetrics, boolean shouldEvolveSchema, SnowflakeConnectionService conn, Optional ssv1ChannelName) { this.channelName = channelName; this.pipeName = pipeName; this.streamingClient = streamingClient; this.openChannelIoExecutor = openChannelIoExecutor; this.taskConfig = taskConfig; this.streamingErrorHandler = streamingErrorHandler; this.taskMetrics = taskMetrics; this.telemetryService = telemetryService; this.snowflakeTelemetryChannelStatus = snowflakeTelemetryChannelStatus; this.offsetTracker = offsetTracker; this.shouldEvolveSchema = shouldEvolveSchema; this.conn = conn; this.tableName = tableName; this.ssv1ChannelName = ssv1ChannelName; LOGGER.info( "Initializing SnowpipeStreamingPartitionChannel channel: {}, pipe: {}", channelName, pipeName); this.channel = CompletableFuture.supplyAsync( () -> { OpenChannelResult openChannelResult = openChannelForTable(channelName); long offsetRecoveredFromSnowflake = parseOrMigrateOffsetToken(openChannelResult); offsetTracker.initializeFromSnowflake(offsetRecoveredFromSnowflake); return openChannelResult.getChannel(); }, openChannelIoExecutor); if (taskConfig.getValidation() == SnowflakeValidation.CLIENT_SIDE) { initializeValidation(); } else { LOGGER.info("Client-side validation disabled for channel {}", channelName); } this.telemetryService.reportKafkaPartitionStart( new SnowflakeTelemetryChannelCreation(tableName, channelName, System.currentTimeMillis())); } @Override public boolean insertRecord(SinkRecord kafkaSinkRecord, boolean isFirstRowPerPartitionInBatch) { if (offsetTracker.shouldProcess(kafkaSinkRecord.kafkaOffset(), isFirstRowPerPartitionInBatch)) { return transformAndSend(kafkaSinkRecord); } return true; } private boolean transformAndSend(SinkRecord kafkaSinkRecord) { try { final long kafkaOffset = kafkaSinkRecord.kafkaOffset(); final SnowflakeSinkRecord record = SnowflakeSinkRecord.from( kafkaSinkRecord, taskConfig.getMetadataConfig(), taskConfig.isEnableSchematization(), taskConfig.isEnableColumnIdentifierNormalization()); if (record.isBroken()) { LOGGER.debug("Broken record offset:{}, topic:{}", kafkaOffset, kafkaSinkRecord.topic()); streamingErrorHandler.handleError(record.getBrokenReason(), kafkaSinkRecord); // If we reach here, the error was tolerated (errors.tolerance=all) snowflakeTelemetryChannelStatus.incErrorToleratedCount(); } else { // If we reach here, it means we should ingest a record (possibly empty for tombstones) final Map row = record.getContentWithMetadata( taskConfig.getMetadataConfig().shouldIncludeAllMetadata()); if (!row.isEmpty()) { if (taskConfig.getValidation() == SnowflakeValidation.CLIENT_SIDE && rowValidator != null) { ValidationResult validationResult = rowValidator.validateRow(row); if (!validationResult.isValid()) { if (validationResult.hasStructuralError()) { handleStructuralError(validationResult, kafkaSinkRecord, record, row); } else { handleValidationError(validationResult, kafkaSinkRecord); } offsetTracker.recordProcessed(kafkaOffset); return true; } } if (!insertRowWithFallback(row, kafkaOffset)) { // Fallback fired: the record was NOT inserted, and the fallback's recovery // logic already reset processedOffset + rewound Kafka. Do NOT call // recordProcessed() here — that would advance processedOffset past the // recovery point and cause replayed offsets to be skipped. See SNOW-3344243. return false; } } } // Always update processedOffset after processing, even for broken records offsetTracker.recordProcessed(kafkaOffset); return true; } catch (BackpressureException ex) { snowflakeTelemetryChannelStatus.incBackpressureRetryCount(); throw ex; } catch (TopicPartitionChannelInsertionException ex) { // Suppressing the exception because other channels might still continue to ingest LOGGER.warn( "Failed to insert row for channel:{}. Will be retried by Kafka. Exception: {}", this.channelName, ex); return true; } } @Override public CompletableFuture waitForLastProcessedRecordCommitted() { if (offsetTracker.getLastAppendRowsOffset() == NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE) { return CompletableFuture.completedFuture(null); } return CompletableFuture.runAsync( () -> { LOGGER.info("Starting flush for channel: {}", this.channelName); streamingClient.initiateFlush(); final long targetOffset = offsetTracker.getLastAppendRowsOffset(); WaitForLastOffsetCommittedPolicy.getPolicy( () -> { long offsetCommittedToBackend = fetchLatestCommittedOffsetFromSnowflake(); if (offsetCommittedToBackend == targetOffset) { return true; } throw ERROR_5027.getException(); }); LOGGER.info("Completed flush for channel: {}", this.channelName); }); } /** * Uses {@link AppendRowWithFallbackPolicy} to reopen the channel if insertRows throws {@link * SFException}. * *

We have deliberately not performed retries on insertRows because it might slow down overall * ingestion and introduce lags in committing offsets to Kafka. * *

Note that insertRows API does perform channel validation which might throw SFException if * channel is invalidated. */ /** * @return true if the record was inserted successfully, false if the fallback fired (record was * NOT inserted) */ private boolean insertRowWithFallback(Map row, long offset) { return AppendRowWithFallbackPolicy.executeWithFallback( () -> { LOGGER.trace("Inserting transformed record: {}, offset: {}", row, offset); getChannel().appendRow(row, Long.toString(offset)); offsetTracker.recordAppended(offset); consecutiveRecoveryCount = 0; }, (Throwable ex) -> { consecutiveRecoveryCount++; if (consecutiveRecoveryCount > MAX_CONSECUTIVE_RECOVERIES) { LOGGER.error( "Channel {} exceeded max consecutive recoveries ({}), giving up", this.channelName, MAX_CONSECUTIVE_RECOVERIES); throw new TopicPartitionChannelInsertionException( String.format( "Channel %s failed after %d consecutive recovery attempts", this.channelName, MAX_CONSECUTIVE_RECOVERIES), ex); } LOGGER.warn( "Channel {} recovery attempt {}/{}", this.channelName, consecutiveRecoveryCount, MAX_CONSECUTIVE_RECOVERIES); reopenChannel("APPEND_ROW_FALLBACK"); snowflakeTelemetryChannelStatus.incAppendRowFallbackCount(); }, this.channelName); } private static void closeChannelWithoutFlushing(SnowflakeStreamingIngestChannel channel) { try { channel.close(false /* waitForFlush */, Duration.ZERO); } catch (TimeoutException e) { // This should never happen since we are not waiting for the channel to flush. throw new RuntimeException( String.format("Error closing channel %s: %s", channel.getChannelName(), e.getMessage())); } } /** * Fallback function to be executed when either of insertRows API or getOffsetToken sends * SFException. * *

Or, in other words, if streaming channel is invalidated, we will reopen the channel and * reset the kafka offset to last committed offset in Snowflake. * *

If a valid offset is found from snowflake, we will reset the topicPartition with * (offsetReturnedFromSnowflake + 1). * * @param reason Reason for the channel recovery. Used for logging. * @return offset which was last present in Snowflake */ private void reopenChannel(final String reason) { LOGGER.warn("{} Channel {} recovery initiated", reason, this.channelName); if (this.snowflakeTelemetryChannelStatus != null) { this.snowflakeTelemetryChannelStatus.incRecoveryCount(); } this.channel = this.channel // Close old channel before reopening a new one. We don't want to wait for the channel // to flush since it will be reopened right away and the in-progress data will be lost. .thenAccept( oldChannel -> { if (!oldChannel.isClosed()) { LOGGER.info( "{} Channel {} is not closed before reopening", reason, this.channelName); closeChannelWithoutFlushing(oldChannel); } }) // If the previous init failed, there is no old channel to close. .exceptionally( initFailure -> { LOGGER.warn( "{} Channel {} had a failed initialization, skipping close: {}", reason, this.channelName, initFailure.getMessage()); return null; }) .thenApply( ignored -> { OpenChannelResult openChannelResult = openChannelForTable(channelName); final long offsetRecoveredFromSnowflake = parseOrMigrateOffsetToken(openChannelResult); if (offsetRecoveredFromSnowflake == NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE) { LOGGER.info( "{} Channel {} has no offset token. Will use consumer group offset," + " currently {}", reason, this.channelName, offsetTracker.consumerGroupOffsetRef().get()); } offsetTracker.resetAfterRecovery(offsetRecoveredFromSnowflake); LOGGER.info( "{} Channel {} recovery complete, offsetRecoveredFromSnowflake={}", reason, this.channelName, offsetRecoveredFromSnowflake); return openChannelResult.getChannel(); }); } /** * Parses the SSv2 offset from the open-channel result, and if SSv2 has no committed offset yet, * attempts SSv1 offset migration based on the configured {@link Ssv1MigrationMode}. * *

Used by both the initial channel open (constructor) and {@link #reopenChannel} so that * migration behavior is consistent regardless of whether the first open succeeded or failed. */ private long parseOrMigrateOffsetToken(OpenChannelResult openChannelResult) { final long ssv2Offset = parseOffsetToken( openChannelResult.getChannelStatus().getLatestCommittedOffsetToken(), channelName); LOGGER.info("Channel {} has SSv2 offset token {}", channelName, ssv2Offset); long effectiveOffset = ssv2Offset; // Only consult SSv1 when SSv2 has no committed offset yet (first-time migration). // Once SSv2 has its own offset, it is authoritative. if (ssv2Offset == NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE && taskConfig.getSsv1MigrationMode() != Ssv1MigrationMode.SKIP) { // migrateSsv1ChannelOffset calls SYSTEM$MIGRATE_SSV1_CHANNEL_OFFSET which: // - returns ssv1ChannelFound=false if the SSv1 channel doesn't exist // - returns ssv1ChannelFound=true, migratedOffset=null if found but no committed offset // - returns ssv1ChannelFound=true, migratedOffset=N on success (also writes to SSv2 in FDB) // - THROWS for SQL/network errors (must not silently proceed -- // falling through to consumer group offset could cause duplicates) String ssv1Channel = ssv1ChannelName.orElseThrow( () -> new IllegalStateException( "ssv1ChannelName must be present when migration mode is " + taskConfig.getSsv1MigrationMode())); Ssv1MigrationResponse response = conn.migrateSsv1ChannelOffset(tableName, ssv1Channel, channelName, pipeName); Long migrated = response.getMigratedOffset(); if (migrated != null) { effectiveOffset = migrated; LOGGER.info( "SSv2 channel {} has no offset yet, migrating SSv1 offset for {}: {}", channelName, ssv1Channel, effectiveOffset); } else if (!response.isSsv1ChannelFound()) { LOGGER.info("SSv1 channel {} not found for SSv2 channel {}", ssv1Channel, channelName); } else { LOGGER.info( "SSv1 channel {} exists but has no committed offset for SSv2 channel {}", ssv1Channel, channelName); } telemetryService.reportSsv1Migration( new SnowflakeTelemetrySsv1Migration( tableName, channelName, ssv1Channel, taskConfig.getSsv1MigrationMode(), response)); if (!response.isSsv1ChannelFound() && taskConfig.getSsv1MigrationMode() == Ssv1MigrationMode.STRICT) { throw new ConnectException( "Snowpipe Streaming Classic channel " + ssv1Channel + " not found but the offset token migration mode is set to 'strict'. This can" + " happen if new topics are added after migrating from version 3 of the" + " connector or if an incorrect value is provided for " + SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME + " or the connector name. Validate your settings or set " + SNOWFLAKE_SSV1_OFFSET_MIGRATION + " to 'best_effort' or 'skip' to fall through to the Kafka consumer group" + " offset."); } } return effectiveOffset; } /** * Parses an offset token string into a long value. * * @param offsetToken the offset token string (may be null) * @param channelNameForLogging used in error messages * @return the parsed long, or {@link * TopicPartitionChannel#NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE} if null * @throws ConnectException if the token is non-null but not parsable as long */ @VisibleForTesting static long parseOffsetToken(String offsetToken, String channelNameForLogging) { if (offsetToken == null) { return NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE; } try { return Long.parseLong(offsetToken); } catch (NumberFormatException ex) { LOGGER.error( "The offsetToken string does not contain a parsable long:{} for channel:{}", offsetToken, channelNameForLogging); throw new ConnectException(ex); } } /** * Returns the offset Token persisted into snowflake. * *

OffsetToken from Snowflake returns a String and we will convert it into long. * *

If it is not long parsable, we will throw {@link ConnectException} * * @return -1 if no offset is found in snowflake, else the long value of committedOffset in * snowflake. */ private long fetchLatestCommittedOffsetFromSnowflake() { return fetchLatestOffsetFromChannel(this.getChannel()); } private static long fetchLatestOffsetFromChannel(SnowflakeStreamingIngestChannel channel) { String offsetToken = channel.getLatestCommittedOffsetToken(); LOGGER.info( "Fetched offsetToken for channelName:{}, offset:{}", channel.getChannelName(), offsetToken); return parseOffsetToken(offsetToken, channel.getChannelName()); } private void initializeValidation() { try { Optional> describeResult = conn.describeTable(tableName); if (!describeResult.isPresent()) { LOGGER.warn( "Table {} not found during validation initialization. " + "Client-side validation will be disabled for channel {}", tableName, channelName); this.snowflakeTelemetryChannelStatus.setValidationDisabled(); return; } this.tableSchema = new HashMap<>(); for (DescribeTableRow row : describeResult.get()) { ColumnSchema colSchema = ColumnSchema.fromDescribeTableFields( row.getColumn(), row.getType(), row.getNullable(), row.hasDefault(), row.isAutoincrement()); this.tableSchema.put(row.getColumn(), colSchema); } RowValidator.validateSchema(this.tableSchema); this.rowValidator = new RowValidator(this.tableSchema); this.schemaEvolutionService = new SnowflakeSchemaEvolutionService(conn); LOGGER.info( "Client-side validation enabled for channel {}. Table {} has {} columns," + " enableSchematization={}", channelName, tableName, this.tableSchema.size(), taskConfig.isEnableSchematization()); } catch (Exception e) { LOGGER.warn( "Failed to initialize client-side validation for channel {}. " + "Validation will be disabled. Error: {}", channelName, e.getMessage()); this.snowflakeTelemetryChannelStatus.setValidationDisabled(); this.rowValidator = null; } } private void refreshTableSchema() { initializeValidation(); } private void handleValidationError( ValidationResult result, SinkRecord originalRecordForReporting) { if (streamingErrorHandler.isLogErrors()) { LOGGER.warn( "Client-side validation failure [{}] channel={}, column={}, error={}, offset={}", result.getErrorType(), channelName, result.getColumnName(), result.getValueError(), originalRecordForReporting.kafkaOffset()); } snowflakeTelemetryChannelStatus.incValidationFailureCount(); String errorMsg = String.format( "Validation failed for column %s: %s", result.getColumnName(), result.getValueError()); streamingErrorHandler.handleError(new DataException(errorMsg), originalRecordForReporting); snowflakeTelemetryChannelStatus.incErrorToleratedCount(); } private void handleStructuralError( ValidationResult result, SinkRecord originalRecordForReporting, SnowflakeSinkRecord snowflakeRecord, Map row) { if (streamingErrorHandler.isLogErrors()) { LOGGER.warn( "Client-side structural validation failure [{}] channel={}, " + "hasSchemaEvolutionPermission={}, extraCols={}, missingNotNull={}, " + "nullNotNull={}, offset={}", result.getErrorType(), channelName, shouldEvolveSchema, result.getExtraColNames(), result.getMissingNotNullColNames(), result.getNullValueForNotNullColNames(), originalRecordForReporting.kafkaOffset()); } if (!shouldEvolveSchema) { snowflakeTelemetryChannelStatus.incValidationFailureCount(); String errorMsg = String.format( "Structural validation error (schema evolution disabled): extraCols=%s," + " missingNotNull=%s", result.getExtraColNames(), result.getMissingNotNullColNames()); LOGGER.info("Routing to DLQ for channel {}: {}", channelName, errorMsg); streamingErrorHandler.handleError(new DataException(errorMsg), originalRecordForReporting); snowflakeTelemetryChannelStatus.incErrorToleratedCount(); return; } try { LOGGER.info("Attempting schema evolution for channel {}, table {}", channelName, tableName); SchemaEvolutionTargetItems items = ValidationResultMapper.mapToSchemaEvolutionItems(result, tableName); schemaEvolutionService.evolveSchemaIfNeeded(items, snowflakeRecord); refreshTableSchema(); ValidationResult retryResult = result; if (rowValidator != null) { retryResult = rowValidator.validateRow(row); if (retryResult.isValid()) { insertRowWithFallback(row, originalRecordForReporting.kafkaOffset()); return; } } snowflakeTelemetryChannelStatus.incValidationFailureCount(); snowflakeTelemetryChannelStatus.incSchemaEvolutionFailureCount(); String errorMsg = String.format( "Schema mismatch after evolution attempt: extraCols=%s, missingNotNull=%s", retryResult.getExtraColNames(), retryResult.getMissingNotNullColNames()); streamingErrorHandler.handleError(new DataException(errorMsg), originalRecordForReporting); snowflakeTelemetryChannelStatus.incErrorToleratedCount(); } catch (SnowflakeKafkaConnectorException e) { LOGGER.error("Schema evolution failed for table {}", tableName, e); throw e; } } /** * Open a channel for Table with given channel name and tableName. * *

Open channels happens at: * *

Constructor of TopicPartitionChannel -> which means we will wipe of all states and it will * call precomputeOffsetTokenForChannel * *

Failure handling which will call reopen, replace instance variable with new channel and call * offsetToken/insertRows. * * @return new channel which was fetched after open/reopen */ private OpenChannelResult openChannelForTable(final String channelName) { if (cancelled.get()) { throw new CancellationException("Channel " + channelName + " was cancelled before opening"); } final OpenChannelResult result; try (TaskMetrics.TimingContext ignored = taskMetrics.timeChannelOpen()) { result = streamingClient.openChannel(channelName, null); } taskMetrics.incChannelOpenCount(); final ChannelStatus channelStatus = result.getChannelStatus(); if (channelStatus.getStatusCode().equals("SUCCESS")) { // Capture the initial error count - errors are cumulative and don't reset on channel reopen. // We only want to fail on NEW errors that occur after the channel was opened. this.initialErrorCount = channelStatus.getRowsErrorCount(); LOGGER.info( "Successfully opened streaming channel: {}, initialErrorCount: {}", channelName, this.initialErrorCount); return result; } else { LOGGER.error( "Failed to open channel: {}, error code: {}", channelName, channelStatus.getStatusCode()); throw ERROR_5028.getException( String.format( "Failed to open channel %s. Error code %s", channelName, channelStatus.getStatusCode())); } } @Override public CompletableFuture closeChannelAsync() { LOGGER.info("Closing streaming channel {}", this.channelName); cancelled.set(true); return channel .thenAccept( c -> { try { if (!c.isClosed()) { closeChannelWithoutFlushing(c); } LOGGER.info("Successfully closed streaming channel {}", this.channelName); } catch (RuntimeException e) { tryRecoverFromCloseChannelError(e); } finally { this.telemetryService.reportKafkaPartitionUsage( this.snowflakeTelemetryChannelStatus, true); this.snowflakeTelemetryChannelStatus.tryUnregisterChannelJMXMetrics(); } }) .exceptionally( e -> { Throwable cause = e.getCause() != null ? e.getCause() : e; if (cause instanceof java.util.concurrent.CancellationException) { LOGGER.info( "Channel {} was cancelled before opening, nothing to close", this.channelName); } else { LOGGER.warn( "Channel {} failed during initialization, skipping close: {}", this.channelName, cause.getMessage()); } this.snowflakeTelemetryChannelStatus.tryUnregisterChannelJMXMetrics(); return null; }); } private void tryRecoverFromCloseChannelError(RuntimeException e) { String errMsg = String.format( "Failure closing streaming channel %s, error: %s", this.channelName, e.getMessage()); this.telemetryService.reportKafkaConnectFatalError( errMsg, this.channelName, this.tableName, this.pipeName); // Only SFExceptions are swallowed. // If a channel-related error occurs, it shouldn't fail a connector task. // The channel is going to be reopened after a rebalance, so the failed channel // will be invalidated anyway. if (e instanceof SFException) { LOGGER.warn( "Encountered {} when closing streaming channel {}: {}. Stack trace: {}", e.getClass(), this.channelName, e.getMessage(), Arrays.toString(e.getStackTrace())); } else { throw e; } } @Override public boolean isInitializing() { return !channel.isDone(); } @Override public void awaitInitialization() { channel.join(); } @Override public boolean isChannelClosed() { try { return this.getChannel().isClosed(); } catch (RuntimeException e) { // If the channel failed to initialize, we consider it closed. LOGGER.warn( "Channel {} failed to initialize, treating as closed: {}", channelName, e.getMessage()); return true; } } @Override public String getChannelNameFormatV1() { return getChannel().getFullyQualifiedChannelName(); } @Override public String getChannelName() { return channelName; } /** * Blocks until the channel initialization future completes and returns the underlying SDK * channel. * *

Warning: Do not call this from the channel construction future body (the lambda * passed to {@code CompletableFuture.supplyAsync} in the constructor). That future is what * populates {@code this.channel}; calling {@code join()} on it from within itself will deadlock. */ @VisibleForTesting public SnowflakeStreamingIngestChannel getChannel() { try { return this.channel.join(); } catch (CompletionException e) { if (e.getCause() instanceof RuntimeException) { throw (RuntimeException) e.getCause(); } throw new RuntimeException(e.getCause()); } } @Override @VisibleForTesting public SnowflakeTelemetryChannelStatus getSnowflakeTelemetryChannelStatus() { return this.snowflakeTelemetryChannelStatus; } @Override public void setLatestConsumerGroupOffset(long consumerOffset) { offsetTracker.setLatestConsumerGroupOffset(consumerOffset); } @Override public long processChannelStatus(final ChannelStatus status, final boolean tolerateErrors) { logChannelStatus(status); handleChannelErrors(status, tolerateErrors); this.snowflakeTelemetryChannelStatus.updateFromChannelStatus(status); long committedOffset = parseOffsetToken(status.getLatestCommittedOffsetToken(), this.channelName); offsetTracker.updatePersistedOffset(committedOffset); if (committedOffset == NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE) { return NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE; } long offsetSafeToCommit = committedOffset + 1; setLatestConsumerGroupOffset(offsetSafeToCommit); return offsetSafeToCommit; } @Override public String getPipeName() { return pipeName; } private void logChannelStatus(final ChannelStatus status) { LOGGER.info( "Channel status for channel=[{}]: databaseName=[{}], schemaName=[{}], pipeName=[{}]," + " channelName=[{}], statusCode=[{}], latestCommittedOffsetToken=[{}]," + " createdOn=[{}], rowsInsertedCount=[{}], rowsParsedCount=[{}]," + " rowsErrorCount=[{}], lastErrorOffsetTokenUpperBound=[{}]," + " lastErrorMessage=[{}], lastErrorTimestamp=[{}]," + " serverAvgProcessingLatency=[{}], lastRefreshedOn=[{}]", this.channelName, status.getDatabaseName(), status.getSchemaName(), status.getPipeName(), status.getChannelName(), status.getStatusCode(), status.getLatestCommittedOffsetToken(), status.getCreatedOn(), status.getRowsInsertedCount(), status.getRowsParsedCount(), status.getRowsErrorCount(), status.getLastErrorOffsetTokenUpperBound(), status.getLastErrorMessage(), status.getLastErrorTimestamp(), status.getServerAvgProcessingLatency(), status.getLastRefreshedOn()); } private void handleChannelErrors(final ChannelStatus status, final boolean tolerateErrors) { final long currentErrorCount = status.getRowsErrorCount(); // Error counts are cumulative and don't reset when a channel is reopened. // Only fail if there are NEW errors that occurred after the channel was opened. final long newErrorCount = currentErrorCount - this.initialErrorCount; if (newErrorCount > 0) { final String errorMessage = String.format( "Channel [%s] has %d new errors (total: %d, initial: %d). Last error message: %s," + " last error timestamp: %s, last error offset token upper bound: %s", this.channelName, newErrorCount, currentErrorCount, this.initialErrorCount, status.getLastErrorMessage(), status.getLastErrorTimestamp(), status.getLastErrorOffsetTokenUpperBound()); this.initialErrorCount = currentErrorCount; if (tolerateErrors) { LOGGER.warn(errorMessage); } else { this.telemetryService.reportKafkaConnectFatalError( errorMessage, this.channelName, this.tableName, this.pipeName); throw ERROR_5030.getException(errorMessage); } } else if (currentErrorCount > 0) { LOGGER.debug( "Channel [{}] has {} pre-existing errors from before connector startup (no new errors)", this.channelName, currentErrorCount); } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/WaitForLastOffsetCommittedPolicy.java ================================================ package com.snowflake.kafka.connector.internal.streaming.v2; import static com.snowflake.kafka.connector.internal.SnowflakeErrors.ERROR_5027; import com.snowflake.kafka.connector.internal.KCLogger; import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException; import dev.failsafe.Failsafe; import dev.failsafe.Fallback; import dev.failsafe.RetryPolicy; import dev.failsafe.function.CheckedSupplier; import java.time.Duration; class WaitForLastOffsetCommittedPolicy { private static final KCLogger LOGGER = new KCLogger(WaitForLastOffsetCommittedPolicy.class.getName()); static void getPolicy(CheckedSupplier action) { Fallback fallback = Fallback.ofException( e -> { LOGGER.error("Wait for the last offset to be commited - max retry attempts", e); throw ERROR_5027.getException(); }); RetryPolicy retryPolicy = RetryPolicy.builder() .handle(SnowflakeKafkaConnectorException.class) .withDelay(Duration.ofSeconds(1)) .withBackoff(Duration.ofSeconds(1), Duration.ofSeconds(30), 1.5) .withJitter(Duration.ofMillis(100)) .withMaxAttempts(10) // for some reason it has to be set as well .onRetry( event -> LOGGER.info( "Wait for the last offset to be commited retry no:{}, message:{}", event.getAttemptCount(), event.getLastException().getMessage())) .build(); Failsafe.with(fallback).compose(retryPolicy).get(action); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/channel/PartitionOffsetTracker.java ================================================ package com.snowflake.kafka.connector.internal.streaming.v2.channel; import static com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel.NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE; import com.snowflake.kafka.connector.internal.KCLogger; import java.util.concurrent.atomic.AtomicLong; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.connect.sink.SinkTaskContext; /** * Tracks all offset state for a single partition channel. This is a passive state holder -- it * makes no network calls. Offsets are updated during channel init/recovery, record processing in * `put`, and when processing channel statuses in `preCommit`. * *

Threading model

* * Most methods are called from the Kafka Connect task thread, which is single-threaded per * partition ({@link #shouldProcess}, {@link #recordProcessed}, {@link #recordAppended}, {@link * #initializeFromSnowflake}, {@link #resetAfterRecovery}). * *

{@link #setLatestConsumerGroupOffset} may be called from a different thread, so its * set-if-greater logic uses a CAS loop for atomicity. The three AtomicLong fields use atomic types * for two reasons: (1) their refs are exposed for telemetry reads from other threads, and (2) * {@code currentConsumerGroupOffset} is written by both the task thread and {@link * #setLatestConsumerGroupOffset}. The remaining fields ({@code lastAppendRowsOffset}, {@code * needToSkipCurrentBatch}) are only accessed from the task thread and need no synchronization. */ public class PartitionOffsetTracker { private static final KCLogger LOGGER = new KCLogger(PartitionOffsetTracker.class.getName()); private final TopicPartition topicPartition; private final SinkTaskContext sinkTaskContext; private final String channelName; // Offset persisted in Snowflake, determined from the insertRows API / fetchOffsetToken calls. private final AtomicLong offsetPersistedInSnowflake = new AtomicLong(NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE); // KC-side processed offset. On creation set to Snowflake's committed offset, then updated on // each new row from KC. Ensures exactly-once semantics. private final AtomicLong processedOffset = new AtomicLong(NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE); // Consumer group offset -- used for telemetry and as a fallback during recovery when Snowflake // has no committed offset. private final AtomicLong currentConsumerGroupOffset = new AtomicLong(NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE); // Last offset passed to appendRow -- used by flush to know when all data is committed. private long lastAppendRowsOffset = NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE; // When true, leftover rows in the current batch are skipped because the channel was // invalidated and offsets were reset in Kafka. private boolean needToSkipCurrentBatch = false; public PartitionOffsetTracker( TopicPartition topicPartition, SinkTaskContext sinkTaskContext, String channelName) { this.topicPartition = topicPartition; this.sinkTaskContext = sinkTaskContext; this.channelName = channelName; } /** Sets both persisted and processed offsets, and resets the Kafka consumer position. */ public void initializeFromSnowflake(long committedOffset) { LOGGER.info( "Initializing offsetPersistedInSnowflake=[{}], channel=[{}]", committedOffset, channelName); this.offsetPersistedInSnowflake.set(committedOffset); LOGGER.info("Initializing processedOffset=[{}], channel=[{}]", committedOffset, channelName); this.processedOffset.set(committedOffset); resetKafkaOffset(committedOffset); } /** * Determines whether the given kafka offset should be processed, and manages batch-skip state. * * @return true if the record should be ingested, false if it should be skipped */ public boolean shouldProcess(long kafkaOffset, boolean isFirstRowInBatch) { if (currentConsumerGroupOffset.compareAndSet( NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE, kafkaOffset)) { LOGGER.trace( "Setting currentConsumerGroupOffset=[{}], channel=[{}]", kafkaOffset, channelName); } if (isFirstRowInBatch) { needToSkipCurrentBatch = false; } if (needToSkipCurrentBatch) { LOGGER.info( "Ignore inserting offset:{} for channel:{} because we recently reset offset in" + " Kafka. currentProcessedOffset:{}", kafkaOffset, channelName, processedOffset.get()); return false; } long currentProcessedOffset = this.processedOffset.get(); if (currentProcessedOffset == NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE || kafkaOffset >= currentProcessedOffset + 1) { return true; } LOGGER.warn( "Channel {} - skipping current record - expected offset {} but received {}. The" + " current offset stored in Snowflake: {}", channelName, currentProcessedOffset, kafkaOffset, offsetPersistedInSnowflake.get()); return false; } /** Called after a record has been fully processed (inserted or reported as broken). */ public void recordProcessed(long kafkaOffset) { this.processedOffset.set(kafkaOffset); LOGGER.trace("Setting processedOffset=[{}], channel=[{}]", kafkaOffset, channelName); } /** Called after a row has been successfully passed to appendRow. */ public void recordAppended(long kafkaOffset) { this.lastAppendRowsOffset = kafkaOffset; } /** * Resets offset state after a channel recovery (reopen). Resets the Kafka consumer position and * marks the current batch for skipping so leftover rows are discarded. * *

If we don't get a valid offset token (because of a table recreation or channel inactivity), * we will rely on Kafka to send us the correct offset. * *

The offset reset in Kafka is set to (offsetRecoveredFromSnowflake + 1) so that Kafka sends * offsets starting from the next unprocessed record, avoiding data loss. * * @param offsetRecoveredFromSnowflake the offset recovered from Snowflake after reopening */ public void resetAfterRecovery(long offsetRecoveredFromSnowflake) { long consumerGroupOffset = currentConsumerGroupOffset.get(); final long offsetToResetInKafka = offsetRecoveredFromSnowflake == NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE ? consumerGroupOffset : offsetRecoveredFromSnowflake + 1L; if (offsetToResetInKafka == NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE) { return; } sinkTaskContext.offset(topicPartition, offsetToResetInKafka); this.offsetPersistedInSnowflake.set(offsetRecoveredFromSnowflake); LOGGER.info( "Reset channel metadata after recovery offsetPersistedInSnowflake=[{}], channel=[{}]", offsetRecoveredFromSnowflake, channelName); this.processedOffset.set(offsetRecoveredFromSnowflake); needToSkipCurrentBatch = true; } public void setLatestConsumerGroupOffset(long consumerOffset) { long current; do { current = this.currentConsumerGroupOffset.get(); if (consumerOffset <= current) { LOGGER.trace( "Not setting currentConsumerGroupOffset because consumerOffset=[{}] is <=" + " currentConsumerGroupOffset=[{}] for channel=[{}]", consumerOffset, current, channelName); return; } } while (!this.currentConsumerGroupOffset.compareAndSet(current, consumerOffset)); LOGGER.trace( "Setting currentConsumerGroupOffset=[{}], channel=[{}]", consumerOffset, channelName); } /** For future: allows an external batch service to push a committed offset. */ public void updatePersistedOffset(long offset) { this.offsetPersistedInSnowflake.set(offset); } public long getPersistedOffset() { return offsetPersistedInSnowflake.get(); } public long getProcessedOffset() { return processedOffset.get(); } public long getLastAppendRowsOffset() { return lastAppendRowsOffset; } // Expose AtomicLong refs for telemetry binding public AtomicLong persistedOffsetRef() { return offsetPersistedInSnowflake; } public AtomicLong processedOffsetRef() { return processedOffset; } public AtomicLong consumerGroupOffsetRef() { return currentConsumerGroupOffset; } private void resetKafkaOffset(long committedOffset) { if (committedOffset != NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE) { sinkTaskContext.offset(topicPartition, committedOffset + 1L); } else { LOGGER.info( "TopicPartitionChannel:{}, offset token is NULL, will rely on Kafka to send us the" + " correct offset instead", channelName); } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/client/StreamingClientFactory.java ================================================ package com.snowflake.kafka.connector.internal.streaming.v2.client; import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient; import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClientFactory; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.internal.streaming.StreamingClientProperties; import java.util.concurrent.atomic.AtomicInteger; /** Factory for creating Snowpipe Streaming clients. Shared by all connectors. */ public class StreamingClientFactory { // Supplier reference is here so that we can swap it to mocked one in the tests private static volatile StreamingClientSupplier ingestClientSupplier = new StreamingClientSupplierImpl(); private static final AtomicInteger createdClientId = new AtomicInteger(0); /** Sets a custom ingest client supplier. This method is used in tests only. */ public static void setStreamingClientSupplier(final StreamingClientSupplier supplier) { ingestClientSupplier = supplier; } /** Resets the ingest client supplier to default. This method is used in tests only. */ public static void resetStreamingClientSupplier() { ingestClientSupplier = new StreamingClientSupplierImpl(); } static SnowflakeStreamingIngestClient createClient( final String pipeName, final SinkTaskConfig config, final StreamingClientProperties streamingClientProperties) { String clientName = clientName(streamingClientProperties); String dbName = config.getSnowflakeDatabase(); String schemaName = config.getSnowflakeSchema(); return ingestClientSupplier.get( clientName, dbName, schemaName, pipeName, streamingClientProperties); } private static String clientName(final StreamingClientProperties streamingClientProperties) { return streamingClientProperties.clientNamePrefix + createdClientId.incrementAndGet(); } static final class StreamingClientSupplierImpl implements StreamingClientSupplier { @Override public SnowflakeStreamingIngestClient get( final String clientName, final String dbName, final String schemaName, final String pipeName, final StreamingClientProperties streamingClientProperties) { // Quote the pipe name to handle lowercase / special characters in the name. return SnowflakeStreamingIngestClientFactory.builder( clientName, dbName, schemaName, '"' + pipeName + '"') .setProperties(streamingClientProperties.clientProperties) .setParameterOverrides(streamingClientProperties.parameterOverrides) .build(); } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/client/StreamingClientPool.java ================================================ package com.snowflake.kafka.connector.internal.streaming.v2.client; import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.internal.KCLogger; import com.snowflake.kafka.connector.internal.metrics.TaskMetrics; import com.snowflake.kafka.connector.internal.streaming.StreamingClientProperties; import com.snowflake.kafka.connector.internal.streaming.v2.service.ThreadPools; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletionException; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicReference; /** * Manages clients for a single connector. Tracks which tasks use which pipes and only closes * clients when no tasks are using them. * *

Client creation is dispatched to the connector's I/O thread pool so that multiple pipes can * initialize in parallel. * *

Thread safety is achieved via a single {@link ConcurrentHashMap} with per-key atomic {@code * compute()} calls — no explicit locking is needed. The actual blocking wait for client readiness * ({@code future.join()}) happens outside the atomic section so that other pipes can proceed in * parallel. */ public class StreamingClientPool { private static final KCLogger LOGGER = new KCLogger(StreamingClientPool.class.getName()); private final String connectorName; private final ConcurrentHashMap pipes = new ConcurrentHashMap<>(); private final ExecutorService ioExecutor; /** * A client shared by one or more tasks. Holds a {@link CompletableFuture} so that client creation * can be kicked off asynchronously, allowing multiple pipes to initialize in parallel. */ static class RefCountedClient { final CompletableFuture clientFuture; private final Set taskIds = ConcurrentHashMap.newKeySet(); RefCountedClient( String pipeName, String connectorName, SinkTaskConfig config, StreamingClientProperties streamingClientProperties, TaskMetrics taskMetrics, ExecutorService executor) { LOGGER.info( "Creating new streaming client for pipe: {}, connector: {}", pipeName, connectorName); this.clientFuture = CompletableFuture.supplyAsync( () -> { try (TaskMetrics.TimingContext ignored = taskMetrics.timeSdkClientCreate()) { return StreamingClientFactory.createClient( pipeName, config, streamingClientProperties); } }, executor); } void addTask(String taskId) { taskIds.add(taskId); } boolean hasTask(String taskId) { return taskIds.contains(taskId); } /** Removes the task and returns {@code true} if no tasks remain (client is unreferenced). */ boolean removeTask(String taskId) { return taskIds.remove(taskId) && taskIds.isEmpty(); } int taskCount() { return taskIds.size(); } /** Copies all task registrations from another entry into this one. */ void copyTasksFrom(RefCountedClient other) { taskIds.addAll(other.taskIds); } void close(String pipeName, String connectorName) { LOGGER.info( "Closing client for pipe {} in connector {} (last task stopped)", pipeName, connectorName); clientFuture.join().close(); } } StreamingClientPool(final String connectorName) { this.connectorName = connectorName; this.ioExecutor = ThreadPools.getIoExecutor(connectorName); LOGGER.info("Created client manager for connector: {}", connectorName); } /** * Asynchronously gets or creates a client for the given task and pipe. The returned future * completes when the client is ready. */ CompletableFuture getClientAsync( final String taskId, final String pipeName, final SinkTaskConfig config, final StreamingClientProperties streamingClientProperties, final TaskMetrics taskMetrics) { RefCountedClient entry = pipes.compute( pipeName, (key, current) -> { if (current == null) { current = new RefCountedClient( pipeName, connectorName, config, streamingClientProperties, taskMetrics, ioExecutor); } current.addTask(taskId); return current; }); return entry.clientFuture.whenComplete( (client, error) -> { if (error != null) { // Only remove if the entry still holds the same (failed) future. pipes.compute(pipeName, (key, current) -> current == entry ? null : current); } else { LOGGER.info( "Task {} now using pipe {} for connector {}, total tasks on this pipe: {}", taskId, pipeName, connectorName, entry.taskCount()); } }); } long getClientCountForTask(final String taskId) { return pipes.values().stream().filter(entry -> entry.hasTask(taskId)).count(); } void closeTaskClients(final String taskId) { LOGGER.info("Releasing clients for task {} in connector {}", taskId, connectorName); for (String pipeName : pipes.keySet()) { pipes.compute( pipeName, (key, entry) -> { if (entry == null) { return null; } if (entry.removeTask(taskId)) { entry.close(pipeName, connectorName); return null; } return entry; }); } } /** * Atomically replaces the client for a pipe if the current client matches the given invalid * client. Uses compare-and-swap semantics: if another caller already replaced the entry, the * existing new client is returned without creating a second one. * * @param taskId the ID of the task requesting recreation; registered on the replacement entry so * the pool does not prematurely evict it on task-local cleanup * @param pipeName the pipe whose client should be replaced * @param invalidClient the client instance that the caller believes is invalid (identity check) * @param config task config for creating the replacement client * @param streamingClientProperties streaming client properties * @param taskMetrics metrics for timing the new client creation * @return the new (or already-replaced) client */ SnowflakeStreamingIngestClient recreateClient( final String taskId, final String pipeName, final SnowflakeStreamingIngestClient invalidClient, final SinkTaskConfig config, final StreamingClientProperties streamingClientProperties, final TaskMetrics taskMetrics) { // Captured inside compute() so the old client can be closed outside the lock. AtomicReference clientToClose = new AtomicReference<>(); RefCountedClient chosenEntry = pipes.compute( pipeName, (key, current) -> { if (current == null) { LOGGER.warn( "recreateClient called for pipe {} but no entry exists in connector {}." + " Creating a fresh entry.", pipeName, connectorName); return createReplacement( taskId, pipeName, null, config, streamingClientProperties, taskMetrics); } // Check if the current entry still holds the invalid client (CAS guard). // Use timeout=0 to avoid blocking the compute() supplier on I/O: a client // whose future hasn't completed yet cannot possibly be the invalid client the // caller just observed, so we can assume it's a valid replacement already in flight. SnowflakeStreamingIngestClient currentClient; try { currentClient = current.clientFuture.get(0, TimeUnit.MILLISECONDS); } catch (TimeoutException timeout) { LOGGER.info( "recreateClient for pipe {} in connector {}: current entry's future not" + " yet complete, assuming replacement already in flight", pipeName, connectorName); current.addTask(taskId); return current; } catch (CompletionException | ExecutionException e) { // Current entry failed to create — replace it unconditionally. LOGGER.warn( "recreateClient for pipe {}: current entry has a failed client future," + " replacing unconditionally", pipeName); return createReplacement( taskId, pipeName, current, config, streamingClientProperties, taskMetrics); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException(e); } if (currentClient != invalidClient) { LOGGER.info( "recreateClient for pipe {} in connector {}: client already replaced" + " by another caller, reusing existing entry", pipeName, connectorName); current.addTask(taskId); return current; } // CAS matches — replace with a new entry, preserving task registrations. LOGGER.info( "Recreating streaming client for pipe {} in connector {}." + " Old client will be closed best-effort.", pipeName, connectorName); // Capture old client for best-effort close outside the compute() lock. clientToClose.set(currentClient); return createReplacement( taskId, pipeName, current, config, streamingClientProperties, taskMetrics); }); // Best-effort close of the old (invalid) client outside the compute() lock // to avoid blocking the ConcurrentHashMap bucket during I/O. SnowflakeStreamingIngestClient oldClient = clientToClose.get(); if (oldClient != null) { try { oldClient.close(); } catch (Exception e) { LOGGER.warn( "Best-effort close of invalid client for pipe {} failed: {}", pipeName, e.getMessage()); } } return joinAndEvictOnFailure(pipeName, chosenEntry); } /** * Creates a new {@link RefCountedClient} for the given pipe, inheriting task registrations from * {@code previous} if non-null, and always registering {@code taskId}. Centralizing this logic * ensures the calling task is always registered so the pool does not prematurely evict a * freshly-created entry during subsequent task-local cleanup. */ private RefCountedClient createReplacement( final String taskId, final String pipeName, final RefCountedClient previous, final SinkTaskConfig config, final StreamingClientProperties streamingClientProperties, final TaskMetrics taskMetrics) { RefCountedClient fresh = new RefCountedClient( pipeName, connectorName, config, streamingClientProperties, taskMetrics, ioExecutor); if (previous != null) { fresh.copyTasksFrom(previous); } fresh.addTask(taskId); return fresh; } /** * Joins the entry's client future and evicts the entry from the pool if the future has failed, so * the next caller gets a fresh entry instead of retrying a broken one. */ private SnowflakeStreamingIngestClient joinAndEvictOnFailure( final String pipeName, final RefCountedClient entry) { try { return entry.clientFuture.join(); } catch (CompletionException e) { pipes.compute(pipeName, (key, current) -> current == entry ? null : current); if (e.getCause() instanceof RuntimeException) { throw (RuntimeException) e.getCause(); } throw e; } } /** Returns true if there are no remaining clients or task registrations. */ boolean isEmpty() { return pipes.isEmpty(); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/client/StreamingClientPools.java ================================================ package com.snowflake.kafka.connector.internal.streaming.v2.client; import static com.google.common.base.Strings.isNullOrEmpty; import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.internal.KCLogger; import com.snowflake.kafka.connector.internal.metrics.TaskMetrics; import com.snowflake.kafka.connector.internal.streaming.StreamingClientProperties; import com.snowflake.kafka.connector.internal.streaming.v2.ClientRecreationException; import dev.failsafe.Failsafe; import dev.failsafe.FailsafeException; import dev.failsafe.RetryPolicy; import java.time.Duration; import java.util.Map; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletionException; import java.util.concurrent.ConcurrentHashMap; import org.apache.kafka.connect.errors.ConnectException; /** * JVM-global registry of {@link StreamingClientPool} objects, keyed by connector name. * *

Multiple Kafka Connect connector instances (i.e. different connector configs) can run in the * same JVM process. Each gets its own {@link StreamingClientPool}, but they all share this static * registry because Kafka Connect only passes String config values to tasks — there is no way to * inject a shared object directly. Tasks look up their pool by connector name at startup. */ public class StreamingClientPools { private static final KCLogger LOGGER = new KCLogger(StreamingClientPools.class.getName()); // Map: connectorName → StreamingClientPool private static final Map connectors = new ConcurrentHashMap<>(); private StreamingClientPools() {} /** * Gets or creates a client for the given connector, task, and pipe. Multiple tasks can share the * same client. Kafka Connect guarantees that no two tasks in the same connector can work on the * same partition. It means that two tasks will never work with given channel at the same time, * because channel names are scoped to connector_name + topic_name + partition_id * * @param connectorName the name of the connector * @param taskId the ID of the task requesting the client * @param pipeName the pipe name * @param config parsed task config * @param streamingClientProperties streaming client properties * @param taskMetrics metrics to record client creation time (noop-safe) * @return the client for this pipe * @throws IllegalArgumentException if connectorName, taskId, or pipeName is null or empty */ public static SnowflakeStreamingIngestClient getClient( final String connectorName, final String taskId, final String pipeName, final SinkTaskConfig config, final StreamingClientProperties streamingClientProperties, final TaskMetrics taskMetrics) { try { return getClientAsync( connectorName, taskId, pipeName, config, streamingClientProperties, taskMetrics) .join(); } catch (CompletionException e) { Throwable cause = e.getCause(); if (cause instanceof RuntimeException) { throw (RuntimeException) cause; } throw new ConnectException( "Unexpected error creating streaming client for pipe: " + pipeName, cause); } } /** * Asynchronously gets or creates a client for the given connector, task, and pipe. The returned * future completes when the client is ready. */ public static CompletableFuture getClientAsync( final String connectorName, final String taskId, final String pipeName, final SinkTaskConfig config, final StreamingClientProperties streamingClientProperties, final TaskMetrics taskMetrics) { if (isNullOrEmpty(connectorName)) { throw new IllegalArgumentException("connectorName cannot be null or empty"); } if (isNullOrEmpty(taskId)) { throw new IllegalArgumentException("taskId cannot be null or empty"); } if (isNullOrEmpty(pipeName)) { throw new IllegalArgumentException("pipeName cannot be null or empty"); } return getPool(connectorName) .getClientAsync(taskId, pipeName, config, streamingClientProperties, taskMetrics); } private static StreamingClientPool getPool(final String connectorName) { return connectors.computeIfAbsent(connectorName, k -> new StreamingClientPool(connectorName)); } public static long getClientCountForTask(final String connectorName, final String taskId) { StreamingClientPool pool = connectors.get(connectorName); if (pool == null) { return 0; } return pool.getClientCountForTask(taskId); } /** * Atomically replaces the client for a pipe if the current client matches the given invalid * client. Uses compare-and-swap semantics: if another caller already replaced the entry, the * existing new client is returned without creating a second one. * * @param connectorName the connector name * @param taskId the ID of the task requesting recreation; registered on the replacement entry so * the pool does not prematurely evict it on task-local cleanup * @param pipeName the pipe whose client should be replaced * @param invalidClient the client instance the caller believes is invalid (identity check) * @param config task config for creating the replacement client * @param streamingClientProperties streaming client properties * @param taskMetrics metrics for timing the new client creation * @return the new (or already-replaced) client */ public static SnowflakeStreamingIngestClient recreateClient( final String connectorName, final String taskId, final String pipeName, final SnowflakeStreamingIngestClient invalidClient, final SinkTaskConfig config, final StreamingClientProperties streamingClientProperties, final TaskMetrics taskMetrics) { try { return Failsafe.with(recreateClientRetryPolicy(pipeName)) .get( () -> getPool(connectorName) .recreateClient( taskId, pipeName, invalidClient, config, streamingClientProperties, taskMetrics)); } catch (FailsafeException e) { // Retries exhausted — wrap as ClientRecreationException so the batch // loop can rewind offsets instead of crashing the task. Throwable cause = e.getCause() != null ? e.getCause() : e; throw ClientRecreationException.wrap(cause); } } /** * Delay between client-creation retries. Pipe failover typically takes a few seconds to stabilize * on the server side, and back-to-back retries with no delay would all hit the same in-flight * failover window and fail before the server finishes. * *

Note: this delay is per-invocation. {@link #recreateClient} can be called concurrently by * multiple {@link * com.snowflake.kafka.connector.internal.streaming.v2.SnowpipeStreamingPartitionChannel}s on the * same pipe. The pool's CAS dedupes to a single fresh client, but each caller runs its own * Failsafe retry schedule — so from the pool's perspective, client creation can happen more than * once per {@code CLIENT_CREATION_RETRY_DELAY} window across concurrent callers. This is * acceptable: each individual channel still retries at ~5s cadence, so its total recovery window * spans {@code MAX_CLIENT_CREATION_RETRIES * CLIENT_CREATION_RETRY_DELAY = ~15s}. When reading * logs, expect to see overlapping retry schedules across channels on the same pipe during a * failover event. */ private static final Duration CLIENT_CREATION_RETRY_DELAY = Duration.ofSeconds(5); /** * Maximum retry attempts when a replacement client also fails with a client-invalid error during * {@link #recreateClient}. Three attempts provide enough headroom for transient failover windows * while keeping total blocking time bounded (each attempt creates a fresh SDK client). */ private static final int MAX_CLIENT_CREATION_RETRIES = 3; /** * Retries replacement-client creation when the SDK reports a client-invalid error (e.g., pipe * failover still in flight). The pool evicts the failed entry on each attempt, so the retry * creates a fresh client. Non-client-invalid errors fall through immediately. */ private static RetryPolicy recreateClientRetryPolicy( String pipeName) { return RetryPolicy.builder() .handleIf( e -> e instanceof RuntimeException && ClientRecreationException.isClientInvalidError(e)) .withMaxAttempts(MAX_CLIENT_CREATION_RETRIES) .withDelay(CLIENT_CREATION_RETRY_DELAY) .onRetry( event -> LOGGER.warn( "Replacement client for pipe {} failed with client-invalid error" + " (attempt {}/{}): {}. Retrying after {}.", pipeName, event.getAttemptCount(), MAX_CLIENT_CREATION_RETRIES, event.getLastException().getMessage(), CLIENT_CREATION_RETRY_DELAY)) .onRetriesExceeded( event -> LOGGER.error( "Replacement client for pipe {} failed after {} attempts: {}", pipeName, event.getAttemptCount(), event.getException().getMessage())) .build(); } /** * Releases all clients used by a specific task. Clients that are still used by other tasks remain * open. Only closes clients when the last task using them stops. When the pool becomes empty (no * remaining clients or tasks), the pool is removed from the registry. * * @param connectorName the name of the connector * @param taskId the ID of the task */ public static void closeTaskClients(final String connectorName, final String taskId) { connectors.compute( connectorName, (key, pool) -> { if (pool == null) { LOGGER.warn( "Attempted to release task {} for unknown connector: {}", taskId, connectorName); return null; } pool.closeTaskClients(taskId); if (pool.isEmpty()) { LOGGER.info("All tasks released for connector: {}", connectorName); return null; } return pool; }); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/client/StreamingClientSupplier.java ================================================ package com.snowflake.kafka.connector.internal.streaming.v2.client; import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient; import com.snowflake.kafka.connector.internal.streaming.StreamingClientProperties; public interface StreamingClientSupplier { SnowflakeStreamingIngestClient get( String clientName, String dbName, String schemaName, String pipeName, StreamingClientProperties streamingClientProperties); } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/migration/Ssv1MigrationMode.java ================================================ package com.snowflake.kafka.connector.internal.streaming.v2.migration; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_DEFAULT; import java.util.Arrays; import java.util.Locale; import java.util.stream.Collectors; /** * Controls whether the connector reads committed offsets from SSv1 channels during migration from * KC v3 to KC v4. Only consulted when the SSv2 channel has no committed offset yet. */ public enum Ssv1MigrationMode { /** Do not query SSv1 at all (default, current behavior). */ SKIP, /** * If SSv2 has no committed offset, query SSv1 and use its offset as the starting point. If the * SSv1 channel is not found, fall through to the consumer group offset. */ BEST_EFFORT, /** * If SSv2 has no committed offset, query SSv1 and use its offset as the starting point. If the * SSv1 channel is not found, fail the channel open so the operator can investigate. */ STRICT; /** * Parses a config string into a migration mode (case-insensitive). Falls back to {@link * com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams#SNOWFLAKE_SSV1_OFFSET_MIGRATION_DEFAULT * SNOWFLAKE_SSV1_OFFSET_MIGRATION_DEFAULT} for null or empty input. Throws {@link * IllegalArgumentException} for unrecognized values, including the config key and valid options. */ public static Ssv1MigrationMode fromConfig(String value) { if (value == null || value.trim().isEmpty()) { value = SNOWFLAKE_SSV1_OFFSET_MIGRATION_DEFAULT; } String normalized = value.trim().toUpperCase(Locale.ROOT); try { return valueOf(normalized); } catch (IllegalArgumentException e) { String validValues = Arrays.stream(values()) .map(v -> v.name().toLowerCase(Locale.ROOT)) .collect(Collectors.joining(", ")); throw new IllegalArgumentException( "Invalid value '" + value.trim() + "' for config '" + SNOWFLAKE_SSV1_OFFSET_MIGRATION + "'. Valid values are: " + validValues, e); } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/migration/Ssv1MigrationResponse.java ================================================ package com.snowflake.kafka.connector.internal.streaming.v2.migration; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.annotations.VisibleForTesting; import javax.annotation.Nullable; /** * Deserialized response from SYSTEM$MIGRATE_SSV1_CHANNEL_OFFSET. The three possible outcomes are: * *

    *
  • {@code ssv1ChannelFound == false} — the SSv1 channel does not exist *
  • {@code ssv1ChannelFound == true, migratedOffset == null} — channel exists but has no * committed offset *
  • {@code ssv1ChannelFound == true, migratedOffset != null} — offset was migrated successfully *
*/ @JsonIgnoreProperties(ignoreUnknown = true) public class Ssv1MigrationResponse { @JsonProperty("ssv1_channel_found") boolean ssv1ChannelFound; @Nullable @JsonProperty("migrated_offset") Long migratedOffset; /** Creates a response representing a channel that was not found. */ @VisibleForTesting public static Ssv1MigrationResponse channelNotFound() { Ssv1MigrationResponse response = new Ssv1MigrationResponse(); response.ssv1ChannelFound = false; return response; } /** Creates a response representing a channel that exists but has no committed offset. */ @VisibleForTesting public static Ssv1MigrationResponse channelFoundNoOffset() { Ssv1MigrationResponse response = new Ssv1MigrationResponse(); response.ssv1ChannelFound = true; return response; } /** Creates a response representing a successful migration with the given offset. */ @VisibleForTesting public static Ssv1MigrationResponse migrated(long offset) { Ssv1MigrationResponse response = new Ssv1MigrationResponse(); response.ssv1ChannelFound = true; response.migratedOffset = offset; return response; } public boolean isSsv1ChannelFound() { return ssv1ChannelFound; } @Nullable public Long getMigratedOffset() { return migratedOffset; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/service/BatchOffsetFetcher.java ================================================ package com.snowflake.kafka.connector.internal.streaming.v2.service; import static com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel.NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE; import com.snowflake.ingest.streaming.ChannelStatus; import com.snowflake.ingest.streaming.ChannelStatusBatch; import com.snowflake.ingest.streaming.SFException; import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.internal.KCLogger; import com.snowflake.kafka.connector.internal.metrics.TaskMetrics; import com.snowflake.kafka.connector.internal.streaming.StreamingClientProperties; import com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel; import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientPools; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletionException; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; import java.util.function.Function; import java.util.stream.Collectors; import org.apache.kafka.common.TopicPartition; /** * Fetches committed offsets for topic partitions in batches, grouped by pipe. Makes at most one * network call per SDK client (i.e. per pipe), regardless of the number of partitions. */ public class BatchOffsetFetcher { private static final KCLogger LOGGER = new KCLogger(BatchOffsetFetcher.class.getName()); private final String connectorName; private final String taskId; private final SinkTaskConfig taskConfig; private final StreamingClientProperties streamingClientProperties; private final boolean tolerateErrors; private final ExecutorService ioExecutor; private final TaskMetrics taskMetrics; public BatchOffsetFetcher( String connectorName, String taskId, SinkTaskConfig taskConfig, ExecutorService ioExecutor, TaskMetrics taskMetrics) { this.connectorName = connectorName; this.taskId = taskId; this.taskConfig = taskConfig; this.streamingClientProperties = StreamingClientProperties.from(taskConfig); this.tolerateErrors = taskConfig.isTolerateErrors(); this.ioExecutor = ioExecutor; this.taskMetrics = taskMetrics; } /** * Fetches committed offsets for the given partitions using the SDK's batch channel-status API. * Makes at most one network call per pipe, regardless of partition count. * * @param partitions the partitions to query * @param channelLookup function to look up the TopicPartitionChannel for a given partition * @return map of TopicPartition to the offset safe to commit to Kafka (committed + 1), only * containing entries where a valid offset was found */ public Map getCommittedOffsets( Collection partitions, Function> channelLookup) { PartitionsByTopic grouped = PartitionsByTopic.groupByTopic(partitions, channelLookup); grouped.topicToPartitionsWithoutChannels.forEach( (topic, uninitializedPartitions) -> LOGGER.warn( "Topic: {} has partition(s) not yet initialized to get offset: {}", topic, uninitializedPartitions)); Map result = new ConcurrentHashMap<>(); CompletableFuture[] futures = grouped.pipeNameToChannels.entrySet().stream() .map(entry -> fetchOffsetsAsync(entry.getKey(), entry.getValue(), result)) .toArray(CompletableFuture[]::new); try { CompletableFuture.allOf(futures).join(); } catch (CompletionException e) { if (e.getCause() instanceof RuntimeException) { throw (RuntimeException) e.getCause(); } throw e; } return result; } private CompletableFuture fetchOffsetsAsync( String pipeName, Map channelsByPartition, Map result) { return CompletableFuture.runAsync( () -> { try { result.putAll(getCommittedOffsetsForPipe(pipeName, channelsByPartition)); } catch (SFException e) { LOGGER.error( "Failed to fetch committed offsets for pipe: {}, skipping {} channel(s)", pipeName, channelsByPartition.size(), e); } }, ioExecutor); } /** * @throws SFException if {@code getChannelStatus} fails after the SDK exhausts its internal * retries (exponential backoff on transient HTTP errors) */ private Map getCommittedOffsetsForPipe( String pipeName, Map channelsByPartition) { List channelNames = channelsByPartition.values().stream() .map(TopicPartitionChannel::getChannelName) .collect(Collectors.toList()); SnowflakeStreamingIngestClient client = StreamingClientPools.getClient( connectorName, taskId, pipeName, taskConfig, streamingClientProperties, taskMetrics); final ChannelStatusBatch batch; try (TaskMetrics.TimingContext ignored = taskMetrics.timeOffsetFetch()) { batch = client.getChannelStatus(channelNames); } Map result = new HashMap<>(); channelsByPartition.forEach( (topicPartition, channel) -> { String channelName = channel.getChannelName(); ChannelStatus status = batch.getChannelStatusBatch().get(channelName); if (status == null) { // This should never happen but we can still recover by simply skipping this channel. // There is no obligation to return any committed offsets in `preCommit`. LOGGER.warn("No status returned for channel: {}", channelName); return; } long offset = channel.processChannelStatus(status, tolerateErrors); LOGGER.info( "Fetched snowflake committed offset: [{}] for channel [{}]", offset, channelName); if (offset != NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE) { result.put(topicPartition, offset); } }); return result; } public static class PartitionsByTopic { /** Partitions with initialized channels, grouped by pipe name */ public final Map> pipeNameToChannels; /** Partitions without an initialized channel, grouped by topic */ public final Map> topicToPartitionsWithoutChannels; PartitionsByTopic( Map> pipeNameToChannels, Map> topicToPartitionsWithoutChannels) { this.pipeNameToChannels = pipeNameToChannels; this.topicToPartitionsWithoutChannels = topicToPartitionsWithoutChannels; } public static PartitionsByTopic groupByTopic( Collection partitions, Function> channelLookup) { Map> pipeNameToChannels = new HashMap<>(); Map> topicToPartitionsWithoutChannels = new HashMap<>(); for (TopicPartition topicPartition : partitions) { channelLookup .apply(topicPartition) .ifPresentOrElse( channel -> pipeNameToChannels .computeIfAbsent(channel.getPipeName(), k -> new LinkedHashMap<>()) .put(topicPartition, channel), () -> topicToPartitionsWithoutChannels .computeIfAbsent(topicPartition.topic(), k -> new HashSet<>()) .add(topicPartition)); } return new PartitionsByTopic(pipeNameToChannels, topicToPartitionsWithoutChannels); } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/service/PartitionChannelManager.java ================================================ package com.snowflake.kafka.connector.internal.streaming.v2.service; import com.google.common.annotations.VisibleForTesting; import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient; import com.snowflake.kafka.connector.Utils; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.config.SnowflakeValidation; import com.snowflake.kafka.connector.dlq.KafkaRecordErrorReporter; import com.snowflake.kafka.connector.internal.KCLogger; import com.snowflake.kafka.connector.internal.SnowflakeConnectionService; import com.snowflake.kafka.connector.internal.metrics.MetricsJmxReporter; import com.snowflake.kafka.connector.internal.metrics.TaskMetrics; import com.snowflake.kafka.connector.internal.streaming.StreamingClientProperties; import com.snowflake.kafka.connector.internal.streaming.StreamingErrorHandler; import com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel; import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetryChannelStatus; import com.snowflake.kafka.connector.internal.streaming.v2.SnowpipeStreamingPartitionChannel; import com.snowflake.kafka.connector.internal.streaming.v2.channel.PartitionOffsetTracker; import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientPools; import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationMode; import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService; import java.util.Collection; import java.util.Map; import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletionException; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.connect.sink.SinkTaskContext; /** * Manages the lifecycle of {@link TopicPartitionChannel} instances for a single Kafka Connect task. * Handles channel creation, opening, closing, and lookup. */ public class PartitionChannelManager { private static final KCLogger LOGGER = new KCLogger(PartitionChannelManager.class.getName()); /** * Creates a {@link TopicPartitionChannel} for a single partition during {@link #startPartitions}. * Production code uses {@link #buildChannel}; tests inject a lambda. */ @FunctionalInterface interface PartitionChannelBuilder { TopicPartitionChannel build( TopicPartition topicPartition, String tableName, String channelName, String pipeName); } private final SnowflakeTelemetryService telemetryService; private final KafkaRecordErrorReporter kafkaRecordErrorReporter; private final Optional metricsJmxReporter; private final TaskMetrics taskMetrics; private final SinkTaskContext sinkTaskContext; private final SinkTaskConfig taskConfig; private final SnowflakeConnectionService conn; private final PartitionChannelBuilder partitionChannelBuilder; private final Map partitionChannels; private final Map shouldEvolveSchemaCache = new ConcurrentHashMap<>(); public PartitionChannelManager( SnowflakeTelemetryService telemetryService, SinkTaskConfig taskConfig, KafkaRecordErrorReporter kafkaRecordErrorReporter, SinkTaskContext sinkTaskContext, Optional metricsJmxReporter, TaskMetrics taskMetrics, SnowflakeConnectionService conn) { this.telemetryService = telemetryService; this.taskConfig = taskConfig; this.kafkaRecordErrorReporter = kafkaRecordErrorReporter; this.sinkTaskContext = sinkTaskContext; this.metricsJmxReporter = metricsJmxReporter; this.taskMetrics = taskMetrics; this.conn = conn; this.partitionChannelBuilder = this::buildChannel; this.partitionChannels = new ConcurrentHashMap<>(); } @VisibleForTesting PartitionChannelManager( SinkTaskConfig taskConfig, PartitionChannelBuilder partitionChannelBuilder) { this.taskConfig = taskConfig; this.partitionChannelBuilder = partitionChannelBuilder; this.partitionChannels = new ConcurrentHashMap<>(); this.telemetryService = null; this.kafkaRecordErrorReporter = null; this.sinkTaskContext = null; this.metricsJmxReporter = Optional.empty(); this.taskMetrics = null; this.conn = null; } /** Gets a unique identifier consisting of connector name, topic name and partition number. */ @VisibleForTesting public static String makeChannelName( final String connectorName, final String topic, final int partition) { final String separator = "_"; return connectorName + separator + topic + separator + partition; } private String getChannelName(TopicPartition topicPartition) { return makeChannelName( taskConfig.getConnectorName(), topicPartition.topic(), topicPartition.partition()); } private String getTableName(TopicPartition topicPartition) { return Utils.getTableName( topicPartition.topic(), taskConfig.getTopicToTableMap(), taskConfig.isEnableSanitization()); } /** * Creates and registers channels for the given partitions. * * @param partitions collection of topic partitions to open channels for * @param tableToPipeMapping pre-resolved mapping of table name to pipe name; the caller is * responsible for ensuring tables exist and resolving the correct pipe for each table */ public void startPartitions( Collection partitions, Map tableToPipeMapping) { LOGGER.info( "Starting {} partitions for connector: {}, task: {}", partitions.size(), taskConfig.getConnectorName(), taskConfig.getTaskId()); warmUpStreamingClients(tableToPipeMapping); for (TopicPartition topicPartition : partitions) { final String tableName = getTableName(topicPartition); final String pipeName = tableToPipeMapping.get(tableName); final String channelName = getChannelName(topicPartition); LOGGER.info( "Creating streaming channel {} for {}, table: {}, pipe: {}", channelName, topicPartition, tableName, pipeName); final TopicPartitionChannel partitionChannel = partitionChannelBuilder.build(topicPartition, tableName, channelName, pipeName); partitionChannels.put(channelName, partitionChannel); LOGGER.info("Successfully created streaming channel: {}", channelName); } } private TopicPartitionChannel buildChannel( TopicPartition topicPartition, String tableName, String channelName, String pipeName) { final StreamingErrorHandler streamingErrorHandler = new StreamingErrorHandler(taskConfig, kafkaRecordErrorReporter, telemetryService); final StreamingClientProperties streamingClientProperties = StreamingClientProperties.from(taskConfig); final SnowflakeStreamingIngestClient streamingClient = StreamingClientPools.getClient( taskConfig.getConnectorName(), taskConfig.getTaskId(), pipeName, taskConfig, streamingClientProperties, taskMetrics); final PartitionOffsetTracker offsetTracker = new PartitionOffsetTracker(topicPartition, this.sinkTaskContext, channelName); final SnowflakeTelemetryChannelStatus telemetryChannelStatus = new SnowflakeTelemetryChannelStatus( tableName, taskConfig.getConnectorName(), channelName, System.currentTimeMillis(), this.metricsJmxReporter, offsetTracker.persistedOffsetRef(), offsetTracker.processedOffsetRef(), offsetTracker.consumerGroupOffsetRef()); final ExecutorService openChannelIoExecutor = ThreadPools.getOpenChannelIoExecutor(taskConfig.getConnectorName()); final boolean shouldEvolveSchema = (taskConfig.getValidation() == SnowflakeValidation.CLIENT_SIDE) && shouldEvolveSchemaCache.computeIfAbsent( tableName, t -> conn.shouldEvolveSchema(t, taskConfig.getSnowflakeRole())); // KC v3 defaulted to V1 channel naming: {topic}_{partition}. // Customers who set snowflake.streaming.channel.name.include.connector.name=true // in KC v3 used V2 naming: {connectorName}_{topic}_{partition} (same as KC v4). final Ssv1MigrationMode ssv1MigrationMode = taskConfig.getSsv1MigrationMode(); final Optional ssv1ChannelName; if (ssv1MigrationMode != Ssv1MigrationMode.SKIP) { String topic = topicPartition.topic(); int partition = topicPartition.partition(); ssv1ChannelName = Optional.of( taskConfig.isSsv1MigrationIncludeConnectorName() ? taskConfig.getConnectorName() + "_" + topic + "_" + partition : topic + "_" + partition); } else { ssv1ChannelName = Optional.empty(); } return new SnowpipeStreamingPartitionChannel( tableName, channelName, pipeName, streamingClient, openChannelIoExecutor, this.telemetryService, telemetryChannelStatus, offsetTracker, taskConfig, streamingErrorHandler, this.taskMetrics, shouldEvolveSchema, this.conn, ssv1ChannelName); } /** * Pre-warms the {@link StreamingClientPools} cache by creating clients for all distinct pipes in * parallel. Subsequent per-partition calls to {@link StreamingClientPools#getClient} in {@link * #buildChannel} will return the cached clients immediately. * *

Skipped when using the test constructor (conn is null). */ private void warmUpStreamingClients(Map tableToPipeMapping) { if (conn == null) { return; } final StreamingClientProperties streamingClientProperties = StreamingClientProperties.from(taskConfig); CompletableFuture[] clientFutures = tableToPipeMapping.values().stream() .distinct() .map( pipeName -> StreamingClientPools.getClientAsync( taskConfig.getConnectorName(), taskConfig.getTaskId(), pipeName, taskConfig, streamingClientProperties, taskMetrics)) .toArray(CompletableFuture[]::new); try { CompletableFuture.allOf(clientFutures).join(); } catch (CompletionException e) { if (e.getCause() instanceof RuntimeException) { throw (RuntimeException) e.getCause(); } throw e; } } public void waitForAllChannelsToCommitData() { int channelCount = partitionChannels.size(); if (channelCount == 0) { return; } LOGGER.info("Starting parallel flush for {} channels", channelCount); CompletableFuture[] futures = partitionChannels.values().stream() .map(TopicPartitionChannel::waitForLastProcessedRecordCommitted) .toArray(CompletableFuture[]::new); CompletableFuture.allOf(futures).join(); LOGGER.info("Completed parallel flush for {} channels", channelCount); } public void closeAll() { LOGGER.info( "Closing all {} partition channels for connector: {}, task: {}", partitionChannels.size(), taskConfig.getConnectorName(), taskConfig.getTaskId()); CompletableFuture[] futures = partitionChannels.values().stream() .map(TopicPartitionChannel::closeChannelAsync) .toArray(CompletableFuture[]::new); CompletableFuture.allOf(futures).join(); partitionChannels.clear(); LOGGER.info( "Completed closing all partition channels for connector: {}, task: {}", taskConfig.getConnectorName(), taskConfig.getTaskId()); } /** * This function is called during rebalance. * *

All the channels are closed. The client is still active. Upon rebalance, (inside {@link * com.snowflake.kafka.connector.SnowflakeSinkTask#open(Collection)} we will reopen the channel. * *

We will wipe the cache partitionChannels so that in {@link * com.snowflake.kafka.connector.SnowflakeSinkTask#open(Collection)} we reinstantiate and fetch * offsetToken * * @param partitions a list of topic partition */ public void close(Collection partitions) { LOGGER.info( "Closing {} partitions for connector: {}, task: {}", partitions.size(), taskConfig.getConnectorName(), taskConfig.getTaskId()); CompletableFuture[] futures = partitions.stream() .map(this::getChannel) .filter(Optional::isPresent) .map(Optional::get) .map( channel -> channel .closeChannelAsync() .thenAccept(__ -> partitionChannels.remove(channel.getChannelName()))) .toArray(CompletableFuture[]::new); CompletableFuture.allOf(futures).join(); LOGGER.info( "Closed {} partitions, remaining {} open partitions are: {}", partitions.size(), partitionChannels.size(), partitionChannels.keySet().toString()); } /** Returns the channel for the given name, or empty if not found. */ public Optional getChannel(String channelName) { return Optional.ofNullable(partitionChannels.get(channelName)); } /** Returns the channel for the given TopicPartition, or empty if not found. */ public Optional getChannel(TopicPartition topicPartition) { String channelName = makeChannelName( taskConfig.getConnectorName(), topicPartition.topic(), topicPartition.partition()); return getChannel(channelName); } public Map getPartitionChannels() { return partitionChannels; } /** Blocks until all partition channels have finished initialization. */ @VisibleForTesting public void awaitAllPartitions() { partitionChannels.values().forEach(TopicPartitionChannel::awaitInitialization); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/service/ThreadPools.java ================================================ package com.snowflake.kafka.connector.internal.streaming.v2.service; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.internal.KCLogger; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.ThreadFactory; import java.util.concurrent.atomic.AtomicInteger; /** * JVM-global registry of per-connector thread pools. * *

    *
  • ioExecutor — an unbounded cached thread pool for bursty blocking I/O: SDK client * creation and batch offset fetching ({@code getChannelStatus} HTTP calls). Threads are * created on demand and reclaimed after 60 s of idleness. *
  • openChannelIoExecutor — a fixed-size thread pool that rate-limits channel open * operations. The size is controlled by {@code snowflake.open.channel.io.threads}. *
* *

Like {@link com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientPools}, * this class uses a static {@link ConcurrentHashMap} keyed by connector name. Each connector gets * its own pools, and the pools are shut down when the last task for a connector calls {@link * #closeForTask(String)}. */ public class ThreadPools { private static final KCLogger LOGGER = new KCLogger(ThreadPools.class.getName()); private static final Map connectorPools = new ConcurrentHashMap<>(); private ThreadPools() {} /** Holds the executors and a reference count of tasks currently using them. */ private static class ConnectorThreadPool { final ExecutorService ioExecutor; final ExecutorService openChannelIoExecutor; final AtomicInteger refCount = new AtomicInteger(0); ConnectorThreadPool(String connectorName, int openChannelIoThreads) { LOGGER.info("Creating I/O thread pool for connector: {}", connectorName); this.ioExecutor = Executors.newCachedThreadPool(new DaemonThreadFactory(connectorName + "-io")); int maxThreads = Math.max(1, openChannelIoThreads); LOGGER.info( "Creating channel thread pool for connector: {}, threads: {}", connectorName, maxThreads); this.openChannelIoExecutor = Executors.newFixedThreadPool( maxThreads, new DaemonThreadFactory(connectorName + "-channel")); } } /** * Returns the I/O executor (cached thread pool) for the given connector. The pool must have been * created by a prior call to {@link #registerTask(String, SinkTaskConfig)}. */ public static ExecutorService getIoExecutor(final String connectorName) { ConnectorThreadPool pool = connectorPools.get(connectorName); if (pool == null) { throw new IllegalStateException("No thread pool registered for connector: " + connectorName); } return pool.ioExecutor; } /** * Returns the open-channel executor (fixed-size thread pool) for the given connector. The pool * must have been created by a prior call to {@link #registerTask(String, SinkTaskConfig)}. */ public static ExecutorService getOpenChannelIoExecutor(final String connectorName) { ConnectorThreadPool pool = connectorPools.get(connectorName); if (pool == null) { throw new IllegalStateException("No thread pool registered for connector: " + connectorName); } return pool.openChannelIoExecutor; } /** * Registers a task as a user of the connector's thread pools, creating the pools if this is the * first task for the connector. Must be paired with a later call to {@link #closeForTask(String)} * to ensure the pools are shut down when no tasks remain. */ public static void registerTask(final String connectorName, final SinkTaskConfig config) { connectorPools.compute( connectorName, (key, pool) -> { if (pool == null) { pool = new ConnectorThreadPool(connectorName, config.getOpenChannelIoThreads()); } pool.refCount.incrementAndGet(); return pool; }); } /** * Unregisters a task from the connector's thread pools. When the last task unregisters, the * executors are shut down and removed from the registry. */ public static void closeForTask(final String connectorName) { connectorPools.computeIfPresent( connectorName, (key, pool) -> { if (pool.refCount.decrementAndGet() == 0) { LOGGER.info("Shutting down thread pools for connector: {}", connectorName); pool.ioExecutor.shutdownNow(); pool.openChannelIoExecutor.shutdownNow(); return null; } return pool; }); } /** * The context class loader is captured at factory creation time because Kafka Connect uses a * PluginClassLoader that must be on the thread context for the SDK's native library loading * (FFIBootstrap) to find resources inside plugin JARs. */ private static final class DaemonThreadFactory implements ThreadFactory { private final AtomicInteger counter = new AtomicInteger(0); private final String prefix; private final ClassLoader contextClassLoader; DaemonThreadFactory(String prefix) { this.prefix = prefix; this.contextClassLoader = Thread.currentThread().getContextClassLoader(); } @Override public Thread newThread(Runnable r) { Thread t = new Thread(r, prefix + "-" + counter.getAndIncrement()); t.setDaemon(true); t.setContextClassLoader(contextClassLoader); return t; } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/telemetry/SnowflakeTelemetryBasicInfo.java ================================================ package com.snowflake.kafka.connector.internal.telemetry; import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.snowflake.kafka.connector.internal.KCLogger; import net.snowflake.client.jdbc.internal.fasterxml.jackson.databind.node.ObjectNode; /** Minimum information needed to sent to Snowflake through Telemetry API */ public abstract class SnowflakeTelemetryBasicInfo { public final String tableName; public final SnowflakeTelemetryService.TelemetryType telemetryType; public static final KCLogger LOGGER = new KCLogger(SnowflakeTelemetryBasicInfo.class.getName()); /** * Base Constructor. Accepts a tableName and StageName. * * @param tableName Checks for Nullability */ public SnowflakeTelemetryBasicInfo( final String tableName, SnowflakeTelemetryService.TelemetryType telemetryType) { Preconditions.checkArgument( !Strings.isNullOrEmpty(tableName), "tableName cannot be null or empty"); this.tableName = tableName; this.telemetryType = telemetryType; } /** * Adds the required fields into the given ObjectNode which will then be used as payload in * Telemetry API * * @param msg ObjectNode in which extra fields needs to be added. */ public abstract void dumpTo(ObjectNode msg); /** * @return true if it would suggest that their was no update to corresponding implementation's * member variables. Or, in other words, the corresponding partition didnt receive any * records, in which case we would not call telemetry API. */ public abstract boolean isEmpty(); } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/telemetry/SnowflakeTelemetryService.java ================================================ package com.snowflake.kafka.connector.internal.telemetry; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.Utils; import com.snowflake.kafka.connector.internal.KCLogger; import com.snowflake.kafka.connector.internal.streaming.IngestionMethodConfig; import java.sql.Connection; import java.util.Map; import java.util.Set; import net.snowflake.client.internal.jdbc.telemetry.Telemetry; import net.snowflake.client.internal.jdbc.telemetry.TelemetryClient; import net.snowflake.client.internal.jdbc.telemetry.TelemetryUtil; import net.snowflake.client.jdbc.internal.fasterxml.jackson.databind.JsonNode; import net.snowflake.client.jdbc.internal.fasterxml.jackson.databind.ObjectMapper; import net.snowflake.client.jdbc.internal.fasterxml.jackson.databind.node.ObjectNode; import org.apache.kafka.common.utils.AppInfoParser; public class SnowflakeTelemetryService { private final KCLogger LOGGER = new KCLogger(SnowflakeTelemetryService.class.getName()); private static final ObjectMapper MAPPER = new ObjectMapper(); // constant string list private static final String SOURCE = "source"; private static final String TYPE = "type"; private static final String KAFKA_CONNECTOR = "kafka_connector"; static final String INGESTION_METHOD = "snowflake.ingestion.method"; private static final String DATA = "data"; private static final String MAX_TASKS = "max_tasks"; private static final String START_TIME = "start_time"; private static final String END_TIME = "end_time"; private static final String APP_NAME = "app_name"; private static final String TASK_ID = "task_id"; private static final String ERROR_DETAIL = "error_detail"; private static final String TIME = "unix_time"; private static final String VERSION = "version"; private static final String KAFKA_VERSION = "kafka_version"; private static final String IS_CHANNEL_CLOSING = "is_channel_closing"; public static final String JDK_VERSION = "jdk_version"; public static final String JDK_DISTRIBUTION = "jdk_distribution"; private static final String TOPICS = "topics"; // Telemetry instance fetched from JDBC private final Telemetry telemetry; // Snowflake Kafka connector name defined in JSON private String name = null; private String taskID = null; public SnowflakeTelemetryService(Connection conn) { this.telemetry = TelemetryClient.createTelemetry(conn); } public SnowflakeTelemetryService(Telemetry telemetry) { this.telemetry = telemetry; } public void setAppName(String name) { this.name = name; } public void setTaskID(String taskID) { this.taskID = taskID; } public void reportKafkaConnectStart( final long startTime, final Map userProvidedConfig) { ObjectNode dataObjectNode = getObjectNode(); String jdkVersion = System.getProperty("java.version"); String jdkDistribution = System.getProperty("java.vendor"); dataObjectNode.put(START_TIME, startTime); dataObjectNode.put(KAFKA_VERSION, AppInfoParser.getVersion()); dataObjectNode.put(JDK_VERSION, jdkVersion); dataObjectNode.put(JDK_DISTRIBUTION, jdkDistribution); addUserConnectorPropertiesToDataNode(userProvidedConfig, dataObjectNode); send(TelemetryType.KAFKA_START, dataObjectNode); } public void reportKafkaConnectStop(final long startTime) { ObjectNode msg = getObjectNode(); msg.put(START_TIME, startTime); msg.put(END_TIME, System.currentTimeMillis()); send(TelemetryType.KAFKA_STOP, msg); } public void reportKafkaConnectFatalError(final String errorDetail) { ObjectNode msg = getObjectNode(); msg.put(TIME, System.currentTimeMillis()); msg.put(ERROR_DETAIL, errorDetail); send(TelemetryType.KAFKA_FATAL_ERROR, msg); } public void reportKafkaConnectFatalError( final String errorDetail, final String channelName, final String tableName, final String pipeName) { ObjectNode msg = getObjectNode(); msg.put(TIME, System.currentTimeMillis()); msg.put(ERROR_DETAIL, errorDetail); if (channelName != null) { msg.put(TelemetryConstants.TOPIC_PARTITION_CHANNEL_NAME, channelName); } if (tableName != null) { msg.put(TelemetryConstants.TABLE_NAME, tableName); } if (pipeName != null) { msg.put(TelemetryConstants.PIPE_NAME, pipeName); } send(TelemetryType.KAFKA_FATAL_ERROR, msg); } /** * Reports connector's partition usage. * * @param partitionStatus SnowflakeTelemetryBasicInfo object * @param isClosing is the underlying channel closing */ public void reportKafkaPartitionUsage( final SnowflakeTelemetryBasicInfo partitionStatus, boolean isClosing) { ObjectNode msg = getObjectNode(); partitionStatus.dumpTo(msg); msg.put(IS_CHANNEL_CLOSING, isClosing); send(partitionStatus.telemetryType, msg); } /** * Reports connector partition start. * * @param partitionCreation SnowflakeTelemetryBasicInfo object */ public void reportKafkaPartitionStart(final SnowflakeTelemetryBasicInfo partitionCreation) { ObjectNode msg = getObjectNode(); partitionCreation.dumpTo(msg); send(partitionCreation.telemetryType, msg); } /** Reports a one-shot SSv1 offset migration attempt and its outcome for a single channel. */ public void reportSsv1Migration(final SnowflakeTelemetryBasicInfo migration) { ObjectNode msg = getObjectNode(); migration.dumpTo(msg); send(TelemetryType.KAFKA_SSV1_MIGRATION, msg); } /** * Creates the default ObjectNode which will be part of every telemetry being sent to Snowflake. * *

Format: * *

   * {
   *  "app_name": "",
   *  "task_id": 1,
   *  "snowflake.ingestion.method": "" for {@link IngestionMethodConfig}
   * }
   * 
* * @return An ObjectNode which is by default always created with certain defined properties in it. */ ObjectNode getObjectNode() { ObjectNode msg = MAPPER.createObjectNode(); msg.put(APP_NAME, getAppName()); msg.put(TASK_ID, getTaskID()); msg.put(INGESTION_METHOD, IngestionMethodConfig.SNOWPIPE_STREAMING.toString()); return msg; } /** * JsonNode data is wrapped into another ObjectNode which looks like this: * *
   *   {
   *   "data": {
   *     "app_name": "",
   *     "task_id": "-1"
   *   },
   *   "source": "kafka_connector",
   *   "type": "kafka_start/",
   *   "version": "snowflake_kc_version"
   * }
   *
   * 
* * @param type type of Data * @param data JsonData to wrap in a json field called data */ private void send(TelemetryType type, JsonNode data) { ObjectNode msg = MAPPER.createObjectNode(); msg.put(SOURCE, KAFKA_CONNECTOR); msg.put(TYPE, type.toString()); msg.set(DATA, data); msg.put(VERSION, Utils.VERSION); // version number try { telemetry.addLogToBatch(TelemetryUtil.buildJobData(msg)); LOGGER.debug("sending telemetry data: {} of type:{}", data.toString(), type.toString()); telemetry.sendBatchAsync(); } catch (Exception e) { LOGGER.error("Failed to send telemetry data: {}, Error: {}", data.toString(), e.getMessage()); } } private String getAppName() { if (name == null || name.isEmpty()) { LOGGER.warn("appName in telemetry service is empty"); return "empty_appName"; } return name; } private String getTaskID() { if (taskID == null || taskID.isEmpty()) { LOGGER.warn("taskID in telemetry service is empty"); return "empty_taskID"; } return taskID; } // IMPORTANT: update this set when adding new credential/secret config params. private static final Set SENSITIVE_KEYS = Set.of( KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY, KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE, KafkaConnectorConfigParams.JVM_PROXY_USERNAME, KafkaConnectorConfigParams.JVM_PROXY_PASSWORD, KafkaConnectorConfigParams.HTTPS_PROXY_USER, KafkaConnectorConfigParams.HTTPS_PROXY_PASSWORD, KafkaConnectorConfigParams.HTTP_PROXY_USER, KafkaConnectorConfigParams.HTTP_PROXY_PASSWORD); /** * Adds all user-provided connector config to the telemetry payload, excluding sensitive keys * (credentials, passwords). Future config additions are automatically included. */ private void addUserConnectorPropertiesToDataNode( final Map userProvidedConfig, final ObjectNode dataObjectNode) { for (Map.Entry entry : userProvidedConfig.entrySet()) { if (!SENSITIVE_KEYS.contains(entry.getKey())) { dataObjectNode.put(entry.getKey(), entry.getValue()); } } } /** Types of telemetry events that can be sent. */ public enum TelemetryType { KAFKA_START("kafka_start"), KAFKA_STOP("kafka_stop"), KAFKA_FATAL_ERROR("kafka_fatal_error"), KAFKA_CHANNEL_USAGE("kafka_channel_usage"), KAFKA_CHANNEL_START("kafka_channel_start"), KAFKA_SSV1_MIGRATION("kafka_ssv1_migration"); private final String name; TelemetryType(String name) { this.name = name; } @Override public String toString() { return this.name; } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/telemetry/SnowflakeTelemetryServiceFactory.java ================================================ package com.snowflake.kafka.connector.internal.telemetry; import java.sql.Connection; /** * Factory class which produces the telemetry service which essentially has a telemetry client * instance. */ public final class SnowflakeTelemetryServiceFactory { private SnowflakeTelemetryServiceFactory() {} public static SnowflakeTelemetryServiceBuilder builder(Connection conn) { return new SnowflakeTelemetryServiceBuilder(conn); } /** Builder for TelemetryService */ public static final class SnowflakeTelemetryServiceBuilder { private final SnowflakeTelemetryService service; /** * @param conn snowflake connection is required for telemetry service */ SnowflakeTelemetryServiceBuilder(Connection conn) { this.service = new SnowflakeTelemetryService(conn); } /** * @param name connector name * @return builder instance */ public SnowflakeTelemetryServiceBuilder setAppName(String name) { this.service.setAppName(name); return this; } /** * @param taskID taskId * @return builder instance */ public SnowflakeTelemetryServiceBuilder setTaskID(String taskID) { this.service.setTaskID(taskID); return this; } public SnowflakeTelemetryService build() { return this.service; } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/telemetry/TelemetryConstants.java ================================================ package com.snowflake.kafka.connector.internal.telemetry; /** * Placeholder for all constants used for Sending information from Connector to Snowflake through * Telemetry API */ public final class TelemetryConstants { public static final String TABLE_NAME = "table_name"; public static final String CONNECTOR_NAME = "connector_name"; public static final String PROCESSED_OFFSET = "processed-offset"; public static final String START_TIME = "start_time"; public static final String UNIX_TIME = "unix_time"; public static final String ERROR_DETAIL = "error_detail"; // ************ Streaming Constants ************// public static final String OFFSET_PERSISTED_IN_SNOWFLAKE = "persisted-in-snowflake-offset"; public static final String LATEST_CONSUMER_OFFSET = "latest-consumer-offset"; public static final String TOPIC_PARTITION_CHANNEL_NAME = "topic_partition_channel_name"; public static final String TOPIC_PARTITION_CHANNEL_CREATION_TIME = "topic_partition_channel_creation_time"; public static final String TOPIC_PARTITION_CHANNEL_CLOSE_TIME = "topic_partition_channel_close_time"; public static final String VALIDATION_FAILURE_COUNT = "validation_failure_count"; public static final String ERROR_TOLERATED_COUNT = "error_tolerated_count"; public static final String CHANNEL_RECOVERY_COUNT = "channel_recovery_count"; public static final String VALIDATION_DISABLED = "validation_disabled"; public static final String ROWS_INSERTED_COUNT = "rows_inserted_count"; public static final String ROWS_PARSED_COUNT = "rows_parsed_count"; public static final String ROWS_ERROR_COUNT = "rows_error_count"; public static final String SERVER_AVG_PROCESSING_LATENCY_MS = "server_avg_processing_latency_ms"; public static final String DATABASE_NAME = "database_name"; public static final String SCHEMA_NAME = "schema_name"; public static final String PIPE_NAME = "pipe_name"; public static final String STATUS_CODE = "status_code"; public static final String LAST_ERROR_TIMESTAMP = "last_error_timestamp"; public static final String LAST_ERROR_OFFSET_TOKEN_UPPER_BOUND = "last_error_offset_token_upper_bound"; public static final String BACKPRESSURE_RETRY_COUNT = "backpressure_retry_count"; public static final String APPEND_ROW_FALLBACK_COUNT = "append_row_fallback_count"; public static final String SCHEMA_EVOLUTION_FAILURE_COUNT = "schema_evolution_failure_count"; // SSv1 offset migration public static final String SSV1_MIGRATION_MODE = "ssv1_migration_mode"; public static final String SSV1_MIGRATION_OUTCOME = "ssv1_migration_outcome"; public static final String SSV1_CHANNEL_NAME = "ssv1_channel_name"; public static final String SSV1_MIGRATED_OFFSET = "ssv1_migrated_offset"; // ********** ^ Streaming Constants ^ **********// } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/BinaryStringUtils.java ================================================ /* * COPIED FROM SNOWFLAKE INGEST SDK V1 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/streaming/internal/BinaryStringUtils.java * * Modifications: * - Only unicodeCharactersCount() method retained (only method used by validation) * - Package changed to com.snowflake.kafka.connector.internal.validation * * Copyright (c) 2023 Snowflake Computing Inc. All rights reserved. */ package com.snowflake.kafka.connector.internal.validation; public class BinaryStringUtils { /** Returns the number of unicode code points in a string */ public static int unicodeCharactersCount(String s) { return s.codePointCount(0, s.length()); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/ByteArraySerializer.java ================================================ /* * COPIED FROM SNOWFLAKE INGEST SDK V1 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/streaming/internal/serialization/ByteArraySerializer.java * * Modifications: * - Package changed to com.snowflake.kafka.connector.internal.validation * * Copyright (c) 2021-2022 Snowflake Computing Inc. All rights reserved. */ package com.snowflake.kafka.connector.internal.validation; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.JsonSerializer; import com.fasterxml.jackson.databind.SerializerProvider; import java.io.IOException; /** * Serialize Java byte arrays as JSON arrays of numbers instead of the default Jackson * base64-encoding. */ public class ByteArraySerializer extends JsonSerializer { @Override public void serialize(byte[] value, JsonGenerator gen, SerializerProvider serializers) throws IOException { gen.writeStartArray(); for (byte v : value) { gen.writeNumber(v); } gen.writeEndArray(); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/ColumnLogicalType.java ================================================ /* * COPIED FROM SNOWFLAKE INGEST SDK V1 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java * * Modifications: * - Extracted ColumnLogicalType enum from AbstractRowBuffer class * - Package changed to com.snowflake.kafka.connector.internal.validation * * Copyright (c) 2022-2024 Snowflake Computing Inc. All rights reserved. */ package com.snowflake.kafka.connector.internal.validation; /** Snowflake table column logical type */ public enum ColumnLogicalType { ANY, BOOLEAN(1), ROWINDEX, NULL(15), REAL(8), FIXED(2), TEXT(9), CHAR, BINARY(10), DATE(7), TIME(6), TIMESTAMP_LTZ(3), TIMESTAMP_NTZ(4), TIMESTAMP_TZ(5), INTERVAL, RAW, ARRAY(13, true), OBJECT(12, true), VARIANT(11, true), ROW, SEQUENCE, FUNCTION, USER_DEFINED_TYPE, ; private static final int INVALID_SERVER_SIDE_DATA_TYPE_ORDINAL = -1; // ordinal should be in sync with the server side scanner private final int ordinal; // whether it is a composite data type: array, object or variant private final boolean object; ColumnLogicalType() { // no valid server side ordinal by default this(INVALID_SERVER_SIDE_DATA_TYPE_ORDINAL); } ColumnLogicalType(int ordinal) { this(ordinal, false); } ColumnLogicalType(int ordinal, boolean object) { this.ordinal = ordinal; this.object = object; } /** * Ordinal to encode the data type for the server side scanner * *

currently used for Parquet format */ public int getOrdinal() { return ordinal; } /** Whether the data type is a composite type: OBJECT, VARIANT, ARRAY. */ public boolean isObject() { return object; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/ColumnPhysicalType.java ================================================ /* * COPIED FROM SNOWFLAKE INGEST SDK V1 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java * * Modifications: * - Extracted ColumnPhysicalType enum from AbstractRowBuffer class * - Package changed to com.snowflake.kafka.connector.internal.validation * * Copyright (c) 2022-2024 Snowflake Computing Inc. All rights reserved. */ package com.snowflake.kafka.connector.internal.validation; /** Snowflake table column physical type */ public enum ColumnPhysicalType { ROWINDEX(9), DOUBLE(7), SB1(1), SB2(2), SB4(3), SB8(4), SB16(5), LOB(8), BINARY, ROW(10), ; private static final int INVALID_SERVER_SIDE_DATA_TYPE_ORDINAL = -1; // ordinal should be in sync with the server side scanner private final int ordinal; ColumnPhysicalType() { // no valid server side ordinal by default this(INVALID_SERVER_SIDE_DATA_TYPE_ORDINAL); } ColumnPhysicalType(int ordinal) { this.ordinal = ordinal; } /** * Ordinal to encode the data type for the server side scanner * *

currently used for Parquet format */ public int getOrdinal() { return ordinal; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/ColumnSchema.java ================================================ /* * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved. * * This file provides integration between SSv1 validation code and KC v4. */ package com.snowflake.kafka.connector.internal.validation; import java.sql.ResultSet; import java.sql.SQLException; /** * Represents the schema of a Snowflake table column for validation purposes. Constructed from JDBC * ResultSet (DESCRIBE TABLE or system function). */ public class ColumnSchema { /** * Maximum byte length for TEXT/VARCHAR columns, matching SSv1 SDK's BYTES_16_MB limit. SSv1 SDK * enforces that strings can never be larger than 16MB bytes, even if the VARCHAR character length * would theoretically allow more (e.g., VARCHAR(16777216) with 4-byte UTF-8 chars could be 64MB, * but is capped at 16MB). * * @see DataValidationUtil line 721 in SSv1 SDK */ private static final int MAX_LOB_SIZE_BYTES = 16 * 1024 * 1024; // 16,777,216 bytes private final String name; private final ColumnLogicalType logicalType; private final ColumnPhysicalType physicalType; private final boolean nullable; private final Integer precision; private final Integer scale; private final Integer length; private final Integer byteLength; private final String collation; private final boolean hasDefault; private final boolean isAutoincrement; /** Full constructor with default and autoincrement metadata. */ public ColumnSchema( String name, ColumnLogicalType logicalType, ColumnPhysicalType physicalType, boolean nullable, Integer precision, Integer scale, Integer length, Integer byteLength, String collation, boolean hasDefault, boolean isAutoincrement) { this.name = name; this.logicalType = logicalType; this.physicalType = physicalType; this.nullable = nullable; this.precision = precision; this.scale = scale; this.length = length; this.byteLength = byteLength; this.collation = collation; this.hasDefault = hasDefault; this.isAutoincrement = isAutoincrement; } /** Backward-compatible constructor (no default/autoincrement metadata). */ public ColumnSchema( String name, ColumnLogicalType logicalType, ColumnPhysicalType physicalType, boolean nullable, Integer precision, Integer scale, Integer length, Integer byteLength, String collation) { this( name, logicalType, physicalType, nullable, precision, scale, length, byteLength, collation, false, false); } /** * Construct ColumnSchema from DESCRIBE TABLE ResultSet row. * *

Thread-safety: This method is NOT thread-safe. Caller must synchronize if sharing ResultSet. * *

Resource management: Caller is responsible for closing the ResultSet. * *

ResultSet state: Must be positioned at a valid row before calling. * * @param rs ResultSet positioned at a DESCRIBE TABLE row (must not be closed) * @return ColumnSchema * @throws SQLException if column metadata cannot be read or ResultSet is closed/invalid * @throws IllegalArgumentException if ResultSet is null or closed */ public static ColumnSchema fromDescribeTableRow(ResultSet rs) throws SQLException { if (rs == null || rs.isClosed()) { throw new IllegalArgumentException("ResultSet must be open and positioned at a row"); } String name = rs.getString("name"); String typeStr = rs.getString("type"); String nullStr = rs.getString("null?"); boolean hasDefault = false; boolean isAutoincrement = false; try { String defaultVal = rs.getString("default"); hasDefault = defaultVal != null && !defaultVal.isEmpty(); String autoinc = rs.getString("autoincrement"); isAutoincrement = autoinc != null && !autoinc.isEmpty(); } catch (SQLException e) { // default/autoincrement columns not available (e.g., in test mocks) } return fromDescribeTableFields(name, typeStr, nullStr, hasDefault, isAutoincrement); } /** * Construct ColumnSchema from individual DESCRIBE TABLE fields. * * @param name Column name * @param typeStr Type string (e.g. "NUMBER(38,0)", "VARCHAR(16777216)") * @param nullStr Nullable flag ("Y" or "N") * @return ColumnSchema */ public static ColumnSchema fromDescribeTableFields(String name, String typeStr, String nullStr) { boolean nullable = "Y".equals(nullStr); // Parse type string to extract logical type and parameters TypeInfo typeInfo = parseTypeString(typeStr); return new ColumnSchema( name, typeInfo.logicalType, typeInfo.physicalType, nullable, typeInfo.precision, typeInfo.scale, typeInfo.length, typeInfo.byteLength, null); // DESCRIBE TABLE doesn't return collation } /** Construct ColumnSchema from DESCRIBE TABLE fields including default/autoincrement metadata. */ public static ColumnSchema fromDescribeTableFields( String name, String typeStr, String nullStr, boolean hasDefault, boolean isAutoincrement) { boolean nullable = "Y".equals(nullStr); TypeInfo typeInfo = parseTypeString(typeStr); return new ColumnSchema( name, typeInfo.logicalType, typeInfo.physicalType, nullable, typeInfo.precision, typeInfo.scale, typeInfo.length, typeInfo.byteLength, null, hasDefault, isAutoincrement); } private static class TypeInfo { ColumnLogicalType logicalType; ColumnPhysicalType physicalType; Integer precision; Integer scale; Integer length; Integer byteLength; } /** Parse Snowflake type string (e.g., "NUMBER(38,0)", "VARCHAR(16777216)") into TypeInfo. */ private static TypeInfo parseTypeString(String typeStr) { // Input validation if (typeStr == null || typeStr.trim().isEmpty()) { throw new IllegalArgumentException("Type string cannot be null or empty"); } TypeInfo info = new TypeInfo(); // Extract base type and parameters String baseType; String params = null; String trimmedType = typeStr.trim(); int parenIdx = trimmedType.indexOf('('); if (parenIdx > 0) { baseType = trimmedType.substring(0, parenIdx).toUpperCase(); // Use lastIndexOf to handle nested types like OBJECT(a NUMBER(38,0), b VARCHAR) int closeParenIdx = trimmedType.lastIndexOf(')'); if (closeParenIdx <= parenIdx) { throw new IllegalArgumentException( "Malformed type string (missing closing parenthesis): " + typeStr); } params = trimmedType.substring(parenIdx + 1, closeParenIdx).trim(); } else { baseType = trimmedType.toUpperCase(); } // Map to logical and physical types switch (baseType) { case "NUMBER": case "NUMERIC": case "DECIMAL": case "INT": case "INTEGER": case "BIGINT": case "SMALLINT": case "TINYINT": case "BYTEINT": info.logicalType = ColumnLogicalType.FIXED; info.physicalType = ColumnPhysicalType.SB16; if (params != null && params.contains(",")) { String[] parts = params.split(","); try { info.precision = Integer.parseInt(parts[0].trim()); info.scale = Integer.parseInt(parts[1].trim()); } catch (NumberFormatException e) { throw new IllegalArgumentException( "Invalid numeric parameter in type string: " + typeStr, e); } } else if (params != null) { try { info.precision = Integer.parseInt(params.trim()); } catch (NumberFormatException e) { throw new IllegalArgumentException( "Invalid numeric parameter in type string: " + typeStr, e); } info.scale = 0; } else { info.precision = 38; info.scale = 0; } break; case "FLOAT": case "FLOAT4": case "FLOAT8": case "DOUBLE": case "DOUBLE PRECISION": case "REAL": info.logicalType = ColumnLogicalType.REAL; info.physicalType = ColumnPhysicalType.DOUBLE; break; case "VARCHAR": case "STRING": case "TEXT": case "CHAR": case "CHARACTER": info.logicalType = ColumnLogicalType.TEXT; info.physicalType = ColumnPhysicalType.LOB; if (params != null) { try { info.length = Integer.parseInt(params.trim()); } catch (NumberFormatException e) { throw new IllegalArgumentException( "Invalid length parameter in type string: " + typeStr, e); } // Cap at MAX_LOB_SIZE_BYTES (SSv1 SDK limit: strings never exceed 16MB bytes) // Use long to prevent integer overflow if length is corrupted/malformed long byteLengthLong = (long) info.length * 4; info.byteLength = (int) Math.min(MAX_LOB_SIZE_BYTES, byteLengthLong); } else { info.length = 16777216; // Default VARCHAR max // Cap at MAX_LOB_SIZE_BYTES (SSv1 SDK limit: strings never exceed 16MB bytes) // Use long to prevent integer overflow if length is corrupted/malformed long byteLengthLong = (long) info.length * 4; info.byteLength = (int) Math.min(MAX_LOB_SIZE_BYTES, byteLengthLong); } break; case "BINARY": case "VARBINARY": info.logicalType = ColumnLogicalType.BINARY; info.physicalType = ColumnPhysicalType.BINARY; if (params != null) { try { info.byteLength = Integer.parseInt(params.trim()); } catch (NumberFormatException e) { throw new IllegalArgumentException( "Invalid length parameter in type string: " + typeStr, e); } } else { info.byteLength = 8388608; // Default BINARY max } break; case "BOOLEAN": info.logicalType = ColumnLogicalType.BOOLEAN; info.physicalType = ColumnPhysicalType.SB1; break; case "DATE": info.logicalType = ColumnLogicalType.DATE; info.physicalType = ColumnPhysicalType.SB8; break; case "TIME": info.logicalType = ColumnLogicalType.TIME; info.physicalType = ColumnPhysicalType.SB8; if (params != null) { try { info.scale = Integer.parseInt(params.trim()); } catch (NumberFormatException e) { throw new IllegalArgumentException( "Invalid scale parameter in type string: " + typeStr, e); } } else { info.scale = 9; // Default TIME scale } break; case "TIMESTAMP": case "DATETIME": info.logicalType = ColumnLogicalType.TIMESTAMP_NTZ; info.physicalType = ColumnPhysicalType.SB8; if (params != null) { try { info.scale = Integer.parseInt(params.trim()); } catch (NumberFormatException e) { throw new IllegalArgumentException( "Invalid scale parameter in type string: " + typeStr, e); } } else { info.scale = 9; // Default TIMESTAMP scale } break; case "TIMESTAMP_LTZ": info.logicalType = ColumnLogicalType.TIMESTAMP_LTZ; info.physicalType = ColumnPhysicalType.SB8; if (params != null) { try { info.scale = Integer.parseInt(params.trim()); } catch (NumberFormatException e) { throw new IllegalArgumentException( "Invalid scale parameter in type string: " + typeStr, e); } } else { info.scale = 9; } break; case "TIMESTAMP_NTZ": info.logicalType = ColumnLogicalType.TIMESTAMP_NTZ; info.physicalType = ColumnPhysicalType.SB8; if (params != null) { try { info.scale = Integer.parseInt(params.trim()); } catch (NumberFormatException e) { throw new IllegalArgumentException( "Invalid scale parameter in type string: " + typeStr, e); } } else { info.scale = 9; } break; case "TIMESTAMP_TZ": info.logicalType = ColumnLogicalType.TIMESTAMP_TZ; info.physicalType = ColumnPhysicalType.SB8; if (params != null) { try { info.scale = Integer.parseInt(params.trim()); } catch (NumberFormatException e) { throw new IllegalArgumentException( "Invalid scale parameter in type string: " + typeStr, e); } } else { info.scale = 9; } break; case "VARIANT": info.logicalType = ColumnLogicalType.VARIANT; info.physicalType = ColumnPhysicalType.LOB; break; case "OBJECT": // Reject structured OBJECT types like OBJECT(a INT, b TEXT) // SSv1 SDK only supports unstructured OBJECT if (params != null && !params.trim().isEmpty()) { throw new IllegalArgumentException( "Structured OBJECT types are not supported by Snowpipe Streaming. " + "Use unstructured OBJECT instead. Type: " + typeStr); } info.logicalType = ColumnLogicalType.OBJECT; info.physicalType = ColumnPhysicalType.LOB; break; case "ARRAY": // Reject structured ARRAY types like ARRAY(INT) // SSv1 SDK only supports unstructured ARRAY if (params != null && !params.trim().isEmpty()) { throw new IllegalArgumentException( "Structured ARRAY types are not supported by Snowpipe Streaming. " + "Use unstructured ARRAY instead. Type: " + typeStr); } info.logicalType = ColumnLogicalType.ARRAY; info.physicalType = ColumnPhysicalType.LOB; break; default: // Unknown type - will be caught by validateSchema info.logicalType = null; info.physicalType = null; } return info; } public String getName() { return name; } public ColumnLogicalType getLogicalType() { return logicalType; } public ColumnPhysicalType getPhysicalType() { return physicalType; } public boolean isNullable() { return nullable; } public Integer getPrecision() { return precision; } public Integer getScale() { return scale; } public Integer getLength() { return length; } public Integer getByteLength() { return byteLength; } public String getCollation() { return collation; } public boolean hasDefault() { return hasDefault; } public boolean isAutoincrement() { return isAutoincrement; } /** True when the column value is filled by the server (has DEFAULT or is AUTOINCREMENT). */ public boolean isServerFilled() { return hasDefault || isAutoincrement; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/DataValidationUtil.java ================================================ /* * COPIED FROM SNOWFLAKE INGEST SDK V1 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java * * Modifications: * - Iceberg-specific validation methods removed (not needed for Kafka Connector) * - Package changed to com.snowflake.kafka.connector.internal.validation * * Copyright (c) 2021-2024 Snowflake Computing Inc. All rights reserved. */ package com.snowflake.kafka.connector.internal.validation; import static com.snowflake.kafka.connector.internal.validation.BinaryStringUtils.unicodeCharactersCount; import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.core.JsonParseException; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.core.JsonToken; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; import com.fasterxml.jackson.databind.module.SimpleModule; import com.fasterxml.jackson.databind.ser.std.ToStringSerializer; import com.google.common.collect.Sets; import java.io.IOException; import java.math.BigDecimal; import java.math.BigInteger; import java.nio.charset.StandardCharsets; import java.time.Instant; import java.time.LocalDate; import java.time.LocalDateTime; import java.time.LocalTime; import java.time.OffsetDateTime; import java.time.OffsetTime; import java.time.ZoneId; import java.time.ZoneOffset; import java.time.ZonedDateTime; import java.time.format.DateTimeParseException; import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.Stack; import java.util.function.Supplier; import org.apache.commons.codec.DecoderException; import org.apache.commons.codec.binary.Hex; import org.apache.commons.io.output.StringBuilderWriter; /** Utility class for parsing and validating inputs based on Snowflake types */ class DataValidationUtil { /** * Seconds limit used for integer-stored timestamp scale guessing. Value needs to be aligned with * the value from {@link SnowflakeDateTimeFormat#parse} */ private static final long SECONDS_LIMIT_FOR_EPOCH = 31536000000L; /** * Milliseconds limit used for integer-stored timestamp scale guessing. Value needs to be aligned * with the value from {@link SnowflakeDateTimeFormat#parse} */ private static final long MILLISECONDS_LIMIT_FOR_EPOCH = SECONDS_LIMIT_FOR_EPOCH * 1000L; /** * Microseconds limit used for integer-stored timestamp scale guessing. Value needs to be aligned * with the value from {@link SnowflakeDateTimeFormat#parse} */ private static final long MICROSECONDS_LIMIT_FOR_EPOCH = SECONDS_LIMIT_FOR_EPOCH * 1000000L; public static final int BYTES_8_MB = 8 * 1024 * 1024; public static final int BYTES_16_MB = 2 * BYTES_8_MB; // TODO SNOW-664249: There is a few-byte mismatch between the value sent by the user and its // server-side representation. Validation leaves a small buffer for this difference. static final int MAX_SEMI_STRUCTURED_LENGTH = BYTES_16_MB - 64; private static final ObjectMapper objectMapper = new ObjectMapper(); private static final JsonFactory factory = new JsonFactory() // Handle duplicate fields in JSON objects by ourselves .configure(JsonGenerator.Feature.STRICT_DUPLICATE_DETECTION, false); // The version of Jackson we are using does not support serialization of date objects from the // java.time package. Here we define a module with custom java.time serializers. Additionally, we // define custom serializer for byte[] because the Jackson default is to serialize it as // base64-encoded string, and we would like to serialize it as JSON array of numbers. static { SimpleModule module = new SimpleModule(); module.addSerializer(byte[].class, new ByteArraySerializer()); module.addSerializer(ZonedDateTime.class, new ZonedDateTimeSerializer()); module.addSerializer(LocalTime.class, new ToStringSerializer()); module.addSerializer(OffsetTime.class, new ToStringSerializer()); module.addSerializer(LocalDate.class, new ToStringSerializer()); module.addSerializer(LocalDateTime.class, new ToStringSerializer()); module.addSerializer(OffsetDateTime.class, new ToStringSerializer()); module.addSerializer(DuplicateKeyValidatedObject.class, new DuplicateKeyValidatingSerializer()); objectMapper.registerModule(module); } private static final ObjectWriter objectWriter = objectMapper.writer(); // Caching the powers of 10 that are used for checking the range of numbers because computing them // on-demand is expensive. private static final BigDecimal[] POWER_10 = makePower10Table(); private static BigDecimal[] makePower10Table() { BigDecimal[] power10 = new BigDecimal[Power10Util.sb16Size]; for (int i = 0; i < Power10Util.sb16Size; i++) { power10[i] = new BigDecimal(Power10Util.sb16Table[i]); } return power10; } /** * Validates and parses input as JSON. All types in the object tree must be valid variant types, * see {@link DataValidationUtil#isAllowedSemiStructuredType}. * * @param input Object to validate * @return JSON tree representing the input */ private static JsonNode validateAndParseSemiStructuredAsJsonTree( String columnName, Object input, String snowflakeType, final long insertRowIndex) { if (input instanceof String) { String stringInput = (String) input; verifyValidUtf8(stringInput, columnName, snowflakeType, insertRowIndex); try { return objectMapper.readTree(stringInput); } catch (JsonProcessingException e) { throw valueFormatNotAllowedException( columnName, snowflakeType, "Not a valid JSON", insertRowIndex); } } else if (isAllowedSemiStructuredType(input)) { return objectMapper.valueToTree(input); } throw typeNotAllowedException( columnName, input.getClass(), snowflakeType, new String[] { "String", "Primitive data types and their arrays", "java.time.*", "List", "Map", "T[]" }, insertRowIndex); } /** * Validates and parses input as JSON. All types in the object tree must be valid variant types, * see {@link DataValidationUtil#isAllowedSemiStructuredType}. * * @param input Object to validate * @return Minified JSON string */ private static String validateAndParseSemiStructured( String columnName, Object input, String snowflakeType, final long insertRowIndex) { if (input instanceof String) { final String stringInput = (String) input; verifyValidUtf8(stringInput, columnName, snowflakeType, insertRowIndex); final StringBuilderWriter resultWriter = new StringBuilderWriter(stringInput.length()); Stack> fieldsByLevel = new Stack<>(); try (final JsonParser parser = factory.createParser(stringInput); final JsonGenerator generator = factory.createGenerator(resultWriter)) { while (parser.nextToken() != null) { final JsonToken token = parser.currentToken(); if (token.isNumeric()) { // If the current token is a number, we cannot just copy the current event because it // would write token the token from double (or big decimal), whose scientific notation // may have been altered during deserialization. We want to preserve the scientific // notation from the user input, so we write the current numer as text. generator.writeNumber(parser.getText()); } else { // Validates duplicate JSON object fields if (token == JsonToken.START_OBJECT) { fieldsByLevel.push(new DuplicateDetector<>()); } if (token == JsonToken.END_OBJECT) { fieldsByLevel.pop(); } if (token == JsonToken.FIELD_NAME) { // We need to strip trailing nulls from the field name to match the behavior of the // server side json parser. See SNOW-1772196 for more details. String strippedFieldName = Utils.stripTrailingNulls(parser.currentName()); if (fieldsByLevel.peek().isDuplicate(strippedFieldName)) { throw valueFormatNotAllowedException( columnName, snowflakeType, String.format("Not a valid JSON: duplicate field %s", strippedFieldName), insertRowIndex); } } generator.copyCurrentEvent(parser); } } } catch (JsonParseException e) { throw valueFormatNotAllowedException( columnName, snowflakeType, "Not a valid JSON", insertRowIndex); } catch (IOException e) { if (e.getMessage().contains("Duplicate field")) { throw valueFormatNotAllowedException( columnName, snowflakeType, "Not a valid JSON: duplicate field", insertRowIndex); } throw new SFExceptionValidation( e, ErrorCode.IO_ERROR, String.format( "Cannot create JSON Parser or JSON generator for column %s of type %s, rowIndex:%d", columnName, snowflakeType, insertRowIndex)); } // We return the minified string from the result writer return resultWriter.toString(); } else if (isAllowedSemiStructuredType(input)) { try { String result = objectWriter.writeValueAsString(new DuplicateKeyValidatedObject(input)); verifyValidUtf8(result, columnName, snowflakeType, insertRowIndex); return result; } catch (JsonProcessingException e) { throw valueFormatNotAllowedException( columnName, snowflakeType, e.getMessage(), insertRowIndex); } } throw typeNotAllowedException( columnName, input.getClass(), snowflakeType, new String[] { "String", "Primitive data types and their arrays", "java.time.*", "List", "Map", "T[]" }, insertRowIndex); } /** * Validates and parses input as JSON. All types in the object tree must be valid variant types, * see {@link DataValidationUtil#isAllowedSemiStructuredType}. * * @param input Object to validate * @param insertRowIndex * @return JSON string representing the input */ static String validateAndParseVariant(String columnName, Object input, long insertRowIndex) { JsonNode node = validateAndParseSemiStructuredAsJsonTree(columnName, input, "VARIANT", insertRowIndex); // Missing nodes are not valid json, ingest them as NULL instead if (node.isMissingNode()) { return null; } String output = node.toString(); int stringLength = output.getBytes(StandardCharsets.UTF_8).length; if (stringLength > MAX_SEMI_STRUCTURED_LENGTH) { throw valueFormatNotAllowedException( columnName, "VARIANT", String.format( "Variant too long: length=%d maxLength=%d", stringLength, MAX_SEMI_STRUCTURED_LENGTH), insertRowIndex); } return output; } /** * Validates and parses input for VARIANT columns, returning a native Java object (Map, List, or * primitive) instead of a JSON string. For String inputs this avoids the serialize→re-parse * roundtrip of {@link #validateAndParseVariant}. * * @param input Object to validate * @param insertRowIndex * @return Native Java object (Map, List, String, Number, Boolean, or null for missing nodes) */ static Object validateAndParseVariantAsObject( String columnName, Object input, long insertRowIndex) { JsonNode node = validateAndParseSemiStructuredAsJsonTree(columnName, input, "VARIANT", insertRowIndex); if (node.isMissingNode()) { return null; } String output = node.toString(); int stringLength = output.getBytes(StandardCharsets.UTF_8).length; if (stringLength > MAX_SEMI_STRUCTURED_LENGTH) { throw valueFormatNotAllowedException( columnName, "VARIANT", String.format( "Variant too long: length=%d maxLength=%d", stringLength, MAX_SEMI_STRUCTURED_LENGTH), insertRowIndex); } try { return objectMapper.treeToValue(node, Object.class); } catch (JsonProcessingException e) { // Should never happen: node was already validated by validateAndParseSemiStructuredAsJsonTree throw new IllegalStateException("Failed to convert validated JsonNode to Object", e); } } /** * Validates and parses input as JSON. All types in the object tree must be valid variant types, * see {@link DataValidationUtil#isAllowedSemiStructuredType}. * * @param input Object to validate * @param insertRowIndex * @return JSON string representing the input */ static String validateAndParseVariantNew(String columnName, Object input, long insertRowIndex) { final String result = validateAndParseSemiStructured(columnName, input, "VARIANT", insertRowIndex); // Empty json strings are ingested as nulls if (result.isEmpty()) { return null; } int stringLength = result.getBytes(StandardCharsets.UTF_8).length; if (stringLength > MAX_SEMI_STRUCTURED_LENGTH) { throw valueFormatNotAllowedException( columnName, "VARIANT", String.format( "Variant too long: length=%d maxLength=%d", stringLength, MAX_SEMI_STRUCTURED_LENGTH), insertRowIndex); } return result; } /** * Validates that passed object is allowed data type for semi-structured columns (i.e. VARIANT, * ARRAY, OBJECT). For non-trivial types like maps, arrays or lists, it recursively traverses the * object tree and validates that all types in the tree are also allowed. Allowed Java types: * *

    *
  • primitive types (int, long, boolean, ...) *
  • String *
  • BigInteger *
  • BigDecimal *
  • LocalTime *
  • OffsetTime *
  • LocalDate *
  • LocalDateTime *
  • OffsetDateTime *
  • ZonedDateTime *
  • Map where T is an allowed semi-structured type *
  • List where T is an allowed semi-structured type *
  • primitive arrays (char[], int[], ...) *
  • T[] where T is an allowed semi-structured type *
* * @param o Object to validate * @return If the passed object is allowed for ingestion into semi-structured column */ static boolean isAllowedSemiStructuredType(Object o) { // Allow null if (o == null) { return true; } // Allow string if (o instanceof String) { return true; } // Allow all primitive Java data types if (o instanceof Long || o instanceof Integer || o instanceof Short || o instanceof Byte || o instanceof Float || o instanceof Double || o instanceof Boolean || o instanceof Character) { return true; } // Allow BigInteger and BigDecimal if (o instanceof BigInteger || o instanceof BigDecimal) { return true; } // Allow supported types from java.time package if (o instanceof java.time.LocalTime || o instanceof OffsetTime || o instanceof LocalDate || o instanceof LocalDateTime || o instanceof ZonedDateTime || o instanceof OffsetDateTime) { return true; } // Map is allowed, as long as T is also a supported semi-structured type if (o instanceof Map) { boolean allKeysAreStrings = ((Map) o).keySet().stream().allMatch(x -> x instanceof String); if (!allKeysAreStrings) { return false; } boolean allValuesAreAllowed = ((Map) o) .values().stream().allMatch(DataValidationUtil::isAllowedSemiStructuredType); return allValuesAreAllowed; } // Allow arrays of primitive data types if (o instanceof byte[] || o instanceof short[] || o instanceof int[] || o instanceof long[] || o instanceof float[] || o instanceof double[] || o instanceof boolean[] || o instanceof char[]) { return true; } // Allow arrays of allowed semi-structured objects if (o.getClass().isArray()) { return Arrays.stream((Object[]) o).allMatch(DataValidationUtil::isAllowedSemiStructuredType); } // Allow lists consisting of allowed semi-structured objects if (o instanceof List) { return ((List) o).stream().allMatch(DataValidationUtil::isAllowedSemiStructuredType); } // If nothing matches, reject the input return false; } /** * Validates and parses JSON array. Non-array types are converted into single-element arrays. All * types in the array tree must be valid variant types, see {@link * DataValidationUtil#isAllowedSemiStructuredType}. * * @param input Object to validate * @param insertRowIndex * @return JSON array representing the input */ static String validateAndParseArray(String columnName, Object input, long insertRowIndex) { JsonNode jsonNode = validateAndParseSemiStructuredAsJsonTree(columnName, input, "ARRAY", insertRowIndex); // Non-array values are ingested as single-element arrays, mimicking the Worksheets behavior if (!jsonNode.isArray()) { jsonNode = objectMapper.createArrayNode().add(jsonNode); } String output = jsonNode.toString(); // Throw an exception if the size is too large int stringLength = output.getBytes(StandardCharsets.UTF_8).length; if (stringLength > MAX_SEMI_STRUCTURED_LENGTH) { throw valueFormatNotAllowedException( columnName, "ARRAY", String.format( "Array too large. length=%d maxLength=%d", stringLength, MAX_SEMI_STRUCTURED_LENGTH), insertRowIndex); } return output; } /** * Validates and parses input for ARRAY columns, returning a native Java List instead of a JSON * string. For String inputs this avoids the serialize→re-parse roundtrip of {@link * #validateAndParseArray}. * * @param input Object to validate * @param insertRowIndex * @return Native Java List */ @SuppressWarnings("unchecked") static List validateAndParseArrayAsList( String columnName, Object input, long insertRowIndex) { JsonNode jsonNode = validateAndParseSemiStructuredAsJsonTree(columnName, input, "ARRAY", insertRowIndex); if (!jsonNode.isArray()) { jsonNode = objectMapper.createArrayNode().add(jsonNode); } String output = jsonNode.toString(); int stringLength = output.getBytes(StandardCharsets.UTF_8).length; if (stringLength > MAX_SEMI_STRUCTURED_LENGTH) { throw valueFormatNotAllowedException( columnName, "ARRAY", String.format( "Array too large. length=%d maxLength=%d", stringLength, MAX_SEMI_STRUCTURED_LENGTH), insertRowIndex); } try { return objectMapper.treeToValue(jsonNode, List.class); } catch (JsonProcessingException e) { // Should never happen: node was already validated by validateAndParseSemiStructuredAsJsonTree throw new IllegalStateException("Failed to convert validated JsonNode to List", e); } } /** * Validates and parses JSON array. Non-array types are converted into single-element arrays. All * types in the array tree must be valid variant types, see {@link * DataValidationUtil#isAllowedSemiStructuredType}. * * @param input Object to validate * @param insertRowIndex * @return JSON array representing the input */ static String validateAndParseArrayNew(String columnName, Object input, long insertRowIndex) { String result = validateAndParseSemiStructured(columnName, input, "ARRAY", insertRowIndex); if (result.isEmpty()) { // Empty input is ingested as an array of null result = JsonToken.START_ARRAY.asString() + JsonToken.VALUE_NULL.asString() + JsonToken.END_ARRAY.asString(); } else if (!result.startsWith(JsonToken.START_ARRAY.asString())) { // Non-array values are ingested as single-element arrays, mimicking the Worksheets behavior result = JsonToken.START_ARRAY.asString() + result + JsonToken.END_ARRAY.asString(); } // Throw an exception if the size is too large int stringLength = result.getBytes(StandardCharsets.UTF_8).length; if (stringLength > MAX_SEMI_STRUCTURED_LENGTH) { throw valueFormatNotAllowedException( columnName, "ARRAY", String.format( "Array too large. length=%d maxLength=%d", stringLength, MAX_SEMI_STRUCTURED_LENGTH), insertRowIndex); } return result; } /** * Validates and parses JSON object. Input is rejected if the value does not represent JSON object * (e.g. String '{}' or Map). All types in the object tree must be valid variant types, * see {@link DataValidationUtil#isAllowedSemiStructuredType}. * * @param input Object to validate * @param insertRowIndex * @return JSON object representing the input */ static String validateAndParseObject(String columnName, Object input, long insertRowIndex) { JsonNode jsonNode = validateAndParseSemiStructuredAsJsonTree(columnName, input, "OBJECT", insertRowIndex); if (!jsonNode.isObject()) { throw valueFormatNotAllowedException(columnName, "OBJECT", "Not an object", insertRowIndex); } String output = jsonNode.toString(); // Throw an exception if the size is too large int stringLength = output.getBytes(StandardCharsets.UTF_8).length; if (stringLength > MAX_SEMI_STRUCTURED_LENGTH) { throw valueFormatNotAllowedException( columnName, "OBJECT", String.format( "Object too large. length=%d maxLength=%d", stringLength, MAX_SEMI_STRUCTURED_LENGTH), insertRowIndex); } return output; } /** * Validates and parses JSON object. Input is rejected if the value does not represent JSON object * (e.g. String '{}' or Map). All types in the object tree must be valid variant types, * see {@link DataValidationUtil#isAllowedSemiStructuredType}. * * @param input Object to validate * @param insertRowIndex * @return JSON object representing the input */ static String validateAndParseObjectNew(String columnName, Object input, long insertRowIndex) { final String result = validateAndParseSemiStructured(columnName, input, "OBJECT", insertRowIndex); if (!result.startsWith(JsonToken.START_OBJECT.asString())) { throw valueFormatNotAllowedException(columnName, "OBJECT", "Not an object", insertRowIndex); } // Throw an exception if the size is too large int stringLength = result.getBytes(StandardCharsets.UTF_8).length; if (stringLength > MAX_SEMI_STRUCTURED_LENGTH) { throw valueFormatNotAllowedException( columnName, "OBJECT", String.format( "Object too large. length=%d maxLength=%d", stringLength, MAX_SEMI_STRUCTURED_LENGTH), insertRowIndex); } return result; } /** * Converts user input to offset date time, which is the canonical representation of dates and * timestamps. */ private static OffsetDateTime inputToOffsetDateTime( String columnName, String typeName, Object input, ZoneId defaultTimezone, final long insertRowIndex) { if (input instanceof OffsetDateTime) { return (OffsetDateTime) input; } if (input instanceof ZonedDateTime) { return ((ZonedDateTime) input).toOffsetDateTime(); } if (input instanceof LocalDateTime) { return ((LocalDateTime) input).atZone(defaultTimezone).toOffsetDateTime(); } if (input instanceof LocalDate) { return ((LocalDate) input).atStartOfDay().atZone(defaultTimezone).toOffsetDateTime(); } if (input instanceof Instant) { // Just like integer-stored timestamps, instants are always interpreted in UTC return ((Instant) input).atZone(ZoneOffset.UTC).toOffsetDateTime(); } if (input instanceof String) { String stringInput = ((String) input).trim(); { // First, try to parse ZonedDateTime ZonedDateTime zoned = catchParsingError(() -> ZonedDateTime.parse(stringInput)); if (zoned != null) { return zoned.toOffsetDateTime(); } } { // Next, try to parse OffsetDateTime OffsetDateTime offset = catchParsingError(() -> OffsetDateTime.parse(stringInput)); if (offset != null) { return offset; } } { // Alternatively, try to parse LocalDateTime LocalDateTime localDateTime = catchParsingError(() -> LocalDateTime.parse(stringInput)); if (localDateTime != null) { return localDateTime.atZone(defaultTimezone).toOffsetDateTime(); } } { // Alternatively, try to parse LocalDate LocalDate localDate = catchParsingError(() -> LocalDate.parse(stringInput)); if (localDate != null) { return localDate.atStartOfDay().atZone(defaultTimezone).toOffsetDateTime(); } } { // Alternatively, try to parse integer-stored timestamp // Just like in Snowflake, integer-stored timestamps are always in UTC Instant instant = catchParsingError(() -> parseInstantGuessScale(stringInput)); if (instant != null) { return instant.atOffset(ZoneOffset.UTC); } } // Couldn't parse anything, throw an exception throw valueFormatNotAllowedException( columnName, typeName, "Not a valid value, see" + " https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview" + " for the list of supported formats", insertRowIndex); } // Type is not supported, throw an exception throw typeNotAllowedException( columnName, input.getClass(), typeName, new String[] {"String", "LocalDate", "LocalDateTime", "ZonedDateTime", "OffsetDateTime"}, insertRowIndex); } private static T catchParsingError(Supplier op) { try { return op.get(); } catch (DateTimeParseException | NumberFormatException e) { return null; } } /** * Validates and parses input for TIMESTAMP_NTZ, TIMESTAMP_LTZ and TIMEATAMP_TZ Snowflake types. * Allowed Java types: * *
    *
  • String *
  • LocalDate *
  • LocalDateTime *
  • OffsetDateTime *
  • ZonedDateTime *
* * @param columnName Column name, used in validation error messages * @param input String date in valid format, seconds past the epoch or java.time.* object. Accepts * fractional seconds with precision up to the column's scale * @param scale decimal scale of timestamp 16 byte integer * @param defaultTimezone Input, which does not carry timezone information is going to be * interpreted in the default timezone. * @param trimTimezone Whether timezone information should be removed from the resulting date, * should be true for TIMESTAMP_NTZ columns. * @param insertRowIndex * @return TimestampWrapper */ static TimestampWrapper validateAndParseTimestamp( String columnName, Object input, int scale, ZoneId defaultTimezone, boolean trimTimezone, long insertRowIndex) { // Integer/Long epoch values from Kafka JsonConverter — delegate to the same // scale-guessing logic used for string-encoded epochs. Only whole numbers // (Integer, Long) are accepted; fractional types (float, double, BigDecimal) // and BigInteger remain rejected to match SSv1 behavior. if (input instanceof Integer || input instanceof Long) { input = input.toString(); } OffsetDateTime offsetDateTime = inputToOffsetDateTime(columnName, "TIMESTAMP", input, defaultTimezone, insertRowIndex); if (trimTimezone) { offsetDateTime = offsetDateTime.withOffsetSameLocal(ZoneOffset.UTC); } if (offsetDateTime.getYear() < 1 || offsetDateTime.getYear() > 9999) { throw new SFExceptionValidation( ErrorCode.INVALID_VALUE_ROW, String.format( "Timestamp out of representable inclusive range of years between 1 and 9999," + " rowIndex:%d, column:%s, value:%s", insertRowIndex, columnName, offsetDateTime)); } return new TimestampWrapper(offsetDateTime, scale); } /** * Validates a timestamp value and returns an ISO-formatted string. Unlike {@link * #validateAndParseTimestamp} (which returns a {@link TimestampWrapper} for Parquet * serialization), this method returns a human-readable ISO string suitable for passing to the * SSv2 SDK. * *

This is used by RowValidator to normalize Integer/Long epoch values into unambiguous ISO * strings, so the Snowflake backend interprets them correctly regardless of channel timezone. * *

Note: Unlike {@link #validateAndParseTimestamp}, this method omits the {@code scale} * parameter because it only handles Integer/Long epoch inputs which have no fractional seconds. * * @param columnName Column name, used in error messages * @param input Timestamp value (Integer, Long, String, or java.time.* object) * @param defaultTimezone Timezone for inputs without timezone info * @param trimTimezone true for TIMESTAMP_NTZ (strip timezone), false for LTZ/TZ * @param insertRowIndex Row index for error messages * @return ISO timestamp string (e.g., "2024-01-15T10:00" for NTZ, "2024-01-15T10:00Z" for LTZ) */ static String validateAndFormatTimestamp( String columnName, Object input, ZoneId defaultTimezone, boolean trimTimezone, long insertRowIndex) { if (input instanceof Integer || input instanceof Long) { input = input.toString(); } OffsetDateTime offsetDateTime = inputToOffsetDateTime(columnName, "TIMESTAMP", input, defaultTimezone, insertRowIndex); if (trimTimezone) { offsetDateTime = offsetDateTime.withOffsetSameLocal(ZoneOffset.UTC); } if (offsetDateTime.getYear() < 1 || offsetDateTime.getYear() > 9999) { throw new SFExceptionValidation( ErrorCode.INVALID_VALUE_ROW, String.format( "Timestamp out of representable inclusive range of years between 1 and 9999," + " rowIndex:%d, column:%s, value:%s", insertRowIndex, columnName, offsetDateTime)); } return trimTimezone ? offsetDateTime.toLocalDateTime().toString() : offsetDateTime.toString(); } /** * Converts input to string, validates that length is less than max allowed string size * https://docs.snowflake.com/en/sql-reference/data-types-text.html#varchar. Allowed data types: * *

    *
  • String *
  • Number *
  • boolean *
  • char *
* * @param input Object to validate and parse to String * @param maxLengthOptional Maximum allowed length of the output String, if empty then uses * maximum allowed by Snowflake * (https://docs.snowflake.com/en/sql-reference/data-types-text.html#varchar) * @param insertRowIndex */ static String validateAndParseString( String columnName, Object input, Optional maxLengthOptional, long insertRowIndex) { String output; if (input instanceof String) { output = (String) input; verifyValidUtf8(output, columnName, "STRING", insertRowIndex); } else if (input instanceof Number) { output = new BigDecimal(input.toString()).stripTrailingZeros().toPlainString(); } else if (input instanceof Boolean || input instanceof Character) { output = input.toString(); } else { throw typeNotAllowedException( columnName, input.getClass(), "STRING", new String[] {"String", "Number", "boolean", "char"}, insertRowIndex); } byte[] utf8Bytes = output.getBytes(StandardCharsets.UTF_8); // Strings can never be larger than 16MB if (utf8Bytes.length > BYTES_16_MB) { throw valueFormatNotAllowedException( columnName, "STRING", String.format( "String too long: length=%d bytes maxLength=%d bytes", utf8Bytes.length, BYTES_16_MB), insertRowIndex); } // If max allowed length is specified (e.g. VARCHAR(10)), the number of unicode characters must // not exceed this value maxLengthOptional.ifPresent( maxAllowedCharacters -> { int actualCharacters = unicodeCharactersCount(output); if (actualCharacters > maxAllowedCharacters) { throw valueFormatNotAllowedException( columnName, "STRING", String.format( "String too long: length=%d characters maxLength=%d characters", actualCharacters, maxAllowedCharacters), insertRowIndex); } }); return output; } /** * Returns a BigDecimal representation of the input. Strings of the form "1.23E4" will be treated * as being written in * scientific notation (e.g. 1.23 * 10^4). Does not perform any size * validation. Allowed Java types: *
  • byte, short, int, long *
  • float, double *
  • BigInteger, BigDecimal *
  • String */ static BigDecimal validateAndParseBigDecimal( String columnName, Object input, long insertRowIndex) { if (input instanceof BigDecimal) { return (BigDecimal) input; } else if (input instanceof BigInteger) { return new BigDecimal((BigInteger) input); } else if (input instanceof Byte || input instanceof Short || input instanceof Integer || input instanceof Long) { return BigDecimal.valueOf(((Number) input).longValue()); } else if (input instanceof Float || input instanceof Double) { try { return BigDecimal.valueOf(((Number) input).doubleValue()); } catch (NumberFormatException e) { /* NaN and infinity are not allowed */ throw valueFormatNotAllowedException( columnName, "NUMBER", "Not a valid number", insertRowIndex); } } else if (input instanceof String) { try { final String stringInput = ((String) input).trim(); return new BigDecimal(stringInput); } catch (NumberFormatException e) { throw valueFormatNotAllowedException( columnName, "NUMBER", "Not a valid number", insertRowIndex); } } else { throw typeNotAllowedException( columnName, input.getClass(), "NUMBER", new String[] { "int", "long", "byte", "short", "float", "double", "BigDecimal", "BigInteger", "String" }, insertRowIndex); } } /** * Returns the number of days between the epoch and the passed date. Allowed Java types: * *
      *
    • String *
    • {@link LocalDate} *
    • {@link LocalDateTime} *
    • {@link OffsetDateTime} *
    • {@link ZonedDateTime} *
    • {@link Instant} *
    */ static int validateAndParseDate(String columnName, Object input, long insertRowIndex) { OffsetDateTime offsetDateTime = inputToOffsetDateTime(columnName, "DATE", input, ZoneOffset.UTC, insertRowIndex); if (offsetDateTime.getYear() < -9999 || offsetDateTime.getYear() > 9999) { throw new SFExceptionValidation( ErrorCode.INVALID_VALUE_ROW, String.format( "Date out of representable inclusive range of years between -9999 and 9999," + " rowIndex:%d, column:%s, value:%s", insertRowIndex, columnName, offsetDateTime)); } return Math.toIntExact(offsetDateTime.toLocalDate().toEpochDay()); } /** * Validates input for data type BINARY. Allowed Java types: * *
      *
    • byte[] *
    • String (hex-encoded) *
    * * @param input Array to validate * @param maxLengthOptional Max array length, defaults to 8MB, which is the max allowed length for * BINARY column * @param insertRowIndex * @return Validated array */ static byte[] validateAndParseBinary( String columnName, Object input, Optional maxLengthOptional, long insertRowIndex) { byte[] output; if (input instanceof byte[]) { // byte[] is a mutable object, we need to create a defensive copy to protect against // concurrent modifications of the array, which could lead to mismatch between data // and metadata byte[] originalInputArray = (byte[]) input; output = new byte[originalInputArray.length]; System.arraycopy(originalInputArray, 0, output, 0, originalInputArray.length); } else if (input instanceof String) { try { String stringInput = ((String) input).trim(); output = Hex.decodeHex(stringInput); } catch (DecoderException e) { throw valueFormatNotAllowedException( columnName, "BINARY", "Not a valid hex string", insertRowIndex); } } else { throw typeNotAllowedException( columnName, input.getClass(), "BINARY", new String[] {"byte[]", "String"}, insertRowIndex); } int maxLength = maxLengthOptional.orElse(BYTES_8_MB); if (output.length > maxLength) { throw valueFormatNotAllowedException( columnName, "BINARY", String.format("Binary too long: length=%d maxLength=%d", output.length, maxLength), insertRowIndex); } return output; } /** * Returns the number of units since 00:00, depending on the scale (scale=0: seconds, scale=3: * milliseconds, scale=9: nanoseconds). Allowed Java types: * *
      *
    • String *
    • {@link LocalTime} *
    • {@link OffsetTime} *
    */ static BigInteger validateAndParseTime( String columnName, Object input, int scale, long insertRowIndex) { if (input instanceof LocalTime) { LocalTime localTime = (LocalTime) input; return BigInteger.valueOf(localTime.toNanoOfDay()).divide(Power10Util.sb16Table[9 - scale]); } else if (input instanceof OffsetTime) { return validateAndParseTime( columnName, ((OffsetTime) input).toLocalTime(), scale, insertRowIndex); } else if (input instanceof String) { String stringInput = ((String) input).trim(); { // First, try to parse LocalTime LocalTime localTime = catchParsingError(() -> LocalTime.parse(stringInput)); if (localTime != null) { return validateAndParseTime(columnName, localTime, scale, insertRowIndex); } } { // Alternatively, try to parse OffsetTime OffsetTime offsetTime = catchParsingError((() -> OffsetTime.parse(stringInput))); if (offsetTime != null) { return validateAndParseTime(columnName, offsetTime.toLocalTime(), scale, insertRowIndex); } } { // Alternatively, try to parse integer-stored time Instant parsedInstant = catchParsingError(() -> parseInstantGuessScale(stringInput)); if (parsedInstant != null) { return validateAndParseTime( columnName, LocalDateTime.ofInstant(parsedInstant, ZoneOffset.UTC).toLocalTime(), scale, insertRowIndex); } } throw valueFormatNotAllowedException( columnName, "TIME", "Not a valid time, see" + " https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview" + " for the list of supported formats", insertRowIndex); } else { throw typeNotAllowedException( columnName, input.getClass(), "TIME", new String[] {"String", "LocalTime", "OffsetTime"}, insertRowIndex); } } /** * Attempts to parse integer-stored date from string input. Tries to guess the scale according to * the rules documented at * https://docs.snowflake.com/en/user-guide/date-time-input-output.html#auto-detection-of-integer-stored-date-time-and-timestamp-values. * * @param input String to parse, must represent a valid long * @return Instant representing the input * @throws NumberFormatException If the input in not a valid long */ private static Instant parseInstantGuessScale(String input) { BigInteger epochNanos; try { long val = Long.parseLong(input); if (val > -SECONDS_LIMIT_FOR_EPOCH && val < SECONDS_LIMIT_FOR_EPOCH) { epochNanos = BigInteger.valueOf(val).multiply(Power10Util.sb16Table[9]); } else if (val > -MILLISECONDS_LIMIT_FOR_EPOCH && val < MILLISECONDS_LIMIT_FOR_EPOCH) { epochNanos = BigInteger.valueOf(val).multiply(Power10Util.sb16Table[6]); } else if (val > -MICROSECONDS_LIMIT_FOR_EPOCH && val < MICROSECONDS_LIMIT_FOR_EPOCH) { epochNanos = BigInteger.valueOf(val).multiply(Power10Util.sb16Table[3]); } else { epochNanos = BigInteger.valueOf(val); } } catch (NumberFormatException e) { // The input is bigger than max long value, treat it as nano-seconds directly epochNanos = new BigInteger(input); } return Instant.ofEpochSecond( epochNanos.divide(Power10Util.sb16Table[9]).longValue(), epochNanos.remainder(Power10Util.sb16Table[9]).longValue()); } /** * Converts input to double value. Allowed Java types: * *
      *
    • Number *
    • String *
    * * @param input * @param insertRowIndex */ static double validateAndParseReal(String columnName, Object input, long insertRowIndex) { if (input instanceof Float) { return Double.parseDouble(input.toString()); } else if (input instanceof Number) { return ((Number) input).doubleValue(); } else if (input instanceof String) { String stringInput = ((String) input).trim(); try { return Double.parseDouble(stringInput); } catch (NumberFormatException err) { stringInput = stringInput.toLowerCase(); switch (stringInput) { case "nan": return Double.NaN; case "inf": return Double.POSITIVE_INFINITY; case "-inf": return Double.NEGATIVE_INFINITY; default: throw valueFormatNotAllowedException( columnName, "REAL", "Not a valid decimal number", insertRowIndex); } } } throw typeNotAllowedException( columnName, input.getClass(), "REAL", new String[] {"Number", "String"}, insertRowIndex); } static int validateAndParseBoolean(String columnName, Object input, long insertRowIndex) { if (input instanceof Boolean) { return (boolean) input ? 1 : 0; } else if (input instanceof Number) { return new BigDecimal(input.toString()).compareTo(BigDecimal.ZERO) == 0 ? 0 : 1; } else if (input instanceof String) { return convertStringToBoolean(columnName, (String) input, insertRowIndex) ? 1 : 0; } throw typeNotAllowedException( columnName, input.getClass(), "BOOLEAN", new String[] {"boolean", "Number", "String"}, insertRowIndex); } static void checkValueInRange( String columnName, BigDecimal bigDecimalValue, int scale, int precision, final long insertRowIndex) { BigDecimal comparand = (precision >= scale) && (precision - scale) < POWER_10.length ? POWER_10[precision - scale] : BigDecimal.TEN.pow(precision - scale); if (bigDecimalValue.abs().compareTo(comparand) >= 0) { throw new SFExceptionValidation( ErrorCode.INVALID_FORMAT_ROW, String.format( "Number out of representable exclusive range of (-1e%s..1e%s), rowIndex:%d," + " column:%s, value:%s", precision - scale, precision - scale, insertRowIndex, columnName, bigDecimalValue)); } } static void checkFixedLengthByteArray( String columnName, byte[] bytes, int length, final long insertRowIndex) { if (bytes.length != length) { throw new SFExceptionValidation( ErrorCode.INVALID_VALUE_ROW, String.format( "Binary length mismatch: expected:%d, actual:%d, rowIndex:%d, column:%s", length, bytes.length, insertRowIndex, columnName)); } } static Set allowedBooleanStringsLowerCased = Sets.newHashSet("1", "0", "yes", "no", "y", "n", "t", "f", "true", "false", "on", "off"); private static boolean convertStringToBoolean( String columnName, String value, final long insertRowIndex) { String normalizedInput = value.toLowerCase().trim(); if (!allowedBooleanStringsLowerCased.contains(normalizedInput)) { throw valueFormatNotAllowedException( columnName, "BOOLEAN", "Not a valid boolean, see" + " https://docs.snowflake.com/en/sql-reference/data-types-logical.html#conversion-to-boolean" + " for the list of supported formats", insertRowIndex); } return "1".equals(normalizedInput) || "yes".equals(normalizedInput) || "y".equals(normalizedInput) || "t".equals(normalizedInput) || "true".equals(normalizedInput) || "on".equals(normalizedInput); } /** * Create exception that a Java type cannot be ingested into a specific Snowflake column type * * @param javaType Java type failing the validation * @param snowflakeType Target Snowflake column type * @param allowedJavaTypes Java types supported for the Java type */ private static SFExceptionValidation typeNotAllowedException( String columnName, Class javaType, String snowflakeType, String[] allowedJavaTypes, final long insertRowIndex) { return new SFExceptionValidation( ErrorCode.INVALID_FORMAT_ROW, String.format( "Object of type %s cannot be ingested into Snowflake column %s of type %s, rowIndex:%d", javaType.getName(), columnName, snowflakeType, insertRowIndex), String.format( String.format("Allowed Java types: %s", String.join(", ", allowedJavaTypes)))); } /** * Create exception when the Java type is correct, but the value is invalid (e.g. boolean cannot * be parsed from a string) * *

    Note: Do not log actual Object Value * * @param columnName Column Name * @param snowflakeType Snowflake column type * @param reason Reason why value format is not allowed. * @param rowIndex Index of the Input row primarily for debugging purposes. * @return SFExceptionValidation is thrown */ // Package-private: used by RowValidator for consistent error formatting static SFExceptionValidation valueFormatNotAllowedException( String columnName, String snowflakeType, String reason, final long rowIndex) { return new SFExceptionValidation( ErrorCode.INVALID_VALUE_ROW, String.format( "Value cannot be ingested into Snowflake column %s of type %s, rowIndex:%d, reason: %s", columnName, snowflakeType, rowIndex, reason)); } /** * Validates that a string is valid UTF-8 string. It catches situations like unmatched high/low * UTF-16 surrogate, for example. */ private static void verifyValidUtf8( String input, String columnName, String dataType, final long insertRowIndex) { String roundTripStr = new String(input.getBytes(StandardCharsets.UTF_8), StandardCharsets.UTF_8); if (!input.equals(roundTripStr)) { throw valueFormatNotAllowedException( columnName, dataType, "Invalid Unicode string", insertRowIndex); } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/DuplicateDetector.java ================================================ /* * COPIED FROM SNOWFLAKE INGEST SDK V1 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/utils/DuplicateDetector.java * * Modifications: * - Package changed to com.snowflake.kafka.connector.internal.validation * * Copyright (c) 2021 Snowflake Computing Inc. All rights reserved. */ package com.snowflake.kafka.connector.internal.validation; import java.util.HashSet; import java.util.Set; /** * A utility class that detects duplicate objects. Optimized for Json objects with a small number of * keys. */ public class DuplicateDetector { private T firstKey; private T secondKey; private Set keys; public boolean isDuplicate(T key) { if (firstKey == null) { firstKey = key; return false; } if (firstKey.equals(key)) { return true; } if (secondKey == null) { secondKey = key; return false; } if (secondKey.equals(key)) { return true; } if (keys == null) { keys = new HashSet<>(); } return !keys.add(key); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/DuplicateKeyValidatedObject.java ================================================ /* * COPIED FROM SNOWFLAKE INGEST SDK V1 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/streaming/internal/serialization/DuplicateKeyValidatedObject.java * * Modifications: * - Package changed to com.snowflake.kafka.connector.internal.validation * * Copyright (c) 2021 Snowflake Computing Inc. All rights reserved. */ package com.snowflake.kafka.connector.internal.validation; /** * A wrapper for an Object that is going to be validated by {@link * DuplicateKeyValidatingSerializer}. */ public class DuplicateKeyValidatedObject { private final Object object; public DuplicateKeyValidatedObject(Object object) { this.object = object; } public Object getObject() { return object; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/DuplicateKeyValidatingSerializer.java ================================================ /* * COPIED FROM SNOWFLAKE INGEST SDK V1 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/streaming/internal/serialization/DuplicateKeyValidatingSerializer.java * * Modifications: * - Package changed to com.snowflake.kafka.connector.internal.validation * * Copyright (c) 2021 Snowflake Computing Inc. All rights reserved. */ package com.snowflake.kafka.connector.internal.validation; import com.fasterxml.jackson.core.JsonGenerationException; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.JsonSerializer; import com.fasterxml.jackson.databind.SerializerProvider; import java.io.IOException; import java.lang.reflect.Array; import java.util.List; import java.util.Map; /** * A custom Jackson serializer that validates Objects by removing trailing nulls in keys for * duplication check. See SNOW-1772196 for more details. */ public class DuplicateKeyValidatingSerializer extends JsonSerializer { @Override public void serialize( DuplicateKeyValidatedObject value, JsonGenerator gen, SerializerProvider serializers) throws IOException { sanitizeAndWrite(value.getObject(), gen, serializers); } private void sanitizeAndWrite(Object object, JsonGenerator gen, SerializerProvider serializers) throws IOException { if (object == null) { gen.writeNull(); return; } if (object instanceof Map) { gen.writeStartObject(); Map map = (Map) object; DuplicateDetector duplicateDetector = new DuplicateDetector<>(); for (Map.Entry entry : map.entrySet()) { String key = entry.getKey().toString(); String strippedKey = Utils.stripTrailingNulls(key); if (duplicateDetector.isDuplicate(strippedKey)) { throw new JsonGenerationException("Duplicate key in JSON object: " + key, gen); } gen.writeFieldName(key); sanitizeAndWrite(entry.getValue(), gen, serializers); } gen.writeEndObject(); } else if (object instanceof List) { gen.writeStartArray(); for (Object item : (List) object) { sanitizeAndWrite(item, gen, serializers); } gen.writeEndArray(); } else if (object.getClass().isArray()) { gen.writeStartArray(); if (object.getClass().getComponentType().isPrimitive()) { final int length = Array.getLength(object); for (int i = 0; i < length; i++) { serializers.defaultSerializeValue(Array.get(object, i), gen); } } else { for (Object item : (Object[]) object) { sanitizeAndWrite(item, gen, serializers); } } gen.writeEndArray(); } else { serializers.defaultSerializeValue(object, gen); } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/ErrorCode.java ================================================ /* * COPIED FROM SNOWFLAKE INGEST SDK V1 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/utils/ErrorCode.java * * Modifications: * - Only validation-related error codes retained (INVALID_FORMAT_ROW, INVALID_VALUE_ROW, UNKNOWN_DATA_TYPE, UNSUPPORTED_DATA_TYPE, IO_ERROR, INTERNAL_ERROR) * - Package changed to com.snowflake.kafka.connector.internal.validation * * Copyright (c) 2021-2024 Snowflake Computing Inc. All rights reserved. */ package com.snowflake.kafka.connector.internal.validation; /** Ingest SDK internal error codes (validation subset) */ public enum ErrorCode { INTERNAL_ERROR("0001"), INVALID_FORMAT_ROW("0004"), UNKNOWN_DATA_TYPE("0005"), IO_ERROR("0020"), UNSUPPORTED_DATA_TYPE("0029"), INVALID_VALUE_ROW("0030"); public static final String errorMessageResource = "com.snowflake.kafka.connector.internal.validation.ingest_error_messages"; /** Snowflake internal message associated to the error. */ private final String messageCode; /** * Construct a new error code specification given Snowflake internal error code. * * @param messageCode Snowflake internal error code */ ErrorCode(String messageCode) { this.messageCode = messageCode; } public String getMessageCode() { return messageCode; } @Override public String toString() { return "ErrorCode{" + "name=" + this.name() + ", messageCode=" + messageCode + "}"; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/Power10Util.java ================================================ package com.snowflake.kafka.connector.internal.validation; import java.math.BigInteger; /** * Powers of 10 used for timestamp/time scaling and validation. Replicates the semantics of * Snowflake JDBC internal Power10 so the connector does not depend on JDBC internal APIs (removed * in JDBC 4.x). */ public final class Power10Util { private Power10Util() {} /** Size of the power tables (10^0 through 10^9). */ public static final int sb16Size = 10; /** 10^i as int for i in [0, 9]. Used for timestamp fraction scaling. */ public static final int[] intTable = new int[sb16Size]; /** 10^i as BigInteger for i in [0, 9]. Used for time/timestamp validation and scaling. */ public static final BigInteger[] sb16Table = new BigInteger[sb16Size]; static { for (int i = 0; i < sb16Size; i++) { intTable[i] = (int) Math.pow(10, i); sb16Table[i] = BigInteger.TEN.pow(i); } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/RowValidator.java ================================================ /* * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved. * * This file provides integration between SSv1 validation code and KC v4. */ package com.snowflake.kafka.connector.internal.validation; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.snowflake.kafka.connector.Utils; import java.math.BigDecimal; import java.time.ZoneId; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.Set; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Validates rows against a table schema using SSv1 validation logic. This is the main facade that * integrates DataValidationUtil with KC v4. * *

    Thread-safety: This class is thread-safe. The schema map is immutably captured at construction * time. Multiple threads can safely call validateRow() on the same RowValidator instance * concurrently. */ public class RowValidator { private static final Logger logger = LoggerFactory.getLogger(RowValidator.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private final Map columnSchemaMap; /** * Default timezone for timestamp parsing, matching SSv1 SDK behavior. * *

    When parsing timestamps without timezone information (e.g., "2024-03-06 10:00:00"), this * timezone determines how the timestamp is interpreted. Must match SSv1 SDK's * OpenChannelRequest.DEFAULT_DEFAULT_TIMEZONE to ensure identical validation behavior. * *

    SSv1 SDK uses America/Los_Angeles, not UTC. */ private final ZoneId defaultTimezone = ZoneId.of("America/Los_Angeles"); public RowValidator(Map columnSchemaMap) { // Input validation Objects.requireNonNull(columnSchemaMap, "columnSchemaMap cannot be null"); if (columnSchemaMap.isEmpty()) { throw new IllegalArgumentException("columnSchemaMap cannot be empty"); } // Defensive copy for thread safety this.columnSchemaMap = Collections.unmodifiableMap(new HashMap<>(columnSchemaMap)); } /** * Validate a row against the table schema. Performs both structural validation (column presence, * NOT NULL checks) and type/value validation. * *

    Side effect: For BINARY columns, hex string values in the row are replaced in-place * with their {@code byte[]} equivalents so the Ingest SDK receives an unambiguous type. * * @param row Map of column name to value (may be mutated for BINARY normalization) * @return ValidationResult indicating success or failure with error details */ public ValidationResult validateRow(Map row) { // Input validation Objects.requireNonNull(row, "row cannot be null"); // Column names are expected to be already normalized (raw internal names) by the caller. // When column identifier normalization is enabled, SnowflakeSinkRecord sanitizes keys // at record creation time. DESCRIBE TABLE results are already raw names. // Step 1: Structural validation (matching AbstractRowBuffer.verifyInputColumns) Set colNames = row.keySet(); Set extraCols = detectExtraColumns(colNames); Set missingNotNullCols = detectMissingNotNullColumns(colNames); Set nullNotNullCols = detectNullValuesInNotNullColumns(row); if (!extraCols.isEmpty() || !missingNotNullCols.isEmpty() || !nullNotNullCols.isEmpty()) { return ValidationResult.structuralError(extraCols, missingNotNullCols, nullNotNullCols); } // Step 2: Type/value validation (dispatch to DataValidationUtil) for (Map.Entry entry : row.entrySet()) { String colName = entry.getKey(); Object value = entry.getValue(); ColumnSchema col = columnSchemaMap.get(colName); // These conditions should have been caught by structural validation above. // If we reach here, it indicates a bug in structural validation logic. if (col == null) { throw new IllegalStateException( "Column " + colName + " not found in schema but was not caught by structural validation"); } if (value == null) { // Null values are valid for nullable columns, skip type validation if (col.isNullable()) { continue; // Valid null for nullable column } // Null value in NOT NULL column should have been caught by structural validation throw new IllegalStateException( "Null value for NOT NULL column " + colName + " but was not caught by structural validation"); } // Skip type validation for the legacy RECORD_CONTENT wrapper column. // In non-schematized mode, this column contains the raw payload (e.g. a plain string, bytes, // or object) and should accept any value the connector places there. // Otherwise, VARIANT client-side validation requires that the payload is a complex object. if (Utils.TABLE_COLUMN_CONTENT.equals(colName)) { continue; } try { Object normalized = validateAndNormalizeColumnValue(col, value); // Reference equality: same object returned for types that don't need normalization if (normalized != value) { entry.setValue(normalized); } } catch (SFExceptionValidation e) { return ValidationResult.typeError(colName, e.getMessage()); } } return ValidationResult.valid(); } /** * Validate a single column value using DataValidationUtil, and return the canonical form to * ingest. */ private Object validateAndNormalizeColumnValue(ColumnSchema col, Object value) throws SFExceptionValidation { // insertRowIndex parameter is used for error messages - use 0 for now final long insertRowIndex = 0; switch (col.getLogicalType()) { case BOOLEAN: // SSv2 SDK only accepts Boolean — normalize to avoid silent drops. // Pre-reject non-0/1 Numbers for KC v3 parity: KC v3's StreamingRecordMapper stringified // all values, and SSv1's convertStringToBoolean rejects e.g. "42". if (value instanceof Number && !(value instanceof Boolean)) { BigDecimal bd = new BigDecimal(value.toString()); if (bd.compareTo(BigDecimal.ZERO) != 0 && bd.compareTo(BigDecimal.ONE) != 0) { throw DataValidationUtil.valueFormatNotAllowedException( col.getName(), "BOOLEAN", "Only 0 and 1 are accepted for numeric boolean values", insertRowIndex); } } return DataValidationUtil.validateAndParseBoolean(col.getName(), value, insertRowIndex) == 1 ? Boolean.TRUE : Boolean.FALSE; case FIXED: // Note: DataValidationUtil.validateAndParseBigDecimal doesn't check precision/scale // It just parses the value. Precision/scale checking would need to be done separately // if needed, but SSv1 didn't enforce it at validation time either. DataValidationUtil.validateAndParseBigDecimal(col.getName(), value, insertRowIndex); break; case REAL: DataValidationUtil.validateAndParseReal(col.getName(), value, insertRowIndex); break; case TEXT: case CHAR: // DVU.validateAndParseString only accepts String, Number, boolean, char — it rejects // Map/Collection. However, KC v3's StreamingRecordMapper serialized all non-textual // JsonNodes to JSON strings via Jackson before the SDK saw them. We replicate that // pipeline-level serialization so v4-compat handles Map/Collection inputs the same way. if (value instanceof Map || value instanceof Collection) { try { String json = OBJECT_MAPPER.writeValueAsString(value); DataValidationUtil.validateAndParseString( col.getName(), json, Optional.ofNullable(col.getLength()), insertRowIndex); return json; } catch (JsonProcessingException e) { throw DataValidationUtil.valueFormatNotAllowedException( col.getName(), "STRING", "Cannot serialize " + value.getClass().getSimpleName() + " to JSON", insertRowIndex); } } DataValidationUtil.validateAndParseString( col.getName(), value, Optional.ofNullable(col.getLength()), insertRowIndex); break; case BINARY: // The SSv2 interprets String values for BINARY columns as either hex or base64 // depending on the server-side parameter ENABLE_SSV2_DEFAULT_BINARY_FORMAT_BASE64. // Returning byte[] sidesteps this ambiguity: byte[] is accepted uniformly regardless of // how that parameter is set. return DataValidationUtil.validateAndParseBinary( col.getName(), value, Optional.ofNullable(col.getByteLength()), insertRowIndex); case DATE: DataValidationUtil.validateAndParseDate(col.getName(), value, insertRowIndex); break; case TIME: DataValidationUtil.validateAndParseTime( col.getName(), value, col.getScale() != null ? col.getScale() : 9, insertRowIndex); break; case TIMESTAMP_NTZ: return validateAndNormalizeTimestamp(col, value, /* trimTimezone= */ true, insertRowIndex); case TIMESTAMP_LTZ: case TIMESTAMP_TZ: return validateAndNormalizeTimestamp(col, value, /* trimTimezone= */ false, insertRowIndex); case VARIANT: // When input is a String, the SSv2 SDK stores it as a JSON-quoted string (e.g. // '{"a":1}' → '"{\\"a\\":1}"'), whereas SSv1 stored the parsed native object. // validateAndParseVariantAsObject returns a native Java object (Map/List/primitive) // so the SDK receives the right type. if (value instanceof String) { return DataValidationUtil.validateAndParseVariantAsObject( col.getName(), value, insertRowIndex); } DataValidationUtil.validateAndParseVariant(col.getName(), value, insertRowIndex); break; case ARRAY: // SSv2 SDK wraps a String value for an ARRAY column as a single-element array (e.g. // "[1,2,3]" → ["[1,2,3]"]), while SSv1 parsed the string into a proper array. // validateAndParseArrayAsList returns a native List so the SDK gets the right type. if (value instanceof String) { return DataValidationUtil.validateAndParseArrayAsList( col.getName(), value, insertRowIndex); } DataValidationUtil.validateAndParseArray(col.getName(), value, insertRowIndex); break; case OBJECT: // No normalization needed: SSv2 SDK correctly parses JSON strings for OBJECT columns // (unlike VARIANT/ARRAY). Passing the original String value through is safe. DataValidationUtil.validateAndParseObject(col.getName(), value, insertRowIndex); break; default: throw new SFExceptionValidation( ErrorCode.UNKNOWN_DATA_TYPE, col.getName(), col.getLogicalType()); } return value; } /** * Validate and optionally normalize a timestamp value. Integer/Long epoch values are converted to * ISO strings so the SSv2 SDK interprets them correctly; other types are validated in place. */ private Object validateAndNormalizeTimestamp( ColumnSchema col, Object value, boolean trimTimezone, long insertRowIndex) throws SFExceptionValidation { if (value instanceof Integer || value instanceof Long) { return DataValidationUtil.validateAndFormatTimestamp( col.getName(), value, defaultTimezone, trimTimezone, insertRowIndex); } DataValidationUtil.validateAndParseTimestamp( col.getName(), value, col.getScale() != null ? col.getScale() : 9, defaultTimezone, trimTimezone, insertRowIndex); return value; } /** Detect columns in the row that don't exist in the table schema. */ private Set detectExtraColumns(Set unquotedRowCols) { Set extraCols = new HashSet<>(); for (String unquotedName : unquotedRowCols) { if (!columnSchemaMap.containsKey(unquotedName)) { extraCols.add(unquotedName); } } return extraCols; } /** Detect NOT NULL columns that are missing from the row, excluding server-filled columns. */ private Set detectMissingNotNullColumns(Set unquotedRowCols) { Set missingNotNullCols = new HashSet<>(); for (Map.Entry entry : columnSchemaMap.entrySet()) { String colName = entry.getKey(); ColumnSchema col = entry.getValue(); if (!col.isNullable() && !col.isServerFilled() && !unquotedRowCols.contains(colName)) { missingNotNullCols.add(colName); } } return missingNotNullCols; } /** Detect NOT NULL columns that have null values in the row. */ private Set detectNullValuesInNotNullColumns(Map normalizedRow) { Set nullNotNullCols = new HashSet<>(); for (Map.Entry entry : normalizedRow.entrySet()) { String colName = entry.getKey(); // Already normalized // Validate column name is not empty if (colName == null || colName.trim().isEmpty()) { logger.warn("Skipping validation for empty column name"); continue; } Object value = entry.getValue(); ColumnSchema col = columnSchemaMap.get(colName); if (col != null && !col.isNullable() && value == null) { nullNotNullCols.add(colName); } } return nullNotNullCols; } /** * Static validator for unsupported types at channel open time. Throws SFExceptionValidation if * the schema contains unsupported types. * * @param schema Map of column name to ColumnSchema * @throws SFExceptionValidation if unsupported types are found */ public static void validateSchema(Map schema) throws SFExceptionValidation { for (ColumnSchema col : schema.values()) { if (col.getLogicalType() == null) { throw new SFExceptionValidation(ErrorCode.UNKNOWN_DATA_TYPE, col.getName()); } // Reject collated columns (not supported in SSv1 validation) if (col.getCollation() != null && !col.getCollation().isEmpty()) { throw new SFExceptionValidation( ErrorCode.UNSUPPORTED_DATA_TYPE, "Collated columns not supported", col.getName()); } // GEOGRAPHY and GEOMETRY are not in ColumnLogicalType enum // They would show up as null logicalType and be caught above } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/SFExceptionValidation.java ================================================ /* * COPIED FROM SNOWFLAKE INGEST SDK V1 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/utils/SFException.java * * Modifications: * - Package changed to com.snowflake.kafka.connector.internal.validation * - Class renamed to SFExceptionValidation to avoid conflict with com.snowflake.ingest.streaming.SFException * * Copyright (c) 2021 Snowflake Computing Inc. All rights reserved. */ package com.snowflake.kafka.connector.internal.validation; import java.text.MessageFormat; import java.util.ResourceBundle; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** Snowflake exception for client-side validation */ public class SFExceptionValidation extends RuntimeException { static final Logger logger = LoggerFactory.getLogger(SFExceptionValidation.class); static final ResourceBundle errorMessageBundle = ResourceBundle.getBundle(ErrorCode.errorMessageResource); private Throwable cause; private String vendorCode; private Object[] params; private static String getErrorMessage(final ErrorCode errorCode, final Object... params) { final String messageTemplate = errorMessageBundle.getString(errorCode.getMessageCode()); return MessageFormat.format(messageTemplate, params); } /** * Construct a Snowflake exception from a cause, an error code and message parameters * * @param cause * @param errorCode * @param params */ public SFExceptionValidation(Throwable cause, ErrorCode errorCode, Object... params) { super(getErrorMessage(errorCode, params), cause); this.vendorCode = errorCode.getMessageCode(); this.params = params; this.cause = cause; } /** * Construct a Snowflake exception from an error code and message parameters * * @param errorCode * @param params */ public SFExceptionValidation(ErrorCode errorCode, Object... params) { this(null, errorCode, params); } public String getVendorCode() { return vendorCode; } public Object[] getParams() { return params; } public Throwable getCause() { return cause; } /** * Checks if this exception has the specified error code * * @param errorCode the error code to check * @return true if this exception's vendor code matches the given error code */ public boolean isErrorCode(ErrorCode errorCode) { return errorCode != null && errorCode.getMessageCode().equals(this.vendorCode); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/SqlIdentifierNormalizer.java ================================================ /* * COPIED FROM SNOWFLAKE INGEST SDK V1 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/streaming/internal/LiteralQuoteUtils.java * * Modifications: * - Package changed to com.snowflake.kafka.connector.internal.validation * - Renamed from LiteralQuoteUtils to SqlIdentifierNormalizer * - Method names updated to reflect normalization semantics * * Copyright (c) 2022 Snowflake Computing Inc. All rights reserved. */ package com.snowflake.kafka.connector.internal.validation; import com.github.benmanes.caffeine.cache.Caffeine; import com.github.benmanes.caffeine.cache.LoadingCache; /** * Normalizes SQL identifiers to their raw internal column names, matching server-side storage. * *

    Rules: * *

      *
    • Quoted identifier {@code "MyCol"} → strips quotes, preserves case → {@code MyCol} *
    • Quoted with escaped quotes {@code "col""name"} → strips quotes, unescapes → {@code * col"name} *
    • Unquoted identifier {@code myCol} → uppercases → {@code MYCOL} *
    * *

    Note: The methods in this class have to be kept in sync with the respective methods on server * side. */ public class SqlIdentifierNormalizer { /** Maximum number of normalized identifiers to store in cache */ static final int NORMALIZED_IDENTIFIER_CACHE_MAX_SIZE = 30000; /** Cache storing normalized identifiers */ private static final LoadingCache normalizedIdentifierCache; static { normalizedIdentifierCache = Caffeine.newBuilder() .maximumSize(NORMALIZED_IDENTIFIER_CACHE_MAX_SIZE) .build(SqlIdentifierNormalizer::normalizeSqlIdentifierInternal); } /** * Normalize a SQL identifier to its raw internal column name. Uses a cache to avoid repeated * computation for the same identifier. * * @param sqlIdentifier the SQL identifier (may be quoted or unquoted) * @return the raw internal column name */ public static String normalizeSqlIdentifier(String sqlIdentifier) { return normalizedIdentifierCache.get(sqlIdentifier); } /** * Normalize a SQL identifier to its raw internal column name. * *

    Normalises the column name to how it is stored internally. This function needs to keep in * sync with server side normalisation. * * @param sqlIdentifier SQL identifier to normalize * @return raw internal column name */ private static String normalizeSqlIdentifierInternal(String sqlIdentifier) { int length = sqlIdentifier.length(); if (length == 0) { return sqlIdentifier; } // If this is an identifier that starts and ends with double quotes, // remove them - accounting for escaped double quotes. // Differs from the second condition in that this one allows repeated // double quotes if (sqlIdentifier.charAt(0) == '"' && (length >= 2 && sqlIdentifier.charAt(length - 1) == '"' && // Condition that the string contains no single double-quotes // but allows repeated double-quotes !sqlIdentifier.substring(1, length - 1).replace("\"\"", "").contains("\""))) { // Remove quotes and turn escaped double-quotes to single double-quotes return sqlIdentifier.substring(1, length - 1).replace("\"\"", "\""); } // If this is an identifier that starts and ends with double quotes, // remove them. Internal single double-quotes are not allowed. else if (sqlIdentifier.charAt(0) == '"' && (length >= 2 && sqlIdentifier.charAt(length - 1) == '"' && !sqlIdentifier.substring(1, length - 1).contains("\""))) { // Remove the quotes return sqlIdentifier.substring(1, length - 1); } // unquoted string that can have escaped spaces else { // replace escaped spaces in unquoted name if (sqlIdentifier.contains("\\ ")) { sqlIdentifier = sqlIdentifier.replace("\\ ", " "); } return sqlIdentifier.toUpperCase(); } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/TimestampWrapper.java ================================================ /* * COPIED FROM SNOWFLAKE INGEST SDK V1 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/streaming/internal/TimestampWrapper.java * * Modifications: * - Package changed to com.snowflake.kafka.connector.internal.validation * * Copyright (c) 2023 Snowflake Computing Inc. All rights reserved. */ package com.snowflake.kafka.connector.internal.validation; import java.math.BigDecimal; import java.math.BigInteger; import java.math.RoundingMode; import java.time.OffsetDateTime; /** * This class represents the outcome of timestamp parsing and validation. It contains methods needed * to serialize timestamps into Parquet. */ public class TimestampWrapper { /** Epoch seconds */ private final long epoch; /** Fractional part of the second */ private final int fraction; /** Timezone offset in seconds */ private final int timezoneOffsetSeconds; /** Scale of the timestamp column (0-9) */ private final int scale; /** * How many bits should be reserver for the timezone part. Needs to be aligned with {@link * net.snowflake.client.jdbc.internal.snowflake.common.core.SFTimestamp#BITS_FOR_TIMEZONE} */ private static final int BITS_FOR_TIMEZONE = 14; /** * Mask of the timezone bits. Needs to be aligned with {@link * net.snowflake.client.jdbc.internal.snowflake.common.core.SFTimestamp#MASK_OF_TIMEZONE} */ private static final int MASK_OF_TIMEZONE = (1 << BITS_FOR_TIMEZONE) - 1; /** Create a new instance from {@link OffsetDateTime} and its scale. */ public TimestampWrapper(OffsetDateTime offsetDateTime, int scale) { if (scale < 0 || scale > 9) { throw new IllegalArgumentException( String.format("Scale must be between 0 and 9, actual: %d", scale)); } this.epoch = offsetDateTime.toEpochSecond(); this.fraction = offsetDateTime.getNano() / Power10Util.intTable[9 - scale] * Power10Util.intTable[9 - scale]; this.timezoneOffsetSeconds = offsetDateTime.getOffset().getTotalSeconds(); this.scale = scale; } /** * Convert the timestamp to a binary representation. Needs to be aligned with {@link * net.snowflake.client.jdbc.internal.snowflake.common.core.SFTimestamp#toBinary}. */ public BigInteger toBinary(boolean includeTimezone) { BigDecimal timeInNs = BigDecimal.valueOf(epoch).scaleByPowerOfTen(9).add(new BigDecimal(fraction)); BigDecimal scaledTime = timeInNs.scaleByPowerOfTen(scale - 9); scaledTime = scaledTime.setScale(0, RoundingMode.DOWN); BigInteger fcpInt = scaledTime.unscaledValue(); if (includeTimezone) { int offsetMin = timezoneOffsetSeconds / 60; assert offsetMin >= -1440 && offsetMin <= 1440; offsetMin += 1440; fcpInt = fcpInt.shiftLeft(14); fcpInt = fcpInt.add(BigInteger.valueOf(offsetMin & MASK_OF_TIMEZONE)); } return fcpInt; } /** Get epoch in seconds */ public long getEpochSecond() { return epoch; } /** Get fractional part of a second */ public int getFraction() { return fraction; } /** Get timezone offset in seconds */ public int getTimezoneOffsetSeconds() { return timezoneOffsetSeconds; } /** * Get timezone index, 1440 means UTC. Calculation needs to be aligned with {@link * net.snowflake.client.jdbc.internal.snowflake.common.core.SFTimestamp#toBinary} */ public int getTimeZoneIndex() { return timezoneOffsetSeconds / 60 + 1440; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/Utils.java ================================================ /* * COPIED FROM SNOWFLAKE INGEST SDK V1 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/utils/Utils.java * * Modifications: * - Only stripTrailingNulls() method retained (only method used by validation) * - Package changed to com.snowflake.kafka.connector.internal.validation * * Copyright (c) 2021 Snowflake Computing Inc. All rights reserved. */ package com.snowflake.kafka.connector.internal.validation; /** Utility methods for validation */ class Utils { /** * Strip trailing null characters from a string * * @param key input string * @return string with trailing nulls removed */ public static String stripTrailingNulls(String key) { int end = key.length(); while (end > 0 && key.charAt(end - 1) == '\u0000') { end--; } return end == key.length() ? key : key.substring(0, end); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/ValidationResult.java ================================================ /* * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved. * * This file provides integration between SSv1 validation code and KC v4. */ package com.snowflake.kafka.connector.internal.validation; import java.util.Collections; import java.util.HashSet; import java.util.Set; /** Result of row validation containing validation status and error details. */ public class ValidationResult { private final boolean valid; private final boolean hasTypeError; private final boolean hasStructuralError; private final String valueError; private final String columnName; private final Set extraColNames; private final Set missingNotNullColNames; private final Set nullValueForNotNullColNames; private ValidationResult( boolean valid, boolean hasTypeError, boolean hasStructuralError, String valueError, String columnName, Set extraColNames, Set missingNotNullColNames, Set nullValueForNotNullColNames) { this.valid = valid; this.hasTypeError = hasTypeError; this.hasStructuralError = hasStructuralError; this.valueError = valueError; this.columnName = columnName; // Create defensive immutable copies of all sets for thread safety this.extraColNames = Collections.unmodifiableSet(new HashSet<>(extraColNames)); this.missingNotNullColNames = Collections.unmodifiableSet(new HashSet<>(missingNotNullColNames)); this.nullValueForNotNullColNames = Collections.unmodifiableSet(new HashSet<>(nullValueForNotNullColNames)); } /** Create a valid result */ public static ValidationResult valid() { return new ValidationResult( true, false, false, null, null, Collections.emptySet(), Collections.emptySet(), Collections.emptySet()); } /** Create a type/value error result */ public static ValidationResult typeError(String columnName, String errorMessage) { return new ValidationResult( false, true, false, errorMessage, columnName, Collections.emptySet(), Collections.emptySet(), Collections.emptySet()); } /** Create a structural error result */ public static ValidationResult structuralError( Set extraColNames, Set missingNotNullColNames, Set nullValueForNotNullColNames) { return new ValidationResult( false, false, true, null, null, extraColNames, missingNotNullColNames, nullValueForNotNullColNames); } public boolean isValid() { return valid; } public boolean hasTypeError() { return hasTypeError; } public boolean hasStructuralError() { return hasStructuralError; } public String getValueError() { return valueError; } public String getColumnName() { return columnName; } public Set getExtraColNames() { return extraColNames; } public Set getMissingNotNullColNames() { return missingNotNullColNames; } public Set getNullValueForNotNullColNames() { return nullValueForNotNullColNames; } /** * Check if this structural error can be resolved with schema evolution. * *

    Matches KC v3 behavior where ALL structural errors trigger schema evolution: - Extra * columns: YES - add via ALTER TABLE ADD COLUMN - Null in NOT NULL: YES - drop constraint via * ALTER TABLE DROP NOT NULL - Missing NOT NULL columns: YES - drop constraint via ALTER TABLE * DROP NOT NULL (KC v3 behavior) * *

    KC v3's InsertErrorMapper.java joined missingNotNullColNames and nullValueForNotNullColNames * into a single list of columns to drop NOT NULL. We maintain this behavior. * * @return true if the error can be resolved with schema evolution */ public boolean needsSchemaEvolution() { return hasStructuralError && (!extraColNames.isEmpty() || !nullValueForNotNullColNames.isEmpty() || !missingNotNullColNames.isEmpty()); } /** * Check if this structural error cannot be resolved with schema evolution. * *

    In KC v3, all structural errors (extra columns, missing NOT NULL, null NOT NULL) were * resolvable via schema evolution. We maintain the same behavior for backwards compatibility. * * @return true if the error is unresolvable (always false for structural errors) */ public boolean hasUnresolvableError() { // All structural errors are resolvable via schema evolution (matches KC v3 behavior) return false; } public String getErrorType() { if (hasTypeError) { return "type_error"; } else if (hasStructuralError) { return "structural_error"; } else { return "unknown"; } } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/ZonedDateTimeSerializer.java ================================================ /* * COPIED FROM SNOWFLAKE INGEST SDK V1 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/streaming/internal/serialization/ZonedDateTimeSerializer.java * * Modifications: * - Package changed to com.snowflake.kafka.connector.internal.validation * * Copyright (c) 2021-2022 Snowflake Computing Inc. All rights reserved. */ package com.snowflake.kafka.connector.internal.validation; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.JsonSerializer; import com.fasterxml.jackson.databind.SerializerProvider; import java.io.IOException; import java.time.ZonedDateTime; /** Snowflake does not support parsing zones, so serialize it in offset instead */ public class ZonedDateTimeSerializer extends JsonSerializer { @Override public void serialize(ZonedDateTime value, JsonGenerator gen, SerializerProvider serializers) throws IOException { gen.writeString(value.toOffsetDateTime().toString()); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/records/KafkaRecordConverter.java ================================================ package com.snowflake.kafka.connector.records; import com.snowflake.kafka.connector.internal.KCLogger; import com.snowflake.kafka.connector.internal.SnowflakeErrors; import java.math.BigDecimal; import java.nio.ByteBuffer; import java.time.ZoneId; import java.time.ZoneOffset; import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.concurrent.ConcurrentHashMap; import org.apache.kafka.connect.data.ConnectSchema; import org.apache.kafka.connect.data.Date; import org.apache.kafka.connect.data.Decimal; import org.apache.kafka.connect.data.Field; import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.Struct; import org.apache.kafka.connect.data.Time; import org.apache.kafka.connect.data.Timestamp; import org.apache.kafka.connect.header.Header; import org.apache.kafka.connect.header.Headers; public final class KafkaRecordConverter { private static final KCLogger LOGGER = new KCLogger(KafkaRecordConverter.class.getName()); private static final int MAX_SNOWFLAKE_NUMBER_PRECISION = 38; private static final ConcurrentHashMap, Optional> SCHEMA_TYPE_CACHE = new ConcurrentHashMap<>(); private static final DateTimeFormatter ISO_DATE_TIME_FORMAT = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'").withZone(ZoneOffset.UTC); private static final DateTimeFormatter TIME_FORMAT = DateTimeFormatter.ofPattern("HH:mm:ss.SSSXXX").withZone(ZoneId.systemDefault()); private KafkaRecordConverter() {} /** * Converts a Kafka Connect value with its schema directly to a Map suitable for Snowflake * streaming ingest. */ public static Map convertToMap(Schema schema, Object value) { if (LOGGER.isDebugEnabled()) { LOGGER.debug( "Converting record to map. Schema: {}, valueType: {}", schema != null ? schema.type() : "null", value != null ? value.getClass().getSimpleName() : "null"); } if (value == null) { return new HashMap<>(); } if (value instanceof Map) { return convertMapToMap((Map) value, schema); } if (value instanceof Struct) { return convertStructToMap((Struct) value); } throw SnowflakeErrors.ERROR_5015.getException( "Cannot schematize record. Record value must be a Map or Struct. Consider using kafka" + " HoistField transformer to wrap the value of the record."); } public static Map convertHeaders(Headers headers) { Map result = new HashMap<>(); if (headers == null) { LOGGER.trace("Headers is null, returning empty map"); return result; } for (Header header : headers) { Object headerValue = convertValue(header.schema(), header.value()); result.put(header.key(), headerValue == null ? null : String.valueOf(headerValue)); } return result; } public static Object convertKey(Schema keySchema, Object key) { if (LOGGER.isTraceEnabled()) { LOGGER.trace( "Converting key. Schema: {}, keyType: {}", keySchema != null ? keySchema.type() : "null", key != null ? key.getClass().getSimpleName() : "null"); } if (key == null) { LOGGER.trace("Key is null, returning null"); return null; } return convertValue(keySchema, key); } private static Map convertStructToMap(Struct struct) { Map result = new HashMap<>(); Schema schema = struct.schema(); for (Field field : schema.fields()) { if (LOGGER.isTraceEnabled()) { LOGGER.trace( "Converting struct field: {}, schema: {}", field.name(), field.schema() != null ? field.schema().type() : "null"); } Object fieldValue = convertValue(field.schema(), struct.get(field)); result.put(field.name(), fieldValue); } return result; } private static Map convertMapToMap(Map map, Schema schema) { Map result = new LinkedHashMap<>(); Schema valueSchema = schema != null ? schema.valueSchema() : null; for (Map.Entry entry : map.entrySet()) { String key = String.valueOf(entry.getKey()); Object convertedValue; if (entry.getValue() instanceof Map) { convertedValue = convertMapToMap((Map) entry.getValue(), valueSchema); } else if (entry.getValue() instanceof Struct) { convertedValue = convertStructToMap((Struct) entry.getValue()); } else { convertedValue = convertValue(valueSchema, entry.getValue()); } result.put(key, convertedValue); } return result; } static Object convertValue(Schema schema, Object value) { if (value == null) { if (schema == null) { LOGGER.trace("Value is null with no schema, returning null"); return null; } if (schema.defaultValue() != null) { return convertValue(schema, schema.defaultValue()); } if (schema.isOptional()) { LOGGER.trace("Value is null for optional field, returning null"); return null; } throw SnowflakeErrors.ERROR_5015.getException( "Conversion error: null value for field that is required and has no default value"); } final Schema.Type schemaType = getSchemaType(schema, value); if (LOGGER.isTraceEnabled()) { LOGGER.trace( "Converting value of type {} with schemaType {}", value.getClass().getSimpleName(), schemaType); } // Validate that value type matches schema type when schema is present if (schema != null) { validateValueType(schema.type(), value); } switch (schemaType) { case INT8: case INT16: case BOOLEAN: LOGGER.trace("Passthrough for primitive type: {}", schemaType); return value; case INT32: return convertInt32(schema, value); case INT64: return convertInt64(schema, value); case FLOAT32: return handleFloatSpecialValues((Float) value); case FLOAT64: return handleDoubleSpecialValues((Double) value); case STRING: LOGGER.trace("Converting to String"); return value.toString(); case BYTES: return convertBytes(schema, value); case ARRAY: return convertArray(schema, value); case MAP: return convertMapValue(schema, value); case STRUCT: return convertStructToMap((Struct) value); default: throw SnowflakeErrors.ERROR_5015.getException("Couldn't convert " + value + " to Object."); } } private static void validateValueType(Schema.Type schemaType, Object value) { boolean valid; switch (schemaType) { case INT8: valid = value instanceof Byte; break; case INT16: valid = value instanceof Short; break; case INT32: valid = value instanceof Integer || value instanceof java.util.Date; break; case INT64: valid = value instanceof Long || value instanceof java.util.Date; break; case FLOAT32: valid = value instanceof Float; break; case FLOAT64: valid = value instanceof Double; break; case BOOLEAN: valid = value instanceof Boolean; break; case STRING: valid = value instanceof String; break; case BYTES: valid = value instanceof byte[] || value instanceof ByteBuffer || value instanceof BigDecimal; break; case ARRAY: valid = value instanceof Collection; break; case MAP: valid = value instanceof Map; break; case STRUCT: valid = value instanceof Struct; break; default: valid = false; } if (!valid) { throw SnowflakeErrors.ERROR_5015.getException( "Type mismatch: expected " + schemaType + " but got " + value.getClass().getName()); } } private static Schema.Type getSchemaType(Schema schema, Object value) { if (schema != null) { return schema.type(); } LOGGER.trace( "No schema provided, inferring type from value class: {}", value.getClass().getName()); // Handle collections and maps before checking primitive schema types // ConnectSchema.schemaType() only matches exact classes, not subclasses if (value instanceof Map) { return Schema.Type.MAP; } if (value instanceof Collection) { return Schema.Type.ARRAY; } Optional cachedType = SCHEMA_TYPE_CACHE.computeIfAbsent( value.getClass(), clazz -> Optional.ofNullable(ConnectSchema.schemaType(clazz))); if (cachedType.isPresent()) { return cachedType.get(); } if (value instanceof java.util.Date) { return Schema.Type.INT64; } throw SnowflakeErrors.ERROR_5015.getException( "Java class " + value.getClass() + " does not have corresponding schema type."); } private static Object convertInt32(Schema schema, Object value) { if (schema != null && Date.LOGICAL_NAME.equals(schema.name())) { LOGGER.trace("Converting INT32 Date logical type to ISO format"); return ISO_DATE_TIME_FORMAT.format(((java.util.Date) value).toInstant()); } if (schema != null && Time.LOGICAL_NAME.equals(schema.name())) { LOGGER.trace("Converting INT32 Time logical type to time format"); return TIME_FORMAT.format(((java.util.Date) value).toInstant()); } LOGGER.trace("Passthrough for INT32 value"); return value; } private static Object convertInt64(Schema schema, Object value) { if (schema != null && Timestamp.LOGICAL_NAME.equals(schema.name())) { LOGGER.trace("Converting INT64 Timestamp logical type to string"); return ISO_DATE_TIME_FORMAT.format(((java.util.Date) value).toInstant()); } LOGGER.trace("Passthrough for INT64 value"); return value; } private static Object convertBytes(Schema schema, Object value) { if (schema != null && Decimal.LOGICAL_NAME.equals(schema.name())) { BigDecimal bigDecimalValue = (BigDecimal) value; if (bigDecimalValue.precision() > MAX_SNOWFLAKE_NUMBER_PRECISION) { if (LOGGER.isTraceEnabled()) { LOGGER.trace( "Converting Decimal with precision {} (exceeds max {}) to string", bigDecimalValue.precision(), MAX_SNOWFLAKE_NUMBER_PRECISION); } return bigDecimalValue.toString(); } return bigDecimalValue; } LOGGER.trace("Converting bytes to byte[]"); return toByteArray(value); } private static byte[] toByteArray(Object value) { if (value instanceof byte[]) { return (byte[]) value; } if (value instanceof ByteBuffer) { ByteBuffer byteBuffer = (ByteBuffer) value; if (byteBuffer.hasArray()) { return byteBuffer.array(); } ByteBuffer clone = ByteBuffer.allocate(byteBuffer.capacity()); byteBuffer.rewind(); clone.put(byteBuffer); byteBuffer.rewind(); clone.flip(); return clone.array(); } throw SnowflakeErrors.ERROR_5015.getException( "Invalid type for bytes type: " + value.getClass()); } private static List convertArray(Schema schema, Object value) { Collection collection = (Collection) value; List result = new ArrayList<>(collection.size()); Schema elementSchema = schema != null ? schema.valueSchema() : null; if (LOGGER.isTraceEnabled()) { LOGGER.trace( "Array element schema: {}", elementSchema != null ? elementSchema.type() : "null"); } for (Object elem : collection) { result.add(convertValue(elementSchema, elem)); } return result; } private static Object convertMapValue(Schema schema, Object value) { Map map = (Map) value; boolean useObjectMode = shouldUseObjectMode(schema, map); if (LOGGER.isTraceEnabled()) { LOGGER.trace( "Converting nested Map with {} entries, useObjectMode: {}", map.size(), useObjectMode); } if (useObjectMode) { Map result = new LinkedHashMap<>(); Schema valueSchema = schema != null ? schema.valueSchema() : null; for (Map.Entry entry : map.entrySet()) { String key = String.valueOf(entry.getKey()); result.put(key, convertValue(valueSchema, entry.getValue())); } return result; } else { // Non-string keys: use array encoding [[key, value], [key, value], ...] List> result = new ArrayList<>(); Schema keySchema = schema != null ? schema.keySchema() : null; Schema valueSchema = schema != null ? schema.valueSchema() : null; if (LOGGER.isTraceEnabled()) { LOGGER.trace( "Array mode key schema: {}, value schema: {}", keySchema != null ? keySchema.type() : "null", valueSchema != null ? valueSchema.type() : "null"); } for (Map.Entry entry : map.entrySet()) { List pair = new ArrayList<>(2); pair.add(convertValue(keySchema, entry.getKey())); pair.add(convertValue(valueSchema, entry.getValue())); result.add(pair); } return result; } } private static boolean shouldUseObjectMode(Schema schema, Map map) { if (schema != null) { return schema.keySchema() != null && schema.keySchema().type() == Schema.Type.STRING; } // For schemaless, check if all keys are strings for (Object key : map.keySet()) { if (!(key instanceof String)) { return false; } } return true; } private static Object handleFloatSpecialValues(Float value) { if (Float.isNaN(value)) { return "NaN"; } if (Float.isInfinite(value)) { return value > 0 ? "Inf" : "-Inf"; } return value; } private static Object handleDoubleSpecialValues(Double value) { if (Double.isNaN(value)) { return "NaN"; } if (Double.isInfinite(value)) { return value > 0 ? "Inf" : "-Inf"; } return value; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/records/SnowflakeMetadataConfig.java ================================================ package com.snowflake.kafka.connector.records; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_METADATA_ALL; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_METADATA_CREATETIME; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_METADATA_OFFSET_AND_PARTITION; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_METADATA_TOPIC; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_METADATA_CONNECTOR_PUSH_TIME; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_METADATA_CONNECTOR_PUSH_TIME_DEFAULT; import com.google.common.base.MoreObjects; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import java.util.HashMap; import java.util.Map; import java.util.Optional; public class SnowflakeMetadataConfig { final boolean createtimeFlag; final boolean connectorPushTimeFlag; final boolean topicFlag; final boolean offsetAndPartitionFlag; final boolean allFlag; /** initialize with default config */ public SnowflakeMetadataConfig() { this(new HashMap<>()); } /** * Set flag to false only if metadata config is not set to "true" in config. * * @param config a String to String map of configs */ public SnowflakeMetadataConfig(Map config) { createtimeFlag = getMetadataProperty(config, SNOWFLAKE_METADATA_CREATETIME); topicFlag = getMetadataProperty(config, SNOWFLAKE_METADATA_TOPIC); offsetAndPartitionFlag = getMetadataProperty(config, SNOWFLAKE_METADATA_OFFSET_AND_PARTITION); allFlag = getMetadataProperty(config, SNOWFLAKE_METADATA_ALL); connectorPushTimeFlag = Optional.ofNullable(config.get(SNOWFLAKE_STREAMING_METADATA_CONNECTOR_PUSH_TIME)) .map(Boolean::parseBoolean) .orElse(SNOWFLAKE_STREAMING_METADATA_CONNECTOR_PUSH_TIME_DEFAULT); } public boolean shouldIncludeAllMetadata() { return allFlag; } private static boolean getMetadataProperty(Map config, String property) { String value = Optional.ofNullable(config.get(property)) .orElse(KafkaConnectorConfigParams.SNOWFLAKE_METADATA_ALL_DEFAULT); return Boolean.parseBoolean(value); } @Override public String toString() { return MoreObjects.toStringHelper(this) .add("createtimeFlag", createtimeFlag) .add("connectorPushTimeFlag", connectorPushTimeFlag) .add("topicFlag", topicFlag) .add("offsetAndPartitionFlag", offsetAndPartitionFlag) .add("allFlag", allFlag) .toString(); } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/records/SnowflakeSinkRecord.java ================================================ package com.snowflake.kafka.connector.records; import static com.snowflake.kafka.connector.Utils.TABLE_COLUMN_METADATA; import com.snowflake.kafka.connector.Utils; import com.snowflake.kafka.connector.internal.validation.SqlIdentifierNormalizer; import java.time.Instant; import java.util.Collections; import java.util.HashMap; import java.util.Map; import org.apache.kafka.common.record.TimestampType; import org.apache.kafka.connect.data.Field; import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.SchemaBuilder; import org.apache.kafka.connect.data.Struct; import org.apache.kafka.connect.sink.SinkRecord; /** * A lightweight wrapper for Kafka SinkRecords that stores data in the format required by the * Snowflake Streaming Ingest SDK ({@code Map}). */ public final class SnowflakeSinkRecord { static final String OFFSET = "offset"; static final String TOPIC = "topic"; static final String PARTITION = "partition"; static final String KEY = "key"; static final String CONNECTOR_PUSH_TIME = "SnowflakeConnectorPushTime"; static final String HEADERS = "headers"; private final Map content; private final Map metadata; private final Schema schema; private final RecordState state; private final Exception brokenReason; public enum RecordState { VALID, TOMBSTONE, BROKEN } private SnowflakeSinkRecord( Map content, Map metadata, Schema schema, RecordState state, Exception brokenReason) { this.content = content; this.metadata = metadata; this.schema = schema; this.state = state; this.brokenReason = brokenReason; } public static SnowflakeSinkRecord from( SinkRecord record, SnowflakeMetadataConfig metadataConfig, boolean enableSchematization, boolean enableColumnIdentifierNormalization) { return from( record, metadataConfig, Instant.now(), enableSchematization, enableColumnIdentifierNormalization); } public static SnowflakeSinkRecord from( SinkRecord record, SnowflakeMetadataConfig metadataConfig, Instant connectorPushTime, boolean enableSchematization, boolean enableColumnIdentifierNormalization) { // First validate the key if present - a broken key means a broken record if (record.key() != null && record.keySchema() != null) { try { KafkaRecordConverter.convertKey(record.keySchema(), record.key()); } catch (Exception e) { return createBrokenRecord(record, metadataConfig, connectorPushTime, e); } } if (record.value() == null) { return createTombstoneRecord(record, metadataConfig, connectorPushTime); } try { Map content; Schema schema = record.valueSchema(); if (enableSchematization) { content = KafkaRecordConverter.convertToMap(schema, record.value()); if (enableColumnIdentifierNormalization) { content = normalizeColumnNames(content); schema = normalizeSchemaFieldNames(schema); } } else { content = wrapValueAsRecordContent(schema, record.value()); schema = RECORD_CONTENT_WRAPPER_SCHEMA; } Map metadata = buildMetadata(record, metadataConfig, connectorPushTime); return new SnowflakeSinkRecord(content, metadata, schema, RecordState.VALID, null); } catch (Exception e) { return createBrokenRecord(record, metadataConfig, connectorPushTime, e); } } /** * Wraps the record value under the {@code RECORD_CONTENT} key. * *

    For structured types (Map/Struct) the value is converted to a Map so the SDK infers VARIANT. * *

    For primitive types the converted value is placed directly into the map. The SSv2 SDK * serializes the map to NDJSON via Jackson, which handles native Java types (String, Number, * Boolean) correctly for VARIANT columns. Unlike KCv3/SSv1 (which required JSON-serialized * strings because SSv1 re-parsed them via {@code readTree}), SSv2 passes NDJSON straight to the * server — so JSON-serializing here would produce double-quoted strings. */ private static Map wrapValueAsRecordContent(Schema schema, Object value) { Map content = new HashMap<>(); Object convertedValue; if (value instanceof Map || value instanceof Struct) { convertedValue = KafkaRecordConverter.convertToMap(schema, value); } else { convertedValue = KafkaRecordConverter.convertValue(schema, value); } content.put(Utils.TABLE_COLUMN_CONTENT, convertedValue); return content; } /** * Builds a synthetic Struct schema declaring {@code RECORD_CONTENT} as STRUCT (→ VARIANT). * *

    Assumptions: * *

      *
    • RECORD_CONTENT is always a VARIANT column in Snowflake, regardless of the Kafka value * type. Even bare strings (from StringConverter) must land as VARIANT, not VARCHAR. *
    • STRUCT is used because {@link * com.snowflake.kafka.connector.internal.schemaevolution.SnowflakeColumnTypeMapper} maps * STRUCT to "VARIANT". If schema evolution needs to ADD this column, it must infer VARIANT. *
    • This only applies to standard Snowflake tables. Iceberg tables with typed RECORD_CONTENT * columns would need a different schema strategy. *
    */ private static final Schema RECORD_CONTENT_WRAPPER_SCHEMA = SchemaBuilder.struct() .field(Utils.TABLE_COLUMN_CONTENT, SchemaBuilder.struct().optional().build()) .build(); private static SnowflakeSinkRecord createTombstoneRecord( SinkRecord record, SnowflakeMetadataConfig metadataConfig, Instant connectorPushTime) { Map metadata = buildMetadata(record, metadataConfig, connectorPushTime); return new SnowflakeSinkRecord( Collections.emptyMap(), metadata, record.valueSchema(), RecordState.TOMBSTONE, null); } private static SnowflakeSinkRecord createBrokenRecord( SinkRecord record, SnowflakeMetadataConfig metadataConfig, Instant connectorPushTime, Exception reason) { Map metadata = buildMetadataSafe(record, metadataConfig, connectorPushTime); return new SnowflakeSinkRecord( Collections.emptyMap(), metadata, record.valueSchema(), RecordState.BROKEN, reason); } private static Map buildMetadataSafe( SinkRecord record, SnowflakeMetadataConfig metadataConfig, Instant connectorPushTime) { final Map metadata = buildMetadataBase(record, metadataConfig, connectorPushTime); // For broken records, store key as string if conversion fails if (record.key() != null) { try { Object convertedKey = KafkaRecordConverter.convertKey(record.keySchema(), record.key()); metadata.put(KEY, convertedKey); } catch (Exception e) { metadata.put(KEY, String.valueOf(record.key())); } } // Add headers (these should be safe to convert) if (record.headers() != null && !record.headers().isEmpty()) { try { metadata.put(HEADERS, KafkaRecordConverter.convertHeaders(record.headers())); } catch (Exception e) { // Skip headers if conversion fails } } return metadata; } private static Map buildMetadata( SinkRecord record, SnowflakeMetadataConfig metadataConfig, Instant connectorPushTime) { final Map metadata = buildMetadataBase(record, metadataConfig, connectorPushTime); // Add key to metadata addKeyToMetadata(record, metadata); // Add headers if (record.headers() != null && !record.headers().isEmpty()) { metadata.put(HEADERS, KafkaRecordConverter.convertHeaders(record.headers())); } return metadata; } private static Map buildMetadataBase( SinkRecord record, SnowflakeMetadataConfig metadataConfig, Instant connectorPushTime) { final Map metadata = new HashMap<>(); if (metadataConfig.topicFlag) { metadata.put(TOPIC, record.topic()); } if (metadataConfig.offsetAndPartitionFlag) { metadata.put(OFFSET, record.kafkaOffset()); metadata.put(PARTITION, record.kafkaPartition()); } if (record.timestampType() != TimestampType.NO_TIMESTAMP_TYPE && metadataConfig.createtimeFlag) { metadata.put(record.timestampType().name, record.timestamp()); } if (connectorPushTime != null && metadataConfig.connectorPushTimeFlag) { metadata.put(CONNECTOR_PUSH_TIME, connectorPushTime.toEpochMilli()); } return metadata; } private static void addKeyToMetadata(SinkRecord record, Map metadata) { if (record.key() == null) { return; } Schema keySchema = record.keySchema(); Object key = record.key(); try { // Always use convertKey to ensure type validation when schema is present Object convertedKey = KafkaRecordConverter.convertKey(keySchema, key); metadata.put(KEY, convertedKey); } catch (Exception e) { // If key conversion fails, store the key as a string representation metadata.put(KEY, String.valueOf(key)); } } public Schema getSchema() { return schema; } private static Schema normalizeSchemaFieldNames(Schema schema) { if (schema == null || schema.type() != Schema.Type.STRUCT) { return schema; } SchemaBuilder builder = SchemaBuilder.struct(); if (schema.name() != null) { builder.name(schema.name()); } if (schema.isOptional()) { builder.optional(); } for (Field field : schema.fields()) { String normalizedName = SqlIdentifierNormalizer.normalizeSqlIdentifier(field.name()); builder.field(normalizedName, field.schema()); } return builder.build(); } private static Map normalizeColumnNames(Map content) { Map normalized = new HashMap<>(content.size()); for (Map.Entry entry : content.entrySet()) { normalized.put( SqlIdentifierNormalizer.normalizeSqlIdentifier(entry.getKey()), entry.getValue()); } return normalized; } public Map getContent() { return content; } public Map getContentWithMetadata(boolean includeMetadata) { if (!includeMetadata || metadata.isEmpty()) { return content; } Map result = new HashMap<>(content); result.put(TABLE_COLUMN_METADATA, metadata); return result; } public Map getMetadata() { return metadata; } public RecordState getState() { return state; } public boolean isValid() { return state == RecordState.VALID; } public boolean isTombstone() { return state == RecordState.TOMBSTONE; } public boolean isBroken() { return state == RecordState.BROKEN; } public Exception getBrokenReason() { return brokenReason; } } ================================================ FILE: src/main/java/com/snowflake/kafka/connector/streaming/iceberg/IcebergDDLTypes.java ================================================ package com.snowflake.kafka.connector.streaming.iceberg; public class IcebergDDLTypes { public static String ICEBERG_METADATA_OBJECT_SCHEMA = "OBJECT(" + "offset LONG," + "topic STRING," + "partition INTEGER," + "key STRING," + "CreateTime BIGINT," + "SnowflakeConnectorPushTime BIGINT," + "headers MAP(VARCHAR, VARCHAR)" + ")"; } ================================================ FILE: src/main/resources/com/snowflake/kafka/connector/ingest_error_messages.properties ================================================ # # Copyright (c) 2024 Snowflake Computing Inc. All rights reserved. # # # Exception messages. # These error code are associated to symbols in ErrorCode.java # 0001=Ingest client internal error: {0}. 0002=Required value is null, Key: {0}. 0003=Required value is empty, Key: {0}. 0004=The given row cannot be converted to the internal format: {0}. {1} 0005=Unknown data type for column: {0}. logical: {1}, physical: {2}. 0006=Register blob request failed: {0}. 0007=Open channel request failed: {0}. 0008=Failed to construct HTTP request: {0}. 0009=Client configure request failed: {0}. 0010=Missing {0} in config file. 0011=Failed to upload blob. 0012=Failed to cleanup resources during {0}. 0013=Channel {0} is invalid and might contain uncommitted rows, please consider reopening the channel to restart. Channel invalidation cause: "{1}". 0014=Channel {0} is closed, please reopen the channel to restart. 0015=Invalid Snowflake URL, URL format: 'https://..snowflakecomputing.com:443', 'https://' and ':443' are optional. 0016=Client is closed, please recreate to restart. 0017=Invalid private key, private key should be a valid PEM RSA private key. 0018=Invalid encrypted private key or passphrase, failed to decrypt private key. 0019=Invalid table data in chunk. 0020=Ingest client encountered IO error. 0021=Unable to connect to streaming ingest stage: {0}. 0022=Unable to create key pair from the provided private key. 0023=MD5 hashing algorithm is not available. 0024=Get channel status request failed: {0}. 0025=One or more channels {0} might contain uncommitted rows due to server side errors, please consider reopening the channels to replay the data loading by using the latest persistent offset token. 0026=Invalid collation string: {0}. {1} 0027=Failure during data encryption. 0028=Get channel status indicates Channel {0} is invalid with status code {1}, please reopen the channel. 0029=Data type not supported: {0} 0030=The given row cannot be converted to the internal format due to invalid value: {0} 0031=The given row exceeds the maximum allowed row size {0} 0032=URI builder fail to build url: {0} 0033=OAuth token refresh failure: {0} 0034=Invalid config parameter: {0} 0035=Failed to load {0}. If you use FIPS, import BouncyCastleFipsProvider in the application: {1} 0036=Failed to drop channel: {0} 0037=Deployment ID mismatch, Client was created on: {0}, Got upload location for: {1}. Please restart client: {2}. 0038=Generate presigned URLs request failed: {0}. 0039=Refresh Table Information request failed: {0}. ================================================ FILE: src/main/resources/com/snowflake/kafka/connector/internal/validation/ingest_error_messages.properties ================================================ # # COPIED FROM SNOWFLAKE INGEST SDK V1 # Source: snowflake-ingest-java/src/main/resources/net/snowflake/ingest/ingest_error_messages.properties # # Modifications: # - Only validation-related error codes retained (0001, 0004, 0005, 0020, 0029, 0030) # - Resource path changed to match new package structure # # Copyright (c) 2024 Snowflake Computing Inc. All rights reserved. # # # Exception messages. # These error codes are associated to symbols in ErrorCode.java # 0001=Ingest client internal error: {0}. 0004=The given row cannot be converted to the internal format: {0}. {1} 0005=Unknown data type for column: {0}. logical: {1}, physical: {2}. 0020=Ingest client encountered IO error. 0029=Data type not supported: {0} 0030=The given row cannot be converted to the internal format due to invalid value: {0} ================================================ FILE: src/test/java/com/snowflake/kafka/connector/CachingConfigValidatorTest.java ================================================ package com.snowflake.kafka.connector; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_PIPE_EXISTS; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_TABLE_EXISTS; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertThrows; import com.snowflake.kafka.connector.config.SnowflakeSinkConnectorConfigBuilder; import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException; import com.snowflake.kafka.connector.internal.streaming.DefaultStreamingConfigValidator; import java.util.Map; import java.util.stream.Stream; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; class CachingConfigValidatorTest { private final ConnectorConfigValidator validator = new DefaultConnectorConfigValidator(new DefaultStreamingConfigValidator()); private static Stream validCacheExpirations() { return Stream.of(Arguments.of("30000", "60000"), Arguments.of("3600000", "7200000")); } private static Stream validCacheEnabledDisabled() { return Stream.of( Arguments.of("true", "true"), Arguments.of("True", "True"), Arguments.of("TRUE", "TRUE"), Arguments.of("false", "false"), Arguments.of("False", "False"), Arguments.of("FALSE", "FALSE")); } @ParameterizedTest(name = "[{index}] {2}") @MethodSource("validCacheExpirations") void test_valid_expirations(String tableExpireMs, String pipeExpireMs) { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig().build(); config.put(CACHE_TABLE_EXISTS_EXPIRE_MS, tableExpireMs); config.put(CACHE_PIPE_EXISTS_EXPIRE_MS, pipeExpireMs); assertDoesNotThrow(() -> validator.validateConfig(config)); } @ParameterizedTest(name = "[{index}] {2}") @MethodSource("validCacheEnabledDisabled") void test_valid_enabled_disabled(String tableExists, String pipeExists) { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig().build(); config.put(CACHE_TABLE_EXISTS, tableExists); config.put(CACHE_PIPE_EXISTS, pipeExists); assertDoesNotThrow(() -> validator.validateConfig(config)); } private static Stream invalidConfigurationProvider() { return Stream.of( Arguments.of(CACHE_TABLE_EXISTS_EXPIRE_MS, "0", "Should reject zero table expiration"), Arguments.of(CACHE_TABLE_EXISTS_EXPIRE_MS, "-1", "Should reject negative table expiration"), Arguments.of(CACHE_PIPE_EXISTS_EXPIRE_MS, "0", "Should reject zero pipe expiration"), Arguments.of( CACHE_PIPE_EXISTS_EXPIRE_MS, "-5000", "Should reject negative pipe expiration"), Arguments.of( CACHE_TABLE_EXISTS_EXPIRE_MS, "invalid", "Should reject non-numeric table expiration"), Arguments.of( CACHE_PIPE_EXISTS_EXPIRE_MS, "not a number", "Should reject non-numeric pipe expiration"), Arguments.of( CACHE_TABLE_EXISTS, "blag blag", "Should reject invalid boolean for table exists"), Arguments.of(CACHE_TABLE_EXISTS, "ture", "Should reject typo in boolean for table exists"), Arguments.of(CACHE_TABLE_EXISTS, "1", "Should reject numeric boolean for table exists"), Arguments.of( CACHE_TABLE_EXISTS, "yes", "Should reject non-boolean string for table exists"), Arguments.of(CACHE_PIPE_EXISTS, "0", "Should reject numeric boolean for pipe exists"), Arguments.of(CACHE_PIPE_EXISTS, "no", "Should reject non-boolean string for pipe exists")); } @ParameterizedTest(name = "[{index}] {2}") @MethodSource("invalidConfigurationProvider") void testInvalidCacheConfiguration(String configKey, String configValue, String description) { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig().build(); config.put(configKey, configValue); assertThrows( SnowflakeKafkaConnectorException.class, () -> validator.validateConfig(config), description); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/ConnectClusterBaseIT.java ================================================ package com.snowflake.kafka.connector; import static org.awaitility.Awaitility.await; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.internal.TestUtils; import com.snowflake.kafka.connector.internal.streaming.FakeIngestClientSupplier; import com.snowflake.kafka.connector.internal.streaming.FakeSnowflakeStreamingIngestClient; import java.time.Duration; import java.util.HashMap; import java.util.Map; import org.apache.kafka.connect.json.JsonConverter; import org.apache.kafka.connect.runtime.ConnectorConfig; import org.apache.kafka.connect.sink.SinkConnector; import org.apache.kafka.connect.storage.StringConverter; import org.apache.kafka.connect.util.clusters.EmbeddedConnectCluster; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.TestInstance; /** Base class for integration tests using an embedded Kafka Connect cluster. */ @TestInstance(TestInstance.Lifecycle.PER_CLASS) public abstract class ConnectClusterBaseIT { protected EmbeddedConnectCluster connectCluster; protected final FakeIngestClientSupplier fakeClientSupplier = new FakeIngestClientSupplier(); static final Integer TASK_NUMBER = 1; @BeforeAll public void beforeAll() { Map workerConfig = new HashMap<>(); workerConfig.put("plugin.discovery", "hybrid_warn"); // this parameter decides how often preCommit is called on the task workerConfig.put("offset.flush.interval.ms", "5000"); connectCluster = new EmbeddedConnectCluster.Builder() .name("kafka-push-connector-connect-cluster") .numWorkers(1) .workerProps(workerConfig) .build(); connectCluster.start(); } @AfterAll public void afterAll() { if (connectCluster != null) { connectCluster.stop(); connectCluster = null; } } protected FakeSnowflakeStreamingIngestClient getOpenedFakeIngestClient(String connectorName) { await("channelsCreated") .atMost(Duration.ofSeconds(60)) .ignoreExceptions() .until( () -> !getFakeSnowflakeStreamingIngestClient(connectorName) .getOpenedChannels() .isEmpty()); return getFakeSnowflakeStreamingIngestClient(connectorName); } protected void waitForOpenedFakeIngestClient(String connectorName) { getOpenedFakeIngestClient(connectorName); } protected final Map defaultProperties(String topicName, String connectorName) { Map config = TestUtils.transformProfileFileToConnectorConfiguration(false); config.put(SinkConnector.TOPICS_CONFIG, topicName); config.put( ConnectorConfig.CONNECTOR_CLASS_CONFIG, SnowflakeStreamingSinkConnector.class.getName()); config.put(ConnectorConfig.TASKS_MAX_CONFIG, TASK_NUMBER.toString()); config.put(ConnectorConfig.KEY_CONVERTER_CLASS_CONFIG, StringConverter.class.getName()); config.put(ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG, JsonConverter.class.getName()); config.put(KafkaConnectorConfigParams.NAME, connectorName); config.put(KafkaConnectorConfigParams.VALUE_CONVERTER_SCHEMAS_ENABLE, "false"); config.put( KafkaConnectorConfigParams.SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION, "true"); return config; } protected final void waitForConnectorRunning(String connectorName) { try { connectCluster .assertions() .assertConnectorAndAtLeastNumTasksAreRunning( connectorName, 1, "The connector did not start."); } catch (InterruptedException e) { throw new IllegalStateException("The connector is not running"); } } protected final void waitForConnectorDoesNotExist(String connectorName) { try { connectCluster .assertions() .assertConnectorDoesNotExist(connectorName, "Failed to stop the connector"); } catch (InterruptedException e) { throw new IllegalStateException("The connector is not running"); } } protected final void waitForConnectorStopped(String connectorName) { try { connectCluster .assertions() .assertConnectorIsStopped(connectorName, "Connector should be stopped"); } catch (InterruptedException e) { throw new IllegalStateException("The connector is not running"); } } private FakeSnowflakeStreamingIngestClient getFakeSnowflakeStreamingIngestClient( String connectorName) { // Connector names are sanitized/uppercased by Utils.convertAppName() in the connector Map config = new HashMap<>(); config.put(KafkaConnectorConfigParams.NAME, connectorName); Utils.convertAppName(config); String sanitizedConnectorName = config.get(KafkaConnectorConfigParams.NAME); return fakeClientSupplier.getFakeIngestClients().stream() .filter((client) -> client.getClientName().contains(sanitizedConnectorName)) .findFirst() .orElseThrow(); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/ConnectorConfigValidatorLogsTest.java ================================================ package com.snowflake.kafka.connector; import static com.snowflake.kafka.connector.internal.TestUtils.generateAESKey; import static com.snowflake.kafka.connector.internal.TestUtils.generatePrivateKey; import static com.snowflake.kafka.connector.internal.TestUtils.getConfig; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.internal.PrivateKeyTool; import com.snowflake.kafka.connector.internal.streaming.DefaultStreamingConfigValidator; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.security.PrivateKey; import java.util.Map; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; public class ConnectorConfigValidatorLogsTest { private final ConnectorConfigValidator connectorConfigValidator = new DefaultConnectorConfigValidator(new DefaultStreamingConfigValidator()); @Test public void testRSAPasswordOutput() throws Exception { // given PrivateKey privateKey = generatePrivateKey(); String testPasswd = "TestPassword1234!"; String testKey = generateAESKey(privateKey, testPasswd.toCharArray()); Map testConf = getConfig(); testConf.remove(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY); testConf.put(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY, testKey); testConf.put(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE, testPasswd); // when connectorConfigValidator.validateConfig(testConf); // then PrivateKeyTool.parsePrivateKey(testKey, testPasswd); Assertions.assertFalse(logFileContains(testPasswd)); } // Note that sf.log accumulates logs between the consecutive test runs // That's why it's very hard to test many scenarios without hacks like test ordering and deleting // log file private boolean logFileContains(String str) throws IOException { String fileName = "sf.log"; File log = new File(fileName); FileReader fileReader = new FileReader(log); BufferedReader buffer = new BufferedReader(fileReader); String line; while ((line = buffer.readLine()) != null) { if (line.contains(str)) { return true; } } buffer.close(); fileReader.close(); return false; } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/ConnectorConfigValidatorTest.java ================================================ package com.snowflake.kafka.connector; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.ERRORS_LOG_ENABLE_CONFIG; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.ERRORS_TOLERANCE_CONFIG; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTPS_PROXY_HOST; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTPS_PROXY_PASSWORD; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTPS_PROXY_PORT; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTPS_PROXY_USER; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTP_NON_PROXY_HOSTS; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTP_PROXY_HOST; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTP_PROXY_PORT; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTP_USE_PROXY; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.JVM_PROXY_HOST; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.JVM_PROXY_PORT; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.NAME; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME; import static com.snowflake.kafka.connector.internal.TestUtils.getConfig; import static org.assertj.core.api.Assertions.assertThatCode; import static org.assertj.core.api.Assertions.assertThatThrownBy; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.config.AuthenticatorType; import com.snowflake.kafka.connector.config.SnowflakeSinkConnectorConfigBuilder; import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException; import com.snowflake.kafka.connector.internal.streaming.DefaultStreamingConfigValidator; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.stream.Stream; import org.apache.kafka.connect.storage.Converter; import org.junit.Assert; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.CsvSource; import org.junit.jupiter.params.provider.MethodSource; public class ConnectorConfigValidatorTest { // subset of valid community converters public static final List COMMUNITY_CONVERTER_SUBSET = Arrays.asList( new org.apache.kafka.connect.json.JsonConverter(), new io.confluent.connect.avro.AvroConverter()); private final ConnectorConfigValidator connectorConfigValidator = new DefaultConnectorConfigValidator(new DefaultStreamingConfigValidator()); public static Stream validConfigs() { return Stream.of( Arguments.of(SnowflakeSinkConnectorConfigBuilder.streamingConfig().build()), Arguments.of(SnowflakeSinkConnectorConfigBuilder.streamingConfig().build())); } @ParameterizedTest(name = "Valid config: {0}") @MethodSource("validConfigs") public void shouldValidateCorrectConfig(Map config) { // no exception thrown connectorConfigValidator.validateConfig(config); } @Test public void testConfig() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig().build(); connectorConfigValidator.validateConfig(config); } @Test public void testConfig_ConvertedInvalidAppName() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig() .withName("testConfig.snowflake-connector") .build(); Utils.convertAppName(config); connectorConfigValidator.validateConfig(config); } @ParameterizedTest @CsvSource({ NAME, SNOWFLAKE_URL_NAME, SNOWFLAKE_USER_NAME, SNOWFLAKE_DATABASE_NAME, SNOWFLAKE_SCHEMA_NAME, SNOWFLAKE_PRIVATE_KEY, SNOWFLAKE_PRIVATE_KEY, SNOWFLAKE_ROLE_NAME }) public void shouldThrowExForEmptyProperty(String prop) { Map config = getConfig(); config.remove(prop); assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config)) .isInstanceOf(SnowflakeKafkaConnectorException.class) .hasMessageContaining(prop); } @Test public void testCorrectProxyHost() { Map config = getConfig(); config.put(JVM_PROXY_HOST, "127.0.0.1"); config.put(JVM_PROXY_PORT, "3128"); connectorConfigValidator.validateConfig(config); } @Test public void testEmptyPort() { Map config = getConfig(); config.put(JVM_PROXY_HOST, "127.0.0.1"); assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config)) .isInstanceOf(SnowflakeKafkaConnectorException.class) .hasMessageContaining(JVM_PROXY_HOST); } @Test public void testEmptyHost() { Map config = getConfig(); config.put(JVM_PROXY_PORT, "3128"); assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config)) .isInstanceOf(SnowflakeKafkaConnectorException.class) .hasMessageContaining(JVM_PROXY_PORT); } @Test public void testNonProxyHosts() { String oldNonProxyHosts = (System.getProperty(HTTP_NON_PROXY_HOSTS) != null) ? System.getProperty(HTTP_NON_PROXY_HOSTS) : null; System.setProperty(HTTP_NON_PROXY_HOSTS, "host1.com|host2.com|localhost"); Map config = getConfig(); config.put(JVM_PROXY_HOST, "127.0.0.1"); config.put(JVM_PROXY_PORT, "3128"); config.put( KafkaConnectorConfigParams.JVM_NON_PROXY_HOSTS, "*.snowflakecomputing.com|*.amazonaws.com"); Utils.enableJVMProxy(config); String mergedNonProxyHosts = System.getProperty(HTTP_NON_PROXY_HOSTS); Assert.assertTrue( mergedNonProxyHosts.equals( "host1.com|host2.com|localhost|*.snowflakecomputing.com|*.amazonaws.com")); if (oldNonProxyHosts != null) { System.setProperty(HTTP_NON_PROXY_HOSTS, oldNonProxyHosts); } else { System.clearProperty(HTTP_NON_PROXY_HOSTS); } // clear properties to prevent other tests from failing System.clearProperty(HTTP_USE_PROXY); System.clearProperty(HTTP_PROXY_HOST); System.clearProperty(HTTP_PROXY_PORT); System.clearProperty(HTTPS_PROXY_HOST); System.clearProperty(HTTPS_PROXY_PORT); System.clearProperty(HTTPS_PROXY_USER); System.clearProperty(HTTPS_PROXY_PASSWORD); } @Test public void testIllegalTopicMap() { Map config = getConfig(); config.put(SNOWFLAKE_TOPICS2TABLE_MAP, "$@#$#@%^$12312"); assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config)) .isInstanceOf(SnowflakeKafkaConnectorException.class) .hasMessageContaining(SNOWFLAKE_TOPICS2TABLE_MAP); } @Test public void testIllegalTableName() { Map config = getConfig(); config.put(SNOWFLAKE_TOPICS2TABLE_MAP, "topic1:\"unterminated"); assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config)) .isInstanceOf(SnowflakeKafkaConnectorException.class) .hasMessageContaining("Unterminated quoted token"); } @Test public void testDuplicatedTopic() { Map config = getConfig(); config.put(SNOWFLAKE_TOPICS2TABLE_MAP, "topic1:table1,topic1:table2"); assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config)) .isInstanceOf(SnowflakeKafkaConnectorException.class) .hasMessageContaining("Duplicate topic: topic1"); } @Test public void testDuplicatedTableName() { Map config = getConfig(); config.put(SNOWFLAKE_TOPICS2TABLE_MAP, "topic1:table1,topic2:table1"); connectorConfigValidator.validateConfig(config); } @Test public void testNameMapCovered() { Map config = getConfig(); config.put(KafkaConnectorConfigParams.TOPICS, "!@#,$%^,test"); config.put(SNOWFLAKE_TOPICS2TABLE_MAP, "!@#:table1,$%^:table2"); connectorConfigValidator.validateConfig(config); } @Test public void testBehaviorOnNullValuesConfig_valid_value() { Map config = getConfig(); config.put(KafkaConnectorConfigParams.BEHAVIOR_ON_NULL_VALUES, "IGNORE"); connectorConfigValidator.validateConfig(config); config.put(KafkaConnectorConfigParams.BEHAVIOR_ON_NULL_VALUES, "DEFAULT"); connectorConfigValidator.validateConfig(config); } @Test public void testBehaviorOnNullValuesConfig_invalid_value() { Map config = getConfig(); config.put(KafkaConnectorConfigParams.BEHAVIOR_ON_NULL_VALUES, "INVALID"); assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config)) .isInstanceOf(SnowflakeKafkaConnectorException.class) .hasMessageContaining(KafkaConnectorConfigParams.BEHAVIOR_ON_NULL_VALUES); } @Test public void testJMX_valid_value() { Map config = getConfig(); config.put(KafkaConnectorConfigParams.JMX_OPT, "true"); connectorConfigValidator.validateConfig(config); config.put(KafkaConnectorConfigParams.JMX_OPT, "False"); connectorConfigValidator.validateConfig(config); } @Test public void testJMX_invalid_value() { Map config = getConfig(); config.put(KafkaConnectorConfigParams.JMX_OPT, "INVALID"); assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config)) .isInstanceOf(SnowflakeKafkaConnectorException.class) .hasMessageContaining(KafkaConnectorConfigParams.JMX_OPT); } @Test public void testIngestionTypeConfig_valid_value_snowpipe() { Map config = getConfig(); config.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, "ACCOUNTADMIN"); connectorConfigValidator.validateConfig(config); } @Test public void testIngestionTypeConfig_valid_value_snowpipe_streaming() { Map config = getConfig(); config.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, "ACCOUNTADMIN"); connectorConfigValidator.validateConfig(config); } @Test public void testIngestionTypeConfig_invalid_snowpipe_streaming() { Map config = getConfig(); config.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, ""); assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config)) .isInstanceOf(SnowflakeKafkaConnectorException.class) .hasMessageContaining(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME); } /** These error tests are not going to enforce errors if they are not passed as configs. */ @Test public void testErrorTolerance_AllowedValues() { Map config = getConfig(); config.put(ERRORS_TOLERANCE_CONFIG, ConnectorConfigTools.ErrorTolerance.ALL.toString()); config.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, "ACCOUNTADMIN"); connectorConfigValidator.validateConfig(config); config.put(ERRORS_TOLERANCE_CONFIG, ConnectorConfigTools.ErrorTolerance.NONE.toString()); connectorConfigValidator.validateConfig(config); config.put(ERRORS_TOLERANCE_CONFIG, "all"); connectorConfigValidator.validateConfig(config); } @Test public void testErrorTolerance_DisallowedValues() { Map config = getConfig(); config.put(ERRORS_TOLERANCE_CONFIG, "INVALID"); config.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, "ACCOUNTADMIN"); assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config)) .isInstanceOf(SnowflakeKafkaConnectorException.class) .hasMessageContaining(ERRORS_TOLERANCE_CONFIG); } @Test public void testErrorLog_AllowedValues() { Map config = getConfig(); config.put(ERRORS_LOG_ENABLE_CONFIG, "true"); config.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, "ACCOUNTADMIN"); connectorConfigValidator.validateConfig(config); config.put(ERRORS_LOG_ENABLE_CONFIG, "FALSE"); connectorConfigValidator.validateConfig(config); config.put(ERRORS_LOG_ENABLE_CONFIG, "TRUE"); connectorConfigValidator.validateConfig(config); } @Test public void testErrorLog_DisallowedValues() { Map config = getConfig(); config.put(ERRORS_LOG_ENABLE_CONFIG, "INVALID"); config.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, "ACCOUNTADMIN"); assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config)) .isInstanceOf(SnowflakeKafkaConnectorException.class) .hasMessageContaining(KafkaConnectorConfigParams.ERRORS_LOG_ENABLE_CONFIG); } @Test public void testValidKeyAndValueConvertersForStreamingSnowpipe() { Map config = getConfig(); config.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, "ACCOUNTADMIN"); COMMUNITY_CONVERTER_SUBSET.forEach( converter -> { config.put(KafkaConnectorConfigParams.KEY_CONVERTER, converter.getClass().toString()); connectorConfigValidator.validateConfig(config); }); COMMUNITY_CONVERTER_SUBSET.forEach( converter -> { config.put(KafkaConnectorConfigParams.VALUE_CONVERTER, converter.getClass().toString()); connectorConfigValidator.validateConfig(config); }); } @Test public void testUnsupportedConverter() { Map config = getConfig(); config.put( KafkaConnectorConfigParams.VALUE_CONVERTER, "org.apache.kafka.connect.storage.StringConverter"); config.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, "ACCOUNTADMIN"); assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config)) .isInstanceOf(SnowflakeKafkaConnectorException.class) .hasMessageContaining("org.apache.kafka.connect.storage.StringConverter"); } @Test public void testStreamingProviderOverrideConfig_validWithSnowpipeStreaming() { Map config = getConfig(); config.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, "ACCOUNTADMIN"); config.put( KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP, "a:b,c:d,e:100,f:true"); connectorConfigValidator.validateConfig(config); } @Test public void testInvalidEmptyConfig() { Map config = new HashMap<>(); assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config)) .isInstanceOf(SnowflakeKafkaConnectorException.class) .hasMessageContaining(SNOWFLAKE_DATABASE_NAME) .hasMessageContaining(SNOWFLAKE_SCHEMA_NAME) .hasMessageContaining(SNOWFLAKE_PRIVATE_KEY) .hasMessageContaining(SNOWFLAKE_USER_NAME) .hasMessageContaining(NAME) .hasMessageContaining(SNOWFLAKE_ROLE_NAME) .hasMessageContaining(SNOWFLAKE_URL_NAME); } // removes each of the following params iteratively to test if the log/exception has all the // expected removed params @Test public void testMultipleInvalidConfigs() { List emptyParams = Arrays.asList( SNOWFLAKE_DATABASE_NAME, SNOWFLAKE_SCHEMA_NAME, SNOWFLAKE_PRIVATE_KEY, SNOWFLAKE_USER_NAME, NAME, SNOWFLAKE_ROLE_NAME, SNOWFLAKE_URL_NAME); List paramsToRemove = new ArrayList(); for (String param : emptyParams) { paramsToRemove.add(param); this.invalidConfigRunner(paramsToRemove); } } @Test public void shouldValidateSSv2Config() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig().build(); assertThatCode(() -> connectorConfigValidator.validateConfig(config)) .doesNotThrowAnyException(); } @Test public void shouldThrowExceptionWhenRoleNotDefinedForSSv2() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig().withoutRole().build(); assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config)) .isInstanceOf(SnowflakeKafkaConnectorException.class) .hasMessageContaining(SNOWFLAKE_ROLE_NAME); } // -- Compatibility validation tests -- @Test public void shouldPassWhenCompatValidateEnabledAndAllCompatSettingsCorrect() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig() .withCompatibilityValidate(true) .withV3CompatibilitySettings() .build(); assertThatCode(() -> connectorConfigValidator.validateConfig(config)) .doesNotThrowAnyException(); } @Test public void shouldPassWhenCompatValidateEnabledAndSchematizationExplicitlyTrue() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig() .withCompatibilityValidate(true) .withV3CompatibilitySettings() .build(); config.put(KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION, "true"); assertThatCode(() -> connectorConfigValidator.validateConfig(config)) .doesNotThrowAnyException(); } @Test public void shouldFailWhenCompatValidateEnabledAndValidationModeWrong() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig() .withCompatibilityValidate(true) .withV3CompatibilitySettings() .build(); config.put(KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, "server_side"); assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config)) .isInstanceOf(SnowflakeKafkaConnectorException.class) .hasMessageContaining(KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION) .hasMessageContaining("client_side"); } @Test public void shouldFailWhenCompatValidateEnabledAndSchematizationNotSet() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig() .withCompatibilityValidate(true) .withV3CompatibilitySettings() .build(); config.remove(KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION); assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config)) .isInstanceOf(SnowflakeKafkaConnectorException.class) .hasMessageContaining(KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION) .hasMessageContaining("not explicitly set"); } @Test public void shouldFailWhenCompatValidateEnabledAndColumnNormalizationWrong() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig() .withCompatibilityValidate(true) .withV3CompatibilitySettings() .build(); config.put( KafkaConnectorConfigParams.SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION, "false"); assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config)) .isInstanceOf(SnowflakeKafkaConnectorException.class) .hasMessageContaining( KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION); } @Test public void shouldFailWhenCompatValidateEnabledAndTableSanitizationWrong() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig() .withCompatibilityValidate(true) .withV3CompatibilitySettings() .build(); config.put( KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION, "false"); assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config)) .isInstanceOf(SnowflakeKafkaConnectorException.class) .hasMessageContaining( KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION); } @Test public void shouldFailWhenCompatValidateEnabledAndMultipleSettingsWrong() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig() .withCompatibilityValidate(true) .build(); config.put(KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION, "true"); // offset.migration is not set → fails; but since it defaults to skip, // include.connector.name is not required. assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config)) .isInstanceOf(SnowflakeKafkaConnectorException.class) .hasMessageContaining(KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION) .hasMessageContaining( KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION) .hasMessageContaining( KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION) .hasMessageContaining(KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION); } @Test public void shouldPassWhenCompatValidateEnabledAndOffsetMigrationIsSkip() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig() .withCompatibilityValidate(true) .withV3CompatibilitySettings() .build(); config.put(KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION, "skip"); assertThatCode(() -> connectorConfigValidator.validateConfig(config)) .doesNotThrowAnyException(); } @Test public void shouldFailWhenCompatValidateEnabledAndOffsetMigrationNotSet() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig() .withCompatibilityValidate(true) .withV3CompatibilitySettings() .build(); config.remove(KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION); assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config)) .isInstanceOf(SnowflakeKafkaConnectorException.class) .hasMessageContaining(KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION) .hasMessageContaining("not explicitly set"); } @Test public void shouldPassWhenCompatValidateEnabledAndOffsetMigrationStrict() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig() .withCompatibilityValidate(true) .withV3CompatibilitySettings() .build(); config.put(KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION, "strict"); assertThatCode(() -> connectorConfigValidator.validateConfig(config)) .doesNotThrowAnyException(); } @Test public void shouldPassWhenCompatValidateEnabledAndOffsetMigrationBestEffort() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig() .withCompatibilityValidate(true) .withV3CompatibilitySettings() .build(); config.put(KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION, "best_effort"); assertThatCode(() -> connectorConfigValidator.validateConfig(config)) .doesNotThrowAnyException(); } @Test public void shouldFailWhenCompatValidateEnabledAndIncludeConnectorNameNotSet() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig() .withCompatibilityValidate(true) .withV3CompatibilitySettings() .build(); config.remove( KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME); assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config)) .isInstanceOf(SnowflakeKafkaConnectorException.class) .hasMessageContaining( KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME) .hasMessageContaining("not explicitly set"); } @Test public void shouldPassWhenCompatValidateEnabledAndSkipWithoutIncludeConnectorName() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig() .withCompatibilityValidate(true) .withV3CompatibilitySettings() .build(); config.put(KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION, "skip"); config.remove( KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME); assertThatCode(() -> connectorConfigValidator.validateConfig(config)) .doesNotThrowAnyException(); } @Test public void shouldPassWhenCompatValidateEnabledAndIncludeConnectorNameTrue() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig() .withCompatibilityValidate(true) .withV3CompatibilitySettings() .build(); config.put( KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME, "true"); assertThatCode(() -> connectorConfigValidator.validateConfig(config)) .doesNotThrowAnyException(); } @Test public void shouldPassWhenCompatValidateDisabled() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig() .withCompatibilityValidate(false) .build(); assertThatCode(() -> connectorConfigValidator.validateConfig(config)) .doesNotThrowAnyException(); } @Test public void shouldPassWhenCompatValidateDefaultAndAllCompatSettingsCorrect() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig().withV3CompatibilitySettings().build(); config.remove( KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC); assertThatCode(() -> connectorConfigValidator.validateConfig(config)) .doesNotThrowAnyException(); } @Test public void shouldFailWhenCompatValidateDefaultAndNoCompatSettings() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig().build(); config.remove( KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC); // offset.migration is not set → fails; but since it defaults to skip, // include.connector.name is not required. assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config)) .isInstanceOf(SnowflakeKafkaConnectorException.class) .hasMessageContaining(KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION) .hasMessageContaining( KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION) .hasMessageContaining( KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION) .hasMessageContaining(KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION) .hasMessageContaining(KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION); } @Test public void testOAuthAuthenticator() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig() .withAuthenticator(AuthenticatorType.OAUTH.toConfigValue()) .withOauthClientId("client_id") .withOauthClientSecret("client_secret") .withOauthRefreshToken("refresh_token") .withoutPrivateKey() .build(); assertThatCode(() -> connectorConfigValidator.validateConfig(config)) .doesNotThrowAnyException(); } @Test public void testOAuthWithoutRefreshToken_clientCredentialsGrant() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig() .withAuthenticator(AuthenticatorType.OAUTH.toConfigValue()) .withOauthClientId("client_id") .withOauthClientSecret("client_secret") .withoutPrivateKey() .build(); assertThatCode(() -> connectorConfigValidator.validateConfig(config)) .doesNotThrowAnyException(); } @Test public void testOAuthWithTokenEndpoint() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig() .withAuthenticator(AuthenticatorType.OAUTH.toConfigValue()) .withOauthClientId("client_id") .withOauthClientSecret("client_secret") .withOauthRefreshToken("refresh_token") .withOauthTokenEndpoint("https://login.example.com/oauth2/v2.0/token") .withoutPrivateKey() .build(); assertThatCode(() -> connectorConfigValidator.validateConfig(config)) .doesNotThrowAnyException(); } @Test public void testInvalidAuthenticator() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig() .withAuthenticator("invalid_authenticator") .build(); assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config)) .isInstanceOf(SnowflakeKafkaConnectorException.class) .hasMessageContaining(KafkaConnectorConfigParams.SNOWFLAKE_AUTHENTICATOR); } @Test public void testOAuthEmptyClientId() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig() .withAuthenticator(AuthenticatorType.OAUTH.toConfigValue()) .withOauthClientSecret("client_secret") .withoutPrivateKey() .build(); assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config)) .isInstanceOf(SnowflakeKafkaConnectorException.class) .hasMessageContaining(KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_ID); } @Test public void testOAuthEmptyClientSecret() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig() .withAuthenticator(AuthenticatorType.OAUTH.toConfigValue()) .withOauthClientId("client_id") .withoutPrivateKey() .build(); assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config)) .isInstanceOf(SnowflakeKafkaConnectorException.class) .hasMessageContaining(KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_SECRET); } @Test public void testOAuthDoesNotRequirePrivateKey() { Map config = SnowflakeSinkConnectorConfigBuilder.streamingConfig() .withAuthenticator(AuthenticatorType.OAUTH.toConfigValue()) .withOauthClientId("client_id") .withOauthClientSecret("client_secret") .withOauthRefreshToken("refresh_token") .withoutPrivateKey() .build(); assertThatCode(() -> connectorConfigValidator.validateConfig(config)) .doesNotThrowAnyException(); } private void invalidConfigRunner(List paramsToRemove) { Map config = getConfig(); for (String configParam : paramsToRemove) { config.remove(configParam); } try { connectorConfigValidator.validateConfig(config); } catch (SnowflakeKafkaConnectorException exception) { for (String configParam : paramsToRemove) { assert exception.getMessage().contains(configParam); } } } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/ConnectorIT.java ================================================ package com.snowflake.kafka.connector; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.JVM_PROXY_HOST; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.JVM_PROXY_PASSWORD; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.JVM_PROXY_PORT; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.JVM_PROXY_USERNAME; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.NAME; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_METADATA_ALL; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_METADATA_CREATETIME; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_METADATA_OFFSET_AND_PARTITION; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_METADATA_TOPIC; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME; import static com.snowflake.kafka.connector.Utils.TASK_ID; import static com.snowflake.kafka.connector.internal.TestUtils.TEST_CONNECTOR_NAME; import static com.snowflake.kafka.connector.internal.TestUtils.transformProfileFileToConnectorConfiguration; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.internal.SnowflakeDataSourceFactory; import java.util.*; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import org.apache.kafka.common.config.Config; import org.apache.kafka.common.config.ConfigValue; import org.junit.Test; public class ConnectorIT { static final String[] allPropertiesList = { SNOWFLAKE_URL_NAME, SNOWFLAKE_USER_NAME, SNOWFLAKE_SCHEMA_NAME, SNOWFLAKE_DATABASE_NAME, SNOWFLAKE_METADATA_ALL, SNOWFLAKE_METADATA_TOPIC, SNOWFLAKE_METADATA_OFFSET_AND_PARTITION, SNOWFLAKE_METADATA_CREATETIME, SNOWFLAKE_TOPICS2TABLE_MAP, SNOWFLAKE_PRIVATE_KEY, JVM_PROXY_PORT, JVM_PROXY_HOST, SNOWFLAKE_PRIVATE_KEY_PASSPHRASE }; static final Set allProperties = new HashSet<>(Arrays.asList(allPropertiesList)); private static void assertPropHasError( final Map validateMap, final String[] propArray) { List propList = Arrays.asList(propArray); for (String prop : allProperties) { if (propList.contains(prop)) { assert !validateMap.get(prop).errorMessages().isEmpty(); } else { assert validateMap.get(prop).errorMessages().isEmpty(); } } } private static Map toValidateMap(final Map config) { SnowflakeStreamingSinkConnector sinkConnector = new SnowflakeStreamingSinkConnector(); Config result = sinkConnector.validate(config); return Utils.validateConfigToMap(result); } static Map getEmptyConfig() { Map config = new HashMap<>(); return config; } static Map getCorrectConfig() { Map config = transformProfileFileToConnectorConfiguration(false); config.remove(SnowflakeDataSourceFactory.SF_WAREHOUSE); config.remove(KafkaConnectorConfigParams.NAME); config.remove(TASK_ID); return config; } @Test public void testValidateErrorConfigImproved() { // Given: a configuration with intentionally invalid values Map config = new HashMap<>(); config.put(SNOWFLAKE_URL_NAME, ""); config.put(SNOWFLAKE_USER_NAME, ""); config.put(SNOWFLAKE_DATABASE_NAME, ""); config.put(SNOWFLAKE_PRIVATE_KEY, ""); config.put(SNOWFLAKE_ROLE_NAME, ""); config.put(SNOWFLAKE_PRIVATE_KEY_PASSPHRASE, ""); config.put(SNOWFLAKE_SCHEMA_NAME, ""); config.put(SNOWFLAKE_METADATA_TOPIC, "falseee"); config.put(SNOWFLAKE_METADATA_OFFSET_AND_PARTITION, "falseee"); config.put(SNOWFLAKE_METADATA_CREATETIME, "falseee"); config.put(SNOWFLAKE_TOPICS2TABLE_MAP, "jfsja,,"); Map validateMap = toValidateMap(config); // Optional properties that should NOT have errors (even when empty or missing) Set optionalProperties = new HashSet<>( Arrays.asList( SNOWFLAKE_PRIVATE_KEY, JVM_PROXY_PORT, JVM_PROXY_HOST, SNOWFLAKE_PRIVATE_KEY_PASSPHRASE, SNOWFLAKE_METADATA_ALL)); // Required properties or properties with format validation that SHOULD have errors Set requiredOrValidatedProperties = new HashSet<>( Arrays.asList( SNOWFLAKE_URL_NAME, // empty string - required SNOWFLAKE_USER_NAME, // empty string - required SNOWFLAKE_DATABASE_NAME, // empty string - required SNOWFLAKE_SCHEMA_NAME, // empty string - required SNOWFLAKE_METADATA_TOPIC, // invalid boolean "falseee" KafkaConnectorConfigParams .SNOWFLAKE_METADATA_OFFSET_AND_PARTITION, // invalid boolean "falseee" KafkaConnectorConfigParams .SNOWFLAKE_METADATA_CREATETIME, // invalid boolean "falseee" SNOWFLAKE_TOPICS2TABLE_MAP // invalid format "jfsja,," )); // Assert: optional properties should have NO errors for (String optionalProp : optionalProperties) { ConfigValue configValue = validateMap.get(optionalProp); assert configValue != null : String.format("Property '%s' not found in validation results", optionalProp); assert configValue.errorMessages().isEmpty() : String.format( "Optional property '%s' should not have errors, but has: %s", optionalProp, configValue.errorMessages()); } // Assert: required/validated properties SHOULD have errors for (String requiredProp : requiredOrValidatedProperties) { ConfigValue configValue = validateMap.get(requiredProp); assert configValue != null : String.format("Property '%s' not found in validation results", requiredProp); assert !configValue.errorMessages().isEmpty() : String.format( "Required/validated property '%s' should have validation errors but has none. " + "Current value: '%s'", requiredProp, configValue.value()); } } @Test public void testValidateEmptyConfig() { Map validateMap = toValidateMap(getEmptyConfig()); assertPropHasError( validateMap, new String[] { SNOWFLAKE_USER_NAME, SNOWFLAKE_URL_NAME, SNOWFLAKE_SCHEMA_NAME, SNOWFLAKE_DATABASE_NAME, }); } @Test public void testValidateCorrectConfig() { Map validateMap = toValidateMap(getCorrectConfig()); assertPropHasError(validateMap, new String[] {}); } @Test public void testValidateErrorURLFormatConfig() { Map config = getCorrectConfig(); config.put(SNOWFLAKE_URL_NAME, "https://google.com"); Map validateMap = toValidateMap(config); assertPropHasError(validateMap, new String[] {SNOWFLAKE_URL_NAME}); } @Test public void testValidateErrorURLAccountConfig() { Map config = getCorrectConfig(); config.put(SNOWFLAKE_URL_NAME, "wronggAccountt.snowflakecomputing.com:443"); Map validateMap = toValidateMap(config); assertPropHasError( validateMap, new String[] {SNOWFLAKE_USER_NAME, SNOWFLAKE_URL_NAME, SNOWFLAKE_PRIVATE_KEY}); } @Test public void testValidateErrorUserConfig() { Map config = getCorrectConfig(); config.put(SNOWFLAKE_USER_NAME, "wrongUser"); Map validateMap = toValidateMap(config); assertPropHasError( validateMap, new String[] {SNOWFLAKE_USER_NAME, SNOWFLAKE_URL_NAME, SNOWFLAKE_PRIVATE_KEY}); } @Test public void testValidateErrorPasswordConfig() { Map config = getCorrectConfig(); config.put(SNOWFLAKE_PRIVATE_KEY, "wrongPassword"); Map validateMap = toValidateMap(config); assertPropHasError(validateMap, new String[] {SNOWFLAKE_PRIVATE_KEY}); } @Test public void testValidateEmptyPasswordConfig() { Map config = getCorrectConfig(); config.put(SNOWFLAKE_PRIVATE_KEY, ""); Map validateMap = toValidateMap(config); assertPropHasError(validateMap, new String[] {SNOWFLAKE_PRIVATE_KEY}); } @Test public void testValidateNullPasswordConfig() { Map config = getCorrectConfig(); config.remove(SNOWFLAKE_PRIVATE_KEY); Map validateMap = toValidateMap(config); assertPropHasError(validateMap, new String[] {SNOWFLAKE_PRIVATE_KEY}); } @Test public void testValidateFilePasswordConfig() { Map config = getCorrectConfig(); config.put(SNOWFLAKE_PRIVATE_KEY, " ${file:/"); Map validateMap = toValidateMap(config); assertPropHasError(validateMap, new String[] {}); } @Test public void testValidateConfigProviderPasswordConfig() { Map config = getCorrectConfig(); config.put(SNOWFLAKE_PRIVATE_KEY, " ${configProvider:/"); Map validateMap = toValidateMap(config); assertPropHasError(validateMap, new String[] {}); } @Test public void testValidateFilePassphraseConfig() { Map config = getCorrectConfig(); config.put(SNOWFLAKE_PRIVATE_KEY_PASSPHRASE, " ${file:/"); Map validateMap = toValidateMap(config); assertPropHasError(validateMap, new String[] {}); } @Test public void testValidateConfigProviderPassphraseConfig() { Map config = getCorrectConfig(); config.put(SNOWFLAKE_PRIVATE_KEY_PASSPHRASE, " ${configProvider:/"); Map validateMap = toValidateMap(config); assertPropHasError(validateMap, new String[] {}); } @Test public void testValidateErrorPassphraseConfig() { Map config = getCorrectConfig(); config.put(SNOWFLAKE_PRIVATE_KEY_PASSPHRASE, "wrongPassphrase"); Map validateMap = toValidateMap(config); assertPropHasError( validateMap, new String[] {SNOWFLAKE_PRIVATE_KEY, SNOWFLAKE_PRIVATE_KEY_PASSPHRASE}); } @Test public void testValidateErrorDatabaseConfig() { Map config = getCorrectConfig(); config.put(SNOWFLAKE_DATABASE_NAME, "wrongDatabase"); Map validateMap = toValidateMap(config); assertPropHasError(validateMap, new String[] {SNOWFLAKE_DATABASE_NAME}); } @Test public void testValidateErrorSchemaConfig() { Map config = getCorrectConfig(); config.put(SNOWFLAKE_SCHEMA_NAME, "wrongSchema"); Map validateMap = toValidateMap(config); assertPropHasError(validateMap, new String[] {SNOWFLAKE_SCHEMA_NAME}); } @Test public void testErrorProxyHostConfig() { Map config = getCorrectConfig(); config.put(JVM_PROXY_HOST, "localhost"); Map validateMap = toValidateMap(config); assertPropHasError(validateMap, new String[] {JVM_PROXY_HOST, JVM_PROXY_PORT}); } @Test public void testErrorProxyPortConfig() { Map config = getCorrectConfig(); config.put(JVM_PROXY_PORT, "8080"); Map validateMap = toValidateMap(config); assertPropHasError(validateMap, new String[] {JVM_PROXY_HOST, JVM_PROXY_PORT}); } @Test public void testProxyHostPortConfig() { Map config = getCorrectConfig(); config.put(JVM_PROXY_HOST, "localhost"); config.put(JVM_PROXY_PORT, "8080"); Utils.validateProxySettings(config); } @Test public void testErrorProxyUsernameConfig() { Map config = getCorrectConfig(); config.put(JVM_PROXY_HOST, "localhost"); config.put(JVM_PROXY_PORT, "8080"); config.put(JVM_PROXY_USERNAME, "user"); Map invalidConfigs = Utils.validateProxySettings(config); assert invalidConfigs.containsKey(JVM_PROXY_USERNAME); } @Test public void testErrorProxyPasswordConfig() { Map config = getCorrectConfig(); config.put(JVM_PROXY_HOST, "localhost"); config.put(JVM_PROXY_PORT, "8080"); config.put(JVM_PROXY_PASSWORD, "pass"); Map invalidConfigs = Utils.validateProxySettings(config); assert invalidConfigs.containsKey(JVM_PROXY_PASSWORD); } @Test public void testProxyUsernamePasswordConfig() { Map config = getCorrectConfig(); config.put(JVM_PROXY_HOST, "localhost"); config.put(JVM_PROXY_PORT, "3128"); config.put(JVM_PROXY_USERNAME, "admin"); config.put(JVM_PROXY_PASSWORD, "test"); Utils.validateProxySettings(config); } @Test public void testConnectorComprehensive() { Map config = transformProfileFileToConnectorConfiguration(false); SnowflakeStreamingSinkConnector sinkConnector = new SnowflakeStreamingSinkConnector(); sinkConnector.start(config); assert sinkConnector.taskClass().equals(SnowflakeSinkTask.class); List> taskConfigs = sinkConnector.taskConfigs(2); assert taskConfigs.get(0).get(TASK_ID).equals("0"); assert taskConfigs.get(0).get(NAME).equals(TEST_CONNECTOR_NAME); assert taskConfigs.get(1).get(TASK_ID).equals("1"); sinkConnector.stop(); assert sinkConnector.version().equals(Utils.VERSION); } @Test public void testConnectorComprehensiveNegative() throws Exception { Map config = transformProfileFileToConnectorConfiguration(false); SnowflakeStreamingSinkConnector sinkConnector = new SnowflakeStreamingSinkConnector(); ExecutorService testThread = Executors.newSingleThreadExecutor(); testThread.submit( () -> { // After 10 minutes this thread will throw error. 10 minutes is too long // for this test, so kill the thread after 6 seconds, which should have // covered enough lines. sinkConnector.taskConfigs(2); }); Thread.sleep(6000); testThread.shutdownNow(); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/InjectQueryRunner.java ================================================ package com.snowflake.kafka.connector; import java.lang.annotation.ElementType; import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; import java.lang.annotation.Target; /** * Annotation to mark fields and parameters for QueryRunner injection. Use with {@link * InjectQueryRunnerExtension}. */ @Target({ElementType.FIELD, ElementType.PARAMETER}) @Retention(RetentionPolicy.RUNTIME) public @interface InjectQueryRunner {} ================================================ FILE: src/test/java/com/snowflake/kafka/connector/InjectQueryRunnerExtension.java ================================================ package com.snowflake.kafka.connector; import com.snowflake.kafka.connector.internal.SnowflakeDataSourceFactory; import java.lang.reflect.Field; import org.apache.commons.dbutils.QueryRunner; import org.junit.jupiter.api.extension.BeforeEachCallback; import org.junit.jupiter.api.extension.ExtensionContext; import org.junit.jupiter.api.extension.ParameterContext; import org.junit.jupiter.api.extension.ParameterResolver; public class InjectQueryRunnerExtension implements BeforeEachCallback, ParameterResolver { @Override public void beforeEach(final ExtensionContext context) throws Exception { final Object testInstance = context.getRequiredTestInstance(); injectFields(testInstance, testInstance.getClass()); } private void injectFields(final Object testInstance, Class testClass) throws Exception { // Process fields in the current class for (final Field field : testClass.getDeclaredFields()) { if (field.isAnnotationPresent(InjectQueryRunner.class)) { field.setAccessible(true); field.set(testInstance, getQueryRunner()); } } // If this is a nested class, recursively process the enclosing class fields Class enclosingClass = testClass.getEnclosingClass(); if (enclosingClass != null) { // Get the enclosing instance for nested classes Field thisField = getEnclosingInstanceField(testClass); if (thisField != null) { thisField.setAccessible(true); Object enclosingInstance = thisField.get(testInstance); if (enclosingInstance != null) { injectFields(enclosingInstance, enclosingClass); } } } } private Field getEnclosingInstanceField(final Class innerClass) { try { // Non-static inner classes have a synthetic field named "this$0" pointing to the enclosing // instance for (final Field field : innerClass.getDeclaredFields()) { if (field.isSynthetic() && field.getName().startsWith("this$")) { return field; } } } catch (final Exception e) { // If we can't find the field, return null } return null; } @Override public boolean supportsParameter( final ParameterContext parameterContext, final ExtensionContext extensionContext) { return parameterContext.getParameter().isAnnotationPresent(InjectQueryRunner.class) && parameterContext.getParameter().getType().equals(QueryRunner.class); } @Override public Object resolveParameter( final ParameterContext parameterContext, final ExtensionContext extensionContext) { return getQueryRunner(); } private QueryRunner getQueryRunner() { return new QueryRunner(SnowflakeDataSourceFactory.get()); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/InjectSnowflakeDataSource.java ================================================ package com.snowflake.kafka.connector; import java.lang.annotation.ElementType; import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; import java.lang.annotation.Target; /** * Annotation to mark fields and parameters for DataSource injection. Use with {@link * InjectSnowflakeDataSourceExtension}. */ @Target({ElementType.FIELD, ElementType.PARAMETER}) @Retention(RetentionPolicy.RUNTIME) public @interface InjectSnowflakeDataSource {} ================================================ FILE: src/test/java/com/snowflake/kafka/connector/InjectSnowflakeDataSourceExtension.java ================================================ package com.snowflake.kafka.connector; import com.snowflake.kafka.connector.internal.SnowflakeDataSourceFactory; import java.lang.reflect.Field; import javax.sql.DataSource; import org.junit.jupiter.api.Order; import org.junit.jupiter.api.extension.BeforeEachCallback; import org.junit.jupiter.api.extension.ExtensionContext; import org.junit.jupiter.api.extension.ParameterContext; import org.junit.jupiter.api.extension.ParameterResolver; @Order(1) public class InjectSnowflakeDataSourceExtension implements BeforeEachCallback, ParameterResolver { @Override public void beforeEach(final ExtensionContext context) throws Exception { final Object testInstance = context.getRequiredTestInstance(); injectFields(testInstance, testInstance.getClass()); } private void injectFields(final Object testInstance, Class testClass) throws Exception { // Process fields in the current class for (final Field field : testClass.getDeclaredFields()) { if (field.isAnnotationPresent(InjectSnowflakeDataSource.class)) { field.setAccessible(true); field.set(testInstance, SnowflakeDataSourceFactory.get()); } } // If this is a nested class, recursively process the enclosing class fields Class enclosingClass = testClass.getEnclosingClass(); if (enclosingClass != null) { // Get the enclosing instance for nested classes Field thisField = getEnclosingInstanceField(testClass); if (thisField != null) { thisField.setAccessible(true); Object enclosingInstance = thisField.get(testInstance); if (enclosingInstance != null) { injectFields(enclosingInstance, enclosingClass); } } } } private Field getEnclosingInstanceField(final Class innerClass) { try { // Non-static inner classes have a synthetic field named "this$0" pointing to the enclosing // instance for (final Field field : innerClass.getDeclaredFields()) { if (field.isSynthetic() && field.getName().startsWith("this$")) { return field; } } } catch (final Exception e) { // If we can't find the field, return null } return null; } @Override public boolean supportsParameter( final ParameterContext parameterContext, final ExtensionContext extensionContext) { return parameterContext.getParameter().isAnnotationPresent(InjectSnowflakeDataSource.class) && parameterContext.getParameter().getType().equals(DataSource.class); } @Override public Object resolveParameter( final ParameterContext parameterContext, final ExtensionContext extensionContext) { return SnowflakeDataSourceFactory.get(); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/LegacySchemaToggleIT.java ================================================ package com.snowflake.kafka.connector; import static org.assertj.core.api.Assertions.assertThat; import static org.awaitility.Awaitility.await; import com.fasterxml.jackson.databind.ObjectMapper; import com.snowflake.kafka.connector.internal.TestUtils; import com.snowflake.kafka.connector.internal.streaming.FakeSnowflakeStreamingIngestChannel; import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientFactory; import java.time.Duration; import java.util.Map; import org.apache.commons.dbutils.QueryRunner; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @ExtendWith({InjectSnowflakeDataSourceExtension.class, InjectQueryRunnerExtension.class}) public class LegacySchemaToggleIT extends ConnectClusterBaseIT { private static final int PARTITION_COUNT = 1; private static final String RECORD_CONTENT = "RECORD_CONTENT"; private static final String RECORD_METADATA = "RECORD_METADATA"; private String topicName; private String connectorName; private ObjectMapper objectMapper = new ObjectMapper(); @InjectQueryRunner private QueryRunner queryRunner; @BeforeEach void before() { topicName = TestUtils.randomTableName(); connectorName = String.format("%s_connector", topicName); connectCluster.kafka().createTopic(topicName, PARTITION_COUNT); TestUtils.createTableWithMetadataColumn(topicName); StreamingClientFactory.setStreamingClientSupplier(fakeClientSupplier); } @AfterEach void after() { connectCluster.kafka().deleteTopic(topicName); connectCluster.deleteConnector(connectorName); StreamingClientFactory.resetStreamingClientSupplier(); TestUtils.dropTable(topicName); TestUtils.dropPipe(topicName + "-STREAMING"); } @Test void test_legacyMode_jsonConverter_wrapsInRecordContent() throws Exception { final Map config = defaultProperties(topicName, connectorName); config.put("snowflake.enable.schematization", "false"); config.put(Constants.KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, "server_side"); connectCluster.configureConnector(connectorName, config); waitForConnectorRunning(connectorName); waitForOpenedFakeIngestClient(connectorName); connectCluster .kafka() .produce(topicName, objectMapper.writeValueAsString(Map.of("city", "Portland", "age", 25))); await() .timeout(Duration.ofSeconds(30)) .pollInterval(Duration.ofSeconds(1)) .untilAsserted( () -> { assertThat(getOpenedFakeIngestClient(connectorName).getAppendedRowCount()) .isEqualTo(1); FakeSnowflakeStreamingIngestChannel channel = getOpenedFakeIngestClient(connectorName).getOpenedChannels().get(0); final Map row = channel.getAppendedRows().get(0); assertThat(row).containsKeys(RECORD_METADATA, RECORD_CONTENT); assertThat(row.get(RECORD_CONTENT)).isInstanceOf(Map.class); @SuppressWarnings("unchecked") Map contentMap = (Map) row.get(RECORD_CONTENT); assertThat(contentMap).containsEntry("city", "Portland"); assertThat(contentMap).containsEntry("age", 25L); }); } @Test void test_legacyMode_stringConverter_wrapsInRecordContent() { final Map config = defaultProperties(topicName, connectorName); config.put("snowflake.enable.schematization", "false"); config.put(Constants.KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, "server_side"); config.put("value.converter", "org.apache.kafka.connect.storage.StringConverter"); connectCluster.configureConnector(connectorName, config); waitForConnectorRunning(connectorName); waitForOpenedFakeIngestClient(connectorName); connectCluster.kafka().produce(topicName, "raw string payload"); await() .timeout(Duration.ofSeconds(30)) .pollInterval(Duration.ofSeconds(1)) .untilAsserted( () -> { assertThat(getOpenedFakeIngestClient(connectorName).getAppendedRowCount()) .isEqualTo(1); FakeSnowflakeStreamingIngestChannel channel = getOpenedFakeIngestClient(connectorName).getOpenedChannels().get(0); final Map row = channel.getAppendedRows().get(0); assertThat(row).containsKeys(RECORD_METADATA, RECORD_CONTENT); assertThat(row.get(RECORD_CONTENT)).isEqualTo("raw string payload"); }); } @Test void test_legacyMode_defaultSchematization_doesNotWrap() throws Exception { final Map config = defaultProperties(topicName, connectorName); config.put(Constants.KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, "server_side"); config.put( Constants.KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION, "false"); connectCluster.configureConnector(connectorName, config); waitForConnectorRunning(connectorName); waitForOpenedFakeIngestClient(connectorName); connectCluster .kafka() .produce(topicName, objectMapper.writeValueAsString(Map.of("city", "Portland"))); await() .timeout(Duration.ofSeconds(30)) .pollInterval(Duration.ofSeconds(1)) .untilAsserted( () -> { assertThat(getOpenedFakeIngestClient(connectorName).getAppendedRowCount()) .isEqualTo(1); FakeSnowflakeStreamingIngestChannel channel = getOpenedFakeIngestClient(connectorName).getOpenedChannels().get(0); final Map row = channel.getAppendedRows().get(0); assertThat(row).containsKey("city"); assertThat(row).doesNotContainKey(RECORD_CONTENT); }); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/SchemaEvolutionAvroSrIT.java ================================================ package com.snowflake.kafka.connector; import static com.snowflake.kafka.connector.internal.TestUtils.assertTableColumnCount; import static com.snowflake.kafka.connector.internal.TestUtils.assertWithRetry; import com.snowflake.kafka.connector.internal.TestUtils; import io.confluent.connect.avro.AvroConverter; import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient; import io.confluent.kafka.schemaregistry.testutil.MockSchemaRegistry; import io.confluent.kafka.serializers.KafkaAvroSerializer; import java.math.BigDecimal; import java.nio.ByteBuffer; import java.util.HashMap; import java.util.Map; import java.util.Properties; import org.apache.avro.Conversions; import org.apache.avro.LogicalTypes; import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericRecord; import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.ProducerConfig; import org.apache.kafka.clients.producer.ProducerRecord; import org.apache.kafka.common.serialization.StringSerializer; import org.apache.kafka.connect.runtime.ConnectorConfig; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; /** * Integration test for schema evolution using Avro with Schema Registry. Tests that the table is * updated with correct column types when records with different Avro schemas are sent from multiple * topics. */ class SchemaEvolutionAvroSrIT extends SchemaEvolutionBase { private static final String MOCK_SCHEMA_REGISTRY_URL = "mock://test-schema-registry"; private static final String PERFORMANCE_STRING = "PERFORMANCE_STRING"; private static final String PERFORMANCE_CHAR = "PERFORMANCE_CHAR"; private static final String RATING_INT = "RATING_INT"; private static final String RATING_DOUBLE = "RATING_DOUBLE"; private static final String APPROVAL = "APPROVAL"; private static final String TIME_MILLIS = "TIME_MILLIS"; private static final String TIMESTAMP_MILLIS = "TIMESTAMP_MILLIS"; private static final String DATE = "DATE"; private static final String DECIMAL = "DECIMAL"; private static final String SOME_FLOAT_NAN = "SOME_FLOAT_NAN"; private static final String RECORD_METADATA = "RECORD_METADATA"; private static final Map EXPECTED_SCHEMA = new HashMap(); static { EXPECTED_SCHEMA.put(PERFORMANCE_STRING, "VARCHAR"); EXPECTED_SCHEMA.put(PERFORMANCE_CHAR, "VARCHAR"); EXPECTED_SCHEMA.put(RATING_INT, "NUMBER"); EXPECTED_SCHEMA.put(RATING_DOUBLE, "FLOAT"); EXPECTED_SCHEMA.put(APPROVAL, "BOOLEAN"); EXPECTED_SCHEMA.put(SOME_FLOAT_NAN, "FLOAT"); EXPECTED_SCHEMA.put(TIME_MILLIS, "TIME"); EXPECTED_SCHEMA.put(TIMESTAMP_MILLIS, "TIMESTAMP"); EXPECTED_SCHEMA.put(DATE, "DATE"); EXPECTED_SCHEMA.put(DECIMAL, "VARCHAR"); EXPECTED_SCHEMA.put(RECORD_METADATA, "VARIANT"); } private static final String VALUE_SCHEMA_0 = "{\"type\": \"record\",\"name\": \"value_schema_0\",\"fields\": [ {\"name\":" + " \"PERFORMANCE_CHAR\", \"type\": \"string\"}, {\"name\": \"PERFORMANCE_STRING\"," + " \"type\": \"string\"}," + " {\"name\":\"TIME_MILLIS\",\"type\":{\"type\":\"int\",\"logicalType\":\"time-millis\"}}," + "{\"name\":\"DATE\",\"type\":{\"type\":\"int\",\"logicalType\":\"date\"}},{\"name\":\"DECIMAL\",\"type\":{\"type\":\"bytes\",\"logicalType\":\"decimal\"," + " \"precision\":4, \"scale\":2}}," + "{\"name\":\"TIMESTAMP_MILLIS\",\"type\":{\"type\":\"long\",\"logicalType\":\"timestamp-millis\"}}," + " {\"name\": \"RATING_INT\", \"type\": \"int\"}]}"; private static final String VALUE_SCHEMA_1 = "{" + "\"type\": \"record\"," + "\"name\": \"value_schema_1\"," + "\"fields\": [" + " {\"name\": \"RATING_DOUBLE\", \"type\": \"float\"}," + " {\"name\": \"PERFORMANCE_STRING\", \"type\": \"string\"}," + " {\"name\": \"APPROVAL\", \"type\": \"boolean\"}," + " {\"name\": \"SOME_FLOAT_NAN\", \"type\": \"float\"}" + "]" + "}"; private static final String SCHEMA_REGISTRY_SCOPE = "test-schema-registry"; private static final int COL_NUM = 11; private KafkaProducer avroProducer; @BeforeEach void beforeEach() { avroProducer = createAvroProducer(); } @AfterEach void afterEach() { if (avroProducer != null) { avroProducer.close(); } MockSchemaRegistry.dropScope(SCHEMA_REGISTRY_SCOPE); } @Test void testSchemaEvolutionWithMultipleTopicsAndAvroSr() throws Exception { // given final Map config = createConnectorConfig(); config.put(ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG, AvroConverter.class.getName()); config.put("value.converter.schema.registry.url", MOCK_SCHEMA_REGISTRY_URL); connectCluster.configureConnector(connectorName, config); waitForConnectorRunning(connectorName); // when sendRecordsToTopic0(); sendRecordsToTopic1(); // then final int expectedTotalRecords = TOPIC_COUNT * RECORD_COUNT; assertWithRetry(() -> snowflake.tableExist(tableName)); assertWithRetry(() -> TestUtils.getNumberOfRows(tableName) == expectedTotalRecords); assertTableColumnCount(tableName, COL_NUM); TestUtils.checkTableSchema(tableName, EXPECTED_SCHEMA); } private KafkaProducer createAvroProducer() { final Properties props = new Properties(); props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, connectCluster.kafka().bootstrapServers()); props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, KafkaAvroSerializer.class.getName()); props.put("schema.registry.url", MOCK_SCHEMA_REGISTRY_URL); return new KafkaProducer<>(props, new StringSerializer(), createAvroSerializer()); } private KafkaAvroSerializer createAvroSerializer() { final SchemaRegistryClient schemaRegistryClient = MockSchemaRegistry.getClientForScope(SCHEMA_REGISTRY_SCOPE); final KafkaAvroSerializer serializer = new KafkaAvroSerializer(schemaRegistryClient); serializer.configure(Map.of("schema.registry.url", MOCK_SCHEMA_REGISTRY_URL), false); return serializer; } private void sendRecordsToTopic0() { final Schema schema = new Schema.Parser().parse(VALUE_SCHEMA_0); for (int i = 0; i < RECORD_COUNT; i++) { final GenericRecord record = createTopic0Record(schema); avroProducer.send(new ProducerRecord<>(topic0, "key-" + i, record)); } avroProducer.flush(); } private void sendRecordsToTopic1() { final Schema schema = new Schema.Parser().parse(VALUE_SCHEMA_1); for (int i = 0; i < RECORD_COUNT; i++) { final GenericRecord record = createTopic1Record(schema); avroProducer.send(new ProducerRecord<>(topic1, "key-" + i, record)); } avroProducer.flush(); } private GenericRecord createTopic0Record(final Schema schema) { Schema decimalSchema = schema.getField(DECIMAL).schema(); LogicalTypes.Decimal decimalType = (LogicalTypes.Decimal) decimalSchema.getLogicalType(); BigDecimal value = new BigDecimal("0.03125"); BigDecimal scaledValue = value.setScale(decimalType.getScale(), BigDecimal.ROUND_HALF_UP); ByteBuffer byteBuffer = new Conversions.DecimalConversion().toBytes(scaledValue, decimalSchema, decimalType); final GenericRecord record = new GenericData.Record(schema); record.put(PERFORMANCE_STRING, "Excellent"); record.put(PERFORMANCE_CHAR, "A"); record.put(RATING_INT, 100); record.put(TIME_MILLIS, 10); record.put(TIMESTAMP_MILLIS, 12); record.put(DECIMAL, byteBuffer); record.put(DATE, 11); return record; } private GenericRecord createTopic1Record(final Schema schema) { final GenericRecord record = new GenericData.Record(schema); record.put(PERFORMANCE_STRING, "Excellent"); record.put(RATING_DOUBLE, 0.99f); record.put(APPROVAL, true); record.put(SOME_FLOAT_NAN, Float.NaN); return record; } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/SchemaEvolutionBase.java ================================================ package com.snowflake.kafka.connector; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP; import com.fasterxml.jackson.databind.ObjectMapper; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.internal.SnowflakeConnectionService; import com.snowflake.kafka.connector.internal.SnowflakeConnectionServiceFactory; import com.snowflake.kafka.connector.internal.TestUtils; import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientFactory; import java.util.Map; import org.apache.kafka.connect.runtime.ConnectorConfig; import org.apache.kafka.connect.storage.StringConverter; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; abstract class SchemaEvolutionBase extends ConnectClusterBaseIT { static final int PARTITION_COUNT = 1; static final int RECORD_COUNT = 100; static final int TOPIC_COUNT = 2; final ObjectMapper objectMapper = new ObjectMapper(); String tableName; String connectorName; String topic0; String topic1; SnowflakeConnectionService snowflake; @BeforeEach void before() { tableName = TestUtils.randomTableName(); connectorName = String.format("%s_connector", tableName); topic0 = tableName + "0"; topic1 = tableName + "1"; connectCluster.kafka().createTopic(topic0, PARTITION_COUNT); connectCluster.kafka().createTopic(topic1, PARTITION_COUNT); Map config = TestUtils.transformProfileFileToConnectorConfiguration(false); config.put(KafkaConnectorConfigParams.CACHE_TABLE_EXISTS, "false"); config.put(KafkaConnectorConfigParams.CACHE_PIPE_EXISTS, "false"); snowflake = SnowflakeConnectionServiceFactory.builder().setProperties(config).build(); StreamingClientFactory.resetStreamingClientSupplier(); } @AfterEach void after() { connectCluster.kafka().deleteTopic(topic0); connectCluster.kafka().deleteTopic(topic1); connectCluster.deleteConnector(connectorName); StreamingClientFactory.resetStreamingClientSupplier(); TestUtils.dropTable(tableName); } Map createConnectorConfig() { final String topics = topic0 + "," + topic1; final String topicsToTableMap = topic0 + ":" + tableName + "," + topic1 + ":" + tableName; final Map config = defaultProperties(topics, connectorName); config.put(SNOWFLAKE_TOPICS2TABLE_MAP, topicsToTableMap); config.put(ConnectorConfig.KEY_CONVERTER_CLASS_CONFIG, StringConverter.class.getName()); config.put("value.converter.schemas.enable", "false"); config.put("errors.tolerance", "none"); config.put("errors.log.enable", "true"); config.put("errors.deadletterqueue.topic.name", "DLQ_TOPIC"); config.put("errors.deadletterqueue.topic.replication.factor", "1"); config.put("jmx", "true"); // Schema evolution type inference tests depend on client-side validation behavior config.put(Constants.KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, "client_side"); return config; } void sendTombstoneRecords(final String topic) { // Send null tombstone connectCluster.kafka().produce(topic, null); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/SchemaEvolutionJsonIT.java ================================================ package com.snowflake.kafka.connector; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP; import static com.snowflake.kafka.connector.internal.TestUtils.assertColumnNullable; import static com.snowflake.kafka.connector.internal.TestUtils.assertTableColumnCount; import static com.snowflake.kafka.connector.internal.TestUtils.assertWithRetry; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.node.ObjectNode; import com.snowflake.kafka.connector.internal.TestUtils; import java.util.Map; import org.apache.kafka.connect.json.JsonConverter; import org.apache.kafka.connect.runtime.ConnectorConfig; import org.junit.jupiter.api.Test; class SchemaEvolutionJsonIT extends SchemaEvolutionBase { @Test() void testSchemaEvolutionWithMultipleTopics() throws Exception { // two topics write to the same table. Each topic sends unique set of columns. Test that after // ingestion all exepcted columns are present in the database // given final Map config = createConnectorConfig(); config.put(ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG, JsonConverter.class.getName()); connectCluster.configureConnector(connectorName, config); waitForConnectorRunning(connectorName); // when sendRecordsToTopic0(); sendRecordsToTopic1(); sendTombstoneRecords(topic1); sendTombstoneRecords(topic0); // then final int expectedTotalRecords = TOPIC_COUNT * RECORD_COUNT + 2; // + 2 tombstone records makeCommonAssertions(expectedTotalRecords); } @Test void testSchemaEvolutionIgnoreTombstone() throws Exception { // given final Map config = createConnectorConfig(); config.put("behavior.on.null.values", "IGNORE"); config.put(ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG, JsonConverter.class.getName()); connectCluster.configureConnector(connectorName, config); waitForConnectorRunning(connectorName); // when sendRecordsToTopic0(); sendRecordsToTopic1(); sendTombstoneRecords(topic1); sendTombstoneRecords(topic0); // then final int expectedTotalRecords = TOPIC_COUNT * RECORD_COUNT; makeCommonAssertions(expectedTotalRecords); } @Test void removeNotNullConstraint() throws Exception { // test that schema evolution is able to remove NON NULL constraint from the column // given final Map config = createConnectorConfig(); config.put(ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG, JsonConverter.class.getName()); // COL1 has not null constraint snowflake.executeQueryWithParameters( "CREATE OR REPLACE TABLE " + tableName + " (RECORD_METADATA VARIANT, COL1 VARCHAR NOT NULL, COL2 VARCHAR)" + " ENABLE_SCHEMA_EVOLUTION = true"); connectCluster.configureConnector(connectorName, config); waitForConnectorRunning(connectorName); final ObjectNode fullRow = objectMapper.createObjectNode(); fullRow.put("col1", "col1value"); fullRow.put("col2", "col2value"); // inserting normal non null columns connectCluster.kafka().produce(topic0, objectMapper.writeValueAsString(fullRow)); // then assertWithRetry(() -> TestUtils.getNumberOfRows(tableName) == 1); assertTableColumnCount(tableName, 3); TestUtils.checkTableSchema( tableName, Map.of( "COL1", "VARCHAR", "COL2", "VARCHAR", "RECORD_METADATA", "VARIANT")); assertColumnNullable(tableName, "COL1", false); // col1 not initialized final ObjectNode rowWithNullValue = objectMapper.createObjectNode(); rowWithNullValue.put("col2", "col2value"); // now insert row with col1 == null connectCluster.kafka().produce(topic0, objectMapper.writeValueAsString(rowWithNullValue)); assertWithRetry(() -> TestUtils.getNumberOfRows(tableName) == 2); // constraint has been removed assertColumnNullable(tableName, "COL1", true); } @Test void testSchemaEvolutionIgnoreTombstoneAfterSmt() throws Exception { // given final Map config = createConnectorConfig(); config.put("behavior.on.null.values", "IGNORE"); config.put("errors.tolerance", "all"); config.put( SNOWFLAKE_TOPICS2TABLE_MAP, topic0 + ":" + tableName); // reading only from one topic for this test config.put("transforms", "extractField"); config.put( "transforms.extractField.type", "org.apache.kafka.connect.transforms.ExtractField$Value"); config.put("transforms.extractField.field", "optionalField"); config.put(ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG, JsonConverter.class.getName()); connectCluster.configureConnector(connectorName, config); waitForConnectorRunning(connectorName); // produce records that should result in null value after SMT transformation for (int i = 0; i < RECORD_COUNT; i++) { final ObjectNode record = objectMapper.createObjectNode(); record.put("PERFORMANCE_STRING", "Excellent"); record.put("APPROVAL", true); connectCluster.kafka().produce(topic0, objectMapper.writeValueAsString(record)); } // produce records that should result in non-null value after SMT transformation for (int i = 0; i < RECORD_COUNT; i++) { final ObjectNode record = objectMapper.createObjectNode(); final ObjectNode optionalFieldValue = objectMapper.createObjectNode(); optionalFieldValue.put("hasSomething", true); record.set("optionalField", optionalFieldValue); connectCluster.kafka().produce(topic0, objectMapper.writeValueAsString(record)); } // then final int expectedTotalRecords = RECORD_COUNT; // not 2x, just half of the records produced should get into destination table assertWithRetry(() -> snowflake.tableExist(tableName)); assertWithRetry(() -> TestUtils.getNumberOfRows(tableName) == expectedTotalRecords); assertTableColumnCount(tableName, 2); TestUtils.checkTableSchema( tableName, Map.of( "HASSOMETHING", "BOOLEAN", "RECORD_METADATA", "VARIANT")); } @Test void testSchemaEvolutionDropTable() throws Exception { // given final Map config = createConnectorConfig(); config.put(ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG, JsonConverter.class.getName()); connectCluster.configureConnector(connectorName, config); waitForConnectorRunning(connectorName); sendRecordsToTopic0(); sendRecordsToTopic1(); sendTombstoneRecords(topic1); sendTombstoneRecords(topic0); // then final int expectedTotalRecords = TOPIC_COUNT * RECORD_COUNT + 2; // +2 tombstone records makeCommonAssertions(expectedTotalRecords); // wait 10 secs to make sure precommit advances consumer group offset and // the connector does not reingest the same records after the restart // precommit frequency is decided by offset.flush.interval.ms parameter Thread.sleep(10000); TestUtils.dropTable(tableName); connectCluster.restartConnectorAndTasks(connectorName, false, true, false); waitForConnectorRunning(connectorName); sendRecordsToTopic0(); sendRecordsToTopic1(); sendTombstoneRecords(topic1); sendTombstoneRecords(topic0); makeCommonAssertions(expectedTotalRecords); } private void sendRecordsToTopic0() throws JsonProcessingException { // Record schema for topic 0: PERFORMANCE_STRING, RATING_INT for (int i = 0; i < RECORD_COUNT; i++) { connectCluster.kafka().produce(topic0, createTopic0Record()); } } private void sendRecordsToTopic1() throws JsonProcessingException { // Record schema for topic 1: PERFORMANCE_STRING, RATING_DOUBLE, APPROVAL for (int i = 0; i < RECORD_COUNT; i++) { connectCluster.kafka().produce(topic1, createTopic1Record()); } } private String createTopic0Record() throws JsonProcessingException { final ObjectNode record = objectMapper.createObjectNode(); record.put("PERFORMANCE_STRING", "Excellent"); record.put("RATING_INT", 100); return objectMapper.writeValueAsString(record); } private String createTopic1Record() throws JsonProcessingException { final ObjectNode record = objectMapper.createObjectNode(); record.put("PERFORMANCE_STRING", "Excellent"); record.put("RATING_DOUBLE", 0.99); record.put("APPROVAL", true); return objectMapper.writeValueAsString(record); } private void makeCommonAssertions(final int expectedTotalRecords) throws Exception { assertWithRetry(() -> snowflake.tableExist(tableName)); assertWithRetry(() -> TestUtils.getNumberOfRows(tableName) == expectedTotalRecords); assertTableColumnCount(tableName, 5); TestUtils.checkTableSchema( tableName, Map.of( "PERFORMANCE_STRING", "VARCHAR", "RECORD_METADATA", "VARIANT", "RATING_INT", "NUMBER", "APPROVAL", "BOOLEAN", "RATING_DOUBLE", "FLOAT")); } @Test void testSnowpipeStreamingSchemaEvolution() throws Exception { // Test schema evolution with streaming ingestion using interactive table // Migrated from test_snowpipe_streaming_schema_evolution.py // given - create interactive table with schema evolution enabled final int partitionCount = 3; final int recordsPerPartition = 1000; final int schemaEvolutionRecordCount = 100; final int initialRecordCount = recordsPerPartition - schemaEvolutionRecordCount; final String streamingTopic = tableName + "_streaming"; connectCluster.kafka().createTopic(streamingTopic, partitionCount); // Create interactive table with schema evolution enabled System.out.println("Creating interactive table: " + tableName); snowflake.executeQueryWithParameters( "CREATE OR REPLACE INTERACTIVE TABLE " + tableName + " (RECORD_METADATA VARIANT, FIELDNAME VARCHAR) " + "CLUSTER BY (FIELDNAME) " + "ENABLE_SCHEMA_EVOLUTION = TRUE"); System.out.println("Interactive table created successfully"); final Map config = defaultProperties(streamingTopic, connectorName); config.put( ConnectorConfig.KEY_CONVERTER_CLASS_CONFIG, org.apache.kafka.connect.storage.StringConverter.class.getName()); config.put(ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG, JsonConverter.class.getName()); config.put("value.converter.schemas.enable", "false"); config.put("errors.tolerance", "none"); config.put("errors.log.enable", "true"); config.put("errors.deadletterqueue.topic.name", "DLQ_TOPIC"); config.put("errors.deadletterqueue.topic.replication.factor", "1"); config.put("jmx", "true"); config.put(SNOWFLAKE_TOPICS2TABLE_MAP, streamingTopic + ":" + tableName); connectCluster.configureConnector(connectorName, config); waitForConnectorRunning(connectorName); // when - send records with initial schema, then evolved schema for (int partition = 0; partition < partitionCount; partition++) { // First, send records with initial schema (only fieldName) for (int i = 0; i < initialRecordCount; i++) { final ObjectNode record = objectMapper.createObjectNode(); record.put("fieldName", String.valueOf(i)); connectCluster .kafka() .produce( streamingTopic, partition, "key-" + i, objectMapper.writeValueAsString(record)); } // Then, send records with evolved schema (fieldName + newField) for (int i = 0; i < schemaEvolutionRecordCount; i++) { final ObjectNode record = objectMapper.createObjectNode(); record.put("fieldName", String.valueOf(i + initialRecordCount)); record.put("newField", "new_" + i); connectCluster .kafka() .produce( streamingTopic, partition, "key-" + (i + initialRecordCount), objectMapper.writeValueAsString(record)); } } // Send tombstone records to each partition for (int partition = 0; partition < partitionCount; partition++) { connectCluster.kafka().produce(streamingTopic, partition, "tombstone-key", null); } // then - verify schema evolution occurred final int expectedTotalRecords = recordsPerPartition * partitionCount + partitionCount; // +partitionCount for tombstones // Verify table exists and record count matches expected System.out.println("Checking if table exists: " + tableName); System.out.println("Table exists: " + snowflake.tableExist(tableName)); assertWithRetry( () -> { boolean exists = snowflake.tableExist(tableName); System.out.println("Table exists check: " + exists); return exists; }); System.out.println("Table exists check passed, now checking row count"); assertWithRetry( () -> { int rowCount = TestUtils.getNumberOfRows(tableName); System.out.println( "Current row count: " + rowCount + ", expected: " + expectedTotalRecords); return rowCount == expectedTotalRecords; }); // Verify schema contains expected columns including the evolved NEWFIELD column TestUtils.checkTableSchema( tableName, Map.of( "FIELDNAME", "VARCHAR", "NEWFIELD", "VARCHAR", "RECORD_METADATA", "VARIANT")); // cleanup connectCluster.kafka().deleteTopic(streamingTopic); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/SinkTaskIT.java ================================================ package com.snowflake.kafka.connector; import static org.assertj.core.api.Assertions.assertThat; import static org.testcontainers.shaded.org.awaitility.Awaitility.await; import com.snowflake.kafka.connector.internal.SnowflakeConnectionService; import com.snowflake.kafka.connector.internal.TestUtils; import com.snowflake.kafka.connector.internal.streaming.InMemorySinkTaskContext; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; import org.apache.kafka.clients.consumer.OffsetAndMetadata; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.record.TimestampType; import org.apache.kafka.connect.data.SchemaAndValue; import org.apache.kafka.connect.json.JsonConverter; import org.apache.kafka.connect.sink.SinkRecord; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; class SinkTaskIT { private static final int PARTITION = 0; private static final int RECORD_COUNT = 10000; private String topicName; private SnowflakeConnectionService snowflakeConnectionService; @BeforeEach public void setup() { topicName = TestUtils.randomTableName(); snowflakeConnectionService = TestUtils.getConnectionService(); snowflakeConnectionService.createTableWithOnlyMetadataColumn(topicName); } @AfterEach public void after() { TestUtils.dropTable(topicName); TestUtils.dropPipe(topicName + "-STREAMING"); } @Test public void testPreCommit() { SnowflakeSinkTask sinkTask = new SnowflakeSinkTask(); Map offsetMap = new HashMap<>(); sinkTask.preCommit(offsetMap); } @Test public void testSinkTask() throws Exception { Map config = TestUtils.transformProfileFileToConnectorConfiguration(true); ConnectorConfigTools.setDefaultValues(config); config.put(Utils.TASK_ID, "0"); SnowflakeSinkTask sinkTask = new SnowflakeSinkTask(); final TopicPartition topicPartition = new TopicPartition(topicName, PARTITION); sinkTask.initialize(new InMemorySinkTaskContext(Collections.singleton(topicPartition))); sinkTask.start(config); ArrayList topicPartitions = new ArrayList<>(); topicPartitions.add(topicPartition); sinkTask.open(topicPartitions); sinkTask.awaitInitialization(); // commit offset should skip when offset=0 Map offsetMap = new HashMap<>(); offsetMap.put(topicPartitions.get(0), new OffsetAndMetadata(0)); offsetMap = sinkTask.preCommit(offsetMap); assertThat(offsetMap).isEmpty(); // send regular data List records = createSinkRecords(PARTITION, RECORD_COUNT); sinkTask.put(records); // Wait for all records to be committed and verify offset long expectedOffset = records.get(records.size() - 1).kafkaOffset() + 1; await() .atMost(60, TimeUnit.SECONDS) .untilAsserted( () -> { Map committed = sinkTask.preCommit(Map.of(topicPartition, new OffsetAndMetadata(0))); assertThat(committed) .containsKey(topicPartition) .extractingByKey(topicPartition) .satisfies(offset -> assertThat(offset.offset()).isEqualTo(expectedOffset)); }); sinkTask.close(topicPartitions); sinkTask.stop(); } @Test public void testSinkTaskNegative() throws Exception { Map config = TestUtils.transformProfileFileToConnectorConfiguration(true); ConnectorConfigTools.setDefaultValues(config); config.put(Utils.TASK_ID, "0"); SnowflakeSinkTask sinkTask = new SnowflakeSinkTask(); TopicPartition topicPartition = new TopicPartition(topicName, PARTITION); sinkTask.initialize(new InMemorySinkTaskContext(Collections.singleton(topicPartition))); sinkTask.start(config); sinkTask.start(config); assertThat(sinkTask.version()).isEqualTo(Utils.VERSION); ArrayList topicPartitions = new ArrayList<>(); topicPartitions.add(topicPartition); // Test put and precommit without open // commit offset Map offsetMap = new HashMap<>(); offsetMap.put(topicPartitions.get(0), new OffsetAndMetadata(0)); offsetMap = sinkTask.preCommit(offsetMap); sinkTask.close(topicPartitions); // send regular data List records = createSinkRecords(PARTITION, RECORD_COUNT); sinkTask.put(records); // commit offset sinkTask.preCommit(offsetMap); sinkTask.close(topicPartitions); sinkTask.stop(); } /** * Tests that multiple sink tasks can concurrently process data for different partitions of the * same topic. Each task handles its own partition and should correctly track offsets. */ @Test public void testMultipleSinkTasks() throws Exception { final int partition0 = 0; final int partition1 = 1; SnowflakeSinkTask task0 = new SnowflakeSinkTask(); SnowflakeSinkTask task1 = new SnowflakeSinkTask(); List topicPartitions0 = List.of(new TopicPartition(topicName, partition0)); List topicPartitions1 = List.of(new TopicPartition(topicName, partition1)); try { // Start both tasks Map task0Config = TestUtils.transformProfileFileToConnectorConfiguration(false); ConnectorConfigTools.setDefaultValues(task0Config); task0Config.put(Utils.TASK_ID, "0"); task0.initialize(new InMemorySinkTaskContext(Collections.singleton(topicPartitions0.get(0)))); task0.start(task0Config); Map task1Config = TestUtils.transformProfileFileToConnectorConfiguration(false); ConnectorConfigTools.setDefaultValues(task1Config); task1Config.put(Utils.TASK_ID, "1"); task1.initialize(new InMemorySinkTaskContext(Collections.singleton(topicPartitions1.get(0)))); task1.start(task1Config); // Open partitions task0.open(topicPartitions0); task1.open(topicPartitions1); task0.awaitInitialization(); task1.awaitInitialization(); // Put records to both tasks task0.put(createSinkRecords(partition0, RECORD_COUNT)); task1.put(createSinkRecords(partition1, RECORD_COUNT)); // Wait for offsets to be committed and verify TopicPartition tp0 = topicPartitions0.get(0); TopicPartition tp1 = topicPartitions1.get(0); await() .atMost(60, TimeUnit.SECONDS) .untilAsserted( () -> { Map offsetMap0 = task0.preCommit(Map.of(tp0, new OffsetAndMetadata(0))); assertThat(offsetMap0) .containsKey(tp0) .extractingByKey(tp0) .satisfies(offset -> assertThat(offset.offset()).isEqualTo(RECORD_COUNT)); }); await() .atMost(60, TimeUnit.SECONDS) .untilAsserted( () -> { Map offsetMap1 = task1.preCommit(Map.of(tp1, new OffsetAndMetadata(0))); assertThat(offsetMap1) .containsKey(tp1) .extractingByKey(tp1) .satisfies(offset -> assertThat(offset.offset()).isEqualTo(RECORD_COUNT)); }); } finally { // Always cleanup even if test fails task0.close(topicPartitions0); task1.close(topicPartitions1); task0.stop(); task1.stop(); } } @Test public void testTopicToTableRegex() { Map config = TestUtils.transformProfileFileToConnectorConfiguration(false); ConnectorConfigTools.setDefaultValues(config); SnowflakeSinkTaskForStreamingIT.testTopicToTableRegexMain(config); } private List createSinkRecords(int partition, int count) { JsonConverter jsonConverter = new JsonConverter(); jsonConverter.configure(Map.of("schemas.enable", "false"), false); String json = "{ \"f1\" : \"v1\" }"; SchemaAndValue schemaAndValue = jsonConverter.toConnectData(topicName, json.getBytes(StandardCharsets.UTF_8)); List records = new ArrayList<>(count); for (int i = 0; i < count; i++) { records.add( new SinkRecord( topicName, partition, null, null, schemaAndValue.schema(), schemaAndValue.value(), i, System.currentTimeMillis(), TimestampType.CREATE_TIME)); } return records; } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/SinkTaskProxyIT.java ================================================ package com.snowflake.kafka.connector; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.internal.EmbeddedProxyServer; import com.snowflake.kafka.connector.internal.SnowflakeConnectionService; import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException; import com.snowflake.kafka.connector.internal.TestUtils; import java.util.Map; import java.util.Optional; import org.junit.After; import org.junit.Assert; import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; /** * Integration tests for Snowflake Sink Task proxy configuration. Uses Testcontainers with a real * Squid proxy server running in Docker to test JVM proxy settings with authentication. The proxy * server uses a random available port on the host to avoid conflicts. * *

    Each test method gets its own proxy server instance via JUnit {@code @Rule}, ensuring tests * can run in parallel without port conflicts. * *

    Note: This test requires Docker to be installed and running. */ public class SinkTaskProxyIT { private static final String PROXY_USERNAME = "admin"; private static final String PROXY_PASSWORD = "test"; @Rule public final EmbeddedProxyServer proxyServer = new EmbeddedProxyServer(PROXY_USERNAME, PROXY_PASSWORD); @After public void testCleanup() { TestUtils.resetProxyParametersInJVM(); } @Test(expected = SnowflakeKafkaConnectorException.class) @Ignore public void testSinkTaskProxyConfigMock() { Map config = TestUtils.transformProfileFileToConnectorConfiguration(false); ConnectorConfigTools.setDefaultValues(config); config.put(Utils.TASK_ID, "0"); config.put(KafkaConnectorConfigParams.JVM_PROXY_HOST, "wronghost"); config.put(KafkaConnectorConfigParams.JVM_PROXY_PORT, "9093"); // wrongport config.put(KafkaConnectorConfigParams.JVM_PROXY_USERNAME, "user"); config.put(KafkaConnectorConfigParams.JVM_PROXY_PASSWORD, "password"); SnowflakeSinkTask sinkTask = new SnowflakeSinkTask(); try { sinkTask.start(config); } catch (SnowflakeKafkaConnectorException e) { assert System.getProperty(KafkaConnectorConfigParams.HTTP_USE_PROXY).equals("true"); assert System.getProperty(KafkaConnectorConfigParams.HTTP_PROXY_HOST).equals("wronghost"); assert System.getProperty(KafkaConnectorConfigParams.HTTP_PROXY_PORT).equals("9093"); assert System.getProperty(KafkaConnectorConfigParams.HTTPS_PROXY_HOST).equals("wronghost"); assert System.getProperty(KafkaConnectorConfigParams.HTTPS_PROXY_PORT).equals("9093"); assert System.getProperty(Utils.JDK_HTTP_AUTH_TUNNELING).isEmpty(); assert System.getProperty(KafkaConnectorConfigParams.HTTP_PROXY_USER).equals("user"); assert System.getProperty(KafkaConnectorConfigParams.HTTP_PROXY_PASSWORD).equals("password"); assert System.getProperty(KafkaConnectorConfigParams.HTTPS_PROXY_USER).equals("user"); assert System.getProperty(KafkaConnectorConfigParams.HTTPS_PROXY_PASSWORD).equals("password"); // unset the system parameters please. TestUtils.resetProxyParametersInJVM(); throw e; } } /** * Tests that the Snowflake Sink Task properly configures JVM proxy settings. This test verifies * that the JVM system properties are correctly set when proxy configuration is provided, without * actually connecting through a proxy or to Snowflake. * *

    This is a focused unit test that verifies the proxy configuration logic. */ @Test public void testProxyJvmPropertiesConfiguration() { Map config = TestUtils.transformProfileFileToConnectorConfiguration(false); ConnectorConfigTools.setDefaultValues(config); // Configure proxy settings config.put(KafkaConnectorConfigParams.JVM_PROXY_HOST, "test-proxy.example.com"); config.put(KafkaConnectorConfigParams.JVM_PROXY_PORT, "8080"); config.put(KafkaConnectorConfigParams.JVM_PROXY_USERNAME, proxyServer.getUsername()); config.put(KafkaConnectorConfigParams.JVM_PROXY_PASSWORD, proxyServer.getPassword()); // Set proxy properties (this is what the connector does internally) Utils.enableJVMProxy(config); // Verify all JVM proxy properties are set correctly Assert.assertEquals("true", System.getProperty(KafkaConnectorConfigParams.HTTP_USE_PROXY)); Assert.assertEquals( "test-proxy.example.com", System.getProperty(KafkaConnectorConfigParams.HTTP_PROXY_HOST)); Assert.assertEquals("8080", System.getProperty(KafkaConnectorConfigParams.HTTP_PROXY_PORT)); Assert.assertEquals( "test-proxy.example.com", System.getProperty(KafkaConnectorConfigParams.HTTPS_PROXY_HOST)); Assert.assertEquals("8080", System.getProperty(KafkaConnectorConfigParams.HTTPS_PROXY_PORT)); Assert.assertEquals( proxyServer.getUsername(), System.getProperty(KafkaConnectorConfigParams.HTTP_PROXY_USER)); Assert.assertEquals( proxyServer.getPassword(), System.getProperty(KafkaConnectorConfigParams.HTTP_PROXY_PASSWORD)); Assert.assertEquals( proxyServer.getUsername(), System.getProperty(KafkaConnectorConfigParams.HTTPS_PROXY_USER)); Assert.assertEquals( proxyServer.getPassword(), System.getProperty(KafkaConnectorConfigParams.HTTPS_PROXY_PASSWORD)); } @Test public void testSinkTaskProxyConfig() { Map config = TestUtils.transformProfileFileToConnectorConfiguration(false); ConnectorConfigTools.setDefaultValues(config); config.put(Utils.TASK_ID, "0"); int proxyPort = proxyServer.getPort(); config.put(KafkaConnectorConfigParams.JVM_PROXY_HOST, "localhost"); config.put(KafkaConnectorConfigParams.JVM_PROXY_PORT, String.valueOf(proxyPort)); config.put(KafkaConnectorConfigParams.JVM_PROXY_USERNAME, proxyServer.getUsername()); config.put(KafkaConnectorConfigParams.JVM_PROXY_PASSWORD, proxyServer.getPassword()); SnowflakeSinkTask sinkTask = new SnowflakeSinkTask(); sinkTask.start(config); assert System.getProperty(KafkaConnectorConfigParams.HTTP_USE_PROXY).equals("true"); assert System.getProperty(KafkaConnectorConfigParams.HTTP_PROXY_HOST).equals("localhost"); assert System.getProperty(KafkaConnectorConfigParams.HTTP_PROXY_PORT) .equals(String.valueOf(proxyPort)); assert System.getProperty(KafkaConnectorConfigParams.HTTPS_PROXY_HOST).equals("localhost"); assert System.getProperty(KafkaConnectorConfigParams.HTTPS_PROXY_PORT) .equals(String.valueOf(proxyPort)); assert System.getProperty(Utils.JDK_HTTP_AUTH_TUNNELING).isEmpty(); assert System.getProperty(KafkaConnectorConfigParams.HTTP_PROXY_USER) .equals(proxyServer.getUsername()); assert System.getProperty(KafkaConnectorConfigParams.HTTP_PROXY_PASSWORD) .equals(proxyServer.getPassword()); assert System.getProperty(KafkaConnectorConfigParams.HTTPS_PROXY_USER) .equals(proxyServer.getUsername()); assert System.getProperty(KafkaConnectorConfigParams.HTTPS_PROXY_PASSWORD) .equals(proxyServer.getPassword()); // Verify the snowflake connection service was created successfully Optional optSfConnectionService = sinkTask.getSnowflakeConnection(); Assert.assertTrue(optSfConnectionService.isPresent()); // Cleanup sinkTask.stop(); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/SmtIT.java ================================================ package com.snowflake.kafka.connector; import static org.apache.kafka.connect.runtime.ConnectorConfig.TRANSFORMS_CONFIG; import static org.apache.kafka.connect.runtime.ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG; import static org.assertj.core.api.Assertions.assertThat; import static org.awaitility.Awaitility.await; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.snowflake.kafka.connector.internal.TestUtils; import com.snowflake.kafka.connector.internal.streaming.FakeSnowflakeStreamingIngestChannel; import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientFactory; import java.time.Duration; import java.util.List; import java.util.Map; import java.util.function.UnaryOperator; import java.util.stream.Stream; import org.apache.commons.dbutils.QueryRunner; import org.apache.kafka.connect.json.JsonConverter; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; @ExtendWith({InjectSnowflakeDataSourceExtension.class, InjectQueryRunnerExtension.class}) public class SmtIT extends ConnectClusterBaseIT { private static final int PARTITION_COUNT = 1; public static final String RECORD_METADATA = "RECORD_METADATA"; public static final String RECORD_CONTENT = "record_content"; private String topicName; private String connectorName; private ObjectMapper objectMapper = new ObjectMapper(); @InjectQueryRunner private QueryRunner queryRunner; @BeforeEach void before() { topicName = TestUtils.randomTableName(); connectorName = String.format("%s_connector", topicName); connectCluster.kafka().createTopic(topicName, PARTITION_COUNT); TestUtils.createTableWithMetadataColumn(topicName); StreamingClientFactory.setStreamingClientSupplier(fakeClientSupplier); } @AfterEach void after() { connectCluster.kafka().deleteTopic(topicName); connectCluster.deleteConnector(connectorName); StreamingClientFactory.resetStreamingClientSupplier(); TestUtils.dropTable(topicName); TestUtils.dropPipe(topicName + "-STREAMING"); } @Test void test_with_record_content_variant_added_by_smt() throws Exception { final Map config = defaultProperties(topicName, connectorName); config.put(Constants.KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, "server_side"); config.put( Constants.KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION, "false"); config.put("transforms", "add_record_content"); config.put( "transforms.add_record_content.type", "org.apache.kafka.connect.transforms.HoistField$Value"); config.put("transforms.add_record_content.field", RECORD_CONTENT); connectCluster.configureConnector(connectorName, config); waitForConnectorRunning(connectorName); waitForOpenedFakeIngestClient(connectorName); connectCluster.kafka().produce(topicName, getTestJsonContent()); // then await() .timeout(Duration.ofSeconds(30)) .pollInterval(Duration.ofSeconds(1)) .untilAsserted( () -> { assertThat(getOpenedFakeIngestClient(connectorName).getAppendedRowCount()) .isEqualTo(1); // get first open channel, there is going to be only one because partition count is 1 FakeSnowflakeStreamingIngestChannel openedChannels = getOpenedFakeIngestClient(connectorName).getOpenedChannels().get(0); assertThat(openedChannels.getAppendedRows()).hasSize(1); final Map firstRow = openedChannels.getAppendedRows().get(0); assertThat(firstRow).containsKeys(RECORD_METADATA, RECORD_CONTENT); assertThat(firstRow) .hasEntrySatisfying( RECORD_METADATA, value -> { assertThat(value).isInstanceOf(Map.class); }); assertThat(firstRow) .hasEntrySatisfying( RECORD_CONTENT, value -> { assertThat(value).isInstanceOf(Map.class); }); }); } @ParameterizedTest @CsvSource({"DEFAULT, 10, 18", "IGNORE, 0, -1"}) // -1 means No offset registered void testIfSmtReturningNullsIngestDataCorrectly( String behaviorOnNull, int expectedRecordNumber, int expectedLastOffset) { // given connectCluster.configureConnector( connectorName, smtProperties(topicName, connectorName, behaviorOnNull)); waitForConnectorRunning(connectorName); waitForOpenedFakeIngestClient(connectorName); // when // Send 20 messages: 10x "{}" (becomes null after ExtractField SMT) alternating with // 10x {"message":"value"} (becomes String "value" after SMT - treated as broken record) Stream.iterate(0, UnaryOperator.identity()) .limit(10) .flatMap(v -> Stream.of("{}", "{\"message\":\"value\"}")) .forEach(message -> connectCluster.kafka().produce(topicName, message)); // then // For DEFAULT mode: 10 tombstones are inserted at even offsets (0,2,4,...,18), last offset=18 // For IGNORE mode: nulls are skipped, broken records don't insert, no rows appended final String expectedOffsetToken = expectedLastOffset >= 0 ? String.valueOf(expectedLastOffset) : null; await() .timeout(Duration.ofSeconds(30)) .pollInterval(Duration.ofSeconds(1)) .untilAsserted( () -> { assertThat(getOpenedFakeIngestClient(connectorName).getAppendedRowCount()) .isEqualTo(expectedRecordNumber); List openedChannels = getOpenedFakeIngestClient(connectorName).getOpenedChannels(); // get first open channel, there is going to be only one because partition count is 1 String offsetToken = openedChannels.get(0).getLatestCommittedOffsetToken(); assertThat(openedChannels).hasSize(PARTITION_COUNT); assertThat(offsetToken).isEqualTo(expectedOffsetToken); }); } @Test void testIfSmtExtractingNestedStructuresWorksCorrectly() { connectCluster.configureConnector( connectorName, smtProperties(topicName, connectorName, "IGNORE")); waitForConnectorRunning(connectorName); waitForOpenedFakeIngestClient(connectorName); final String message = "{\"message\":{\"title\":\"abcd\", \"length\":5999}}"; connectCluster.kafka().produce(topicName, message); await() .timeout(Duration.ofSeconds(30)) .pollInterval(Duration.ofSeconds(2)) .untilAsserted( () -> { assertThat(getOpenedFakeIngestClient(connectorName).getAppendedRowCount()) .isEqualTo(1); List openedChannels = getOpenedFakeIngestClient(connectorName).getOpenedChannels(); // get first open channel, there is going to be only one because partition count is 1 String offsetToken = openedChannels.get(0).getLatestCommittedOffsetToken(); assertThat(offsetToken).isEqualTo("0"); }); } private Map smtProperties( String smtTopic, String smtConnector, String behaviorOnNull) { Map config = defaultProperties(smtTopic, smtConnector); config.put(Constants.KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, "server_side"); config.put(VALUE_CONVERTER_CLASS_CONFIG, JsonConverter.class.getName()); config.put("value.converter.schemas.enable", "false"); config.put("behavior.on.null.values", behaviorOnNull); config.put(TRANSFORMS_CONFIG, "extractField"); config.put( "transforms.extractField.type", "org.apache.kafka.connect.transforms.ExtractField$Value"); config.put("transforms.extractField.field", "message"); // Allow broken records (plain Strings after SMT) to be sent to DLQ instead of failing config.put("errors.tolerance", "all"); config.put("errors.deadletterqueue.topic.name", "DLQ_TOPIC"); config.put("errors.deadletterqueue.topic.replication.factor", "1"); return config; } private String getTestJsonContent() throws JsonProcessingException { return objectMapper.writeValueAsString( Map.of( "city", "Pcim Górny", "age", 30, "married", true, "has cat", true, "! @&$#* has Łułósżź", true, "skills", List.of("sitting", "standing", "eating"), "family", Map.of("son", "Jack", "daughter", "Anna"))); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/SnowflakeSinkTaskAuthorizationExceptionTrackerTest.java ================================================ package com.snowflake.kafka.connector; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.ENABLE_TASK_FAIL_ON_AUTHORIZATION_ERRORS; import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException; import com.snowflake.kafka.connector.internal.TestUtils; import java.util.Map; import java.util.stream.Stream; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; class SnowflakeSinkTaskAuthorizationExceptionTrackerTest { @Test public void shouldThrowExceptionOnAuthorizationError() { // given SnowflakeSinkTaskAuthorizationExceptionTracker tracker = new SnowflakeSinkTaskAuthorizationExceptionTracker(); Map config = TestUtils.getConfig(); config.put(ENABLE_TASK_FAIL_ON_AUTHORIZATION_ERRORS, "true"); tracker.updateStateOnTaskStart(config); // when tracker.reportPrecommitException(new Exception("Authorization failed after retry")); // then Assertions.assertThrows( SnowflakeKafkaConnectorException.class, tracker::throwExceptionIfAuthorizationFailed); } @Test public void shouldNotThrowExceptionWhenNoExceptionReported() { // given SnowflakeSinkTaskAuthorizationExceptionTracker tracker = new SnowflakeSinkTaskAuthorizationExceptionTracker(); Map config = TestUtils.getConfig(); config.put(ENABLE_TASK_FAIL_ON_AUTHORIZATION_ERRORS, "true"); tracker.updateStateOnTaskStart(config); // expect Assertions.assertDoesNotThrow(tracker::throwExceptionIfAuthorizationFailed); } @ParameterizedTest @MethodSource("noExceptionConditions") public void shouldNotThrowException(boolean enabled, String exceptionMessage) { // given SnowflakeSinkTaskAuthorizationExceptionTracker tracker = new SnowflakeSinkTaskAuthorizationExceptionTracker(); Map config = TestUtils.getConfig(); config.put(ENABLE_TASK_FAIL_ON_AUTHORIZATION_ERRORS, Boolean.toString(enabled)); tracker.updateStateOnTaskStart(config); // when tracker.reportPrecommitException(new Exception(exceptionMessage)); // then Assertions.assertDoesNotThrow(tracker::throwExceptionIfAuthorizationFailed); } public static Stream noExceptionConditions() { return Stream.of( Arguments.of(false, "Authorization failed after retry"), Arguments.of(true, "NullPointerException")); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/SnowflakeSinkTaskForStreamingIT.java ================================================ package com.snowflake.kafka.connector; import static com.snowflake.kafka.connector.internal.TestUtils.getConnectionServiceWithEncryptedKey; import static java.lang.String.format; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.internal.SnowflakeConnectionService; import com.snowflake.kafka.connector.internal.SnowflakeSinkService; import com.snowflake.kafka.connector.internal.TestUtils; import com.snowflake.kafka.connector.internal.streaming.InMemorySinkTaskContext; import java.sql.ResultSet; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.kafka.clients.consumer.OffsetAndMetadata; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.connect.sink.SinkRecord; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.mockito.Mockito; /** * Sink Task IT test which uses {@link * com.snowflake.kafka.connector.internal.streaming.SnowflakeSinkServiceV2} */ public class SnowflakeSinkTaskForStreamingIT { private String topicName; private static final int partition = 0; private TopicPartition topicPartition; @BeforeEach public void beforeEach() { topicName = TestUtils.randomTableName(); topicPartition = new TopicPartition(topicName, partition); getConnectionServiceWithEncryptedKey() .executeQueryWithParameters( format("create or replace table %s (record_metadata variant, f1 varchar)", topicName)); } @AfterEach public void afterEach() { TestUtils.dropTable(topicName); // Drop the associated streaming pipe to prevent account-level pipe limit errors TestUtils.dropPipe(topicName + "-STREAMING"); } @Test public void testSinkTask() throws Exception { Map config = getConfig(); ConnectorConfigTools.setDefaultValues(config); SnowflakeSinkTask sinkTask = new SnowflakeSinkTask(); // Inits the sinktaskcontext sinkTask.initialize(new InMemorySinkTaskContext(Collections.singleton(topicPartition))); sinkTask.start(config); ArrayList topicPartitions = new ArrayList<>(); topicPartitions.add(new TopicPartition(topicName, partition)); sinkTask.open(topicPartitions); sinkTask.awaitInitialization(); // commit offset final Map offsetMap = new HashMap<>(); offsetMap.put(topicPartitions.get(0), new OffsetAndMetadata(0)); TestUtils.assertWithRetry(() -> sinkTask.preCommit(offsetMap).size() == 0, 5, 20); // send regular data List records = TestUtils.createJsonStringSinkRecords(0, 1, topicName, partition); sinkTask.put(records); // commit offset offsetMap.clear(); offsetMap.put(topicPartitions.get(0), new OffsetAndMetadata(10000)); TestUtils.assertWithRetry(() -> sinkTask.preCommit(offsetMap).size() == 1, 5, 20); TestUtils.assertWithRetry( () -> sinkTask.preCommit(offsetMap).get(topicPartitions.get(0)).offset() == 1, 5, 20); sinkTask.close(topicPartitions); sinkTask.stop(); } @Test public void testSinkTaskWithMultipleOpenClose() throws Exception { Map config = getConfig(); ConnectorConfigTools.setDefaultValues(config); SnowflakeSinkTask sinkTask = new SnowflakeSinkTask(); // Inits the sinktaskcontext sinkTask.initialize(new InMemorySinkTaskContext(Collections.singleton(topicPartition))); sinkTask.start(config); ArrayList topicPartitions = new ArrayList<>(); topicPartitions.add(new TopicPartition(topicName, partition)); sinkTask.open(topicPartitions); sinkTask.awaitInitialization(); final long noOfRecords = 1L; final long lastOffsetNo = noOfRecords - 1; // send regular data List records = TestUtils.createJsonStringSinkRecords(0, noOfRecords, topicName, partition); sinkTask.put(records); // commit offset final Map offsetMap = new HashMap<>(); offsetMap.put(topicPartitions.get(0), new OffsetAndMetadata(lastOffsetNo)); TestUtils.assertWithRetry(() -> sinkTask.preCommit(offsetMap).size() == 1, 5, 20); // precommit is one more than offset last inserted TestUtils.assertWithRetry( () -> sinkTask.preCommit(offsetMap).get(topicPartitions.get(0)).offset() == noOfRecords, 20, 5); sinkTask.close(topicPartitions); // Add one more partition topicPartitions.add(new TopicPartition(topicName, partition + 1)); sinkTask.open(topicPartitions); sinkTask.awaitInitialization(); // trying to put same records sinkTask.put(records); List recordsWithAnotherPartition = TestUtils.createJsonStringSinkRecords(0, noOfRecords, topicName, partition + 1); sinkTask.put(recordsWithAnotherPartition); // Adding to offsetMap so that this gets into precommit offsetMap.put(topicPartitions.get(1), new OffsetAndMetadata(lastOffsetNo)); TestUtils.assertWithRetry(() -> sinkTask.preCommit(offsetMap).size() == 2, 5, 20); TestUtils.assertWithRetry( () -> sinkTask.preCommit(offsetMap).get(topicPartitions.get(0)).offset() == 1, 5, 20); TestUtils.assertWithRetry( () -> sinkTask.preCommit(offsetMap).get(topicPartitions.get(1)).offset() == 1, 5, 20); sinkTask.close(topicPartitions); sinkTask.stop(); ResultSet resultSet = TestUtils.showTable(topicName); LinkedList contentResult = new LinkedList<>(); LinkedList metadataResult = new LinkedList<>(); while (resultSet.next()) { contentResult.add(resultSet.getString("F1")); metadataResult.add(resultSet.getString("RECORD_METADATA")); } resultSet.close(); assert metadataResult.size() == 2; assert contentResult.size() == 2; ObjectMapper mapper = new ObjectMapper(); Set partitionsInTable = new HashSet<>(); metadataResult.forEach( s -> { try { JsonNode metadata = mapper.readTree(s); metadata.get("offset").asText().equals("0"); partitionsInTable.add(metadata.get("partition").asLong()); } catch (JsonProcessingException e) { Assertions.fail(); } }); assert partitionsInTable.size() == 2; } @Test public void testTopicToTableRegex() { Map config = getConfig(); testTopicToTableRegexMain(config); } public static void testTopicToTableRegexMain(Map config) { // constants String catTable = "cat_table"; String catTopicRegex = ".*_cat"; String catTopicStr1 = "calico_cat"; String catTopicStr2 = "orange_cat"; String bigCatTable = "big_cat_table"; String bigCatTopicRegex = "big.*_.*_cat"; String bigCatTopicStr1 = "big_calico_cat"; String bigCatTopicStr2 = "biggest_orange_cat"; String dogTable = "dog_table"; String dogTopicRegex = ".*_dog"; String dogTopicStr1 = "corgi_dog"; String catchallTable = "animal_table"; String catchAllRegex = ".*"; String birdTopicStr1 = "bird"; // test two regexes. bird should create its own table String twoRegexConfig = Utils.formatString("{}:{}, {}:{}", bigCatTopicRegex, bigCatTable, dogTopicRegex, dogTable); List twoRegexPartitionStrs = Arrays.asList(bigCatTopicStr1, bigCatTopicStr2, dogTopicStr1, birdTopicStr1); Map twoRegexExpected = new HashMap<>(); twoRegexExpected.put(bigCatTopicStr1, bigCatTable); twoRegexExpected.put(bigCatTopicStr2, bigCatTable); twoRegexExpected.put(dogTopicStr1, dogTable); twoRegexExpected.put(birdTopicStr1, birdTopicStr1); testTopicToTableRegexRunner(config, twoRegexConfig, twoRegexPartitionStrs, twoRegexExpected); // test catchall regex String catchAllConfig = Utils.formatString("{}:{}", catchAllRegex, catchallTable); List catchAllPartitionStrs = Arrays.asList(catTopicStr1, catTopicStr2, dogTopicStr1, birdTopicStr1); Map catchAllExpected = new HashMap<>(); catchAllExpected.put(catTopicStr1, catchallTable); catchAllExpected.put(catTopicStr2, catchallTable); catchAllExpected.put(dogTopicStr1, catchallTable); catchAllExpected.put(birdTopicStr1, catchallTable); testTopicToTableRegexRunner(config, catchAllConfig, catchAllPartitionStrs, catchAllExpected); } private static void testTopicToTableRegexRunner( Map connectorBaseConfig, String topic2tableRegex, List partitionStrList, Map expectedTopic2TableConfig) { // setup connectorBaseConfig.put( KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP, topic2tableRegex); // setup partitions List testPartitions = new ArrayList<>(); for (int i = 0; i < partitionStrList.size(); i++) { testPartitions.add(new TopicPartition(partitionStrList.get(i), i)); } // mocks SnowflakeSinkService serviceSpy = Mockito.spy(SnowflakeSinkService.class); SnowflakeConnectionService connSpy = Mockito.spy(SnowflakeConnectionService.class); Map config = new HashMap<>(connectorBaseConfig); config.putIfAbsent(KafkaConnectorConfigParams.NAME, "test-topic-to-table-regex"); config.put(Utils.TASK_ID, "1"); SnowflakeSinkTask sinkTask = new SnowflakeSinkTask(serviceSpy, connSpy); // test topics were mapped correctly sinkTask.open(testPartitions); // verify expected num tasks opened Mockito.verify(serviceSpy, Mockito.times(1)).startPartitions(Mockito.anyCollection()); for (String topicStr : expectedTopic2TableConfig.keySet()) { TopicPartition topic = null; for (TopicPartition currTp : testPartitions) { if (currTp.topic().equals(topicStr)) { topic = currTp; } } Assertions.assertNotNull(topic, "Expected topic partition was not opened by the tast"); } } private Map getConfig() { return TestUtils.getConnectorConfigurationForStreaming(false); } @Test public void testSanitizationEnabledAutoGenerated() throws Exception { // Topic with valid identifier that needs uppercasing // Use uppercase letters to avoid hash generation String topicName = "TestTopic" + System.currentTimeMillis(); TopicPartition topicPartition = new TopicPartition(topicName, 0); Map config = TestUtils.getConnectorConfigurationForStreaming(false); ConnectorConfigTools.setDefaultValues(config); config.put( KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION, "true"); config.put(KafkaConnectorConfigParams.TOPICS, topicName); SnowflakeSinkTask task = new SnowflakeSinkTask(); task.initialize(new InMemorySinkTaskContext(Collections.singleton(topicPartition))); task.start(config); task.open(Collections.singletonList(topicPartition)); task.awaitInitialization(); // Create and send records List records = TestUtils.createJsonStringSinkRecords(0, 5, topicName, 0); task.put(records); // Wait for preCommit to confirm data is flushed final Map offsetMap = new HashMap<>(); offsetMap.put(topicPartition, new OffsetAndMetadata(10000)); TestUtils.assertWithRetry(() -> task.preCommit(offsetMap).size() == 1, 5, 20); task.close(Collections.singletonList(topicPartition)); task.stop(); // When sanitization is enabled, valid identifiers are uppercased String expectedTableName = topicName.toUpperCase(); SnowflakeConnectionService conn = getConnectionServiceWithEncryptedKey(); // Verify the table exists and is uppercased boolean tableExists = conn.tableExist(expectedTableName); Assertions.assertTrue(tableExists, "Should find uppercased table: " + expectedTableName); Assertions.assertTrue( expectedTableName.matches("^[A-Z_0-9]+$"), "Table name should be fully uppercased with only alphanumeric and underscore characters"); // Verify data ResultSet data = TestUtils.showTable(expectedTableName); int count = 0; while (data.next()) { count++; } Assertions.assertEquals(5, count, "Should have 5 rows"); // Cleanup table and pipe String pipeName = expectedTableName + "-STREAMING"; TestUtils.dropTable(expectedTableName); TestUtils.dropPipe(pipeName); } @Test public void testSanitizationDisabledQuotedMap() throws Exception { // Quoting in topic2table.map preserves case through the parser (no uppercasing). String topicName = "myTopic_" + System.currentTimeMillis(); String mixedCaseTable = "My_Test_Table_" + System.currentTimeMillis(); TopicPartition topicPartition = new TopicPartition(topicName, 0); Map config = TestUtils.getConnectorConfigurationForStreaming(false); ConnectorConfigTools.setDefaultValues(config); config.put( KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION, "false"); config.put( KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP, topicName + ":\"" + mixedCaseTable + "\""); config.put(KafkaConnectorConfigParams.TOPICS, topicName); SnowflakeSinkTask task = new SnowflakeSinkTask(); task.initialize(new InMemorySinkTaskContext(Collections.singleton(topicPartition))); task.start(config); task.open(Collections.singletonList(topicPartition)); task.awaitInitialization(); // Create and send records List records = TestUtils.createJsonStringSinkRecords(0, 5, topicName, 0); task.put(records); // Wait for preCommit to confirm data is flushed final Map offsetMap = new HashMap<>(); offsetMap.put(topicPartition, new OffsetAndMetadata(10000)); TestUtils.assertWithRetry(() -> task.preCommit(offsetMap).size() == 1, 5, 20); task.close(Collections.singletonList(topicPartition)); task.stop(); // Verify data in the auto-created table ResultSet data = TestUtils.showTable(mixedCaseTable); int count = 0; while (data.next()) { count++; } Assertions.assertEquals(5, count, "Should have 5 rows in table " + mixedCaseTable); // Cleanup table and pipe String pipeName = mixedCaseTable + "-STREAMING"; TestUtils.dropTable(mixedCaseTable); TestUtils.dropPipe(pipeName); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/TopicToTableParserTest.java ================================================ package com.snowflake.kafka.connector; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import org.junit.Test; public class TopicToTableParserTest { @Test public void testParseEmptyInput() { assertTrue(TopicToTableParser.parse("").isEmpty()); assertTrue(TopicToTableParser.parse(" ").isEmpty()); } @Test public void testParseMultipleEntries() { Map expected = new LinkedHashMap<>(); expected.put("topic_a", "TABLE_A"); expected.put("topic_b", "TABLE_B"); assertEquals(expected, TopicToTableParser.parse("topic_a:table_a, topic_b:table_b")); } @Test public void testParseQuotedEntries() { Map expected = new LinkedHashMap<>(); expected.put("topic:one", "table,one"); expected.put("topic two", "table two"); assertEquals( expected, TopicToTableParser.parse("\"topic:one\":\"table,one\", \"topic two\":\"table two\"")); } @Test public void testParseEntriesPreservesOrder() { List entries = new TopicToTableParser("first:one, second:two").parseEntries(); assertEquals(2, entries.size()); assertEquals("first", entries.get(0).getTopic()); assertEquals("ONE", entries.get(0).getTable()); assertEquals("second", entries.get(1).getTopic()); assertEquals("TWO", entries.get(1).getTable()); } @Test public void testParseUppercasesOnlyUnquotedTableTokens() { Map expected = new LinkedHashMap<>(); expected.put("topic", "E"); expected.put("other_topic", "e"); assertEquals(expected, TopicToTableParser.parse("topic:e, other_topic:\"e\"")); } @Test public void testParseRejectsDuplicateTopics() { IllegalArgumentException error = assertParseError("topic:one, topic:two"); assertEquals("Duplicate topic: topic", error.getMessage()); } @Test public void testParseRejectsOverlappingRegexes() { IllegalArgumentException error = assertParseError(".*:table_a, .*foo:table_b"); assertTrue(error.getMessage().contains("Topic regexes cannot overlap")); assertTrue(error.getMessage().contains(".*")); assertTrue(error.getMessage().contains(".*foo")); } @Test public void testParseRejectsUnterminatedQuotedToken() { IllegalArgumentException error = assertParseError("\"topic:table"); assertTrue(error.getMessage().contains("Unterminated quoted token")); } @Test public void testParseRejectsEmptyQuotedToken() { IllegalArgumentException error = assertParseError("\"\":table"); assertTrue(error.getMessage().contains("Empty quoted token")); } @Test public void testParseRejectsMissingColon() { IllegalArgumentException error = assertParseError("topic table"); assertTrue(error.getMessage().contains("Expected ':'")); } private static IllegalArgumentException assertParseError(String input) { try { TopicToTableParser.parse(input); fail("Expected IllegalArgumentException"); return null; } catch (IllegalArgumentException error) { return error; } } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/UtilsTest.java ================================================ package com.snowflake.kafka.connector; import static java.util.Arrays.*; import static java.util.Collections.*; import static org.assertj.core.api.Fail.fail; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.internal.SnowflakeErrors; import com.snowflake.kafka.connector.internal.TestUtils; import java.util.HashMap; import java.util.List; import java.util.Map; import org.junit.Rule; import org.junit.Test; import org.junit.contrib.java.lang.system.EnvironmentVariables; public class UtilsTest { @Rule public final EnvironmentVariables environmentVariables = new EnvironmentVariables(); @Test public void testObjectIdentifier() { String name = "DATABASE.SCHEMA.TABLE"; assert !Utils.isValidSnowflakeObjectIdentifier(name); String name1 = "table!@#$%^;()"; assert !Utils.isValidSnowflakeObjectIdentifier(name1); } @Test public void testVersionChecker() { assert Utils.checkConnectorVersion(); } @Test public void testGetTableName() { Map topic2table = TopicToTableParser.parse("ab@cd:abcd, 1234:_1234"); assert Utils.getTableName("ab@cd", topic2table, true).equals("ABCD"); assert Utils.getTableName("1234", topic2table, true).equals("_1234"); TestUtils.assertError( SnowflakeErrors.ERROR_0020, () -> Utils.getTableName("", topic2table, true)); TestUtils.assertError( SnowflakeErrors.ERROR_0020, () -> Utils.getTableName(null, topic2table, true)); String topic = "bc*def"; assert Utils.getTableName(topic, topic2table, true) .equals("BC_DEF_" + Math.abs(topic.hashCode())); topic = "12345"; assert Utils.getTableName(topic, topic2table, true) .equals("_12345_" + Math.abs(topic.hashCode())); } @Test public void testGetTableNameRegex() { String catTable = "cat_table"; String dogTable = "dog_table"; String catTopicRegex = ".*_cat"; String dogTopicRegex = ".*_dog"; // test two different regexs Map topic2table = TopicToTableParser.parse( Utils.formatString("{}:{},{}:{}", catTopicRegex, catTable, dogTopicRegex, dogTable)); assert Utils.getTableName("calico_cat", topic2table, true).equals("CAT_TABLE"); assert Utils.getTableName("orange_cat", topic2table, true).equals("CAT_TABLE"); assert Utils.getTableName("_cat", topic2table, true).equals("CAT_TABLE"); assert Utils.getTableName("corgi_dog", topic2table, true).equals("DOG_TABLE"); // test new topic should not have wildcard String topic = "bird.*"; assert Utils.getTableName(topic, topic2table, true) .equals("BIRD_" + Math.abs(topic.hashCode())); } @Test public void testConvertAppName() { HashMap config = new HashMap(); config.put(KafkaConnectorConfigParams.NAME, "_aA1"); Utils.convertAppName(config); assert config.get(KafkaConnectorConfigParams.NAME).equals("_AA1"); config.put(KafkaConnectorConfigParams.NAME, "-_aA1"); Utils.convertAppName(config); assert config.get(KafkaConnectorConfigParams.NAME).equals("___AA1_44483871"); config.put(KafkaConnectorConfigParams.NAME, "_aA1-"); Utils.convertAppName(config); assert config.get(KafkaConnectorConfigParams.NAME).equals("_AA1__90688251"); config.put(KafkaConnectorConfigParams.NAME, "testApp.snowflake-connector"); Utils.convertAppName(config); assert config .get(KafkaConnectorConfigParams.NAME) .equals("TESTAPP_SNOWFLAKE_CONNECTOR_36242259"); } @Test public void testIsValidSnowflakeApplicationName() { assert Utils.isValidSnowflakeApplicationName("-_aA1"); assert Utils.isValidSnowflakeApplicationName("aA_1-"); assert !Utils.isValidSnowflakeApplicationName("1aA_-"); assert !Utils.isValidSnowflakeApplicationName("_1.a$"); assert !Utils.isValidSnowflakeApplicationName("(1.f$-_"); } @Test public void testLogMessageBasic() { // no variable String expected = Utils.SF_LOG_TAG + " test message"; assert Utils.formatLogMessage("test message").equals(expected); // 1 variable expected = Utils.SF_LOG_TAG + " 1 test message"; assert Utils.formatLogMessage("{} test message", 1).equals(expected); } @Test public void testLogMessageNulls() { // nulls String expected = Utils.SF_LOG_TAG + " null test message"; assert Utils.formatLogMessage("{} test message", (String) null).equals(expected); expected = Utils.SF_LOG_TAG + " some string test null message null"; assert Utils.formatLogMessage("{} test {} message {}", "some string", null, null) .equals(expected); } @Test public void testLogMessageMultiLines() { // 2 variables String expected = Utils.SF_LOG_TAG + " 1 test message\n" + "2 test message"; System.out.println(Utils.formatLogMessage("{} test message\n{} test message", 1, 2)); assert Utils.formatLogMessage("{} test message\n{} test message", 1, 2).equals(expected); // 3 variables expected = Utils.SF_LOG_TAG + " 1 test message\n" + "2 test message\n" + "3 test message"; assert Utils.formatLogMessage("{} test message\n{} test message\n{} test " + "message", 1, 2, 3) .equals(expected); // 4 variables expected = Utils.SF_LOG_TAG + " 1 test message\n" + "2 test message\n" + "3 test message\n" + "4 test message"; assert Utils.formatLogMessage( "{} test message\n{} test message\n{} test " + "message\n{} test message", 1, 2, 3, 4) .equals(expected); } @Test public void testSemanticVersionParsing() { // Test standard version parsing SemanticVersion version311 = new SemanticVersion("3.1.1"); assertEquals(3, version311.major()); assertEquals(1, version311.minor()); assertEquals(1, version311.patch()); assertFalse(version311.isReleaseCandidate()); assertEquals("3.1.1", version311.originalVersion()); // Test version with RC suffix SemanticVersion version400rc = new SemanticVersion("4.0.0-rc"); assertEquals(4, version400rc.major()); assertEquals(0, version400rc.minor()); assertEquals(0, version400rc.patch()); assertTrue(version400rc.isReleaseCandidate()); assertEquals("4.0.0-rc", version400rc.originalVersion()); // Test version with RC1 suffix SemanticVersion version401rc1 = new SemanticVersion("4.0.1-RC1"); assertEquals(4, version401rc1.major()); assertEquals(0, version401rc1.minor()); assertEquals(1, version401rc1.patch()); assertTrue(version401rc1.isReleaseCandidate()); assertEquals("4.0.1-RC1", version401rc1.originalVersion()); } @Test public void testSemanticVersionComparison() { SemanticVersion v310 = new SemanticVersion("3.1.0"); SemanticVersion v311 = new SemanticVersion("3.1.1"); SemanticVersion v320 = new SemanticVersion("3.2.0"); SemanticVersion v400 = new SemanticVersion("4.0.0"); SemanticVersion v401 = new SemanticVersion("4.0.1"); SemanticVersion v501 = new SemanticVersion("5.0.1"); // Test less than assertTrue(v310.compareTo(v311) < 0); assertTrue(v311.compareTo(v320) < 0); assertTrue(v320.compareTo(v400) < 0); assertTrue(v400.compareTo(v401) < 0); assertTrue(v310.compareTo(v501) < 0); // Test greater than assertTrue(v311.compareTo(v310) > 0); assertTrue(v320.compareTo(v311) > 0); assertTrue(v400.compareTo(v320) > 0); assertTrue(v401.compareTo(v400) > 0); assertTrue(v501.compareTo(v401) > 0); // Test equals SemanticVersion v311_2 = new SemanticVersion("3.1.1"); assertEquals(0, v311.compareTo(v311_2)); assertEquals(v311, v311_2); // Test RC versions are treated same as non-RC for comparison (major.minor.patch only) SemanticVersion v400rc = new SemanticVersion("4.0.0-rc"); assertEquals(0, v400.compareTo(v400rc)); } @Test public void testSemanticVersionInvalidFormat() { try { new SemanticVersion("invalid"); fail("Should have thrown IllegalArgumentException"); } catch (IllegalArgumentException e) { assertTrue(e.getMessage().contains("Invalid version format")); } try { new SemanticVersion("1.2"); fail("Should have thrown IllegalArgumentException"); } catch (IllegalArgumentException e) { assertTrue(e.getMessage().contains("Invalid version format")); } } @Test public void testFindRecommendedVersion() { // v4.0.0 should recommend v5.0.0 (highest available) List availableVersions = asList("3.3.1", "4.0.0", "4.0.1", "4.1.0", "5.0.0"); SemanticVersion current = new SemanticVersion("4.0.0"); String recommended = Utils.findRecommendedVersion(current, availableVersions); assertEquals("5.0.0", recommended); } @Test public void testFindRecommendedVersionFiltersRCVersions() { // Scenario 3: Should not recommend RC versions List availableVersions = asList("3.1.1", "3.2.0-rc", "3.2.0-RC1", "4.0.0-rc"); SemanticVersion current = new SemanticVersion("4.1.1"); String recommended = Utils.findRecommendedVersion(current, availableVersions); assertNull(recommended); // No stable version available newer than 3.1.1 } @Test public void testFindRecommendedVersionNoUpgradeAvailable() { // Current is already latest List availableVersions = asList("4.1.0", "4.2.0", "4.3.1"); SemanticVersion current = new SemanticVersion("4.3.1"); String recommended = Utils.findRecommendedVersion(current, availableVersions); assertNull(recommended); } @Test public void testFindRecommendedVersionWithEmptyList() { // Empty version list should return null List availableVersions = emptyList(); SemanticVersion current = new SemanticVersion("3.1.1"); String recommended = Utils.findRecommendedVersion(current, availableVersions); assertNull(recommended); } @Test public void testFindRecommendedVersionWithInvalidVersions() { // Invalid versions should be skipped List availableVersions = asList("3.1.1", "invalid", "3.2.0", "bad.version", "3.3.0"); SemanticVersion current = new SemanticVersion("3.1.1"); String recommended = Utils.findRecommendedVersion(current, availableVersions); assertEquals("3.3.0", recommended); } @Test public void testFindRecommendedVersionOnlyRCVersionsAvailable() { // Only RC versions newer than current - should return null List availableVersions = asList("3.1.0", "3.1.1", "3.2.0-RC", "3.3.0-rc1"); SemanticVersion current = new SemanticVersion("3.1.1"); String recommended = Utils.findRecommendedVersion(current, availableVersions); assertNull(recommended); } @Test public void testSanitizationToggle() { Map emptyMap = new HashMap<>(); // Sanitization enabled (v3 compatible) String uppercased = Utils.getTableName("MyTopic", emptyMap, true); assertEquals("MYTOPIC", uppercased, "Valid identifier should be uppercased"); String sanitized = Utils.getTableName("my-topic", emptyMap, true); assertTrue( sanitized.startsWith("MY_TOPIC_"), "Invalid identifier should be sanitized+uppercased"); assertTrue(sanitized.matches("^[A-Z_0-9]+$"), "Should be fully uppercased"); // Sanitization disabled (pass through) String passedThrough = Utils.getTableName("MyTopic", emptyMap, false); assertEquals("MyTopic", passedThrough, "Should pass through unchanged"); String invalid = Utils.getTableName("my-topic", emptyMap, false); assertEquals("my-topic", invalid, "Invalid identifier should pass through"); } @Test public void testMapEntriesBypassSanitization() { Map map = TopicToTableParser.parse("myTopic:\"My-Table\",otherTopic:MixedCase"); // Quoted table names preserve case; unquoted are uppercased at parse time assertEquals("My-Table", Utils.getTableName("myTopic", map, true)); assertEquals("My-Table", Utils.getTableName("myTopic", map, false)); assertEquals("MIXEDCASE", Utils.getTableName("otherTopic", map, true)); assertEquals("MIXEDCASE", Utils.getTableName("otherTopic", map, false)); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/builder/SinkRecordBuilder.java ================================================ package com.snowflake.kafka.connector.builder; import com.google.common.base.Preconditions; import org.apache.kafka.common.record.TimestampType; import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.SchemaAndValue; import org.apache.kafka.connect.sink.SinkRecord; public class SinkRecordBuilder { private final String topic; private final int partition; private Schema keySchema = Schema.STRING_SCHEMA; private Object key = "key"; private Schema valueSchema = Schema.STRING_SCHEMA; private Object value = "{\"name\":123}"; private long offset = 0; private Long timestamp = null; private TimestampType timestampType = TimestampType.NO_TIMESTAMP_TYPE; private SinkRecordBuilder(String topic, int partition) { this.topic = topic; this.partition = partition; } public static SinkRecordBuilder forTopicPartition(String topic, int partition) { return new SinkRecordBuilder(topic, partition); } public SinkRecord build() { return new SinkRecord( topic, partition, keySchema, key, valueSchema, value, offset, timestamp, timestampType); } public SinkRecordBuilder withKeySchema(Schema keySchema) { this.keySchema = keySchema; return this; } public SinkRecordBuilder withKey(Object key) { this.key = key; return this; } public SinkRecordBuilder withValueSchema(Schema valueSchema) { this.valueSchema = valueSchema; return this; } public SinkRecordBuilder withValue(Object value) { this.value = value; return this; } public SinkRecordBuilder withSchemaAndValue(SchemaAndValue schemaAndValue) { this.valueSchema = schemaAndValue.schema(); this.value = schemaAndValue.value(); return this; } public SinkRecordBuilder withOffset(long offset) { this.offset = offset; return this; } public SinkRecordBuilder withTimestamp(long timestamp, TimestampType timestampType) { Preconditions.checkArgument( timestampType != TimestampType.NO_TIMESTAMP_TYPE, "NO_TIMESTAMP_TYPE is the default timestampType"); this.timestamp = timestamp; this.timestampType = timestampType; return this; } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/config/ClientValidationConfigTest.java ================================================ package com.snowflake.kafka.connector.config; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION_DEFAULT; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import java.util.HashMap; import java.util.Map; import org.apache.kafka.common.config.ConfigDef; import org.junit.jupiter.api.Test; public class ClientValidationConfigTest { @Test public void testValidationConfigExists() { ConfigDef configDef = ConnectorConfigDefinition.getConfig(); assertNotNull( configDef.configKeys().get(SNOWFLAKE_VALIDATION), "snowflake.validation should be defined in config"); } @Test public void testValidationDefaultValue() { ConfigDef configDef = ConnectorConfigDefinition.getConfig(); Object defaultValue = configDef.configKeys().get(SNOWFLAKE_VALIDATION).defaultValue; assertEquals(SNOWFLAKE_VALIDATION_DEFAULT, defaultValue, "Default value should be server_side"); } @Test public void testValidationCanBeSetToServerSide() { ConfigDef configDef = ConnectorConfigDefinition.getConfig(); Map props = new HashMap<>(); props.put(SNOWFLAKE_VALIDATION, "server_side"); Map parsed = configDef.parse(props); assertEquals( "server_side", parsed.get(SNOWFLAKE_VALIDATION), "Should be able to set validation to server_side"); } @Test public void testValidationCanBeSetToClientSide() { ConfigDef configDef = ConnectorConfigDefinition.getConfig(); Map props = new HashMap<>(); props.put(SNOWFLAKE_VALIDATION, "client_side"); Map parsed = configDef.parse(props); assertEquals( "client_side", parsed.get(SNOWFLAKE_VALIDATION), "Should be able to set validation to client_side"); } @Test public void testValidationDefaultsToServerSide() { ConfigDef configDef = ConnectorConfigDefinition.getConfig(); Map props = new HashMap<>(); Map parsed = configDef.parse(props); assertEquals( "server_side", parsed.get(SNOWFLAKE_VALIDATION), "Should default to server_side when not specified"); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/config/SinkTaskConfigTest.java ================================================ package com.snowflake.kafka.connector.config; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.*; import static org.junit.jupiter.api.Assertions.*; import com.snowflake.kafka.connector.ConnectorConfigTools; import com.snowflake.kafka.connector.Utils; import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationMode; import java.util.HashMap; import java.util.Map; import org.junit.jupiter.api.Test; public class SinkTaskConfigTest { private static Map minimalConfig() { Map config = new HashMap<>(); config.put(NAME, "test_connector"); config.put(Utils.TASK_ID, "0"); config.put(SNOWFLAKE_URL_NAME, "https://account.snowflakecomputing.com"); config.put(SNOWFLAKE_USER_NAME, "user"); config.put(SNOWFLAKE_ROLE_NAME, "role"); config.put(SNOWFLAKE_DATABASE_NAME, "db"); config.put(SNOWFLAKE_SCHEMA_NAME, "schema"); return config; } @Test public void from_minimalConfig_succeeds() { SinkTaskConfig config = SinkTaskConfig.from(minimalConfig()); assertEquals("test_connector", config.getConnectorName()); assertEquals("0", config.getTaskId()); assertTrue(config.getTopicToTableMap().isEmpty()); assertEquals( ConnectorConfigTools.BehaviorOnNullValues.DEFAULT, config.getBehaviorOnNullValues()); assertTrue(config.isJmxEnabled()); assertFalse(config.isTolerateErrors()); assertNull(config.getDlqTopicName()); assertFalse(config.isEnableSanitization()); assertTrue(config.isEnableSchematization()); assertEquals(SnowflakeValidation.SERVER_SIDE, config.getValidation()); assertEquals(50, config.getOpenChannelIoThreads()); assertNotNull(config.getCachingConfig()); assertNotNull(config.getMetadataConfig()); } @Test public void from_missingConnectorName_throws() { Map config = minimalConfig(); config.remove(NAME); IllegalArgumentException e = assertThrows(IllegalArgumentException.class, () -> SinkTaskConfig.from(config)); assertTrue(e.getMessage().contains("Connector name")); } @Test public void from_missingTaskId_throws() { Map config = minimalConfig(); config.remove(Utils.TASK_ID); IllegalArgumentException e = assertThrows(IllegalArgumentException.class, () -> SinkTaskConfig.from(config)); assertTrue(e.getMessage().contains("Task ID")); } @Test public void from_emptyConnectorName_throws() { Map config = minimalConfig(); config.put(NAME, " "); assertThrows(IllegalArgumentException.class, () -> SinkTaskConfig.from(config)); } @Test public void from_overridesDefaults() { Map config = minimalConfig(); config.put(BEHAVIOR_ON_NULL_VALUES, "ignore"); config.put(JMX_OPT, "false"); config.put(ERRORS_TOLERANCE_CONFIG, "all"); config.put(ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_CONFIG, "dlq-topic"); config.put(SNOWFLAKE_OPEN_CHANNEL_IO_THREADS, "10"); config.put(SNOWFLAKE_ENABLE_SCHEMATIZATION, "false"); SinkTaskConfig parsed = SinkTaskConfig.from(config); assertEquals( ConnectorConfigTools.BehaviorOnNullValues.IGNORE, parsed.getBehaviorOnNullValues()); assertFalse(parsed.isJmxEnabled()); assertTrue(parsed.isTolerateErrors()); assertEquals("dlq-topic", parsed.getDlqTopicName()); assertEquals(10, parsed.getOpenChannelIoThreads()); assertFalse(parsed.isEnableSchematization()); } @Test public void from_topic2tableMap_parsed() { Map config = minimalConfig(); config.put(SNOWFLAKE_TOPICS2TABLE_MAP, "t1:table1,t2:table2"); SinkTaskConfig parsed = SinkTaskConfig.from(config); assertEquals(2, parsed.getTopicToTableMap().size()); assertEquals("TABLE1", parsed.getTopicToTableMap().get("t1")); assertEquals("TABLE2", parsed.getTopicToTableMap().get("t2")); } @Test public void from_nullMap_treatedAsEmptyAndThrowsForMissingRequired() { // from(null) replaces null with empty map, then validation fails for missing connector name assertThrows(IllegalArgumentException.class, () -> SinkTaskConfig.from(null)); } @Test public void from_defaultMigrationMode_isSkip() { SinkTaskConfig config = SinkTaskConfig.from(minimalConfig()); assertEquals(Ssv1MigrationMode.SKIP, config.getSsv1MigrationMode()); } @Test public void from_migrationMode_bestEffort() { Map config = minimalConfig(); config.put(SNOWFLAKE_SSV1_OFFSET_MIGRATION, "best_effort"); SinkTaskConfig parsed = SinkTaskConfig.from(config); assertEquals(Ssv1MigrationMode.BEST_EFFORT, parsed.getSsv1MigrationMode()); } @Test public void from_migrationMode_strict() { Map config = minimalConfig(); config.put(SNOWFLAKE_SSV1_OFFSET_MIGRATION, "strict"); SinkTaskConfig parsed = SinkTaskConfig.from(config); assertEquals(Ssv1MigrationMode.STRICT, parsed.getSsv1MigrationMode()); } @Test public void from_migrationMode_caseInsensitive() { Map config = minimalConfig(); config.put(SNOWFLAKE_SSV1_OFFSET_MIGRATION, "BEST_EFFORT"); SinkTaskConfig parsed = SinkTaskConfig.from(config); assertEquals(Ssv1MigrationMode.BEST_EFFORT, parsed.getSsv1MigrationMode()); } @Test public void from_migrationMode_invalidValue_throws() { Map config = minimalConfig(); config.put(SNOWFLAKE_SSV1_OFFSET_MIGRATION, "invalid_value"); IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, () -> SinkTaskConfig.from(config)); assertTrue(ex.getMessage().contains(SNOWFLAKE_SSV1_OFFSET_MIGRATION)); assertTrue(ex.getMessage().contains("invalid_value")); } @Test public void from_defaultIncludeConnectorName_isFalse() { SinkTaskConfig config = SinkTaskConfig.from(minimalConfig()); assertFalse(config.isSsv1MigrationIncludeConnectorName()); } @Test public void from_includeConnectorNameTrue_isParsed() { Map raw = minimalConfig(); raw.put(SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME, "true"); SinkTaskConfig config = SinkTaskConfig.from(raw); assertTrue(config.isSsv1MigrationIncludeConnectorName()); } @Test public void from_oauthFields_areParsed() { Map raw = minimalConfig(); raw.put(SNOWFLAKE_AUTHENTICATOR, AuthenticatorType.OAUTH.toConfigValue()); raw.put(SNOWFLAKE_OAUTH_CLIENT_ID, "my_client_id"); raw.put(SNOWFLAKE_OAUTH_CLIENT_SECRET, "my_client_secret"); raw.put(SNOWFLAKE_OAUTH_REFRESH_TOKEN, "my_refresh_token"); raw.put(SNOWFLAKE_OAUTH_TOKEN_ENDPOINT, "https://oauth.example.com/token"); SinkTaskConfig config = SinkTaskConfig.from(raw); assertEquals(AuthenticatorType.OAUTH, config.getAuthenticator()); assertEquals("my_client_id", config.getOauthClientId()); assertEquals("my_client_secret", config.getOauthClientSecret().value()); assertEquals("my_refresh_token", config.getOauthRefreshToken().value()); assertEquals("https://oauth.example.com/token", config.getOauthTokenEndpoint()); } @Test public void from_privateKeyFields_wrappedAsPassword() { Map raw = minimalConfig(); raw.put(SNOWFLAKE_PRIVATE_KEY, "my_private_key"); raw.put(SNOWFLAKE_PRIVATE_KEY_PASSPHRASE, "my_passphrase"); SinkTaskConfig config = SinkTaskConfig.from(raw); assertEquals("my_private_key", config.getSnowflakePrivateKey().value()); assertEquals("my_passphrase", config.getSnowflakePrivateKeyPassphrase().value()); } @Test public void from_missingPrivateKey_returnsNull() { SinkTaskConfig config = SinkTaskConfig.from(minimalConfig()); assertNull(config.getSnowflakePrivateKey()); assertNull(config.getSnowflakePrivateKeyPassphrase()); } @Test public void from_defaultAuthenticator_isSnowflakeJwt() { SinkTaskConfig config = SinkTaskConfig.from(minimalConfig()); assertEquals(AuthenticatorType.SNOWFLAKE_JWT, config.getAuthenticator()); } @Test public void from_oauthWithoutOptionalFields_succeeds() { Map raw = minimalConfig(); raw.put(SNOWFLAKE_AUTHENTICATOR, AuthenticatorType.OAUTH.toConfigValue()); raw.put(SNOWFLAKE_OAUTH_CLIENT_ID, "client_id"); raw.put(SNOWFLAKE_OAUTH_CLIENT_SECRET, "client_secret"); SinkTaskConfig config = SinkTaskConfig.from(raw); assertEquals(AuthenticatorType.OAUTH, config.getAuthenticator()); assertNull(config.getOauthRefreshToken()); assertNull(config.getOauthTokenEndpoint()); } @Test public void from_skipTaskSpecificConfig_succeedsWithoutTaskId() { Map raw = minimalConfig(); raw.remove(Utils.TASK_ID); SinkTaskConfig config = SinkTaskConfig.from(raw, true); assertEquals("", config.getTaskId()); assertEquals("test_connector", config.getConnectorName()); } @Test public void from_skipTaskSpecificConfig_succeedsWithoutConnectorName() { Map raw = minimalConfig(); raw.remove(NAME); raw.remove(Utils.TASK_ID); SinkTaskConfig config = SinkTaskConfig.from(raw, true); assertEquals("", config.getConnectorName()); assertEquals("", config.getTaskId()); } @Test public void from_skipTaskSpecificConfig_false_throwsWithoutTaskId() { Map raw = minimalConfig(); raw.remove(Utils.TASK_ID); assertThrows(IllegalArgumentException.class, () -> SinkTaskConfig.from(raw)); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/config/SinkTaskConfigTestBuilder.java ================================================ package com.snowflake.kafka.connector.config; import com.snowflake.kafka.connector.ConnectorConfigTools; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.internal.CachingConfig; import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationMode; import com.snowflake.kafka.connector.records.SnowflakeMetadataConfig; import java.util.Collections; import java.util.HashMap; /** * Test-only builder for {@link SinkTaskConfig}. Provides a builder with default values for all * optional fields. Caller must set connectorName and taskId before build(). * *

    Production code uses {@link SinkTaskConfig#from(java.util.Map)}; this class is for tests that * need to construct a config without parsing a Map. */ public final class SinkTaskConfigTestBuilder { private SinkTaskConfigTestBuilder() {} /** * Returns a builder with default values for all optional fields. Caller must set connectorName * and taskId before build(). */ public static SinkTaskConfig.Builder builder() { return SinkTaskConfig.builder() .topicToTableMap(new HashMap<>()) .behaviorOnNullValues(ConnectorConfigTools.BehaviorOnNullValues.DEFAULT) .jmxEnabled(KafkaConnectorConfigParams.JMX_OPT_DEFAULT) .tolerateErrors(false) .errorsLogEnable(KafkaConnectorConfigParams.ERRORS_LOG_ENABLE_DEFAULT) .dlqTopicName(KafkaConnectorConfigParams.ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_DEFAULT) .enableSanitization( KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION_DEFAULT) .enableColumnIdentifierNormalization( KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION_DEFAULT) .enableSchematization(KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION_DEFAULT) .validation( SnowflakeValidation.fromConfig(KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION_DEFAULT)) .openChannelIoThreads(KafkaConnectorConfigParams.SNOWFLAKE_OPEN_CHANNEL_IO_THREADS_DEFAULT) .streamingClientProviderOverrideMap("") .cachingConfig(CachingConfig.fromConfig(Collections.emptyMap())) .metadataConfig(new SnowflakeMetadataConfig()) .snowflakeUrl("") .snowflakeUser("") .snowflakeRole("") .snowflakePrivateKey(null) .snowflakePrivateKeyPassphrase(null) .authenticator(AuthenticatorType.SNOWFLAKE_JWT) .snowflakeDatabase("") .snowflakeSchema("") .ssv1MigrationMode(Ssv1MigrationMode.SKIP) .ssv1MigrationIncludeConnectorName(false); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/config/SnowflakeSinkConnectorConfigBuilder.java ================================================ package com.snowflake.kafka.connector.config; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import java.util.HashMap; import java.util.Map; /** * This is a builder class for the connector config. For now it returns map. Let's change it to a * more convenient abstraction when we have it. */ public class SnowflakeSinkConnectorConfigBuilder { private final Map config = new HashMap(); private SnowflakeSinkConnectorConfigBuilder() {} public static SnowflakeSinkConnectorConfigBuilder streamingConfig() { return commonRequiredFields().withCompatibilityValidate(false); } private static SnowflakeSinkConnectorConfigBuilder commonRequiredFields() { return new SnowflakeSinkConnectorConfigBuilder() .withName("test") .withTopics("topic1,topic2") .withUrl("https://testaccount.snowflake.com:443") .withSchema("testSchema") .withDatabase("testDatabase") .withUser("userName") .withPrivateKey("fdsfsdfsdfdsfdsrqwrwewrwrew42314424") .withRole("role"); } public SnowflakeSinkConnectorConfigBuilder withName(String name) { config.put(KafkaConnectorConfigParams.NAME, name); return this; } public SnowflakeSinkConnectorConfigBuilder withTopics(String topics) { config.put(KafkaConnectorConfigParams.TOPICS, topics); return this; } public SnowflakeSinkConnectorConfigBuilder withUrl(String url) { config.put(SNOWFLAKE_URL_NAME, url); return this; } public SnowflakeSinkConnectorConfigBuilder withDatabase(String database) { config.put(SNOWFLAKE_DATABASE_NAME, database); return this; } public SnowflakeSinkConnectorConfigBuilder withSchema(String schema) { config.put(SNOWFLAKE_SCHEMA_NAME, schema); return this; } public SnowflakeSinkConnectorConfigBuilder withUser(String user) { config.put(SNOWFLAKE_USER_NAME, user); return this; } public SnowflakeSinkConnectorConfigBuilder withPrivateKey(String privateKey) { config.put(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY, privateKey); return this; } public SnowflakeSinkConnectorConfigBuilder withRole(String role) { config.put(SNOWFLAKE_ROLE_NAME, role); return this; } public SnowflakeSinkConnectorConfigBuilder withoutRole() { config.remove(SNOWFLAKE_ROLE_NAME); return this; } public SnowflakeSinkConnectorConfigBuilder withAuthenticator(String authenticator) { config.put(KafkaConnectorConfigParams.SNOWFLAKE_AUTHENTICATOR, authenticator); return this; } public SnowflakeSinkConnectorConfigBuilder withOauthClientId(String clientId) { config.put(KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_ID, clientId); return this; } public SnowflakeSinkConnectorConfigBuilder withOauthClientSecret(String clientSecret) { config.put(KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_SECRET, clientSecret); return this; } public SnowflakeSinkConnectorConfigBuilder withOauthRefreshToken(String refreshToken) { config.put(KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_REFRESH_TOKEN, refreshToken); return this; } public SnowflakeSinkConnectorConfigBuilder withOauthTokenEndpoint(String tokenEndpoint) { config.put(KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_TOKEN_ENDPOINT, tokenEndpoint); return this; } public SnowflakeSinkConnectorConfigBuilder withoutPrivateKey() { config.remove(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY); return this; } public SnowflakeSinkConnectorConfigBuilder withCompatibilityValidate(boolean validate) { config.put( KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC, String.valueOf(validate)); return this; } /** * Sets the three value-checked settings to their v3 values and explicitly sets schematization. */ public SnowflakeSinkConnectorConfigBuilder withV3CompatibilitySettings() { config.put(KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, "client_side"); config.put( KafkaConnectorConfigParams.SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION, "true"); config.put( KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION, "true"); config.put(KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION, "false"); config.put(KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION, "best_effort"); config.put( KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME, "false"); return this; } public Map build() { return config; } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/dlq/InMemoryKafkaRecordErrorReporter.java ================================================ package com.snowflake.kafka.connector.dlq; import static java.util.Collections.unmodifiableList; import java.util.ArrayList; import java.util.List; import org.apache.kafka.connect.sink.SinkRecord; /** * In memory implementation of KafkaRecordErrorReporter which mimics sending records to DLQ. Here we * simply insert records into an ArrayList * *

    Used for testing. */ public final class InMemoryKafkaRecordErrorReporter implements KafkaRecordErrorReporter { private final List reportedRecords = new ArrayList<>(); @Override public void reportError(final SinkRecord record, final Exception e) { reportedRecords.add(new ReportedRecord(record, e)); } public List getReportedRecords() { return unmodifiableList(reportedRecords); } public static final class ReportedRecord { private final SinkRecord record; private final Throwable e; private ReportedRecord(final SinkRecord record, final Throwable e) { this.record = record; this.e = e; } public SinkRecord getRecord() { return record; } public Throwable getException() { return e; } @Override public String toString() { return "ReportedData{" + "record=" + record + ", e=" + e + '}'; } } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/CachingSnowflakeConnectionServiceStatsTest.java ================================================ package com.snowflake.kafka.connector.internal; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_PIPE_EXISTS; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_TABLE_EXISTS; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import java.util.HashMap; import java.util.Map; import org.junit.jupiter.api.Test; /** * Tests for cache statistics logging in CachedSnowflakeConnectionService. Verifies that cache stats * can be logged without exceptions. */ class CachingSnowflakeConnectionServiceStatsTest { @Test void testCacheStatisticsLogging() { // Given: A cached service with both caches enabled SnowflakeConnectionService mockDelegate = mock(SnowflakeConnectionService.class); when(mockDelegate.tableExist("TABLE1")).thenReturn(true); when(mockDelegate.pipeExist("PIPE1")).thenReturn(true); CachingConfig config = createCacheConfig(true, 30000L, true, 30000L); CachingSnowflakeConnectionService cachedService = new CachingSnowflakeConnectionService(mockDelegate, config); // When: Perform some operations cachedService.tableExist("TABLE1"); cachedService.tableExist("TABLE1"); // Cache hit cachedService.pipeExist("PIPE1"); cachedService.pipeExist("PIPE1"); // Cache hit // Then: Log statistics (should not throw any exceptions) cachedService.logCacheStatistics(); } @Test void testCacheStatisticsLoggingWithNoCacheEnabled() { // Given: A cached service with no caches enabled SnowflakeConnectionService mockDelegate = mock(SnowflakeConnectionService.class); CachingConfig config = createCacheConfig(false, 30000L, false, 30000L); CachingSnowflakeConnectionService cachedService = new CachingSnowflakeConnectionService(mockDelegate, config); // When: Log statistics with no cache enabled cachedService.logCacheStatistics(); // Then: No exception should be thrown } private CachingConfig createCacheConfig( boolean cacheTableExists, long tableExpirationMs, boolean cachePipeExists, long pipeExpirationMs) { Map config = new HashMap<>(); config.put(CACHE_TABLE_EXISTS, String.valueOf(cacheTableExists)); config.put(CACHE_TABLE_EXISTS_EXPIRE_MS, String.valueOf(tableExpirationMs)); config.put(CACHE_PIPE_EXISTS, String.valueOf(cachePipeExists)); config.put(CACHE_PIPE_EXISTS_EXPIRE_MS, String.valueOf(pipeExpirationMs)); return CachingConfig.fromConfig(config); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/CachingSnowflakeConnectionServiceTest.java ================================================ package com.snowflake.kafka.connector.internal; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_PIPE_EXISTS; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_TABLE_EXISTS; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import java.util.HashMap; import java.util.Map; import org.junit.jupiter.api.Test; class CachingSnowflakeConnectionServiceTest { private static final String TEST_TABLE = "TEST_TABLE"; private static final String TEST_PIPE = "TEST_PIPE"; @Test void testTableExistCacheEnabled_MultipleCalls_DelegateCalledOnce() { SnowflakeConnectionService mockDelegate = mock(SnowflakeConnectionService.class); when(mockDelegate.tableExist(TEST_TABLE)).thenReturn(true); CachingConfig config = createCacheConfig(true, 30000L, false, 30000L); CachingSnowflakeConnectionService cachedService = new CachingSnowflakeConnectionService(mockDelegate, config); // When: Call tableExist multiple times boolean result1 = cachedService.tableExist(TEST_TABLE); boolean result2 = cachedService.tableExist(TEST_TABLE); boolean result3 = cachedService.tableExist(TEST_TABLE); // Then: All calls return true and delegate was called only once assertTrue(result1); assertTrue(result2); assertTrue(result3); verify(mockDelegate, times(1)).tableExist(TEST_TABLE); } @Test void testTableExistCacheDisabled_MultipleCalls_DelegateCalledEveryTime() { // Given: Cache disabled for table existence SnowflakeConnectionService mockDelegate = mock(SnowflakeConnectionService.class); when(mockDelegate.tableExist(TEST_TABLE)).thenReturn(true); CachingConfig config = createCacheConfig(false, 30000L, false, 30000L); CachingSnowflakeConnectionService cachedService = new CachingSnowflakeConnectionService(mockDelegate, config); // When: Call tableExist multiple times boolean result1 = cachedService.tableExist(TEST_TABLE); boolean result2 = cachedService.tableExist(TEST_TABLE); boolean result3 = cachedService.tableExist(TEST_TABLE); // Then: All calls return true and delegate was called every time assertTrue(result1); assertTrue(result2); assertTrue(result3); verify(mockDelegate, times(3)).tableExist(TEST_TABLE); } @Test void testTableExistCacheEnabled_DifferentTables_DelegateCalledForEach() { // Given: Cache enabled for table existence SnowflakeConnectionService mockDelegate = mock(SnowflakeConnectionService.class); when(mockDelegate.tableExist("TABLE1")).thenReturn(true); when(mockDelegate.tableExist("TABLE2")).thenReturn(false); CachingConfig config = createCacheConfig(true, 30000L, false, 30000L); CachingSnowflakeConnectionService cachedService = new CachingSnowflakeConnectionService(mockDelegate, config); // When: Call tableExist for different tables boolean result1a = cachedService.tableExist("TABLE1"); boolean result1b = cachedService.tableExist("TABLE1"); boolean result2a = cachedService.tableExist("TABLE2"); boolean result2b = cachedService.tableExist("TABLE2"); // Then: Delegate called once per unique table assertTrue(result1a); assertTrue(result1b); assertFalse(result2a); assertFalse(result2b); verify(mockDelegate, times(1)).tableExist("TABLE1"); verify(mockDelegate, times(1)).tableExist("TABLE2"); } @Test void testPipeExistCacheEnabled_MultipleCalls_DelegateCalledOnce() { // Given: Cache enabled for pipe existence SnowflakeConnectionService mockDelegate = mock(SnowflakeConnectionService.class); when(mockDelegate.pipeExist(TEST_PIPE)).thenReturn(true); CachingConfig config = createCacheConfig(false, 30000L, true, 30000L); CachingSnowflakeConnectionService cachedService = new CachingSnowflakeConnectionService(mockDelegate, config); // When: Call pipeExist multiple times boolean result1 = cachedService.pipeExist(TEST_PIPE); boolean result2 = cachedService.pipeExist(TEST_PIPE); boolean result3 = cachedService.pipeExist(TEST_PIPE); // Then: All calls return true and delegate was called only once assertTrue(result1); assertTrue(result2); assertTrue(result3); verify(mockDelegate, times(1)).pipeExist(TEST_PIPE); } @Test void testPipeExistCacheDisabled_MultipleCalls_DelegateCalledEveryTime() { // Given: Cache disabled for pipe existence SnowflakeConnectionService mockDelegate = mock(SnowflakeConnectionService.class); when(mockDelegate.pipeExist(TEST_PIPE)).thenReturn(true); CachingConfig config = createCacheConfig(false, 30000L, false, 30000L); CachingSnowflakeConnectionService cachedService = new CachingSnowflakeConnectionService(mockDelegate, config); // When: Call pipeExist multiple times boolean result1 = cachedService.pipeExist(TEST_PIPE); boolean result2 = cachedService.pipeExist(TEST_PIPE); boolean result3 = cachedService.pipeExist(TEST_PIPE); // Then: All calls return true and delegate was called every time assertTrue(result1); assertTrue(result2); assertTrue(result3); verify(mockDelegate, times(3)).pipeExist(TEST_PIPE); } @Test void testPipeExistCacheEnabled_DifferentPipes_DelegateCalledForEach() { // Given: Cache enabled for pipe existence SnowflakeConnectionService mockDelegate = mock(SnowflakeConnectionService.class); when(mockDelegate.pipeExist("PIPE1")).thenReturn(true); when(mockDelegate.pipeExist("PIPE2")).thenReturn(false); CachingConfig config = createCacheConfig(false, 30000L, true, 30000L); CachingSnowflakeConnectionService cachedService = new CachingSnowflakeConnectionService(mockDelegate, config); // When: Call pipeExist for different pipes boolean result1a = cachedService.pipeExist("PIPE1"); boolean result1b = cachedService.pipeExist("PIPE1"); boolean result2a = cachedService.pipeExist("PIPE2"); boolean result2b = cachedService.pipeExist("PIPE2"); // Then: Delegate called once per unique pipe assertTrue(result1a); assertTrue(result1b); assertFalse(result2a); assertFalse(result2b); verify(mockDelegate, times(1)).pipeExist("PIPE1"); verify(mockDelegate, times(1)).pipeExist("PIPE2"); } @Test void testCacheExpiration_TableExists() throws InterruptedException { // Given: Very short cache expiration SnowflakeConnectionService mockDelegate = mock(SnowflakeConnectionService.class); when(mockDelegate.tableExist(TEST_TABLE)).thenReturn(true); CachingConfig config = createCacheConfig(true, 100L, false, 30000L); CachingSnowflakeConnectionService cachedService = new CachingSnowflakeConnectionService(mockDelegate, config); // When: Call tableExist, wait for expiration, call again cachedService.tableExist(TEST_TABLE); Thread.sleep(150); // Wait for cache to expire cachedService.tableExist(TEST_TABLE); // Then: Delegate was called twice (cache expired) verify(mockDelegate, times(2)).tableExist(TEST_TABLE); } @Test void testCacheExpiration_PipeExists() throws InterruptedException { // Given: Very short cache expiration SnowflakeConnectionService mockDelegate = mock(SnowflakeConnectionService.class); when(mockDelegate.pipeExist(TEST_PIPE)).thenReturn(true); CachingConfig config = createCacheConfig(false, 30000L, true, 100L); CachingSnowflakeConnectionService cachedService = new CachingSnowflakeConnectionService(mockDelegate, config); // When: Call pipeExist, wait for expiration, call again cachedService.pipeExist(TEST_PIPE); Thread.sleep(150); // Wait for cache to expire cachedService.pipeExist(TEST_PIPE); // Then: Delegate was called twice (cache expired) verify(mockDelegate, times(2)).pipeExist(TEST_PIPE); } private CachingConfig createCacheConfig( boolean cacheTableExists, long tableExpirationMs, boolean cachePipeExists, long pipeExpirationMs) { Map config = new HashMap<>(); config.put(CACHE_TABLE_EXISTS, String.valueOf(cacheTableExists)); config.put(CACHE_TABLE_EXISTS_EXPIRE_MS, String.valueOf(tableExpirationMs)); config.put(CACHE_PIPE_EXISTS, String.valueOf(cachePipeExists)); config.put(CACHE_PIPE_EXISTS_EXPIRE_MS, String.valueOf(pipeExpirationMs)); return CachingConfig.fromConfig(config); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/ConnectionServiceIT.java ================================================ package com.snowflake.kafka.connector.internal; import static com.snowflake.kafka.connector.internal.TestUtils.TEST_CONNECTOR_NAME; import static org.assertj.core.api.AssertionsForClassTypes.assertThat; import com.snowflake.kafka.connector.ConnectorConfigTools; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.Map; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Test; class ConnectionServiceIT { private final SnowflakeConnectionService conn = buildNoCachingConnection(); private static SnowflakeConnectionService buildNoCachingConnection() { Map config = TestUtils.transformProfileFileToConnectorConfiguration(false); config.put(KafkaConnectorConfigParams.CACHE_TABLE_EXISTS, "false"); config.put(KafkaConnectorConfigParams.CACHE_PIPE_EXISTS, "false"); return SnowflakeConnectionServiceFactory.builder().setProperties(config).build(); } private final String tableName = TestUtils.randomTableName(); private final String tableName1 = TestUtils.randomTableName(); @Test void testEncryptedKey() { // no exception SnowflakeConnectionServiceFactory.builder() .setProperties(TestUtils.transformProfileFileToConnectorConfiguration(true)) .build(); } @Test void testSetSSLProperties() { Map testConfig = TestUtils.transformProfileFileToConnectorConfiguration(false); testConfig.put( KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME, "https://sfctest0.snowflakecomputing.com"); assert SnowflakeConnectionServiceFactory.builder() .setProperties(testConfig) .getProperties() .getProperty(InternalUtils.JDBC_SSL) .equals("on"); testConfig.put( KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME, "sfctest0.snowflakecomputing.com"); assert SnowflakeConnectionServiceFactory.builder() .setProperties(testConfig) .getProperties() .getProperty(InternalUtils.JDBC_SSL) .equals("on"); testConfig.put( KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME, "http://sfctest0.snowflakecomputing.com:400"); assert SnowflakeConnectionServiceFactory.builder() .setProperties(testConfig) .getProperties() .getProperty(InternalUtils.JDBC_SSL) .equals("off"); } @Test void createConnectionService_SnowpipeStreaming() { Map config = TestUtils.getConnectorConfigurationForStreaming(false); ConnectorConfigTools.setDefaultValues(config); SnowflakeConnectionService service = SnowflakeConnectionServiceFactory.builder().setProperties(config).build(); assert service.getConnectorName().equals(TEST_CONNECTOR_NAME); assertThat(service.getTelemetryClient()).isInstanceOf(SnowflakeTelemetryService.class); } @AfterEach void afterEach() { TestUtils.dropTable(tableName); TestUtils.dropTable(tableName1); } @Test void testTableFunctions() throws SQLException { // table doesn't exist assert !conn.tableExist(tableName); // create table TestUtils.createTableWithMetadataColumn(tableName); // table exists assert conn.tableExist(tableName); // insert some value TestUtils.executeQuery("insert into \"" + tableName + "\" values(123)"); ResultSet resultSet = TestUtils.showTable(tableName); // value inserted assert InternalUtils.resultSize(resultSet) == 1; // create table if not exists TestUtils.createTableWithMetadataColumn(tableName); resultSet = TestUtils.showTable(tableName); // table hasn't been overwritten assert InternalUtils.resultSize(resultSet) == 1; // overwrite table TestUtils.createTableWithMetadataColumn(tableName, true); resultSet = TestUtils.showTable(tableName); // new table assert InternalUtils.resultSize(resultSet) == 0; // table is compatible assert conn.isTableCompatible(tableName); TestUtils.dropTable(tableName); // dropped table assert !conn.tableExist(tableName); // create incompatible table TestUtils.executeQuery("create table \"" + tableName + "\" (num int)"); assert !conn.isTableCompatible(tableName); TestUtils.dropTable(tableName); } @Test void testConnectionFunction() { SnowflakeConnectionService service = TestUtils.getConnectionService(); assert !service.isClosed(); service.close(); assert service.isClosed(); } /** * Integration test for SNOW-3029864: Verifies that the configured snowflake.role.name is actually * used when establishing JDBC connections for DDL operations (table creation, schema checks, * etc.). */ @Test void testRoleIsUsedInJdbcConnection() throws SQLException { // given - connection service with role from config Map config = TestUtils.transformProfileFileToConnectorConfiguration(true); String expectedRole = config.get(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME); SnowflakeConnectionService service = SnowflakeConnectionServiceFactory.builder().setProperties(config).build(); String actualRole; // when - get JDBC connection and query current role try (Statement stmt = service.getConnection().createStatement(); ResultSet resultSet = stmt.executeQuery("SELECT CURRENT_ROLE()")) { resultSet.next(); actualRole = resultSet.getString(1); } // then - the active role should match the configured role (case-insensitive, Snowflake // uppercases) assertThat(actualRole) .as("JDBC connection should use the configured snowflake.role.name") .isEqualToIgnoringCase(expectedRole); // and - DDL operations (table creation) should work with this role String testTable = TestUtils.randomTableName(); TestUtils.createTableWithMetadataColumn(testTable); assertThat(service.tableExist(testTable)) .as("Table creation should succeed with the configured role") .isTrue(); // cleanup TestUtils.dropTable(testTable); service.close(); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/EmbeddedProxyServer.java ================================================ /* * Copyright (c) 2019 Snowflake Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.snowflake.kafka.connector.internal; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.net.ServerSocket; import java.net.Socket; import java.nio.charset.StandardCharsets; import java.util.Base64; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import org.junit.rules.ExternalResource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Lightweight in-process HTTP CONNECT proxy for testing proxy configurations. Supports basic * authentication and HTTPS tunneling via the CONNECT method. * *

    Can be used as a JUnit Rule to automatically manage the proxy lifecycle per test method. */ public class EmbeddedProxyServer extends ExternalResource { private static final Logger LOGGER = LoggerFactory.getLogger(EmbeddedProxyServer.class); private final String username; private final String password; private ServerSocket serverSocket; private ExecutorService executor; private volatile boolean running; public EmbeddedProxyServer(final String username, final String password) { this.username = username; this.password = password; } public final void start() { if (serverSocket != null) { throw new IllegalStateException("Proxy server is already running"); } try { serverSocket = new ServerSocket(0); // random available port running = true; executor = Executors.newCachedThreadPool( r -> { Thread t = new Thread(r, "proxy-worker"); t.setDaemon(true); return t; }); Thread acceptThread = new Thread(this::acceptLoop, "proxy-accept"); acceptThread.setDaemon(true); acceptThread.start(); LOGGER.info("Proxy server started on localhost:{}", serverSocket.getLocalPort()); } catch (IOException e) { throw new RuntimeException("Failed to start proxy server: " + e.getMessage(), e); } } private void acceptLoop() { while (running) { try { Socket client = serverSocket.accept(); executor.submit(() -> handleClient(client)); } catch (IOException e) { if (running) { LOGGER.warn("Accept failed: {}", e.getMessage()); } } } } private void handleClient(Socket client) { try { client.setSoTimeout(300_000); InputStream in = client.getInputStream(); OutputStream out = client.getOutputStream(); // Read the request line and headers String requestLine = readLine(in); if (requestLine == null) { client.close(); return; } LOGGER.debug("Proxy request: {}", requestLine); String proxyAuth = null; String line; while ((line = readLine(in)) != null && !line.isEmpty()) { if (line.toLowerCase().startsWith("proxy-authorization:")) { proxyAuth = line.substring("proxy-authorization:".length()).trim(); } } // Check authentication if (!checkAuth(proxyAuth)) { String response = "HTTP/1.1 407 Proxy Authentication Required\r\n" + "Proxy-Authenticate: Basic realm=\"proxy\"\r\n" + "Content-Length: 0\r\n\r\n"; out.write(response.getBytes(StandardCharsets.US_ASCII)); out.flush(); client.close(); return; } // Handle CONNECT (HTTPS tunneling) if (requestLine.startsWith("CONNECT ")) { handleConnect(requestLine, client, out); } else { // For non-CONNECT, just close — tests only need CONNECT for Snowflake HTTPS String response = "HTTP/1.1 405 Method Not Allowed\r\nContent-Length: 0\r\n\r\n"; out.write(response.getBytes(StandardCharsets.US_ASCII)); out.flush(); client.close(); } } catch (Exception e) { LOGGER.debug("Client handler error: {}", e.getMessage()); try { client.close(); } catch (IOException ignored) { } } } private void handleConnect(String requestLine, Socket client, OutputStream clientOut) throws IOException { // Parse "CONNECT host:port HTTP/1.1" String[] parts = requestLine.split(" "); if (parts.length < 2) { String response = "HTTP/1.1 400 Bad Request\r\nContent-Length: 0\r\n\r\n"; clientOut.write(response.getBytes(StandardCharsets.US_ASCII)); clientOut.flush(); client.close(); return; } String[] hostPort = parts[1].split(":"); String host = hostPort[0]; int port; try { port = hostPort.length > 1 ? Integer.parseInt(hostPort[1]) : 443; } catch (NumberFormatException e) { String response = "HTTP/1.1 400 Bad Request\r\nContent-Length: 0\r\n\r\n"; clientOut.write(response.getBytes(StandardCharsets.US_ASCII)); clientOut.flush(); client.close(); return; } try { Socket remote = new Socket(host, port); // Send 200 to client clientOut.write( "HTTP/1.1 200 Connection Established\r\n\r\n".getBytes(StandardCharsets.US_ASCII)); clientOut.flush(); // Bidirectional relay Thread toRemote = new Thread(() -> relay(client, remote), "proxy-to-remote"); toRemote.setDaemon(true); toRemote.start(); relay(remote, client); toRemote.join(5000); toRemote.interrupt(); remote.close(); } catch (Exception e) { String response = "HTTP/1.1 502 Bad Gateway\r\nContent-Length: 0\r\n\r\n"; clientOut.write(response.getBytes(StandardCharsets.US_ASCII)); clientOut.flush(); } client.close(); } private static void relay(Socket from, Socket to) { try { InputStream in = from.getInputStream(); OutputStream out = to.getOutputStream(); byte[] buf = new byte[8192]; int n; while ((n = in.read(buf)) != -1) { out.write(buf, 0, n); out.flush(); } } catch (IOException ignored) { // Connection closed } } private boolean checkAuth(String proxyAuth) { if (proxyAuth == null) return false; if (!proxyAuth.startsWith("Basic ")) return false; String decoded = new String(Base64.getDecoder().decode(proxyAuth.substring(6)), StandardCharsets.UTF_8); return decoded.equals(username + ":" + password); } private static String readLine(InputStream in) throws IOException { StringBuilder sb = new StringBuilder(); int c; while ((c = in.read()) != -1) { if (c == '\r') { int next = in.read(); // consume \n if (next != '\n' && next != -1) { sb.append((char) c); sb.append((char) next); continue; } break; } if (c == '\n') break; sb.append((char) c); } return c == -1 && sb.length() == 0 ? null : sb.toString(); } public final void stop() { if (serverSocket == null) { throw new IllegalStateException("Proxy server is not running"); } LOGGER.info("Stopping proxy server on port {}", serverSocket.getLocalPort()); running = false; try { serverSocket.close(); } catch (IOException e) { LOGGER.warn("Error closing server socket", e); } serverSocket = null; if (executor != null) { executor.shutdownNow(); try { executor.awaitTermination(2, TimeUnit.SECONDS); } catch (InterruptedException ignored) { Thread.currentThread().interrupt(); } executor = null; } LOGGER.info("Proxy server stopped"); } public final boolean isRunning() { return serverSocket != null && !serverSocket.isClosed(); } public final int getPort() { if (serverSocket == null) { throw new IllegalStateException("Proxy server is not running"); } return serverSocket.getLocalPort(); } public final String getUsername() { return username; } public final String getPassword() { return password; } @Override protected final void before() { start(); } @Override protected final void after() { if (isRunning()) { stop(); } } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/InternalUtilsTest.java ================================================ package com.snowflake.kafka.connector.internal; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.config.SinkTaskConfigTestBuilder; import com.snowflake.kafka.connector.mock.MockResultSetForSizeTest; import java.sql.ResultSet; import java.sql.SQLException; import java.util.Base64; import java.util.HashMap; import java.util.Map; import java.util.Properties; import org.apache.kafka.common.config.types.Password; import org.junit.jupiter.api.Test; public class InternalUtilsTest { @Test public void testPrivateKey() { assert TestUtils.assertError( SnowflakeErrors.ERROR_0002, () -> PrivateKeyTool.parsePrivateKey("adfsfsaff", null)); Map connectorConfiguration = TestUtils.transformProfileFileToConnectorConfiguration(true); String privateKey = connectorConfiguration.get(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY); String pass = connectorConfiguration.get(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE); // no exception PrivateKeyTool.parsePrivateKey(privateKey, pass); StringBuilder builder = new StringBuilder(); builder.append("-----BEGIN RSA PRIVATE KEY-----\n"); for (int i = 0; i < privateKey.length(); i++) { builder.append(privateKey.charAt(i)); if ((i + 1) % 64 == 0) { builder.append("\n"); } } builder.append("\n-----END RSA PRIVATE KEY-----"); String originalKey = builder.toString(); // no exception PrivateKeyTool.parsePrivateKey(originalKey, pass); } @Test public void testTimestampToDateConversion() { long t = 1563492758649L; assert InternalUtils.timestampToDate(t).equals("2019-07-18T23:32:38Z"); } @Test public void testAssertNotEmpty() { InternalUtils.assertNotEmpty("tableName", "name"); assert TestUtils.assertError( SnowflakeErrors.ERROR_0005, () -> InternalUtils.assertNotEmpty("TABLENAME", null)); assert TestUtils.assertError( SnowflakeErrors.ERROR_0005, () -> InternalUtils.assertNotEmpty("tableName", "")); assert TestUtils.assertError( SnowflakeErrors.ERROR_0006, () -> InternalUtils.assertNotEmpty("pipeName", null)); assert TestUtils.assertError( SnowflakeErrors.ERROR_0006, () -> InternalUtils.assertNotEmpty("pipeName", "")); assert TestUtils.assertError( SnowflakeErrors.ERROR_0001, () -> InternalUtils.assertNotEmpty("conf", null)); assert TestUtils.assertError( SnowflakeErrors.ERROR_0003, () -> InternalUtils.assertNotEmpty("sfdsfdsfd", null)); assert TestUtils.assertError( SnowflakeErrors.ERROR_0003, () -> InternalUtils.assertNotEmpty("zxcxzcx", "")); } @Test public void testMakeJdbcDriverProperties() { Map config = TestUtils.transformProfileFileToConnectorConfiguration(true); SnowflakeURL url = TestUtils.getUrl(); SinkTaskConfig parsedConfig = SinkTaskConfig.from(config, true); Properties prop = InternalUtils.makeJdbcDriverProperties(parsedConfig, url); assert prop.containsKey(InternalUtils.JDBC_DATABASE); assert prop.containsKey(InternalUtils.JDBC_PRIVATE_KEY); assert prop.containsKey(InternalUtils.JDBC_SCHEMA); assert prop.containsKey(InternalUtils.JDBC_USER); assert prop.containsKey(InternalUtils.JDBC_SESSION_KEEP_ALIVE); assert prop.containsKey(InternalUtils.JDBC_SSL); assert prop.getProperty(InternalUtils.JDBC_SESSION_KEEP_ALIVE).equals("true"); if (url.sslEnabled()) { assert prop.getProperty(InternalUtils.JDBC_SSL).equals("on"); } else { assert prop.getProperty(InternalUtils.JDBC_SSL).equals("off"); } assert TestUtils.assertError( SnowflakeErrors.ERROR_0013, () -> { Map t = new HashMap<>(config); t.remove(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY); InternalUtils.makeJdbcDriverProperties(SinkTaskConfig.from(t, true), url); }); assert TestUtils.assertError( SnowflakeErrors.ERROR_0014, () -> { Map t = new HashMap<>(config); t.remove(KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME); InternalUtils.makeJdbcDriverProperties(SinkTaskConfig.from(t, true), url); }); assert TestUtils.assertError( SnowflakeErrors.ERROR_0015, () -> { Map t = new HashMap<>(config); t.remove(KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME); InternalUtils.makeJdbcDriverProperties(SinkTaskConfig.from(t, true), url); }); assert TestUtils.assertError( SnowflakeErrors.ERROR_0016, () -> { Map t = new HashMap<>(config); t.remove(KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME); InternalUtils.makeJdbcDriverProperties(SinkTaskConfig.from(t, true), url); }); } /** * Regression test for SNOW-3029864: snowflake.role.name must be propagated to the JDBC connection * properties so that DDL operations (table creation, schema checks) run under the configured role * rather than the user's default role. */ @Test public void testMakeJdbcDriverProperties_shouldIncludeRoleName() { // given Map config = TestUtils.transformProfileFileToConnectorConfiguration(true); String expectedRole = config.get(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME); SnowflakeURL url = TestUtils.getUrl(); // when Properties props = InternalUtils.makeJdbcDriverProperties(SinkTaskConfig.from(config, true), url); // then — the role from connector config must appear in the JDBC properties String rolePropertyKey = JdbcPropertyKeys.ROLE; assertTrue( props.containsKey(rolePropertyKey), "JDBC properties must contain the role property (key='" + rolePropertyKey + "'), but found keys: " + props.keySet()); assertEquals( expectedRole, props.getProperty(rolePropertyKey), "JDBC role property must match the configured snowflake.role.name"); } @Test public void testResultSize() throws SQLException { ResultSet resultSet = new MockResultSetForSizeTest(0); assert InternalUtils.resultSize(resultSet) == 0; resultSet = new MockResultSetForSizeTest(100); assert InternalUtils.resultSize(resultSet) == 100; } @Test public void parseJdbcPropertiesMapTest() { String input = "isInsecureMode:true, disableSamlURLCheck:false, passcodeInPassword:on, foo:bar," + " networkTimeout:100"; SinkTaskConfig config = SinkTaskConfigTestBuilder.builder() .connectorName("test") .taskId("0") .jdbcMap(input) .build(); // when Properties jdbcPropertiesMap = InternalUtils.parseJdbcPropertiesMap(config); // then assertEquals(jdbcPropertiesMap.size(), 5); } @Test public void makeJdbcDriverProperties_setsAllFields() { String pemKey = Base64.getEncoder().encodeToString(TestUtils.generatePrivateKey().getEncoded()); SnowflakeURL url = new SnowflakeURL("https://testaccount.snowflakecomputing.com:443"); SinkTaskConfig taskConfig = SinkTaskConfigTestBuilder.builder() .connectorName("test-connector") .taskId("0") .snowflakeDatabase("MY_DB") .snowflakeSchema("MY_SCHEMA") .snowflakeUser("MY_USER") .snowflakePrivateKey(new Password(pemKey)) .snowflakeRole("MY_ROLE") .snowflakeUrl(url.getFullUrl()) .build(); Properties props = InternalUtils.makeJdbcDriverProperties(taskConfig, url); assertEquals("MY_DB", props.getProperty(InternalUtils.JDBC_DATABASE)); assertEquals("MY_SCHEMA", props.getProperty(InternalUtils.JDBC_SCHEMA)); assertEquals("MY_USER", props.getProperty(InternalUtils.JDBC_USER)); assertEquals("MY_ROLE", props.getProperty(JdbcPropertyKeys.ROLE)); assertTrue(props.containsKey(InternalUtils.JDBC_PRIVATE_KEY)); assertEquals("on", props.getProperty(InternalUtils.JDBC_SSL)); assertEquals("true", props.getProperty(InternalUtils.JDBC_SESSION_KEEP_ALIVE)); assertEquals("json", props.getProperty(InternalUtils.JDBC_QUERY_RESULT_FORMAT)); } @Test public void makeJdbcDriverProperties_missingPrivateKey_throws() { SnowflakeURL url = new SnowflakeURL("https://testaccount.snowflakecomputing.com:443"); SinkTaskConfig taskConfig = SinkTaskConfigTestBuilder.builder() .connectorName("test-connector") .taskId("0") .snowflakeDatabase("MY_DB") .snowflakeSchema("MY_SCHEMA") .snowflakeUser("MY_USER") .snowflakeRole("MY_ROLE") .snowflakeUrl(url.getFullUrl()) .build(); SnowflakeKafkaConnectorException exception = assertThrows( SnowflakeKafkaConnectorException.class, () -> InternalUtils.makeJdbcDriverProperties(taskConfig, url)); assertEquals("0013", exception.getCode()); } @Test public void makeJdbcDriverProperties_noRole_omitsRoleProperty() { String pemKey = Base64.getEncoder().encodeToString(TestUtils.generatePrivateKey().getEncoded()); SnowflakeURL url = new SnowflakeURL("https://testaccount.snowflakecomputing.com:443"); SinkTaskConfig taskConfig = SinkTaskConfigTestBuilder.builder() .connectorName("test-connector") .taskId("0") .snowflakeDatabase("MY_DB") .snowflakeSchema("MY_SCHEMA") .snowflakeUser("MY_USER") .snowflakePrivateKey(new Password(pemKey)) .snowflakeUrl(url.getFullUrl()) .build(); Properties props = InternalUtils.makeJdbcDriverProperties(taskConfig, url); assertFalse( props.containsKey(JdbcPropertyKeys.ROLE), "JDBC properties should not contain role when role is blank"); } @Test public void makeJdbcDriverProperties_emptyStringRole_omitsRoleProperty() { String pemKey = Base64.getEncoder().encodeToString(TestUtils.generatePrivateKey().getEncoded()); SnowflakeURL url = new SnowflakeURL("https://testaccount.snowflakecomputing.com:443"); SinkTaskConfig taskConfig = SinkTaskConfigTestBuilder.builder() .connectorName("test-connector") .taskId("0") .snowflakeDatabase("MY_DB") .snowflakeSchema("MY_SCHEMA") .snowflakeUser("MY_USER") .snowflakePrivateKey(new Password(pemKey)) .snowflakeUrl(url.getFullUrl()) .snowflakeRole("") .build(); Properties props = InternalUtils.makeJdbcDriverProperties(taskConfig, url); assertFalse( props.containsKey(JdbcPropertyKeys.ROLE), "JDBC properties should not contain role when role is empty string"); } @Test public void makeJdbcDriverProperties_whitespaceRole_omitsRoleProperty() { String pemKey = Base64.getEncoder().encodeToString(TestUtils.generatePrivateKey().getEncoded()); SnowflakeURL url = new SnowflakeURL("https://testaccount.snowflakecomputing.com:443"); SinkTaskConfig taskConfig = SinkTaskConfigTestBuilder.builder() .connectorName("test-connector") .taskId("0") .snowflakeDatabase("MY_DB") .snowflakeSchema("MY_SCHEMA") .snowflakeUser("MY_USER") .snowflakePrivateKey(new Password(pemKey)) .snowflakeUrl(url.getFullUrl()) .snowflakeRole(" ") .build(); Properties props = InternalUtils.makeJdbcDriverProperties(taskConfig, url); assertFalse( props.containsKey(JdbcPropertyKeys.ROLE), "JDBC properties should not contain role when role is whitespace"); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/JdbcPropertiesTest.java ================================================ package com.snowflake.kafka.connector.internal; import static org.assertj.core.api.AssertionsForClassTypes.assertThatThrownBy; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import com.snowflake.kafka.connector.config.SinkTaskConfig; import java.util.Properties; import org.junit.jupiter.api.Test; public class JdbcPropertiesTest { @Test public void shouldCombineProperties() { // given SnowflakeURL url = TestUtils.getUrl(); SinkTaskConfig parsedConfig = SinkTaskConfig.from(TestUtils.transformProfileFileToConnectorConfiguration(false), true); Properties connection = InternalUtils.makeJdbcDriverProperties(parsedConfig, url); Properties proxy = new Properties(); proxy.put("useProxy", "true"); Properties jdbcMap = new Properties(); jdbcMap.put("insecureMode", "true"); // when JdbcProperties jdbcProperties = JdbcProperties.create(connection, proxy, jdbcMap); // then int givenPropertiesSize = connection.size() + proxy.size() + jdbcMap.size(); int mergedPropertiesSize = jdbcProperties.getProperties().size(); assertEquals(givenPropertiesSize, mergedPropertiesSize); } @Test public void shouldThrowWhen_jdbcMap_overridesConnection() { Properties connection = new Properties(); connection.put("user", "test_user1"); Properties proxy = new Properties(); Properties jdbcMap = new Properties(); jdbcMap.put("user", "test_user2"); jdbcMap.put("insecureMode", "true"); // expect assertThatThrownBy(() -> JdbcProperties.create(connection, proxy, jdbcMap)) .isInstanceOfSatisfying( SnowflakeKafkaConnectorException.class, ex -> { // property key is printed not value assertTrue(ex.getMessage().contains("user")); assertEquals("0031", ex.getCode()); }); } @Test public void shouldThrowWhen_jdbcMap_overridesProxy() { Properties connection = new Properties(); connection.put("user", "test_user1"); Properties proxy = new Properties(); proxy.put("useProxy", "true"); Properties jdbcMap = new Properties(); jdbcMap.put("useProxy", "true"); jdbcMap.put("insecureMode", "false"); // expect assertThatThrownBy(() -> JdbcProperties.create(connection, proxy, jdbcMap)) .isInstanceOfSatisfying( SnowflakeKafkaConnectorException.class, ex -> { // property key is printed not value assertTrue(ex.getMessage().contains("useProxy")); assertEquals("0031", ex.getCode()); }); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/KCLoggerTest.java ================================================ /* * Copyright (c) 2019 Snowflake Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.snowflake.kafka.connector.internal; import com.snowflake.kafka.connector.Utils; import org.junit.Before; import org.junit.Test; import org.mockito.InjectMocks; import org.mockito.Mock; import org.mockito.Mockito; import org.mockito.MockitoAnnotations; import org.slf4j.Logger; import org.slf4j.MDC; public class KCLoggerTest { // test constants private final String name = "test.logger.name"; // mock and test setup, inject logger into KCLogger @Mock(name = "logger") private Logger logger = Mockito.mock(Logger.class); @InjectMocks private KCLogger kcLogger = new KCLogger(this.name); @Before public void before() { this.kcLogger = new KCLogger(this.name); MockitoAnnotations.initMocks(this); } @Test public void testAllLogMessages() { String msg = "super useful logging msg"; String expectedMsg = Utils.formatLogMessage(msg); String formatMsg = "super {} useful {} logging {} msg {}"; String expectedFormattedMsg = Utils.formatLogMessage("super wow useful wow! logging 1 msg yay"); KCLogger.toggleGlobalMdcLoggingContext(false); this.testLogMessagesRunner(msg, expectedMsg); this.testLogMessagesWithFormattingRunner( formatMsg, expectedFormattedMsg, "wow", "wow!", 1, "yay"); } @Test public void testAllLogMessagesWithMDCContext() { String mdcContext = "[mdc context] "; KCLogger.toggleGlobalMdcLoggingContext(true); MDC.put(KCLogger.MDC_CONN_CTX_KEY, mdcContext); String msg = "super useful logging msg"; String expectedMsg = Utils.formatLogMessage(mdcContext + msg); String formatMsg = "super {} useful {} logging {} msg {}"; String expectedFormattedMsg = Utils.formatLogMessage(mdcContext + "super wow useful wow! logging 1 msg yay"); this.testLogMessagesRunner(msg, expectedMsg); this.testLogMessagesWithFormattingRunner( formatMsg, expectedFormattedMsg, "wow", "wow!", 1, "yay"); } private void testLogMessagesRunner(String msg, String expectedMsg) { // info Mockito.when(logger.isInfoEnabled()).thenReturn(true); kcLogger.info(msg); Mockito.verify(logger, Mockito.times(1)).info(expectedMsg); // trace Mockito.when(logger.isTraceEnabled()).thenReturn(true); kcLogger.trace(msg); Mockito.verify(logger, Mockito.times(1)).trace(expectedMsg); // debug Mockito.when(logger.isDebugEnabled()).thenReturn(true); kcLogger.debug(msg); Mockito.verify(logger, Mockito.times(1)).debug(expectedMsg); // warn Mockito.when(logger.isWarnEnabled()).thenReturn(true); kcLogger.warn(msg); Mockito.verify(logger, Mockito.times(1)).warn(expectedMsg); // error Mockito.when(logger.isErrorEnabled()).thenReturn(true); kcLogger.error(msg); Mockito.verify(logger, Mockito.times(1)).error(expectedMsg); } private void testLogMessagesWithFormattingRunner( String formatMsg, String expectedFormattedMsg, Object... vars) { // info Mockito.when(logger.isInfoEnabled()).thenReturn(true); kcLogger.info(formatMsg, vars); Mockito.verify(logger, Mockito.times(1)).info(expectedFormattedMsg); // trace Mockito.when(logger.isTraceEnabled()).thenReturn(true); kcLogger.trace(formatMsg, vars); Mockito.verify(logger, Mockito.times(1)).trace(expectedFormattedMsg); // debug Mockito.when(logger.isDebugEnabled()).thenReturn(true); kcLogger.debug(formatMsg, vars); Mockito.verify(logger, Mockito.times(1)).debug(expectedFormattedMsg); // warn Mockito.when(logger.isWarnEnabled()).thenReturn(true); kcLogger.warn(formatMsg, vars); Mockito.verify(logger, Mockito.times(1)).warn(expectedFormattedMsg); // error Mockito.when(logger.isErrorEnabled()).thenReturn(true); kcLogger.error(formatMsg, vars); Mockito.verify(logger, Mockito.times(1)).error(expectedFormattedMsg); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/NonEncryptedKeyTestSnowflakeConnection.java ================================================ package com.snowflake.kafka.connector.internal; import static com.snowflake.kafka.connector.internal.TestUtils.transformProfileFileToConnectorConfiguration; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.config.SinkTaskConfig; import java.sql.Connection; import java.util.Map; import java.util.Properties; import net.snowflake.client.api.driver.SnowflakeDriver; /** Connection to test environment generated from a profile file stored locally. */ public class NonEncryptedKeyTestSnowflakeConnection { /** Given a profile file path name, generate a connection by constructing a snowflake driver. */ public static Connection getConnection() throws Exception { Map connectorConfiguration = transformProfileFileToConnectorConfiguration(false); SnowflakeURL url = new SnowflakeURL(connectorConfiguration.get(KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME)); Properties properties = InternalUtils.makeJdbcDriverProperties( SinkTaskConfig.from(connectorConfiguration, true), url); return new SnowflakeDriver().connect(url.getJdbcUrl(), properties); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/ResetProxyConfigExec.java ================================================ package com.snowflake.kafka.connector.internal; import net.snowflake.client.api.exception.SnowflakeSQLException; public class ResetProxyConfigExec { public static void main(String[] args) throws SnowflakeSQLException { System.out.println("ResetProxyConfigExec::Start wiping Proxy config"); TestUtils.resetProxyParametersInJVM(); System.out.println("ResetProxyConfigExec::Proxy Parameters reset in JVM in JDBC"); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/SchematizationTestUtils.java ================================================ package com.snowflake.kafka.connector.internal; import java.util.HashMap; import java.util.Map; public class SchematizationTestUtils { public static final Map SF_JSON_SCHEMA_FOR_TABLE_CREATION; static { SF_JSON_SCHEMA_FOR_TABLE_CREATION = new HashMap<>(); SF_JSON_SCHEMA_FOR_TABLE_CREATION.put("ID_INT8", "NUMBER"); SF_JSON_SCHEMA_FOR_TABLE_CREATION.put("ID_INT8_OPTIONAL", "VARCHAR"); SF_JSON_SCHEMA_FOR_TABLE_CREATION.put("ID_INT16", "NUMBER"); SF_JSON_SCHEMA_FOR_TABLE_CREATION.put("\"id_int32_double_quotes\"", "NUMBER"); SF_JSON_SCHEMA_FOR_TABLE_CREATION.put("ID_INT64", "NUMBER"); SF_JSON_SCHEMA_FOR_TABLE_CREATION.put("FIRST_NAME", "VARCHAR"); SF_JSON_SCHEMA_FOR_TABLE_CREATION.put("RATING_FLOAT32", "FLOAT"); SF_JSON_SCHEMA_FOR_TABLE_CREATION.put("RATING_FLOAT64", "FLOAT"); SF_JSON_SCHEMA_FOR_TABLE_CREATION.put("APPROVAL", "BOOLEAN"); SF_JSON_SCHEMA_FOR_TABLE_CREATION.put("INFO_ARRAY", "ARRAY"); SF_JSON_SCHEMA_FOR_TABLE_CREATION.put("INFO_MAP", "VARIANT"); SF_JSON_SCHEMA_FOR_TABLE_CREATION.put("RECORD_METADATA", "VARIANT"); } public static final Map CONTENT_FOR_JSON_TABLE_CREATION; static { CONTENT_FOR_JSON_TABLE_CREATION = new HashMap<>(); CONTENT_FOR_JSON_TABLE_CREATION.put("ID_INT8", 0L); CONTENT_FOR_JSON_TABLE_CREATION.put("ID_INT8_OPTIONAL", null); CONTENT_FOR_JSON_TABLE_CREATION.put("ID_INT16", 42L); CONTENT_FOR_JSON_TABLE_CREATION.put("id_int32_double_quotes", 42L); CONTENT_FOR_JSON_TABLE_CREATION.put("ID_INT64", 42L); CONTENT_FOR_JSON_TABLE_CREATION.put("FIRST_NAME", "zekai"); CONTENT_FOR_JSON_TABLE_CREATION.put("RATING_FLOAT32", 0.99); CONTENT_FOR_JSON_TABLE_CREATION.put("RATING_FLOAT64", 0.99); CONTENT_FOR_JSON_TABLE_CREATION.put("APPROVAL", true); CONTENT_FOR_JSON_TABLE_CREATION.put("INFO_ARRAY", "[\"a\",\"b\"]"); CONTENT_FOR_JSON_TABLE_CREATION.put("INFO_MAP", "{\"field\":3}"); CONTENT_FOR_JSON_TABLE_CREATION.put("RECORD_METADATA", "RECORD_METADATA_PLACE_HOLDER"); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/SnowflakeConnectionServiceCacheTest.java ================================================ package com.snowflake.kafka.connector.internal; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_PIPE_EXISTS; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS_DEFAULT; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_TABLE_EXISTS; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS_DEFAULT; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.HashMap; import java.util.Map; import org.junit.jupiter.api.Test; /** * Tests for CacheConfig class. These tests verify that cache configuration values are properly * parsed and validated. */ class SnowflakeConnectionServiceCacheTest { @Test void testCacheConfigDefaults() { Map configMap = new HashMap<>(); CachingConfig config = CachingConfig.fromConfig(configMap); assertTrue(config.isTableExistsCacheEnabled()); assertEquals(CACHE_TABLE_EXISTS_EXPIRE_MS_DEFAULT, config.getTableExistsCacheExpireMs()); assertTrue(config.isPipeExistsCacheEnabled()); assertEquals(CACHE_PIPE_EXISTS_EXPIRE_MS_DEFAULT, config.getPipeExistsCacheExpireMs()); } @Test void testCacheConfigInvalidTableExpiration() { Map configMap = createConfigWithCache(true, 0L, true, 30000L); assertThrows( IllegalArgumentException.class, () -> CachingConfig.fromConfig(configMap), "Should throw exception for non-positive table expiration"); } @Test void testCacheConfigInvalidPipeExpiration() { Map configMap = createConfigWithCache(true, 30000L, true, -100L); assertThrows( IllegalArgumentException.class, () -> CachingConfig.fromConfig(configMap), "Should throw exception for negative pipe expiration"); } @Test void testCacheConfigInvalidNumberFormat() { Map configMap = new HashMap<>(); configMap.put(CACHE_TABLE_EXISTS, "true"); configMap.put(CACHE_TABLE_EXISTS_EXPIRE_MS, "invalid"); configMap.put(CACHE_PIPE_EXISTS, "true"); configMap.put(CACHE_PIPE_EXISTS_EXPIRE_MS, "30000"); assertThrows( IllegalArgumentException.class, () -> CachingConfig.fromConfig(configMap), "Should throw exception for invalid number format"); } private Map createConfigWithCache( boolean cacheTableExists, long tableExpirationMs, boolean cachePipeExists, long pipeExpirationMs) { Map config = new HashMap<>(); config.put(CACHE_TABLE_EXISTS, String.valueOf(cacheTableExists)); config.put(CACHE_TABLE_EXISTS_EXPIRE_MS, String.valueOf(tableExpirationMs)); config.put(CACHE_PIPE_EXISTS, String.valueOf(cachePipeExists)); config.put(CACHE_PIPE_EXISTS_EXPIRE_MS, String.valueOf(pipeExpirationMs)); return config; } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/SnowflakeDataSourceFactory.java ================================================ package com.snowflake.kafka.connector.internal; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import java.security.PrivateKey; import java.util.Map; import java.util.Properties; import javax.sql.DataSource; import net.snowflake.client.api.driver.SnowflakeDriver; import org.apache.commons.dbcp2.ConnectionFactory; import org.apache.commons.dbcp2.DriverConnectionFactory; import org.apache.commons.dbcp2.PoolableConnection; import org.apache.commons.dbcp2.PoolableConnectionFactory; import org.apache.commons.dbcp2.PoolingDataSource; import org.apache.commons.pool2.impl.GenericObjectPool; /** Factory class for creating DataSource instances using Apache Commons DBCP2 for testing. */ public final class SnowflakeDataSourceFactory { public static final String SF_WAREHOUSE = "sfwarehouse"; // for test only private static DataSource dataSource; private SnowflakeDataSourceFactory() {} public static DataSource get() { if (dataSource != null) { return dataSource; } else { try { final Map conf = TestUtils.getConnectorConfigurationForStreaming(false); final SnowflakeURL url = new SnowflakeURL(conf.get(KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME)); // Extract properties from conf Map final String user = conf.get(KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME); final String role = conf.get(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME); final String privateKeyStr = conf.get(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY); final String privateKeyPassphrase = conf.get(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE); final String database = conf.get(KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME); final String schema = conf.get(KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME); final String warehouse = conf.get(SF_WAREHOUSE); // Assert all required properties are present assert user != null : "User must not be null"; assert privateKeyStr != null : "Private key must not be null"; assert database != null : "Database must not be null"; assert schema != null : "Schema must not be null"; assert warehouse != null : "Warehouse must not be null"; // Build connection properties final Properties connectionProperties = new Properties(); connectionProperties.setProperty("authenticator", "snowflake_jwt"); connectionProperties.setProperty("user", user); connectionProperties.setProperty("db", database); connectionProperties.setProperty("schema", schema); connectionProperties.setProperty("role", role); connectionProperties.setProperty("warehouse", warehouse); // JWT key pair auth - set private key final PrivateKey privateKey = PrivateKeyTool.parsePrivateKey(privateKeyStr, privateKeyPassphrase); connectionProperties.put("privateKey", privateKey); // Create connection factory with Snowflake driver final SnowflakeDriver driver = new SnowflakeDriver(); final ConnectionFactory connectionFactory = new DriverConnectionFactory(driver, url.getJdbcUrl(), connectionProperties); // Create poolable connection factory final PoolableConnectionFactory poolableConnectionFactory = new PoolableConnectionFactory(connectionFactory, null); // Create the pool with 1 initial connection final GenericObjectPool connectionPool = new GenericObjectPool<>(poolableConnectionFactory); connectionPool.setMaxTotal(10); connectionPool.setMaxIdle(1); connectionPool.setMinIdle(1); poolableConnectionFactory.setPool(connectionPool); dataSource = new PoolingDataSource<>(connectionPool); return dataSource; } catch (final Exception e) { throw new RuntimeException("Failed to create DataSource", e); } } } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/SnowflakeURLTest.java ================================================ /* * Copyright (c) 2019 Snowflake Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.snowflake.kafka.connector.internal; import org.junit.Rule; import org.junit.Test; import org.junit.contrib.java.lang.system.EnvironmentVariables; public class SnowflakeURLTest { @Rule public final EnvironmentVariables environmentVariables = new EnvironmentVariables(); @Test public void createFromValidURL() { String url = "http://account.snowflake.com:80"; SnowflakeURL sfurl = new SnowflakeURL(url); assert !sfurl.sslEnabled(); assert sfurl.getAccount().equals("account"); assert sfurl.getFullUrl().equals("account.snowflake.com:80"); assert sfurl.getPort() == 80; assert sfurl.getScheme().equals("http"); assert sfurl.getJdbcUrl().equals("jdbc:snowflake://" + sfurl.getFullUrl()); url = "https://account.snowflake.com:443"; sfurl = new SnowflakeURL(url); assert sfurl.sslEnabled(); assert sfurl.getScheme().equals("https"); assert sfurl.getAccount().equals("account"); url = " account.snowflake.com:80"; sfurl = new SnowflakeURL(url); assert sfurl.sslEnabled(); assert sfurl.getAccount().equals("account"); assert sfurl.getFullUrl().equals("account.snowflake.com:80"); assert sfurl.getPort() == 80; assert sfurl.getScheme().equals("https"); assert sfurl.getJdbcUrl().equals("jdbc:snowflake://" + sfurl.getFullUrl()); url = "account.snowflake.com"; new SnowflakeURL(url); url = "http://account.snowflake.com "; sfurl = new SnowflakeURL(url); assert !sfurl.sslEnabled(); assert sfurl.getAccount().equals("account"); assert sfurl.getFullUrl().equals("account.snowflake.com:80"); assert sfurl.getPort() == 80; assert sfurl.getScheme().equals("http"); assert sfurl.getJdbcUrl().equals("jdbc:snowflake://" + sfurl.getFullUrl()); url = "https://account.snowflake.com"; new SnowflakeURL(url); url = "https://account.region.aws.privatelink.snowflake.com:443"; sfurl = new SnowflakeURL(url); assert sfurl.getUrlWithoutPort().equals("account.region.aws.privatelink.snowflake.com"); } @Test(expected = SnowflakeKafkaConnectorException.class) public void createFromInvalidURL() { String url = "htt://account.snowflake.com:80"; new SnowflakeURL(url); } @Test public void testRegionlessURLString() { String url = "http://org-account.snowflake.com:80"; SnowflakeURL sfurl = new SnowflakeURL(url); assert !sfurl.sslEnabled(); assert sfurl.getAccount().equals("org-account"); assert sfurl.getFullUrl().equals("org-account.snowflake.com:80"); assert sfurl.getPort() == 80; assert sfurl.getScheme().equals("http"); assert sfurl.getJdbcUrl().equals("jdbc:snowflake://" + sfurl.getFullUrl()); } @Test public void testRegionlessWithPrivateLinkURL() { // test with privatelink too String url = "https://org-account.privatelink.snowflake.com:80"; SnowflakeURL sfurl = new SnowflakeURL(url); assert sfurl.sslEnabled(); assert sfurl.getAccount().equals("org-account"); assert sfurl.getFullUrl().equals("org-account.privatelink.snowflake.com:80"); assert sfurl.getPort() == 80; assert sfurl.getScheme().equals("https"); assert sfurl.getJdbcUrl().equals("jdbc:snowflake://" + sfurl.getFullUrl()); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/StandardSnowflakeConnectionServiceDdlTest.java ================================================ package com.snowflake.kafka.connector.internal; import static org.junit.jupiter.api.Assertions.*; import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.ArgumentMatchers.argThat; import static org.mockito.Mockito.*; import com.snowflake.kafka.connector.internal.schemaevolution.ColumnInfos; import java.lang.reflect.Field; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.util.*; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.mockito.ArgumentCaptor; /** * Tests for DDL methods in StandardSnowflakeConnectionService: appendColumnsToTable and * alterNonNullableColumns. */ public class StandardSnowflakeConnectionServiceDdlTest { private Connection mockJdbcConn; // Separate stubs for the isIcebergTable SHOW query vs the ALTER DDL query. private PreparedStatement mockShowStmt; private PreparedStatement mockAlterStmt; private ResultSet mockEmptyRs; private StandardSnowflakeConnectionService service; @BeforeEach public void setUp() throws Exception { mockJdbcConn = mock(Connection.class); when(mockJdbcConn.isClosed()).thenReturn(false); // isIcebergTable uses SHOW ICEBERG TABLES LIKE → returns empty ResultSet (non-iceberg) mockShowStmt = mock(PreparedStatement.class); mockEmptyRs = mock(ResultSet.class); when(mockEmptyRs.next()).thenReturn(false); when(mockShowStmt.executeQuery()).thenReturn(mockEmptyRs); // ALTER DDL statement mockAlterStmt = mock(PreparedStatement.class); when(mockJdbcConn.prepareStatement(argThat(s -> s != null && s.startsWith("show")))) .thenReturn(mockShowStmt); when(mockJdbcConn.prepareStatement(argThat(s -> s != null && !s.startsWith("show")))) .thenReturn(mockAlterStmt); service = createServiceWithMockConnection(mockJdbcConn); } private static StandardSnowflakeConnectionService createServiceWithMockConnection( Connection mockConn) throws Exception { org.objenesis.Objenesis objenesis = new org.objenesis.ObjenesisStd(); StandardSnowflakeConnectionService svc = objenesis.newInstance(StandardSnowflakeConnectionService.class); Field connField = StandardSnowflakeConnectionService.class.getDeclaredField("conn"); connField.setAccessible(true); connField.set(svc, mockConn); Field loggerField = StandardSnowflakeConnectionService.class.getDeclaredField("LOGGER"); loggerField.setAccessible(true); loggerField.set(svc, new KCLogger(StandardSnowflakeConnectionService.class.getName())); return svc; } /** Captures the ALTER SQL (second prepareStatement call; first is the SHOW ICEBERG check). */ private String captureAlterSql() throws SQLException { ArgumentCaptor sqlCaptor = ArgumentCaptor.forClass(String.class); verify(mockJdbcConn, times(2)).prepareStatement(sqlCaptor.capture()); return sqlCaptor.getAllValues().get(1); } @Test public void testAppendColumnsToTable_singleColumn_generatesCorrectSql() throws SQLException { Map columns = new LinkedHashMap<>(); columns.put("new_col", new ColumnInfos("VARCHAR", null)); service.appendColumnsToTable("test_table", columns); String sql = captureAlterSql(); // Table name uses identifier(?), column name is quoted inline assertTrue(sql.startsWith("alter table identifier(?) add column if not exists ")); assertTrue(sql.contains("\"new_col\" VARCHAR")); assertTrue(sql.contains("comment 'column created by schema evolution")); // Only the table name is a binding verify(mockAlterStmt).setString(1, "\"test_table\""); verify(mockAlterStmt).execute(); } @Test public void testAppendColumnsToTable_multipleColumns_repeatsIfNotExists() throws SQLException { Map columns = new LinkedHashMap<>(); columns.put("col_a", new ColumnInfos("VARCHAR", null)); columns.put("col_b", new ColumnInfos("NUMBER", null)); service.appendColumnsToTable("test_table", columns); String sql = captureAlterSql(); assertTrue(sql.contains("\"col_a\" VARCHAR")); assertTrue(sql.contains(", if not exists \"col_b\" NUMBER")); verify(mockAlterStmt).setString(1, "\"test_table\""); verify(mockAlterStmt).execute(); } @Test public void testAppendColumnsToTable_withComment_includesDdlComment() throws SQLException { Map columns = new LinkedHashMap<>(); columns.put("col1", new ColumnInfos("INT", "source field doc")); service.appendColumnsToTable("test_table", columns); String sql = captureAlterSql(); assertTrue(sql.contains("INT comment 'source field doc'")); } @Test public void testAppendColumnsToTable_nullMap_doesNothing() throws SQLException { service.appendColumnsToTable("test_table", null); // No SQL calls at all — not even the isIcebergTable check verify(mockJdbcConn, never()).prepareStatement(anyString()); } @Test public void testAppendColumnsToTable_emptyMap_doesNothing() throws SQLException { service.appendColumnsToTable("test_table", Collections.emptyMap()); verify(mockJdbcConn, never()).prepareStatement(anyString()); } @Test public void testAppendColumnsToTable_sqlException_throwsError2015() throws SQLException { // isIcebergTable SHOW succeeds (returns empty); only the ALTER fails when(mockJdbcConn.prepareStatement(argThat(s -> s != null && !s.startsWith("show")))) .thenThrow(new SQLException("test error")); Map columns = new LinkedHashMap<>(); columns.put("col1", new ColumnInfos("VARCHAR", null)); SnowflakeKafkaConnectorException ex = assertThrows( SnowflakeKafkaConnectorException.class, () -> service.appendColumnsToTable("test_table", columns)); assertTrue(ex.getMessage().contains("2015")); } @Test public void testAlterNonNullableColumns_singleColumn_generatesCorrectSql() throws SQLException { service.alterNonNullableColumns("test_table", Arrays.asList("COL1")); String sql = captureAlterSql(); // Table name uses identifier(?), column names are quoted inline assertTrue(sql.startsWith("alter table identifier(?) alter ")); assertTrue(sql.contains("\"COL1\" drop not null")); assertTrue( sql.contains( "\"COL1\" comment 'column altered to be nullable by schema evolution" + " from Snowflake Kafka Connector'")); verify(mockAlterStmt).setString(1, "\"test_table\""); verify(mockAlterStmt).execute(); } @Test public void testAlterNonNullableColumns_multipleColumns_generatesCorrectSql() throws SQLException { service.alterNonNullableColumns("test_table", Arrays.asList("COL_A", "COL_B")); String sql = captureAlterSql(); assertTrue(sql.contains("\"COL_A\" drop not null")); assertTrue(sql.contains("\"COL_B\" drop not null")); verify(mockAlterStmt).setString(1, "\"test_table\""); verify(mockAlterStmt).execute(); } @Test public void testAppendColumnsToTable_caseSensitiveColumnsQuotedInline() throws SQLException { Map columns = new LinkedHashMap<>(); columns.put("city", new ColumnInfos("VARCHAR", null)); service.appendColumnsToTable("test_table", columns); String sql = captureAlterSql(); // Lowercase "city" is quoted inline to preserve case assertTrue(sql.contains("\"city\" VARCHAR")); } @Test public void testAlterNonNullableColumns_caseSensitiveColumnsQuotedInline() throws SQLException { service.alterNonNullableColumns("test_table", Arrays.asList("city")); String sql = captureAlterSql(); assertTrue(sql.contains("\"city\" drop not null")); assertTrue(sql.contains("\"city\" comment")); } @Test public void testAppendColumnsToTable_embeddedQuotesEscaped() throws SQLException { Map columns = new LinkedHashMap<>(); columns.put("col\"name", new ColumnInfos("VARCHAR", null)); service.appendColumnsToTable("test_table", columns); String sql = captureAlterSql(); // Embedded double quotes are escaped per SQL standard assertTrue(sql.contains("\"col\"\"name\" VARCHAR")); } @Test public void testAlterNonNullableColumns_nullList_doesNothing() throws SQLException { service.alterNonNullableColumns("test_table", null); verify(mockJdbcConn, never()).prepareStatement(anyString()); } @Test public void testAlterNonNullableColumns_emptyList_doesNothing() throws SQLException { service.alterNonNullableColumns("test_table", Collections.emptyList()); verify(mockJdbcConn, never()).prepareStatement(anyString()); } @Test public void testAlterNonNullableColumns_sqlException_throwsError2016() throws SQLException { // isIcebergTable SHOW succeeds (returns empty); only the ALTER fails when(mockJdbcConn.prepareStatement(argThat(s -> s != null && !s.startsWith("show")))) .thenThrow(new SQLException("test error")); SnowflakeKafkaConnectorException ex = assertThrows( SnowflakeKafkaConnectorException.class, () -> service.alterNonNullableColumns("test_table", Arrays.asList("COL1"))); assertTrue(ex.getMessage().contains("2016")); } @Test public void testAppendColumnsToTable_icebergTable_usesAlterIcebergTable() throws SQLException { // Simulate isIcebergTable returning true when(mockEmptyRs.next()).thenReturn(true); Map columns = new LinkedHashMap<>(); columns.put("new_col", new ColumnInfos("VARCHAR", null)); service.appendColumnsToTable("iceberg_table", columns); String sql = captureAlterSql(); assertTrue(sql.startsWith("alter iceberg table identifier(?) add column if not exists ")); } // --------------------------------------------------------------------------- // shouldEvolveSchema tests // --------------------------------------------------------------------------- @Test public void testShouldEvolveSchema_icebergTable_seEnabled_returnsTrue() throws Exception { // Grant row: grantee_name = role, privilege = OWNERSHIP ResultSet grantRs = mock(ResultSet.class); when(grantRs.next()).thenReturn(true, false); when(grantRs.getString("grantee_name")).thenReturn("TEST_ROLE"); when(grantRs.getString("privilege")).thenReturn("OWNERSHIP"); // SHOW TABLES returns nothing (iceberg table) ResultSet emptyRs = mock(ResultSet.class); when(emptyRs.next()).thenReturn(false); // SHOW ICEBERG TABLES returns a row with enable_schema_evolution = Y ResultSet icebergRs = mock(ResultSet.class); when(icebergRs.next()).thenReturn(true, false); when(icebergRs.getString("enable_schema_evolution")).thenReturn("Y"); Connection conn = mock(Connection.class); when(conn.isClosed()).thenReturn(false); PreparedStatement grantStmt = mock(PreparedStatement.class); when(grantStmt.executeQuery()).thenReturn(grantRs); PreparedStatement showTablesStmt = mock(PreparedStatement.class); when(showTablesStmt.executeQuery()).thenReturn(emptyRs); PreparedStatement showIcebergStmt = mock(PreparedStatement.class); when(showIcebergStmt.executeQuery()).thenReturn(icebergRs); when(conn.prepareStatement(argThat(s -> s != null && s.startsWith("show grants")))) .thenReturn(grantStmt); when(conn.prepareStatement(argThat(s -> s != null && s.equals("show tables like ? limit 1")))) .thenReturn(showTablesStmt); when(conn.prepareStatement( argThat(s -> s != null && s.equals("show iceberg tables like ? limit 1")))) .thenReturn(showIcebergStmt); StandardSnowflakeConnectionService svc = createServiceWithMockConnection(conn); assertTrue(svc.shouldEvolveSchema("iceberg_table", "TEST_ROLE")); } @Test public void testShouldEvolveSchema_regularTable_seEnabled_returnsTrue() throws Exception { ResultSet grantRs = mock(ResultSet.class); when(grantRs.next()).thenReturn(true, false); when(grantRs.getString("grantee_name")).thenReturn("TEST_ROLE"); when(grantRs.getString("privilege")).thenReturn("OWNERSHIP"); ResultSet tableRs = mock(ResultSet.class); when(tableRs.next()).thenReturn(true, false); when(tableRs.getString("enable_schema_evolution")).thenReturn("Y"); Connection conn = mock(Connection.class); when(conn.isClosed()).thenReturn(false); PreparedStatement grantStmt = mock(PreparedStatement.class); when(grantStmt.executeQuery()).thenReturn(grantRs); PreparedStatement showTablesStmt = mock(PreparedStatement.class); when(showTablesStmt.executeQuery()).thenReturn(tableRs); when(conn.prepareStatement(argThat(s -> s != null && s.startsWith("show grants")))) .thenReturn(grantStmt); when(conn.prepareStatement(argThat(s -> s != null && s.equals("show tables like ? limit 1")))) .thenReturn(showTablesStmt); StandardSnowflakeConnectionService svc = createServiceWithMockConnection(conn); assertTrue(svc.shouldEvolveSchema("regular_table", "TEST_ROLE")); } @Test public void testShouldEvolveSchema_tableNotFound_returnsFalse() throws Exception { ResultSet grantRs = mock(ResultSet.class); when(grantRs.next()).thenReturn(true, false); when(grantRs.getString("grantee_name")).thenReturn("TEST_ROLE"); when(grantRs.getString("privilege")).thenReturn("OWNERSHIP"); ResultSet emptyRs = mock(ResultSet.class); when(emptyRs.next()).thenReturn(false); Connection conn = mock(Connection.class); when(conn.isClosed()).thenReturn(false); PreparedStatement grantStmt = mock(PreparedStatement.class); when(grantStmt.executeQuery()).thenReturn(grantRs); PreparedStatement showTablesStmt = mock(PreparedStatement.class); when(showTablesStmt.executeQuery()).thenReturn(emptyRs); PreparedStatement showIcebergStmt = mock(PreparedStatement.class); when(showIcebergStmt.executeQuery()).thenReturn(emptyRs); when(conn.prepareStatement(argThat(s -> s != null && s.startsWith("show grants")))) .thenReturn(grantStmt); when(conn.prepareStatement(argThat(s -> s != null && s.equals("show tables like ? limit 1")))) .thenReturn(showTablesStmt); when(conn.prepareStatement( argThat(s -> s != null && s.equals("show iceberg tables like ? limit 1")))) .thenReturn(showIcebergStmt); StandardSnowflakeConnectionService svc = createServiceWithMockConnection(conn); assertFalse(svc.shouldEvolveSchema("missing_table", "TEST_ROLE")); } @Test public void testAlterNonNullableColumns_icebergTable_usesAlterIcebergTable() throws SQLException { // Simulate isIcebergTable returning true when(mockEmptyRs.next()).thenReturn(true); service.alterNonNullableColumns("iceberg_table", Arrays.asList("COL1")); String sql = captureAlterSql(); assertTrue(sql.startsWith("alter iceberg table identifier(?) alter ")); } @Test public void testCreateTableWithOnlyMetadataColumn_icebergTableAlreadyExists_doesNotThrow() throws SQLException { // Snowflake rejects CREATE TABLE IF NOT EXISTS when the name belongs to an ICEBERG TABLE. // The method should swallow the error and return normally rather than propagating it. SQLException icebergConflict = new SQLException( "SQL compilation error:\nObject 'MY_TABLE' already exists as ICEBERG_TABLE"); when(mockAlterStmt.execute()).thenThrow(icebergConflict); assertDoesNotThrow(() -> service.createTableWithOnlyMetadataColumn("MY_TABLE")); } @Test public void testCreateTableWithOnlyMetadataColumn_otherSqlError_throws() throws SQLException { SQLException otherError = new SQLException("Some other SQL error"); when(mockAlterStmt.execute()).thenThrow(otherError); assertThrows( com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException.class, () -> service.createTableWithOnlyMetadataColumn("MY_TABLE")); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/TestUtils.java ================================================ /* * Copyright (c) 2019 Snowflake Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.snowflake.kafka.connector.internal; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTPS_PROXY_HOST; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTPS_PROXY_PASSWORD; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTPS_PROXY_PORT; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTPS_PROXY_USER; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTP_PROXY_HOST; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTP_PROXY_PASSWORD; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTP_PROXY_PORT; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTP_PROXY_USER; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTP_USE_PROXY; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE; import static com.snowflake.kafka.connector.Utils.JDK_HTTP_AUTH_TUNNELING; import static org.assertj.core.api.Assertions.assertThat; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.PropertyNamingStrategies; import com.fasterxml.jackson.databind.annotation.JsonNaming; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.Utils; import com.snowflake.kafka.connector.config.SnowflakeSinkConnectorConfigBuilder; import io.confluent.connect.avro.AvroConverter; import io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient; import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient; import java.io.File; import java.io.IOException; import java.io.StringWriter; import java.nio.charset.StandardCharsets; import java.security.KeyPair; import java.security.KeyPairGenerator; import java.security.NoSuchAlgorithmException; import java.security.PrivateKey; import java.security.Security; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Random; import java.util.function.Function; import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.SchemaAndValue; import org.apache.kafka.connect.data.SchemaBuilder; import org.apache.kafka.connect.data.Struct; import org.apache.kafka.connect.json.JsonConverter; import org.apache.kafka.connect.sink.SinkRecord; import org.bouncycastle.asn1.nist.NISTObjectIdentifiers; import org.bouncycastle.jcajce.provider.BouncyCastleFipsProvider; import org.bouncycastle.openssl.jcajce.JcaPEMWriter; import org.bouncycastle.operator.OperatorCreationException; import org.bouncycastle.pkcs.PKCS8EncryptedPrivateKeyInfoBuilder; import org.bouncycastle.pkcs.jcajce.JcaPKCS8EncryptedPrivateKeyInfoBuilder; import org.bouncycastle.pkcs.jcajce.JcePKCSPBEOutputEncryptorBuilder; public class TestUtils { private static final KCLogger log = new KCLogger(TestUtils.class.getName()); private static final Random random = new Random(); public static final String TEST_CONNECTOR_NAME = "TEST_CONNECTOR"; private static final String SNOWFLAKE_CREDENTIAL_FILE_ENV = "SNOWFLAKE_CREDENTIAL_FILE"; private static final ObjectMapper mapper = new ObjectMapper(); private static SnowflakeURL url = null; private static volatile Profile profile = null; // Ephemeral schema: each test run creates its own schema to avoid collisions. private static volatile String ephemeralSchema = null; private static volatile boolean creatingEphemeralSchema = false; @JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class) @JsonIgnoreProperties(ignoreUnknown = true) public static class Profile { public String user; public String role; public String host; public String database; public String schema; public String warehouse; public String privateKey; public String encryptedPrivateKey; public String privateKeyPassphrase; public String password; public String oauthClientId; public String oauthClientSecret; public String oauthRefreshToken; public String oauthTokenEndpoint; public String desRsaKey; } public static final String JSON_WITH_SCHEMA = "{\n" + " \"schema\": {\n" + " \"type\": \"struct\",\n" + " \"fields\": [\n" + " {\n" + " \"type\": \"string\",\n" + " \"doc\": \"doc\", \n" + " \"optional\": false,\n" + " \"field\": \"regionid\"\n" + " },\n" + " {\n" + " \"type\": \"string\",\n" + " \"optional\": false,\n" + " \"field\": \"gender\"\n" + " }\n" + " ],\n" + " \"optional\": false,\n" + " \"name\": \"sf.kc.test\"\n" + " },\n" + " \"payload\": {\n" + " \"regionid\": \"Region_5\",\n" + " \"gender\": \"FEMALE\"\n" + " }\n" + "}"; public static final String JSON_WITHOUT_SCHEMA = "{\"userid\": \"User_1\"}"; private static Profile getProfile() { if (profile == null) { String path = System.getenv(SNOWFLAKE_CREDENTIAL_FILE_ENV); if (path == null || path.isEmpty()) { throw new IllegalStateException( SNOWFLAKE_CREDENTIAL_FILE_ENV + " environment variable is not set"); } try { profile = mapper.readValue(new File(path), Profile.class); } catch (IOException e) { throw new RuntimeException(e); } } return profile; } public static PrivateKey generatePrivateKey() { KeyPairGenerator keyPairGenerator = null; try { keyPairGenerator = KeyPairGenerator.getInstance("RSA"); } catch (final NoSuchAlgorithmException e) { throw new RuntimeException(e); } keyPairGenerator.initialize(2048); KeyPair keyPair = keyPairGenerator.generateKeyPair(); return keyPair.getPrivate(); } /** * Returns the ephemeral schema name for this test run, creating it on first access. * *

    The name is {@code _<7-char random salt>}. A JVM shutdown hook drops the * schema with CASCADE so all tables/pipes/channels are cleaned up automatically. * *

    A re-entrancy guard ({@code creatingEphemeralSchema}) handles the circular call path: {@code * getOrCreateEphemeralSchema → getConnection → transformProfileFileToConnectorConfiguration → * getOrCreateEphemeralSchema}. During bootstrap the original schema is returned so the JDBC * connection can be established. */ private static String getOrCreateEphemeralSchema() { if (ephemeralSchema != null) { return ephemeralSchema; } synchronized (TestUtils.class) { if (ephemeralSchema != null) { return ephemeralSchema; } // Re-entrancy guard: while we are creating the schema, the JDBC connection we open will // call back into transformProfileFileToConnectorConfiguration → here. Return the original // schema so that bootstrap connection can be established. if (creatingEphemeralSchema) { return getProfile().schema; } creatingEphemeralSchema = true; try { String originalSchema = getProfile().schema; String database = getProfile().database; String salt = randomAlphanumeric(7); String salted = originalSchema + "_" + salt; String fqn = database + "." + salted; log.info("Creating ephemeral test schema: {}", fqn); try (Connection conn = NonEncryptedKeyTestSnowflakeConnection.getConnection(); Statement stmt = conn.createStatement()) { stmt.execute("CREATE SCHEMA IF NOT EXISTS " + fqn); } Runtime.getRuntime() .addShutdownHook( new Thread( () -> { try (Connection c = NonEncryptedKeyTestSnowflakeConnection.getConnection(); Statement s = c.createStatement()) { log.info("Dropping ephemeral test schema: {}", fqn); s.execute("DROP SCHEMA IF EXISTS " + fqn + " CASCADE"); } catch (Exception e) { log.error( "Failed to drop ephemeral test schema {}: {}", fqn, e.getMessage()); } })); ephemeralSchema = salted; return salted; } catch (Exception e) { // Snowflake is unreachable (e.g. unit tests without a live connection). // Fall back to the original schema so unit tests behave exactly as before. log.warn("Could not create ephemeral test schema, using original: {}", e.getMessage()); ephemeralSchema = getProfile().schema; return ephemeralSchema; } finally { creatingEphemeralSchema = false; } } } private static String randomAlphanumeric(int length) { String chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; StringBuilder sb = new StringBuilder(length); for (int i = 0; i < length; i++) { sb.append(chars.charAt(random.nextInt(chars.length()))); } return sb.toString(); } public static Map transformProfileFileToConnectorConfiguration( boolean takeEncryptedKeyAndPassword) { Map configuration = new HashMap<>(); Profile p = getProfile(); configuration.put(KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME, p.user); configuration.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, p.role); configuration.put(KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME, p.database); configuration.put( KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME, getOrCreateEphemeralSchema()); configuration.put(KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME, p.host); configuration.put(SnowflakeDataSourceFactory.SF_WAREHOUSE, p.warehouse); if (takeEncryptedKeyAndPassword) { configuration.put(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY, p.encryptedPrivateKey); configuration.put(SNOWFLAKE_PRIVATE_KEY_PASSPHRASE, p.privateKeyPassphrase); } else { configuration.put(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY, p.privateKey); } // password only appears in test profile if (p.password != null) { configuration.put("password", p.password); } configuration.put(KafkaConnectorConfigParams.NAME, TEST_CONNECTOR_NAME); // enable test query mark configuration.put(Utils.TASK_ID, ""); // ITs test features other than compatibility validation; opt out by default configuration.put( KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC, "false"); return configuration; } public static Map getConnectorConfigurationForStreaming( boolean takeEncryptedKey) { Map configuration = transformProfileFileToConnectorConfiguration(takeEncryptedKey); // On top of existing properties, add configuration.put(Utils.TASK_ID, "0"); // Existing tests assume column identifier normalization is enabled (uppercasing JSON keys // to match Snowflake's default uppercase column names). configuration.put( com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION, "true"); return configuration; } /** * @return JDBC config with encrypted private key */ public static String generateAESKey(PrivateKey key, char[] passwd) throws IOException, OperatorCreationException { Security.addProvider(new BouncyCastleFipsProvider()); StringWriter writer = new StringWriter(); JcaPEMWriter pemWriter = new JcaPEMWriter(writer); PKCS8EncryptedPrivateKeyInfoBuilder pkcs8EncryptedPrivateKeyInfoBuilder = new JcaPKCS8EncryptedPrivateKeyInfoBuilder(key); pemWriter.writeObject( pkcs8EncryptedPrivateKeyInfoBuilder.build( new JcePKCSPBEOutputEncryptorBuilder(NISTObjectIdentifiers.id_aes256_CBC) .setProvider("BCFIPS") .build(passwd))); pemWriter.close(); return writer.toString(); } /** * execute sql query * * @param query sql query string * @return result set */ static ResultSet executeQuery(String query) { try { Statement statement = NonEncryptedKeyTestSnowflakeConnection.getConnection().createStatement(); log.debug("Executing query: {}", query); return statement.executeQuery(query); } // if ANY exceptions occur, an illegal state has been reached catch (Exception e) { throw new IllegalStateException(e); } } /** * execute sql query * * @param query sql query string * @param parameter parameter to be inserted at index 1 */ public static void executeQueryWithParameter(String query, String parameter) { try { executeQueryWithParameter( NonEncryptedKeyTestSnowflakeConnection.getConnection(), query, parameter); } catch (Exception e) { throw new RuntimeException("Error executing query: " + query, e); } } /** * execute sql query * * @param conn jdbc connection * @param query sql query string * @param parameter parameter to be inserted at index 1 */ public static void executeQueryWithParameter(Connection conn, String query, String parameter) { log.debug("Executing query: {}", query); try { PreparedStatement stmt = conn.prepareStatement(query); stmt.setString(1, parameter); stmt.execute(); stmt.close(); } catch (Exception e) { throw new RuntimeException("Error executing query: " + query, e); } } /** * execute sql query and collect result * * @param query sql query string * @param parameter parameter to be inserted at index 1 * @param resultCollector function to collect result * @return result * @param result type */ public static T executeQueryAndCollectResult( String query, String parameter, Function resultCollector) { try { return executeQueryAndCollectResult( NonEncryptedKeyTestSnowflakeConnection.getConnection(), query, parameter, resultCollector); } catch (Exception e) { throw new RuntimeException("Error executing query: " + query, e); } } /** * execute sql query and collect result * * @param conn jdbc connection * @param query sql query string * @param parameter parameter to be inserted at index 1 * @param resultCollector function to collect result * @return result * @param result type */ public static T executeQueryAndCollectResult( Connection conn, String query, String parameter, Function resultCollector) { try { PreparedStatement stmt = conn.prepareStatement(query); stmt.setString(1, parameter); stmt.execute(); ResultSet resultSet = stmt.getResultSet(); T result = resultCollector.apply(resultSet); resultSet.close(); stmt.close(); return result; } catch (Exception e) { throw new RuntimeException("Error executing query: " + query, e); } } /** * Create a table with a single variant column: record_metadata. * * @param tableName table name * @param overwrite if true, execute "create or replace table"; otherwise "create table if not * exists" */ public static void createTableWithMetadataColumn(String tableName, boolean overwrite) { String ddl = overwrite ? "create or replace table \"" + tableName + "\" (record_metadata variant)" : "create table if not exists \"" + tableName + "\" (record_metadata variant)"; executeQuery(ddl); } /** Shorthand for {@link #createTableWithMetadataColumn(String, boolean)} with overwrite=false. */ public static void createTableWithMetadataColumn(String tableName) { createTableWithMetadataColumn(tableName, false); } /** * drop a table * * @param tableName table name */ public static void dropTable(String tableName) { String query = "drop table if exists \"" + tableName + "\""; executeQuery(query); } public static void dropPipe(String pipeName) { // Quote pipe name if it contains special characters like dashes String quotedPipeName = pipeName.contains("-") || pipeName.contains(" ") ? "\"" + pipeName + "\"" : pipeName; executeQuery("drop pipe if exists " + quotedPipeName); } /** Select * from table */ public static ResultSet showTable(String tableName) { String query = "select * from \"" + tableName + "\""; return executeQuery(query); } /** * create a random name for test * * @param objectName e.g. table, stage, pipe * @return kafka_connector_test_objectName_randomNum */ private static String randomName(String objectName) { long num = random.nextLong(); num = num < 0 ? (num + 1) * (-1) : num; return "kafka_connector_test_" + objectName + "_" + num; } /** * @return a random table name */ public static String randomTableName() { return randomName("table").toUpperCase(java.util.Locale.ROOT); } public static String randomTopicName() { return randomName("topic"); } static SnowflakeURL getUrl() { if (url == null) { url = new SnowflakeURL(getProfile().host); } return url; } /** * Check Snowflake Error Code in test * * @param error Snowflake error * @param func function throwing exception * @return true is error code is correct, otherwise, false */ public static boolean assertError(SnowflakeErrors error, Runnable func) { try { func.run(); } catch (SnowflakeKafkaConnectorException e) { return e.checkErrorCode(error); } return false; } /** * @return snowflake connection for test */ public static SnowflakeConnectionService getConnectionService() { return SnowflakeConnectionServiceFactory.builder() .setProperties(transformProfileFileToConnectorConfiguration(false)) .build(); } public static SnowflakeConnectionService getConnectionServiceWithEncryptedKey() { return SnowflakeConnectionServiceFactory.builder() .setProperties(getConnectorConfigurationForStreaming(true)) .build(); } /** * Reset proxy parameters in JVM which is enabled during starting a sink Task. Call this if your * test/code executes the Utils.enableJVMProxy function */ public static void resetProxyParametersInJVM() { System.setProperty(HTTP_USE_PROXY, ""); System.setProperty(HTTP_PROXY_HOST, ""); System.setProperty(HTTP_PROXY_PORT, ""); System.setProperty(HTTPS_PROXY_HOST, ""); System.setProperty(HTTPS_PROXY_PORT, ""); // No harm in unsetting user password as well System.setProperty(JDK_HTTP_AUTH_TUNNELING, ""); System.setProperty(HTTP_PROXY_USER, ""); System.setProperty(HTTP_PROXY_PASSWORD, ""); System.setProperty(HTTPS_PROXY_USER, ""); System.setProperty(HTTPS_PROXY_PASSWORD, ""); } /** * retrieve table size from snowflake * * @param tableName table name * @return size of table * @throws SQLException if meet connection issue */ public static int tableSize(String tableName) throws SQLException { String query = "show tables like '" + tableName + "'"; ResultSet result = executeQuery(query); if (result.next()) { final int rows = result.getInt("rows"); log.debug("{} table size is: {}", tableName, rows); return rows; } return 0; } /** Interface to define the lambda function to be used by assertWithRetry */ public interface AssertFunction { boolean operate() throws Exception; } /** * Assert with sleep and retry logic * * @param func the lambda function to be asserted defined by interface AssertFunction * @param intervalSec retry time interval in seconds * @param maxRetry max retry times */ public static void assertWithRetry(AssertFunction func, int intervalSec, int maxRetry) throws Exception { int iteration = 1; while (!func.operate()) { if (iteration > maxRetry) { throw new InterruptedException("Max retry exceeded"); } Thread.sleep(intervalSec * 1000L); iteration += 1; } } public static void assertWithRetry(AssertFunction func) throws Exception { assertWithRetry(func, 5, 20); } /* Generate (noOfRecords - startOffset) for a given topic and partition. */ public static List createJsonStringSinkRecords( final long startOffset, final long noOfRecords, final String topicName, final int partitionNo) throws Exception { return createJsonRecords( startOffset, noOfRecords, topicName, partitionNo, null, Collections.singletonMap("schemas.enable", Boolean.toString(false))); } /* Generate (noOfRecords - startOffset) blank records for a given topic and partition. */ public static List createBlankJsonSinkRecords( final long startOffset, final long noOfRecords, final String topicName, final int partitionNo) { return createJsonRecords( startOffset, noOfRecords, topicName, partitionNo, null, Collections.singletonMap("schemas.enable", Boolean.toString(false))); } /* Generate (noOfRecords - startOffset) for a given topic and partition. */ public static List createNativeJsonSinkRecords( final long startOffset, final long noOfRecords, final String topicName, final int partitionNo) { return createJsonRecords( startOffset, noOfRecords, topicName, partitionNo, TestUtils.JSON_WITH_SCHEMA.getBytes(StandardCharsets.UTF_8), Collections.singletonMap("schemas.enable", Boolean.toString(true))); } private static List createJsonRecords( final long startOffset, final long noOfRecords, final String topicName, final int partitionNo, byte[] value, Map converterConfig) { JsonConverter converter = new JsonConverter(); converter.configure(converterConfig, false); SchemaAndValue schemaInputValue = converter.toConnectData("test", value); ArrayList records = new ArrayList<>(); for (long i = startOffset; i < startOffset + noOfRecords; ++i) { records.add( new SinkRecord( topicName, partitionNo, Schema.STRING_SCHEMA, "test", schemaInputValue.schema(), schemaInputValue.value(), i)); } return records; } /* Generate (noOfRecords - startOffset) for a given topic and partition which were essentially avro records */ public static List createBigAvroRecords( final long startOffset, final long noOfRecords, final String topicName, final int partitionNo) { ArrayList records = new ArrayList<>(); final int outerSegmentLength = 10; final int innerSegmentLength = 10; List outerSchemas = new ArrayList<>(outerSegmentLength); for (int outerSegment = 0; outerSegment < outerSegmentLength; outerSegment++) { SchemaBuilder outerSegmentSchema = SchemaBuilder.struct().name("segment" + outerSegment); for (int innerSegment = 0; innerSegment < innerSegmentLength; innerSegment++) { outerSegmentSchema.field( "segment_" + outerSegment + "_" + innerSegment, Schema.STRING_SCHEMA); } outerSchemas.add(outerSegmentSchema.build()); } List items = new ArrayList<>(outerSegmentLength); for (int outerSegment = 0; outerSegment < outerSegmentLength; outerSegment++) { Struct outerItem = new Struct(outerSchemas.get(outerSegment)); for (int innerSegment = 0; innerSegment < innerSegmentLength; innerSegment++) { outerItem.put( "segment_" + outerSegment + "_" + innerSegment, "segment_" + outerSegment + "_" + innerSegment); } items.add(outerItem); } SchemaBuilder schemaBuilderBigAvroSegment = SchemaBuilder.struct().name("biggestAvro"); outerSchemas.forEach(schema -> schemaBuilderBigAvroSegment.field(schema.name(), schema)); Struct originalBASegment = new Struct(schemaBuilderBigAvroSegment.build()); for (int i = 0; i < outerSchemas.size(); i++) { originalBASegment.put(outerSchemas.get(i).name(), items.get(i)); } SchemaRegistryClient schemaRegistry = new MockSchemaRegistryClient(); AvroConverter avroConverter = new AvroConverter(schemaRegistry); avroConverter.configure( Collections.singletonMap("schema.registry.url", "http://fake-url"), false); byte[] converted = avroConverter.fromConnectData( topicName, schemaBuilderBigAvroSegment.schema(), originalBASegment); SchemaAndValue avroInputValue = avroConverter.toConnectData(topicName, converted); for (long i = startOffset; i < startOffset + noOfRecords; ++i) { records.add( new SinkRecord( topicName, partitionNo, Schema.STRING_SCHEMA, "key" + i, avroInputValue.schema(), avroInputValue.value(), i)); } return records; } /** * @deprecated use SnowflakeSinkConnectorConfigBuilder instead */ @Deprecated public static Map getConfig() { return SnowflakeSinkConnectorConfigBuilder.streamingConfig().build(); } /** * Check if the schema of the table matches the provided schema. * * @param tableName the name of the table * @param schemaMap the provided schema */ public static void checkTableSchema(String tableName, Map schemaMap) throws SQLException { // the table should be checked to exist beforehand InternalUtils.assertNotEmpty("tableName", tableName); String describeTableQuery = "desc table " + tableName; ResultSet result = executeQuery(describeTableQuery); int numberOfColumnExpected = schemaMap.size(); int numberOfColumnInTable = 0; while (result.next()) { String colName = result.getString("name"); if (!colName.equals(colName.toUpperCase())) { colName = "\"" + colName + "\""; } final String type = result.getString("type"); log.info("Checking column name: [{}] should have type: [{}]", colName, type); assertThat(type).startsWith(schemaMap.get(colName)); // see if the type of the column in sf is the same as expected (ignoring scale) numberOfColumnInTable++; } assert numberOfColumnExpected == numberOfColumnInTable; } /** * Check if one row retrieved from the table matches the provided content * *

    The assumption is that the rows in the table are the same. * * @param tableName the name of the table * @param contentMap the provided content map from columnName to their value */ public static void checkTableContentOneRow(String tableName, Map contentMap) throws SQLException { InternalUtils.assertNotEmpty("tableName", tableName); String getRowQuery = "select * from " + tableName + " limit 1"; ResultSet result = executeQuery(getRowQuery); result.next(); assert result.getMetaData().getColumnCount() == contentMap.size(); for (int i = 0; i < contentMap.size(); ++i) { String columnName = result.getMetaData().getColumnName(i + 1); Object value = result.getObject(i + 1); if (value != null) { // For map or array if (value instanceof String && (((String) value).startsWith("{") || ((String) value).startsWith("["))) { // Get rid of the formatting added by snowflake value = ((String) value).replace(" ", "").replace("\n", ""); } if ("RECORD_METADATA_PLACE_HOLDER".equals(contentMap.get(columnName))) { continue; } assert value.equals(contentMap.get(columnName)) : "expected: " + contentMap.get(columnName) + " actual: " + value; } else { assert contentMap.get(columnName) == null : "value should be null"; } } } public static Map getTableContentOneRow(String tableName) throws SQLException { String getRowQuery = "select * from " + tableName + " limit 1"; ResultSet result = executeQuery(getRowQuery); result.next(); Map contentMap = new HashMap<>(); for (int i = 0; i < result.getMetaData().getColumnCount(); i++) { contentMap.put(result.getMetaData().getColumnName(i + 1), result.getObject(i + 1)); } return contentMap; } public static int getNumberOfRows(String tableName) throws SQLException { String getRowQuery = "select count(*) from " + tableName; ResultSet result = executeQuery(getRowQuery); result.next(); final int rowsNo = result.getInt(1); log.info("Number or rows: [{}]", rowsNo); return rowsNo; } public static int getNumberOfColumns(String tableName) throws SQLException { String getRowQuery = "select * from " + tableName + " limit 1"; ResultSet result = executeQuery(getRowQuery); return result.getMetaData().getColumnCount(); } public static void assertTableRowCount(String tableName, int expectedRowCount) throws SQLException { int actualRowCount = getNumberOfRows(tableName); if (actualRowCount != expectedRowCount) { throw new AssertionError( String.format( "Expected table %s to have %d rows, but it has %d rows", tableName, expectedRowCount, actualRowCount)); } } public static void assertTableColumnCount(String tableName, int expectedColumnCount) throws SQLException { int actualColumnCount = getNumberOfColumns(tableName); if (actualColumnCount != expectedColumnCount) { throw new AssertionError( String.format( "Expected table %s to have %d columns, but it has %d columns", tableName, expectedColumnCount, actualColumnCount)); } } public static void assertTableHasColumn(String tableName, String columnName) throws SQLException { String getRowQuery = "select * from " + tableName + " limit 1"; ResultSet result = executeQuery(getRowQuery); ResultSetMetaData metaData = result.getMetaData(); boolean found = false; for (int i = 1; i <= metaData.getColumnCount(); i++) { if (metaData.getColumnName(i).equalsIgnoreCase(columnName)) { found = true; break; } } if (!found) { throw new AssertionError( String.format( "Expected table %s to have column %s, but it was not found", tableName, columnName)); } } public static List> getTableRows(String tableName) throws SQLException { InternalUtils.assertNotEmpty("tableName", tableName); String getRowQuery = "select * from " + tableName; ResultSet result = executeQuery(getRowQuery); ResultSetMetaData metaData = result.getMetaData(); int columnCount = metaData.getColumnCount(); List> rows = new ArrayList<>(); while (result.next()) { Map row = new HashMap<>(); for (int i = 1; i <= columnCount; i++) { String columnName = metaData.getColumnName(i); Object value = result.getObject(i); row.put(columnName, value); } rows.add(row); } return rows; } public static void assertColumnNullable(String tableName, String columnName, boolean isNullable) throws SQLException { InternalUtils.assertNotEmpty("tableName", tableName); InternalUtils.assertNotEmpty("columnName", columnName); String describeTableQuery = "desc table " + tableName; final String isNullableVal = isNullable ? "Y" : "N"; ResultSet result = executeQuery(describeTableQuery); while (result.next()) { String colName = result.getString("name"); String nullable = result.getString("null?"); if (columnName.equals(colName)) { assertThat(nullable).as("Column %s should be nullable", colName).isEqualTo(isNullableVal); } } } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/TombstoneRecordIngestionIT.java ================================================ package com.snowflake.kafka.connector.internal; import static com.snowflake.kafka.connector.ConnectorConfigValidatorTest.COMMUNITY_CONVERTER_SUBSET; import static com.snowflake.kafka.connector.internal.TestUtils.getConnectionService; import static java.lang.String.format; import static org.assertj.core.api.Assertions.assertThat; import com.snowflake.kafka.connector.ConnectorConfigTools; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.internal.streaming.InMemorySinkTaskContext; import com.snowflake.kafka.connector.internal.streaming.SnowflakeSinkServiceV2; import com.snowflake.kafka.connector.internal.streaming.StreamingSinkServiceBuilder; import io.confluent.connect.avro.AvroConverter; import io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient; import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.SchemaAndValue; import org.apache.kafka.connect.json.JsonConverter; import org.apache.kafka.connect.sink.SinkRecord; import org.apache.kafka.connect.storage.Converter; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.EnumSource; class TombstoneRecordIngestionIT { private final int partition = 0; private final String topic = "test"; private String table; private Converter jsonConverter; private Map converterConfig; @BeforeEach void beforeEach() { this.table = TestUtils.randomTableName(); getConnectionService() .executeQueryWithParameters( format( "create or replace table %s (record_metadata variant, gender varchar, regionid" + " varchar)", table)); this.jsonConverter = new JsonConverter(); this.converterConfig = new HashMap<>(); this.converterConfig.put("schemas.enable", "false"); this.jsonConverter.configure(this.converterConfig, false); } @AfterEach void afterEach() { TestUtils.dropTable(table); } @ParameterizedTest(name = "behavior: {0}") @EnumSource(ConnectorConfigTools.BehaviorOnNullValues.class) void testStreamingTombstoneBehavior(ConnectorConfigTools.BehaviorOnNullValues behavior) throws Exception { // setup TopicPartition topicPartition = new TopicPartition(topic, partition); SinkTaskConfig taskConfig = SinkTaskConfig.builderFrom(TestUtils.getConnectorConfigurationForStreaming(false)) .topicToTableMap(Collections.singletonMap(topic, table)) .behaviorOnNullValues(behavior) .build(); SnowflakeSinkServiceV2 service = StreamingSinkServiceBuilder.builder(getConnectionService(), taskConfig) .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition))) .build(); service.startPartitions(Collections.singleton(topicPartition)); service.awaitInitialization(); // create one normal record SinkRecord normalRecord = TestUtils.createNativeJsonSinkRecords(0, 1, topic, partition).get(0); // test this.testIngestTombstoneRunner(normalRecord, COMMUNITY_CONVERTER_SUBSET, service, behavior); // cleanup service.closeAll(); } @ParameterizedTest(name = "behavior: {0}") @EnumSource(ConnectorConfigTools.BehaviorOnNullValues.class) void testStreamingTombstoneBehaviorWithSchematization( ConnectorConfigTools.BehaviorOnNullValues behavior) throws Exception { // setup TopicPartition topicPartition = new TopicPartition(topic, partition); SinkTaskConfig taskConfig = SinkTaskConfig.builderFrom(TestUtils.getConnectorConfigurationForStreaming(false)) .topicToTableMap(Collections.singletonMap(topic, table)) .behaviorOnNullValues(behavior) .build(); SnowflakeSinkServiceV2 service = StreamingSinkServiceBuilder.builder(getConnectionService(), taskConfig) .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition))) .build(); service.startPartitions(Collections.singleton(topicPartition)); service.awaitInitialization(); // create one normal record SinkRecord normalRecord = TestUtils.createNativeJsonSinkRecords(0, 1, topic, partition).get(0); service.insert(normalRecord); // schematization needs first insert for evolution // test this.testIngestTombstoneRunner(normalRecord, COMMUNITY_CONVERTER_SUBSET, service, behavior); // cleanup service.closeAll(); } // all ingestion methods should have the same behavior for tombstone records private void testIngestTombstoneRunner( SinkRecord normalRecord, List converters, SnowflakeSinkService service, ConnectorConfigTools.BehaviorOnNullValues behavior) throws Exception { int offset = 1; // normalRecord should be offset 0 List sinkRecords = new ArrayList<>(); sinkRecords.add(normalRecord); // create tombstone records SchemaAndValue nullRecordInput = this.jsonConverter.toConnectData(topic, null); SinkRecord allNullRecord1 = new SinkRecord(topic, partition, null, null, null, null, offset++); SinkRecord allNullRecord2 = new SinkRecord( topic, partition, null, null, nullRecordInput.schema(), nullRecordInput.value(), offset++); SinkRecord allNullRecord3 = new SinkRecord( topic, partition, nullRecordInput.schema(), nullRecordInput.value(), nullRecordInput.schema(), nullRecordInput.value(), offset++); // add tombstone records sinkRecords.addAll(Arrays.asList(allNullRecord1, allNullRecord2, allNullRecord3)); // create and add tombstone records from each converter Map converterConfig = new HashMap<>(); converterConfig.put("schemas.enable", "false"); for (Converter converter : converters) { // handle avro converter if (converter.toString().contains("io.confluent.connect.avro.AvroConverter")) { SchemaRegistryClient schemaRegistry = new MockSchemaRegistryClient(); converter = new AvroConverter(schemaRegistry); converterConfig.put("schema.registry.url", "http://fake-url"); } converter.configure(converterConfig, false); SchemaAndValue input = converter.toConnectData(topic, null); sinkRecords.add( new SinkRecord( topic, partition, Schema.STRING_SCHEMA, converter.toString(), input.schema(), input.value(), offset)); offset++; } // insert all records service.insert(sinkRecords); // verify inserted (offset updates happen automatically in streaming) int expectedOffset = behavior == ConnectorConfigTools.BehaviorOnNullValues.DEFAULT ? sinkRecords.size() : 1; TestUtils.assertWithRetry(() -> TestUtils.tableSize(table) == expectedOffset, 10, 20); TestUtils.assertWithRetry( () -> service.getOffset(new TopicPartition(topic, partition)) == expectedOffset, 10, 20); // assert that one row have values in those columns assertThat( TestUtils.getTableRows(table).stream() .filter( row -> "FEMALE".equals(row.get("GENDER")) && "Region_5".equals(row.get("REGIONID"))) .count()) .isEqualTo(1); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/metrics/MetricsJmxReporterTest.java ================================================ package com.snowflake.kafka.connector.internal.metrics; import static org.junit.Assert.*; import com.codahale.metrics.Gauge; import com.codahale.metrics.MetricRegistry; import org.junit.Before; import org.junit.Test; public class MetricsJmxReporterTest { private MetricRegistry metricRegistry; private MetricsJmxReporter reporter; @Before public void setUp() { metricRegistry = new MetricRegistry(); reporter = new MetricsJmxReporter(metricRegistry, "testConnector"); } @Test public void testRemoveMetricByExactName() { metricRegistry.register("channel:ch1/offsets/processed-offset", (Gauge) () -> 42L); metricRegistry.register("channel:ch1/offsets/persisted-offset", (Gauge) () -> 10L); metricRegistry.register("channel:ch2/offsets/processed-offset", (Gauge) () -> 99L); assertEquals(3, metricRegistry.getMetrics().size()); reporter.removeMetric("channel:ch1/offsets/processed-offset"); assertEquals(2, metricRegistry.getMetrics().size()); assertNull(metricRegistry.getGauges().get("channel:ch1/offsets/processed-offset")); assertNotNull(metricRegistry.getGauges().get("channel:ch1/offsets/persisted-offset")); assertNotNull(metricRegistry.getGauges().get("channel:ch2/offsets/processed-offset")); } @Test public void testRemoveMetricNonexistentIsNoOp() { metricRegistry.register("channel:ch1/offsets/processed-offset", (Gauge) () -> 42L); reporter.removeMetric("channel:nonexistent/offsets/foo"); assertEquals(1, metricRegistry.getMetrics().size()); } @Test public void testRemoveMetricsFromRegistryStillWorks() { metricRegistry.register("channel:ch1/offsets/a", (Gauge) () -> 1L); metricRegistry.register("channel:ch1/offsets/b", (Gauge) () -> 2L); metricRegistry.register("channel:ch2/offsets/a", (Gauge) () -> 3L); reporter.removeMetricsFromRegistry("channel:ch1"); assertEquals(1, metricRegistry.getMetrics().size()); assertNotNull(metricRegistry.getGauges().get("channel:ch2/offsets/a")); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/metrics/SnowflakeSinkTaskMetricsTest.java ================================================ package com.snowflake.kafka.connector.internal.metrics; import static com.snowflake.kafka.connector.internal.TestUtils.TEST_CONNECTOR_NAME; import static com.snowflake.kafka.connector.internal.metrics.MetricsUtil.taskMetricName; import static com.snowflake.kafka.connector.internal.metrics.SnowflakeSinkTaskMetrics.*; import static org.junit.Assert.*; import com.codahale.metrics.Gauge; import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.Timer; import java.util.concurrent.atomic.AtomicInteger; import org.junit.After; import org.junit.Before; import org.junit.Test; public class SnowflakeSinkTaskMetricsTest { private static final String TASK_ID = "3"; private static final String PREFIX = "task-" + TASK_ID; private MetricRegistry metricRegistry; private MetricsJmxReporter metricsJmxReporter; private SnowflakeSinkTaskMetrics metrics; @Before public void setUp() { metricRegistry = new MetricRegistry(); metricsJmxReporter = new MetricsJmxReporter(metricRegistry, TEST_CONNECTOR_NAME); } @After public void tearDown() { if (metrics != null) { metrics.unregister(); } } private void createMetrics() { metrics = new SnowflakeSinkTaskMetrics(TEST_CONNECTOR_NAME, TASK_ID, metricsJmxReporter); } private void createMetricsWithSdkClientCount(int initialCount) { AtomicInteger sdkCount = new AtomicInteger(initialCount); metrics = new SnowflakeSinkTaskMetrics( TEST_CONNECTOR_NAME, TASK_ID, metricsJmxReporter, sdkCount::get); } @Test public void testAllMetricsRegistered() { createMetrics(); // Method duration timers assertNotNull( metricRegistry.getTimers().get(taskMetricName(PREFIX, TASK_SUB_DOMAIN, PUT_DURATION))); assertNotNull( metricRegistry .getTimers() .get(taskMetricName(PREFIX, TASK_SUB_DOMAIN, PRECOMMIT_DURATION))); assertNotNull( metricRegistry .getTimers() .get(taskMetricName(PREFIX, TASK_SUB_DOMAIN, PRECOMMIT_OFFSET_FETCH_DURATION))); // Lifecycle duration timers assertNotNull( metricRegistry .getTimers() .get(taskMetricName(PREFIX, LIFECYCLE_SUB_DOMAIN, OPEN_DURATION))); assertNotNull( metricRegistry .getTimers() .get(taskMetricName(PREFIX, LIFECYCLE_SUB_DOMAIN, CLOSE_DURATION))); assertNotNull( metricRegistry .getTimers() .get(taskMetricName(PREFIX, LIFECYCLE_SUB_DOMAIN, START_DURATION))); // Channel/SDK timers assertNotNull( metricRegistry .getTimers() .get(taskMetricName(PREFIX, LIFECYCLE_SUB_DOMAIN, CHANNEL_OPEN_DURATION))); assertNotNull( metricRegistry .getTimers() .get(taskMetricName(PREFIX, LIFECYCLE_SUB_DOMAIN, SDK_CLIENT_CREATE_DURATION))); // Meter assertNotNull( metricRegistry.getMeters().get(taskMetricName(PREFIX, TASK_SUB_DOMAIN, PUT_RECORDS))); // Counters assertNotNull( metricRegistry .getCounters() .get(taskMetricName(PREFIX, TASK_SUB_DOMAIN, PRECOMMIT_PARTITIONS_SKIPPED))); assertNotNull( metricRegistry.getCounters().get(taskMetricName(PREFIX, LIFECYCLE_SUB_DOMAIN, OPEN_COUNT))); assertNotNull( metricRegistry .getCounters() .get(taskMetricName(PREFIX, LIFECYCLE_SUB_DOMAIN, CLOSE_COUNT))); assertNotNull( metricRegistry .getCounters() .get(taskMetricName(PREFIX, LIFECYCLE_SUB_DOMAIN, CHANNEL_OPEN_COUNT))); // Gauges assertNotNull( metricRegistry .getGauges() .get(taskMetricName(PREFIX, TASK_SUB_DOMAIN, ASSIGNED_PARTITIONS))); } @Test public void testPutDurationTimer() { createMetrics(); Timer.Context ctx = metrics.putDuration().time(); ctx.stop(); assertEquals(1, metrics.putDuration().getCount()); } @Test public void testPreCommitDurationTimer() { createMetrics(); Timer.Context ctx = metrics.preCommitDuration().time(); ctx.stop(); assertEquals(1, metrics.preCommitDuration().getCount()); } @Test public void testLifecycleTimers() { createMetrics(); Timer.Context openCtx = metrics.openDuration().time(); openCtx.stop(); assertEquals(1, metrics.openDuration().getCount()); Timer.Context closeCtx = metrics.closeDuration().time(); closeCtx.stop(); assertEquals(1, metrics.closeDuration().getCount()); Timer.Context startCtx = metrics.startDuration().time(); startCtx.stop(); assertEquals(1, metrics.startDuration().getCount()); } @Test public void testChannelAndSdkTimers() { createMetrics(); Timer.Context channelCtx = metrics.channelOpenDuration().time(); channelCtx.stop(); Timer.Context channelCtx2 = metrics.channelOpenDuration().time(); channelCtx2.stop(); assertEquals(2, metrics.channelOpenDuration().getCount()); Timer.Context sdkCtx = metrics.sdkClientCreateDuration().time(); sdkCtx.stop(); assertEquals(1, metrics.sdkClientCreateDuration().getCount()); Timer.Context fetchCtx = metrics.preCommitOffsetFetchDuration().time(); fetchCtx.stop(); assertEquals(1, metrics.preCommitOffsetFetchDuration().getCount()); } @Test public void testPutRecordsMeter() { createMetrics(); metrics.putRecords().mark(100); metrics.putRecords().mark(50); assertEquals(150, metrics.putRecords().getCount()); } @Test public void testPreCommitPartitionsSkipped() { createMetrics(); metrics.preCommitPartitionsSkipped().inc(3); assertEquals(3, metrics.preCommitPartitionsSkipped().getCount()); } @Test public void testAssignedPartitionsGauge() { createMetrics(); assertEquals(0, metrics.getAssignedPartitions()); metrics.setAssignedPartitions(12); assertEquals(12, metrics.getAssignedPartitions()); @SuppressWarnings("unchecked") Gauge gauge = metricRegistry .getGauges() .get(taskMetricName(PREFIX, TASK_SUB_DOMAIN, ASSIGNED_PARTITIONS)); assertEquals(Integer.valueOf(12), gauge.getValue()); } @Test public void testLifecycleCounters() { createMetrics(); metrics.openCount().inc(); metrics.openCount().inc(); metrics.closeCount().inc(); assertEquals(2, metrics.openCount().getCount()); assertEquals(1, metrics.closeCount().getCount()); } @Test public void testChannelOpenCount() { createMetrics(); metrics.channelOpenCount().inc(); metrics.channelOpenCount().inc(); metrics.channelOpenCount().inc(); assertEquals(3, metrics.channelOpenCount().getCount()); } @Test public void testSdkClientCountGauge() { createMetricsWithSdkClientCount(5); @SuppressWarnings("unchecked") Gauge gauge = metricRegistry .getGauges() .get(taskMetricName(PREFIX, LIFECYCLE_SUB_DOMAIN, SDK_CLIENT_COUNT)); assertNotNull(gauge); assertEquals(Integer.valueOf(5), gauge.getValue()); } @Test public void testSdkClientCountGaugeNotRegisteredWithoutSupplier() { createMetrics(); assertNull( metricRegistry .getGauges() .get(taskMetricName(PREFIX, LIFECYCLE_SUB_DOMAIN, SDK_CLIENT_COUNT))); } @Test public void testUnregisterRemovesAllMetrics() { createMetrics(); assertFalse(metricRegistry.getMetrics().isEmpty()); metrics.unregister(); assertTrue(metricRegistry.getMetrics().isEmpty()); metrics = null; } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/schemaevolution/ColumnInfosTest.java ================================================ package com.snowflake.kafka.connector.internal.schemaevolution; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.*; import org.junit.jupiter.api.Test; class ColumnInfosTest { @Test void getDdlComments_withComment() { ColumnInfos infos = new ColumnInfos("VARCHAR", "user name field"); assertThat(infos.getDdlComments()).isEqualTo(" comment 'user name field' "); } @Test void getDdlComments_withoutComment() { ColumnInfos infos = new ColumnInfos("INT"); assertThat(infos.getDdlComments()) .isEqualTo(" comment 'column created by schema evolution from Snowflake Kafka Connector' "); } @Test void getDdlComments_withNullComment() { ColumnInfos infos = new ColumnInfos("INT", null); assertThat(infos.getDdlComments()) .isEqualTo(" comment 'column created by schema evolution from Snowflake Kafka Connector' "); } @Test void getDdlComments_escapeSingleQuotes() { ColumnInfos infos = new ColumnInfos("VARCHAR", "it's a test"); assertThat(infos.getDdlComments()).isEqualTo(" comment 'it''s a test' "); } @Test void constructorRejectsNullColumnType() { assertThrows(NullPointerException.class, () -> new ColumnInfos(null)); assertThrows(NullPointerException.class, () -> new ColumnInfos(null, "comment")); } @Test void equalityAndHashCode() { ColumnInfos a = new ColumnInfos("VARCHAR", "comment"); ColumnInfos b = new ColumnInfos("VARCHAR", "comment"); ColumnInfos c = new ColumnInfos("INT", "comment"); ColumnInfos d = new ColumnInfos("VARCHAR", null); assertEquals(a, b); assertEquals(a.hashCode(), b.hashCode()); assertNotEquals(a, c); assertNotEquals(a, d); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/schemaevolution/SchemaEvolutionTargetItemsTest.java ================================================ package com.snowflake.kafka.connector.internal.schemaevolution; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.*; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.Set; import org.junit.jupiter.api.Test; class SchemaEvolutionTargetItemsTest { @Test void hasDataForSchemaEvolution_withColumnsToAdd() { SchemaEvolutionTargetItems items = new SchemaEvolutionTargetItems( "table", Collections.emptySet(), new HashSet<>(Arrays.asList("COL1"))); assertTrue(items.hasDataForSchemaEvolution()); } @Test void hasDataForSchemaEvolution_withColumnsToDropNonNull() { SchemaEvolutionTargetItems items = new SchemaEvolutionTargetItems( "table", new HashSet<>(Arrays.asList("COL1")), Collections.emptySet()); assertTrue(items.hasDataForSchemaEvolution()); } @Test void hasDataForSchemaEvolution_empty() { SchemaEvolutionTargetItems items = new SchemaEvolutionTargetItems("table", Collections.emptySet(), Collections.emptySet()); assertFalse(items.hasDataForSchemaEvolution()); } @Test void constructorHandlesNullSets() { SchemaEvolutionTargetItems items = new SchemaEvolutionTargetItems("table", null, null); assertThat(items.getColumnsToAdd()).isEmpty(); assertThat(items.getColumnsToDropNonNullability()).isEmpty(); assertFalse(items.hasDataForSchemaEvolution()); } @Test void twoArgConstructorSetsColumnsToAdd() { Set cols = new HashSet<>(Arrays.asList("A", "B")); SchemaEvolutionTargetItems items = new SchemaEvolutionTargetItems("table", cols); assertThat(items.getColumnsToAdd()).containsExactlyInAnyOrder("A", "B"); assertThat(items.getColumnsToDropNonNullability()).isEmpty(); } @Test void gettersReturnUnmodifiableSets() { SchemaEvolutionTargetItems items = new SchemaEvolutionTargetItems( "table", new HashSet<>(Arrays.asList("DROP1")), new HashSet<>(Arrays.asList("ADD1"))); assertThrows(UnsupportedOperationException.class, () -> items.getColumnsToAdd().add("X")); assertThrows( UnsupportedOperationException.class, () -> items.getColumnsToDropNonNullability().add("X")); } @Test void defensiveCopyPreventsExternalMutation() { Set original = new HashSet<>(Arrays.asList("COL1")); SchemaEvolutionTargetItems items = new SchemaEvolutionTargetItems("table", original, Collections.emptySet()); original.add("COL2"); assertThat(items.getColumnsToDropNonNullability()).containsExactly("COL1"); } @Test void equalityAndHashCode() { SchemaEvolutionTargetItems a = new SchemaEvolutionTargetItems( "t", new HashSet<>(Arrays.asList("C1")), new HashSet<>(Arrays.asList("C2"))); SchemaEvolutionTargetItems b = new SchemaEvolutionTargetItems( "t", new HashSet<>(Arrays.asList("C1")), new HashSet<>(Arrays.asList("C2"))); SchemaEvolutionTargetItems c = new SchemaEvolutionTargetItems("t", Collections.emptySet(), Collections.emptySet()); assertEquals(a, b); assertEquals(a.hashCode(), b.hashCode()); assertNotEquals(a, c); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/schemaevolution/SnowflakeColumnTypeMapperTest.java ================================================ package com.snowflake.kafka.connector.internal.schemaevolution; import static org.assertj.core.api.Assertions.assertThat; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import java.util.stream.Stream; import org.apache.kafka.connect.data.Date; import org.apache.kafka.connect.data.Decimal; import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.Time; import org.apache.kafka.connect.data.Timestamp; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; class SnowflakeColumnTypeMapperTest { private final SnowflakeColumnTypeMapper mapper = new SnowflakeColumnTypeMapper(); @ParameterizedTest(name = "should map Kafka type {0} to Snowflake column type {2}") @MethodSource("kafkaTypesToMap") void shouldMapKafkaTypeToSnowflakeColumnType( Schema.Type kafkaType, String schemaName, String expectedSnowflakeType) { assertThat(mapper.mapToColumnType(kafkaType, schemaName)).isEqualTo(expectedSnowflakeType); } @ParameterizedTest() @MethodSource("jsonNodeTypesToMap") void shouldMapJsonNodeTypeToKafkaType(JsonNode value, Schema.Type expectedKafkaType) { assertThat(mapper.mapJsonNodeTypeToKafkaType(value)).isEqualTo(expectedKafkaType); } private static Stream kafkaTypesToMap() { return Stream.of( Arguments.of(Schema.Type.INT8, null, "BYTEINT"), Arguments.of(Schema.Type.INT16, null, "SMALLINT"), Arguments.of(Schema.Type.INT32, Date.LOGICAL_NAME, "DATE"), Arguments.of(Schema.Type.INT32, Time.LOGICAL_NAME, "TIME(6)"), Arguments.of(Schema.Type.INT32, null, "INT"), Arguments.of(Schema.Type.INT64, Timestamp.LOGICAL_NAME, "TIMESTAMP(6)"), Arguments.of(Schema.Type.INT64, null, "BIGINT"), Arguments.of(Schema.Type.FLOAT32, null, "FLOAT"), Arguments.of(Schema.Type.FLOAT64, null, "DOUBLE"), Arguments.of(Schema.Type.BOOLEAN, null, "BOOLEAN"), Arguments.of(Schema.Type.STRING, null, "VARCHAR"), Arguments.of(Schema.Type.BYTES, Decimal.LOGICAL_NAME, "VARCHAR"), Arguments.of(Schema.Type.BYTES, null, "BINARY"), Arguments.of(Schema.Type.ARRAY, null, "ARRAY"), Arguments.of(Schema.Type.STRUCT, null, "VARIANT"), Arguments.of(Schema.Type.MAP, null, "VARIANT")); } private static Stream jsonNodeTypesToMap() { return Stream.of( Arguments.of(JsonNodeFactory.instance.nullNode(), Schema.Type.STRING), Arguments.of(JsonNodeFactory.instance.numberNode((short) 1), Schema.Type.INT16), Arguments.of(JsonNodeFactory.instance.numberNode(1), Schema.Type.INT32), Arguments.of(JsonNodeFactory.instance.numberNode(1L), Schema.Type.INT64), Arguments.of(JsonNodeFactory.instance.numberNode(1.0), Schema.Type.FLOAT64), Arguments.of(JsonNodeFactory.instance.numberNode(1.0f), Schema.Type.FLOAT32), Arguments.of(JsonNodeFactory.instance.textNode("text"), Schema.Type.STRING), Arguments.of(JsonNodeFactory.instance.booleanNode(true), Schema.Type.BOOLEAN), Arguments.of(JsonNodeFactory.instance.binaryNode(new byte[] {1, 2, 3}), Schema.Type.BYTES), Arguments.of(JsonNodeFactory.instance.arrayNode().add(1).add(2).add(3), Schema.Type.ARRAY), Arguments.of(JsonNodeFactory.instance.objectNode(), Schema.Type.STRUCT), Arguments.of(JsonNodeFactory.instance.pojoNode(new Object()), null)); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/schemaevolution/SnowflakeSchemaEvolutionServiceTest.java ================================================ /* * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved. * * Tests for schema evolution service and DDL execution (Commit 6). */ package com.snowflake.kafka.connector.internal.schemaevolution; import static org.junit.jupiter.api.Assertions.*; import static org.mockito.ArgumentMatchers.*; import static org.mockito.Mockito.*; import com.snowflake.kafka.connector.internal.SnowflakeConnectionService; import com.snowflake.kafka.connector.records.SnowflakeMetadataConfig; import com.snowflake.kafka.connector.records.SnowflakeSinkRecord; import java.util.*; import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.SchemaBuilder; import org.apache.kafka.connect.data.Struct; import org.apache.kafka.connect.sink.SinkRecord; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.mockito.ArgumentCaptor; import org.mockito.InOrder; /** Tests for SnowflakeSchemaEvolutionService */ public class SnowflakeSchemaEvolutionServiceTest { private static final SnowflakeMetadataConfig METADATA_CONFIG = new SnowflakeMetadataConfig(); private SnowflakeConnectionService mockConn; private SnowflakeSchemaEvolutionService service; @BeforeEach public void setUp() { mockConn = mock(SnowflakeConnectionService.class); service = new SnowflakeSchemaEvolutionService(mockConn); } private static SnowflakeSinkRecord toSinkRecord(SinkRecord kafkaRecord) { return SnowflakeSinkRecord.from(kafkaRecord, METADATA_CONFIG, true, false); } @Test public void testEvolveSchemaAddColumns() { Schema valueSchema = SchemaBuilder.struct() .field("name", Schema.STRING_SCHEMA) .field("new_col", Schema.INT32_SCHEMA) .build(); Struct value = new Struct(valueSchema); value.put("name", "Alice"); value.put("new_col", 42); SinkRecord kafkaRecord = new SinkRecord("topic", 0, null, null, valueSchema, value, 0); SnowflakeSinkRecord record = toSinkRecord(kafkaRecord); SchemaEvolutionTargetItems items = new SchemaEvolutionTargetItems( "test_table", Collections.emptySet(), new HashSet<>(Arrays.asList("NEW_COL"))); service.evolveSchemaIfNeeded(items, record); verify(mockConn).appendColumnsToTable(eq("test_table"), anyMap()); verify(mockConn, never()).alterNonNullableColumns(anyString(), anyList()); } @Test public void testEvolveSchemaDropNotNull() { SinkRecord kafkaRecord = new SinkRecord("topic", 0, null, null, null, new HashMap<>(), 0); SnowflakeSinkRecord record = toSinkRecord(kafkaRecord); SchemaEvolutionTargetItems items = new SchemaEvolutionTargetItems( "test_table", new HashSet<>(Arrays.asList("COL1", "COL2")), Collections.emptySet()); service.evolveSchemaIfNeeded(items, record); ArgumentCaptor> colsCaptor = ArgumentCaptor.forClass(List.class); verify(mockConn).alterNonNullableColumns(eq("test_table"), colsCaptor.capture()); List droppedCols = colsCaptor.getValue(); assertEquals(2, droppedCols.size()); assertTrue(droppedCols.contains("COL1")); assertTrue(droppedCols.contains("COL2")); verify(mockConn, never()).appendColumnsToTable(anyString(), anyMap()); } @Test public void testEvolveSchemaNoDataSkipsExecution() { SinkRecord kafkaRecord = new SinkRecord("topic", 0, null, null, null, null, 0); SnowflakeSinkRecord record = toSinkRecord(kafkaRecord); SchemaEvolutionTargetItems items = new SchemaEvolutionTargetItems( "test_table", Collections.emptySet(), Collections.emptySet()); service.evolveSchemaIfNeeded(items, record); verify(mockConn, never()).appendColumnsToTable(anyString(), anyMap()); verify(mockConn, never()).alterNonNullableColumns(anyString(), anyList()); } @Test public void testEvolveSchemaHandlesAddColumnFailure() { Schema valueSchema = SchemaBuilder.struct().field("col1", Schema.STRING_SCHEMA).build(); Struct value = new Struct(valueSchema); value.put("col1", "test"); SinkRecord kafkaRecord = new SinkRecord("topic", 0, null, null, valueSchema, value, 0); SnowflakeSinkRecord record = toSinkRecord(kafkaRecord); doThrow( new com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException( "race", "2001")) .when(mockConn) .appendColumnsToTable(anyString(), anyMap()); SchemaEvolutionTargetItems items = new SchemaEvolutionTargetItems( "test_table", Collections.emptySet(), new HashSet<>(Arrays.asList("COL1"))); assertDoesNotThrow(() -> service.evolveSchemaIfNeeded(items, record)); } @Test public void testEvolveSchemaHandlesDropNotNullFailure() { SinkRecord kafkaRecord = new SinkRecord("topic", 0, null, null, null, new HashMap<>(), 0); SnowflakeSinkRecord record = toSinkRecord(kafkaRecord); doThrow( new com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException( "race", "2001")) .when(mockConn) .alterNonNullableColumns(anyString(), anyList()); SchemaEvolutionTargetItems items = new SchemaEvolutionTargetItems( "test_table", new HashSet<>(Arrays.asList("COL1")), Collections.emptySet()); assertDoesNotThrow(() -> service.evolveSchemaIfNeeded(items, record)); } @Test public void testEvolveSchemaAddColumnsBeforeDropNotNull() { Schema valueSchema = SchemaBuilder.struct() .field("existing_col", Schema.STRING_SCHEMA) .field("new_col", Schema.INT32_SCHEMA) .build(); Struct value = new Struct(valueSchema); value.put("existing_col", "hello"); value.put("new_col", 99); SinkRecord kafkaRecord = new SinkRecord("topic", 0, null, null, valueSchema, value, 0); SnowflakeSinkRecord record = toSinkRecord(kafkaRecord); SchemaEvolutionTargetItems items = new SchemaEvolutionTargetItems( "test_table", new HashSet<>(Arrays.asList("EXISTING_COL")), new HashSet<>(Arrays.asList("NEW_COL"))); service.evolveSchemaIfNeeded(items, record); InOrder inOrder = inOrder(mockConn); inOrder.verify(mockConn).appendColumnsToTable(eq("test_table"), anyMap()); inOrder.verify(mockConn).alterNonNullableColumns(eq("test_table"), anyList()); } @Test public void testEvolveSchemaWithTransformedRecordContent() { // Simulates the schematization=off path where the transformed record // contains RECORD_CONTENT (a Map) instead of the original flat fields. Map transformedRecord = new HashMap<>(); transformedRecord.put("RECORD_CONTENT", new HashMap<>(Map.of("city", "Hsinchu", "age", 42))); transformedRecord.put("RECORD_METADATA", new HashMap<>(Map.of("offset", 0))); SinkRecord syntheticKafkaRecord = new SinkRecord("topic", 0, null, null, null, transformedRecord, 0); SnowflakeSinkRecord syntheticRecord = toSinkRecord(syntheticKafkaRecord); SchemaEvolutionTargetItems items = new SchemaEvolutionTargetItems( "test_table", Collections.emptySet(), new HashSet<>(Arrays.asList("RECORD_CONTENT"))); service.evolveSchemaIfNeeded(items, syntheticRecord); @SuppressWarnings("unchecked") ArgumentCaptor> schemaCaptor = ArgumentCaptor.forClass(Map.class); verify(mockConn).appendColumnsToTable(eq("test_table"), schemaCaptor.capture()); Map addedColumns = schemaCaptor.getValue(); assertTrue(addedColumns.containsKey("RECORD_CONTENT")); assertEquals("VARIANT", addedColumns.get("RECORD_CONTENT").getColumnType()); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/schemaevolution/TableSchemaResolverTest.java ================================================ package com.snowflake.kafka.connector.internal.schemaevolution; import static org.assertj.core.api.Assertions.*; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.snowflake.kafka.connector.internal.TestUtils; import com.snowflake.kafka.connector.records.SnowflakeMetadataConfig; import com.snowflake.kafka.connector.records.SnowflakeSinkRecord; import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Map; import org.apache.kafka.common.record.TimestampType; import org.apache.kafka.connect.data.SchemaAndValue; import org.apache.kafka.connect.json.JsonConverter; import org.apache.kafka.connect.sink.SinkRecord; import org.junit.jupiter.api.Test; public class TableSchemaResolverTest { private static final SnowflakeMetadataConfig METADATA_CONFIG = new SnowflakeMetadataConfig(); private final TableSchemaResolver schemaResolver = new TableSchemaResolver(); private static SnowflakeSinkRecord toSinkRecord( SinkRecord kafkaRecord, boolean enableColumnIdentifierNormalization) { return SnowflakeSinkRecord.from( kafkaRecord, METADATA_CONFIG, true, enableColumnIdentifierNormalization); } @Test public void testGetColumnTypesWithSchema_TimestampField_JacksonCanSerialize() { // Reproducer for PR review comment: when schematization IS enabled, // content map holds the raw output of convertToMap(). During schema evolution, // OBJECT_MAPPER.valueToTree(record.getContent()) runs on this map. // If the map contains a raw Instant, plain ObjectMapper (no JavaTimeModule) throws // InvalidDefinitionException. java.util.Date nearEpochDate = new java.util.Date(java.time.Instant.parse("1969-04-08T00:00:00Z").toEpochMilli()); org.apache.kafka.connect.data.Schema schema = org.apache.kafka.connect.data.SchemaBuilder.struct() .field("ts", org.apache.kafka.connect.data.Timestamp.SCHEMA) .build(); org.apache.kafka.connect.data.Struct struct = new org.apache.kafka.connect.data.Struct(schema).put("ts", nearEpochDate); SinkRecord kafkaRecord = new SinkRecord( "topic", 0, null, null, schema, struct, 0, System.currentTimeMillis(), TimestampType.CREATE_TIME); // enableSchematization=true so content = convertToMap() result (may contain Instant) SnowflakeSinkRecord record = toSinkRecord(kafkaRecord, false); // This is the call that fails: OBJECT_MAPPER.valueToTree(record.getContent()) // triggers Jackson serialization of the Instant without JavaTimeModule assertThatCode( () -> schemaResolver.resolveTableSchemaFromSnowflakeRecord(record, Arrays.asList("ts"))) .doesNotThrowAnyException(); } @Test public void testGetColumnTypesWithoutSchema_NormalizationEnabled() throws JsonProcessingException { String columnName = "test"; ObjectMapper mapper = new ObjectMapper(); JsonConverter jsonConverter = new JsonConverter(); Map config = Collections.singletonMap("schemas.enable", false); jsonConverter.configure(config, false); Map jsonMap = new HashMap<>(); jsonMap.put(columnName, "value"); SchemaAndValue schemaAndValue = jsonConverter.toConnectData("topic", mapper.writeValueAsBytes(jsonMap)); SinkRecord kafkaRecord = new SinkRecord( "topic", 0, null, null, schemaAndValue.schema(), schemaAndValue.value(), 0, System.currentTimeMillis(), TimestampType.CREATE_TIME); SnowflakeSinkRecord record = toSinkRecord(kafkaRecord, true); // With normalization=true, "test" normalizes to "TEST" in ColumnValuePair // So columnsToInclude should use raw normalized name "TEST" TableSchema tableSchema = schemaResolver.resolveTableSchemaFromSnowflakeRecord( record, Collections.singletonList("TEST")); assertThat(tableSchema.getColumnInfos()) .containsExactlyInAnyOrderEntriesOf( Collections.singletonMap("TEST", new ColumnInfos("VARCHAR", null))); // Get non-existing column name should return nothing tableSchema = schemaResolver.resolveTableSchemaFromSnowflakeRecord( record, Collections.singletonList("NONEXISTENT")); assertThat(tableSchema.getColumnInfos()).isEmpty(); } @Test public void testGetColumnTypesWithoutSchema_NormalizationDisabled() throws JsonProcessingException { String columnName = "test"; ObjectMapper mapper = new ObjectMapper(); JsonConverter jsonConverter = new JsonConverter(); Map config = Collections.singletonMap("schemas.enable", false); jsonConverter.configure(config, false); Map jsonMap = new HashMap<>(); jsonMap.put(columnName, "value"); SchemaAndValue schemaAndValue = jsonConverter.toConnectData("topic", mapper.writeValueAsBytes(jsonMap)); SinkRecord kafkaRecord = new SinkRecord( "topic", 0, null, null, schemaAndValue.schema(), schemaAndValue.value(), 0, System.currentTimeMillis(), TimestampType.CREATE_TIME); SnowflakeSinkRecord record = toSinkRecord(kafkaRecord, false); // With normalization=false, column name stays as-is: "test" TableSchema tableSchema = schemaResolver.resolveTableSchemaFromSnowflakeRecord( record, Collections.singletonList("test")); assertThat(tableSchema.getColumnInfos()) .containsExactlyInAnyOrderEntriesOf( Collections.singletonMap("test", new ColumnInfos("VARCHAR", null))); } @Test public void testGetColumnTypesWithSchema_NormalizationEnabled() { JsonConverter converter = new JsonConverter(); Map converterConfig = new HashMap<>(); converterConfig.put("schemas.enable", "true"); converter.configure(converterConfig, false); SchemaAndValue schemaAndValue = converter.toConnectData( "topic", TestUtils.JSON_WITH_SCHEMA.getBytes(StandardCharsets.UTF_8)); // With normalization=true: "regionid" → "REGIONID", "gender" → "GENDER" String columnName1 = "REGIONID"; String columnName2 = "GENDER"; SinkRecord kafkaRecord = new SinkRecord( "topic", 0, null, null, schemaAndValue.schema(), schemaAndValue.value(), 0, System.currentTimeMillis(), TimestampType.CREATE_TIME); SnowflakeSinkRecord record = toSinkRecord(kafkaRecord, true); TableSchema tableSchema = schemaResolver.resolveTableSchemaFromSnowflakeRecord( record, Arrays.asList(columnName1, columnName2)); assertThat(tableSchema.getColumnInfos().get(columnName1).getColumnType()).isEqualTo("VARCHAR"); assertThat(tableSchema.getColumnInfos().get(columnName1).getComments()).isEqualTo("doc"); assertThat(tableSchema.getColumnInfos().get(columnName2).getColumnType()).isEqualTo("VARCHAR"); assertThat(tableSchema.getColumnInfos().get(columnName2).getComments()).isNull(); } @Test public void testGetColumnTypesWithSchema_NormalizationDisabled() { JsonConverter converter = new JsonConverter(); Map converterConfig = new HashMap<>(); converterConfig.put("schemas.enable", "true"); converter.configure(converterConfig, false); SchemaAndValue schemaAndValue = converter.toConnectData( "topic", TestUtils.JSON_WITH_SCHEMA.getBytes(StandardCharsets.UTF_8)); // With normalization=false: column names stay as-is String columnName1 = "regionid"; String columnName2 = "gender"; SinkRecord kafkaRecord = new SinkRecord( "topic", 0, null, null, schemaAndValue.schema(), schemaAndValue.value(), 0, System.currentTimeMillis(), TimestampType.CREATE_TIME); SnowflakeSinkRecord record = toSinkRecord(kafkaRecord, false); TableSchema tableSchema = schemaResolver.resolveTableSchemaFromSnowflakeRecord( record, Arrays.asList(columnName1, columnName2)); assertThat(tableSchema.getColumnInfos().get(columnName1).getColumnType()).isEqualTo("VARCHAR"); assertThat(tableSchema.getColumnInfos().get(columnName1).getComments()).isEqualTo("doc"); assertThat(tableSchema.getColumnInfos().get(columnName2).getColumnType()).isEqualTo("VARCHAR"); assertThat(tableSchema.getColumnInfos().get(columnName2).getComments()).isNull(); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/schemaevolution/ValidationResultMapperTest.java ================================================ package com.snowflake.kafka.connector.internal.schemaevolution; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.*; import com.snowflake.kafka.connector.internal.validation.ValidationResult; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.Set; import org.junit.jupiter.api.Test; class ValidationResultMapperTest { @Test void mapWithExtraColumnsAndBothNotNullViolations() { Set extraCols = new HashSet<>(Arrays.asList("NEW_COL1", "NEW_COL2")); Set missingNotNull = new HashSet<>(Arrays.asList("REQUIRED_COL")); Set nullNotNull = new HashSet<>(Arrays.asList("NULLABLE_COL")); ValidationResult result = ValidationResult.structuralError(extraCols, missingNotNull, nullNotNull); SchemaEvolutionTargetItems items = ValidationResultMapper.mapToSchemaEvolutionItems(result, "MY_TABLE"); assertEquals("MY_TABLE", items.getTableName()); assertThat(items.getColumnsToAdd()).containsExactlyInAnyOrder("NEW_COL1", "NEW_COL2"); assertThat(items.getColumnsToDropNonNullability()) .containsExactlyInAnyOrder("REQUIRED_COL", "NULLABLE_COL"); assertTrue(items.hasDataForSchemaEvolution()); } @Test void mapWithEmptyResult() { ValidationResult result = ValidationResult.structuralError( Collections.emptySet(), Collections.emptySet(), Collections.emptySet()); SchemaEvolutionTargetItems items = ValidationResultMapper.mapToSchemaEvolutionItems(result, "MY_TABLE"); assertFalse(items.hasDataForSchemaEvolution()); assertThat(items.getColumnsToAdd()).isEmpty(); assertThat(items.getColumnsToDropNonNullability()).isEmpty(); } @Test void mapWithOnlyExtraColumns() { ValidationResult result = ValidationResult.structuralError( new HashSet<>(Arrays.asList("COL1")), Collections.emptySet(), Collections.emptySet()); SchemaEvolutionTargetItems items = ValidationResultMapper.mapToSchemaEvolutionItems(result, "T"); assertThat(items.getColumnsToAdd()).containsExactly("COL1"); assertThat(items.getColumnsToDropNonNullability()).isEmpty(); } @Test void mapWithOnlyMissingNotNull() { ValidationResult result = ValidationResult.structuralError( Collections.emptySet(), new HashSet<>(Arrays.asList("COL1")), Collections.emptySet()); SchemaEvolutionTargetItems items = ValidationResultMapper.mapToSchemaEvolutionItems(result, "T"); assertThat(items.getColumnsToAdd()).isEmpty(); assertThat(items.getColumnsToDropNonNullability()).containsExactly("COL1"); } @Test void mapWithOnlyNullValueForNotNull() { ValidationResult result = ValidationResult.structuralError( Collections.emptySet(), Collections.emptySet(), new HashSet<>(Arrays.asList("COL1"))); SchemaEvolutionTargetItems items = ValidationResultMapper.mapToSchemaEvolutionItems(result, "T"); assertThat(items.getColumnsToAdd()).isEmpty(); assertThat(items.getColumnsToDropNonNullability()).containsExactly("COL1"); } @Test void mapCombinesBothNotNullViolationTypes() { ValidationResult result = ValidationResult.structuralError( Collections.emptySet(), new HashSet<>(Arrays.asList("MISSING1")), new HashSet<>(Arrays.asList("NULL1"))); SchemaEvolutionTargetItems items = ValidationResultMapper.mapToSchemaEvolutionItems(result, "T"); assertThat(items.getColumnsToDropNonNullability()) .containsExactlyInAnyOrder("MISSING1", "NULL1"); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/BatchOffsetFetcherTest.java ================================================ package com.snowflake.kafka.connector.internal.streaming; import static com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel.NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import com.snowflake.ingest.streaming.ChannelStatus; import com.snowflake.ingest.streaming.ChannelStatusBatch; import com.snowflake.ingest.streaming.SFException; import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.config.SinkTaskConfigTestBuilder; import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException; import com.snowflake.kafka.connector.internal.metrics.TaskMetrics; import com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel; import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientFactory; import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientSupplier; import com.snowflake.kafka.connector.internal.streaming.v2.service.BatchOffsetFetcher; import com.snowflake.kafka.connector.internal.streaming.v2.service.ThreadPools; import java.time.Instant; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Function; import org.apache.kafka.common.TopicPartition; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; /** Unit tests for {@link BatchOffsetFetcher}. */ class BatchOffsetFetcherTest { private static final String TASK_ID = "0"; private String connectorName; private ExecutorService ioExecutor; private BatchOffsetFetcher fetcher; private Map channels; private CountingClientSupplier clientSupplier; @BeforeEach void setUp() { // Unique name per test to avoid StreamingClientPools caching across tests connectorName = "test_connector_" + UUID.randomUUID().toString().substring(0, 8); SinkTaskConfig taskConfig = SinkTaskConfigTestBuilder.builder().connectorName(connectorName).taskId(TASK_ID).build(); ThreadPools.registerTask(connectorName, taskConfig); ioExecutor = ThreadPools.getIoExecutor(connectorName); fetcher = new BatchOffsetFetcher(connectorName, TASK_ID, taskConfig, ioExecutor, TaskMetrics.noop()); channels = new HashMap<>(); clientSupplier = new CountingClientSupplier(); StreamingClientFactory.setStreamingClientSupplier(clientSupplier); } @AfterEach void tearDown() { ThreadPools.closeForTask(connectorName); StreamingClientFactory.resetStreamingClientSupplier(); } @Test void emptyPartitionsReturnsEmptyMap() { assertTrue(fetcher.getCommittedOffsets(Collections.emptySet(), channelLookup()).isEmpty()); } @Test void groupsByPipeAndBatchesCalls() { TopicPartition tp0 = new TopicPartition("topicA", 0); TopicPartition tp1 = new TopicPartition("topicA", 1); TopicPartition tp2 = new TopicPartition("topicB", 0); TopicPartition tp3 = new TopicPartition("topicB", 1); registerChannel(tp0, "pipeA", "chA0", 10L); registerChannel(tp1, "pipeA", "chA1", 20L); registerChannel(tp2, "pipeB", "chB0", 30L); registerChannelWithNoOffset(tp3, "pipeB", "chB1"); Map result = fetcher.getCommittedOffsets(Set.of(tp0, tp1, tp2, tp3), channelLookup()); assertEquals(3, result.size()); assertEquals(11L, result.get(tp0)); assertEquals(21L, result.get(tp1)); assertEquals(31L, result.get(tp2)); assertTrue(!result.containsKey(tp3), "Channel with no offset should be excluded"); // Two pipes, so exactly 2 batch calls assertEquals(2, clientSupplier.getBatchCallCount()); } @Test void uninitializedPartitionsAreSkipped() { TopicPartition initialized = new TopicPartition("topicA", 0); TopicPartition uninitialized = new TopicPartition("topicA", 1); registerChannel(initialized, "pipeA", "ch0", 5L); Map result = fetcher.getCommittedOffsets(Set.of(initialized, uninitialized), channelLookup()); assertEquals(1, result.size()); assertEquals(6L, result.get(initialized)); } @Test void sfExceptionForOnePipeDoesNotAffectOthers() { TopicPartition tp0 = new TopicPartition("topicA", 0); TopicPartition tp1 = new TopicPartition("topicB", 0); registerChannel(tp0, "pipeA", "ch0", 10L); registerChannel(tp1, "pipeB", "ch1", 20L); clientSupplier.setFailingPipe("pipeA"); Map result = fetcher.getCommittedOffsets(Set.of(tp0, tp1), channelLookup()); assertEquals(1, result.size()); assertEquals(21L, result.get(tp1)); } @Test void connectorExceptionPropagates() { TopicPartition tp0 = new TopicPartition("topicA", 0); TopicPartitionChannel mockChannel = mock(TopicPartitionChannel.class); when(mockChannel.getChannelName()).thenReturn("ch0"); when(mockChannel.getPipeName()).thenReturn("pipeA"); when(mockChannel.processChannelStatus(any(ChannelStatus.class), anyBoolean())) .thenThrow(new SnowflakeKafkaConnectorException("ingestion error", "5030")); channels.put(tp0, mockChannel); clientSupplier.setChannelOffset("ch0", "pipeA", "10"); assertThrows( SnowflakeKafkaConnectorException.class, () -> fetcher.getCommittedOffsets(Set.of(tp0), channelLookup())); } @Test void partitionsByTopicGroupsCorrectly() { TopicPartition tpA0 = new TopicPartition("topicA", 0); TopicPartition tpA1 = new TopicPartition("topicA", 1); TopicPartition tpB0 = new TopicPartition("topicB", 0); TopicPartition tpB1 = new TopicPartition("topicB", 1); TopicPartitionChannel chA0 = mockChannel("pipeA"); TopicPartitionChannel chB0 = mockChannel("pipeB"); // tpA0 and tpB0 have channels; tpA1 and tpB1 do not Map lookup = Map.of(tpA0, chA0, tpB0, chB0); BatchOffsetFetcher.PartitionsByTopic result = BatchOffsetFetcher.PartitionsByTopic.groupByTopic( Set.of(tpA0, tpA1, tpB0, tpB1), tp -> Optional.ofNullable(lookup.get(tp))); // Initialized channels grouped by pipe assertEquals(2, result.pipeNameToChannels.size()); assertEquals(Map.of(tpA0, chA0), result.pipeNameToChannels.get("pipeA")); assertEquals(Map.of(tpB0, chB0), result.pipeNameToChannels.get("pipeB")); // Uninitialized partitions grouped by topic assertEquals(2, result.topicToPartitionsWithoutChannels.size()); assertEquals(Set.of(tpA1), result.topicToPartitionsWithoutChannels.get("topicA")); assertEquals(Set.of(tpB1), result.topicToPartitionsWithoutChannels.get("topicB")); } private static TopicPartitionChannel mockChannel(String pipeName) { TopicPartitionChannel ch = mock(TopicPartitionChannel.class); when(ch.getPipeName()).thenReturn(pipeName); return ch; } // -- helpers -- private Function> channelLookup() { return tp -> Optional.ofNullable(channels.get(tp)); } private void registerChannel( TopicPartition topicPartition, String pipeName, String channelName, long committedOffset) { TopicPartitionChannel mockChannel = mock(TopicPartitionChannel.class); when(mockChannel.getChannelName()).thenReturn(channelName); when(mockChannel.getPipeName()).thenReturn(pipeName); when(mockChannel.processChannelStatus(any(ChannelStatus.class), anyBoolean())) .thenReturn(committedOffset + 1); channels.put(topicPartition, mockChannel); clientSupplier.setChannelOffset(channelName, pipeName, String.valueOf(committedOffset)); } private void registerChannelWithNoOffset( TopicPartition topicPartition, String pipeName, String channelName) { TopicPartitionChannel mockChannel = mock(TopicPartitionChannel.class); when(mockChannel.getChannelName()).thenReturn(channelName); when(mockChannel.getPipeName()).thenReturn(pipeName); when(mockChannel.processChannelStatus(any(ChannelStatus.class), anyBoolean())) .thenReturn(NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE); channels.put(topicPartition, mockChannel); } /** * A StreamingClientSupplier that creates mock clients with configurable channel statuses and * tracks batch call counts. */ static class CountingClientSupplier implements StreamingClientSupplier { private final AtomicInteger batchCallCount = new AtomicInteger(0); // pipeName -> (channelName -> offsetToken) private final Map> pipeChannelOffsets = new ConcurrentHashMap<>(); private volatile String failingPipe = null; void setChannelOffset(String channelName, String pipeName, String offsetToken) { pipeChannelOffsets .computeIfAbsent(pipeName, k -> new ConcurrentHashMap<>()) .put(channelName, offsetToken); } void setFailingPipe(String pipeName) { this.failingPipe = pipeName; } int getBatchCallCount() { return batchCallCount.get(); } @Override public SnowflakeStreamingIngestClient get( String clientName, String dbName, String schemaName, String pipeName, StreamingClientProperties streamingClientProperties) { SnowflakeStreamingIngestClient client = mock(SnowflakeStreamingIngestClient.class); when(client.getChannelStatus(any())) .thenAnswer( invocation -> { batchCallCount.incrementAndGet(); if (pipeName.equals(failingPipe)) { throw new SFException( "TestError", "Simulated batch failure", 500, "Internal Server Error"); } List names = invocation.getArgument(0); Map statusMap = new HashMap<>(); Map offsets = pipeChannelOffsets.getOrDefault(pipeName, Collections.emptyMap()); for (String name : names) { statusMap.put( name, new ChannelStatus( "db", "schema", pipeName, name, "SUCCESS", offsets.get(name), Instant.now(), 0, 0, 0, null, null, null, null, Instant.now())); } return new ChannelStatusBatch(statusMap); }); return client; } } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/ChannelStatusCheckIT.java ================================================ package com.snowflake.kafka.connector.internal.streaming; import static org.awaitility.Awaitility.await; import static org.junit.jupiter.api.Assertions.assertTrue; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector; import com.snowflake.kafka.connector.Utils; import com.snowflake.kafka.connector.internal.TestUtils; import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientFactory; import java.time.Duration; import java.util.HashMap; import java.util.Map; import org.apache.kafka.connect.json.JsonConverter; import org.apache.kafka.connect.runtime.ConnectorConfig; import org.apache.kafka.connect.runtime.rest.entities.ConnectorStateInfo; import org.apache.kafka.connect.sink.SinkConnector; import org.apache.kafka.connect.storage.StringConverter; import org.apache.kafka.connect.util.clusters.EmbeddedConnectCluster; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; /** * Integration tests for channel status error handling using an embedded Kafka Connect cluster with * fake streaming ingest clients. */ @TestInstance(TestInstance.Lifecycle.PER_CLASS) class ChannelStatusCheckIT { private EmbeddedConnectCluster connectCluster; private final FakeIngestClientSupplier fakeClientSupplier = new FakeIngestClientSupplier(); @BeforeAll void beforeAll() { Map workerConfig = new HashMap<>(); workerConfig.put("plugin.discovery", "hybrid_warn"); // Set a short offset flush interval for faster preCommit calls workerConfig.put("offset.flush.interval.ms", "1000"); connectCluster = new EmbeddedConnectCluster.Builder() .name("channel-status-check-cluster") .numWorkers(5) .workerProps(workerConfig) .build(); connectCluster.start(); } @AfterAll void afterAll() { if (connectCluster != null) { connectCluster.stop(); connectCluster = null; } } private static final int PARTITIONS_NUMBER = 10; private String topicName; private String connectorName; private final ObjectMapper mapper = new ObjectMapper(); @BeforeEach void setUp() { topicName = TestUtils.randomTableName(); connectorName = topicName + "_connector"; connectCluster.kafka().createTopic(topicName, PARTITIONS_NUMBER); TestUtils.createTableWithMetadataColumn(topicName); StreamingClientFactory.setStreamingClientSupplier(fakeClientSupplier); } @AfterEach void tearDown() { connectCluster.deleteConnector(connectorName); waitForConnectorStopped(connectorName); connectCluster.kafka().deleteTopic(topicName); StreamingClientFactory.resetStreamingClientSupplier(); TestUtils.dropTable(topicName); } @Test void shouldContinueWorkingWhenNoChannelErrors() throws JsonProcessingException { // Given: connector with default config (errors.tolerance=none) Map config = defaultProperties(topicName, connectorName); connectCluster.configureConnector(connectorName, config); waitForConnectorRunning(connectorName); waitForOpenedFakeIngestClient(connectorName); // When: produce messages produceMessages(3000); // Then: connector should remain running (no errors to cause failure) await("Messages processed") .atMost(Duration.ofSeconds(30)) .until(() -> waitForConnectorToOpenChannels(connectorName).getAppendedRowCount() >= 3); ConnectorStateInfo connectorState = connectCluster.connectorStatus(connectorName); assertTrue( connectorState.tasks().stream().allMatch(task -> "RUNNING".equals(task.state())), "All tasks should be running when there are no channel errors"); } @Test void shouldFailConnectorWhenChannelHasErrorsAndToleranceIsNone() throws JsonProcessingException { // Given: connector with errors.tolerance=none (default) Map config = defaultProperties(topicName, connectorName); connectCluster.configureConnector(connectorName, config); waitForConnectorRunning(connectorName); FakeSnowflakeStreamingIngestClient fakeClient = waitForConnectorToOpenChannels(connectorName); // Produce initial message to ensure channel is set up produceMessages(3000); await("Initial message processed") .atMost(Duration.ofSeconds(30)) .until(() -> fakeClient.getAppendedRowCount() >= 1); // When: inject errors on all channels for (FakeSnowflakeStreamingIngestChannel channel : fakeClient.getOpenedChannels()) { channel.updateErrors(5, "Test error message", "95"); } // Then: connector task should fail due to channel errors await("Connector task failed") .atMost(Duration.ofMinutes(2)) .pollInterval(Duration.ofSeconds(4)) .until( () -> { ConnectorStateInfo state = connectCluster.connectorStatus(connectorName); return state.tasks().stream().anyMatch(task -> "FAILED".equals(task.state())); }); } @Test void shouldContinueWorkingWhenChannelHasErrorsAndToleranceIsAll() throws JsonProcessingException { // Given: connector with errors.tolerance=all Map config = defaultProperties(topicName, connectorName); config.put(KafkaConnectorConfigParams.ERRORS_TOLERANCE_CONFIG, "all"); connectCluster.configureConnector(connectorName, config); waitForConnectorRunning(connectorName); FakeSnowflakeStreamingIngestClient fakeClient = waitForConnectorToOpenChannels(connectorName); // Produce initial message produceMessages(1); await("Initial message processed") .atMost(Duration.ofSeconds(30)) .until(() -> fakeClient.getAppendedRowCount() >= 1); // When: inject errors on all channels for (FakeSnowflakeStreamingIngestChannel channel : fakeClient.getOpenedChannels()) { channel.updateErrors(5, "Test error message", "95"); } // Produce more messages produceMessages(2); // Then: connector should continue running (errors are tolerated) await("Messages processed despite errors") .atMost(Duration.ofSeconds(30)) .until(() -> fakeClient.getAppendedRowCount() >= 3); ConnectorStateInfo connectorState = connectCluster.connectorStatus(connectorName); assertTrue( connectorState.tasks().stream().allMatch(task -> "RUNNING".equals(task.state())), "All tasks should remain running when errors.tolerance=all"); } @Test void shouldContinueWorkingWithPreExistingErrorsAndToleranceIsNone() throws JsonProcessingException { // Given: Pre-existing errors are set BEFORE the connector starts (simulating channel reopen // scenario) // This simulates the case where a channel has cumulative errors from a previous connector run fakeClientSupplier.setPreExistingErrorCount(5); Map config = defaultProperties(topicName, connectorName); config.put(KafkaConnectorConfigParams.ERRORS_TOLERANCE_CONFIG, "none"); connectCluster.configureConnector(connectorName, config); waitForConnectorRunning(connectorName); FakeSnowflakeStreamingIngestClient fakeClient = waitForConnectorToOpenChannels(connectorName); // Produce messages produceMessages(5); // Then: connector should remain running because pre-existing errors don't count as new errors await("Messages processed despite pre-existing errors") .atMost(Duration.ofSeconds(30)) .until(() -> fakeClient.getAppendedRowCount() >= 5); ConnectorStateInfo connectorState = connectCluster.connectorStatus(connectorName); assertTrue( connectorState.tasks().stream().allMatch(task -> "RUNNING".equals(task.state())), "All tasks should be running when there are only pre-existing errors"); } @Test void shouldFailWhenNewErrorsOccurAfterStartupWithPreExistingErrors() throws JsonProcessingException { // Given: Pre-existing errors are set BEFORE the connector starts fakeClientSupplier.setPreExistingErrorCount(5); Map config = defaultProperties(topicName, connectorName); connectCluster.configureConnector(connectorName, config); waitForConnectorRunning(connectorName); FakeSnowflakeStreamingIngestClient fakeClient = waitForConnectorToOpenChannels(connectorName); // Produce initial message produceMessages(1); await("Initial message processed") .atMost(Duration.ofSeconds(30)) .until(() -> fakeClient.getAppendedRowCount() >= 1); // When: NEW errors occur (error count increases from 5 to 10) for (FakeSnowflakeStreamingIngestChannel channel : fakeClient.getOpenedChannels()) { channel.updateErrors(10, "Test error message", "95"); } // Then: connector task should fail due to NEW channel errors await("Connector task failed due to new errors") .atMost(Duration.ofMinutes(2)) .pollInterval(Duration.ofSeconds(4)) .until( () -> { ConnectorStateInfo state = connectCluster.connectorStatus(connectorName); return state.tasks().stream().anyMatch(task -> "FAILED".equals(task.state())); }); } private void produceMessages(int count) throws JsonProcessingException { Map payload = Map.of("key1", "value1", "key2", "value2"); for (int i = 0; i < count; i++) { connectCluster .kafka() .produce( topicName, i % PARTITIONS_NUMBER, "key-" + i, mapper.writeValueAsString(payload)); } } // Helper methods private FakeSnowflakeStreamingIngestClient waitForConnectorToOpenChannels(String connectorName) { await("channelsCreated") .atMost(Duration.ofSeconds(30)) .ignoreExceptions() .until( () -> !getFakeSnowflakeStreamingIngestClient(connectorName) .getOpenedChannels() .isEmpty()); return getFakeSnowflakeStreamingIngestClient(connectorName); } private void waitForOpenedFakeIngestClient(String connectorName) { waitForConnectorToOpenChannels(connectorName); } private FakeSnowflakeStreamingIngestClient getFakeSnowflakeStreamingIngestClient( String connectorName) { // Connector names are sanitized/uppercased by Utils.convertAppName() in the connector Map config = new HashMap<>(); config.put(KafkaConnectorConfigParams.NAME, connectorName); Utils.convertAppName(config); String sanitizedConnectorName = config.get(KafkaConnectorConfigParams.NAME); return fakeClientSupplier.getFakeIngestClients().stream() .filter((client) -> client.getClientName().contains(sanitizedConnectorName)) .findFirst() .orElseThrow(); } private Map defaultProperties(String topicName, String connectorName) { Map config = TestUtils.transformProfileFileToConnectorConfiguration(false); config.put(SinkConnector.TOPICS_CONFIG, topicName); config.put( ConnectorConfig.CONNECTOR_CLASS_CONFIG, SnowflakeStreamingSinkConnector.class.getName()); config.put(ConnectorConfig.TASKS_MAX_CONFIG, "1"); config.put(ConnectorConfig.KEY_CONVERTER_CLASS_CONFIG, StringConverter.class.getName()); config.put(ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG, JsonConverter.class.getName()); config.put(KafkaConnectorConfigParams.NAME, connectorName); config.put(KafkaConnectorConfigParams.VALUE_CONVERTER_SCHEMAS_ENABLE, "false"); config.put(KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, "server_side"); return config; } private void waitForConnectorRunning(String connectorName) { try { connectCluster .assertions() .assertConnectorAndAtLeastNumTasksAreRunning( connectorName, 1, "The connector did not start."); } catch (InterruptedException e) { throw new IllegalStateException("The connector is not running"); } } private void waitForConnectorStopped(String connectorName) { try { connectCluster .assertions() .assertConnectorDoesNotExist(connectorName, "Failed to stop the connector"); } catch (InterruptedException e) { throw new IllegalStateException("Interrupted while waiting for connector to stop"); } } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/CloseTopicPartitionChannelIT.java ================================================ package com.snowflake.kafka.connector.internal.streaming; import static org.awaitility.Awaitility.await; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.snowflake.kafka.connector.ConnectClusterBaseIT; import com.snowflake.kafka.connector.internal.TestUtils; import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientFactory; import java.time.Duration; import java.util.Map; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; class CloseTopicPartitionChannelIT extends ConnectClusterBaseIT { private static final int PARTITIONS_NUMBER = 16; private String topicName; private String connectorName; private ObjectMapper mapper = new ObjectMapper(); @BeforeEach void setUp() throws JsonProcessingException { topicName = TestUtils.randomTableName(); connectorName = topicName + "_connector"; connectCluster.kafka().createTopic(topicName, PARTITIONS_NUMBER); // JVM scoped Ingest client mock StreamingClientFactory.setStreamingClientSupplier(fakeClientSupplier); generateKafkaMessages(); } @AfterEach void tearDown() { connectCluster.kafka().deleteTopic(topicName); StreamingClientFactory.resetStreamingClientSupplier(); TestUtils.dropTable(topicName); TestUtils.dropPipe(topicName + "-STREAMING"); } private void generateKafkaMessages() throws JsonProcessingException { final Map payload = Map.of("key1", "value1", "key2", "value2"); int bound = PARTITIONS_NUMBER; for (int partition = 0; partition < bound; partition++) { connectCluster .kafka() .produce(topicName, partition, "key-" + partition, mapper.writeValueAsString(payload)); } } @Test void closeChannels() { // given connectCluster.configureConnector(connectorName, defaultProperties(topicName, connectorName)); waitForConnectorRunning(connectorName); waitForOpenedFakeIngestClient(connectorName); await("Channels created") .atMost(Duration.ofSeconds(30)) .ignoreExceptions() .until( () -> getOpenedFakeIngestClient(connectorName).getOpenedChannels().size() == PARTITIONS_NUMBER); // when connectCluster.deleteConnector(connectorName); waitForConnectorDoesNotExist(connectorName); // then await("Channels closed") .atMost(Duration.ofSeconds(30)) .until( () -> getOpenedFakeIngestClient(connectorName).countClosedChannels() == PARTITIONS_NUMBER); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/DefaultStreamingConfigValidatorTest.java ================================================ package com.snowflake.kafka.connector.internal.streaming; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import com.google.common.collect.ImmutableMap; import java.util.HashMap; import java.util.Map; import org.junit.jupiter.api.Test; class DefaultStreamingConfigValidatorTest { private final DefaultStreamingConfigValidator validator = new DefaultStreamingConfigValidator(); private Map validConfig() { Map config = new HashMap<>(); config.put("snowflake.role.name", "testrole"); return config; } @Test void testStringConverterAllowed_WhenSchematizationDisabled() { Map config = validConfig(); config.put("value.converter", "org.apache.kafka.connect.storage.StringConverter"); config.put("snowflake.enable.schematization", "false"); ImmutableMap result = validator.validate(config); assertTrue( result.isEmpty(), "StringConverter should be allowed when schematization is disabled"); } @Test void testByteArrayConverterAllowed_WhenSchematizationDisabled() { Map config = validConfig(); config.put("value.converter", "org.apache.kafka.connect.converters.ByteArrayConverter"); config.put("snowflake.enable.schematization", "false"); ImmutableMap result = validator.validate(config); assertTrue( result.isEmpty(), "ByteArrayConverter should be allowed when schematization is disabled"); } @Test void testStringConverterBlocked_WhenSchematizationEnabled() { Map config = validConfig(); config.put("value.converter", "org.apache.kafka.connect.storage.StringConverter"); config.put("snowflake.enable.schematization", "true"); ImmutableMap result = validator.validate(config); assertFalse( result.isEmpty(), "StringConverter should be blocked when schematization is enabled"); } @Test void testByteArrayConverterBlocked_WhenSchematizationEnabled() { Map config = validConfig(); config.put("value.converter", "org.apache.kafka.connect.converters.ByteArrayConverter"); config.put("snowflake.enable.schematization", "true"); ImmutableMap result = validator.validate(config); assertFalse( result.isEmpty(), "ByteArrayConverter should be blocked when schematization is enabled"); } @Test void testStringConverterBlocked_WhenSchematizationDefault() { Map config = validConfig(); config.put("value.converter", "org.apache.kafka.connect.storage.StringConverter"); ImmutableMap result = validator.validate(config); assertFalse( result.isEmpty(), "StringConverter should be blocked when schematization defaults to true"); } @Test void testJsonConverterAllowed_WhenSchematizationEnabled() { Map config = validConfig(); config.put("value.converter", "org.apache.kafka.connect.json.JsonConverter"); config.put("snowflake.enable.schematization", "true"); ImmutableMap result = validator.validate(config); assertTrue(result.isEmpty(), "JsonConverter should be allowed regardless of schematization"); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/FakeIngestClientSupplier.java ================================================ package com.snowflake.kafka.connector.internal.streaming; import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient; import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientSupplier; import java.util.Collection; import java.util.concurrent.ConcurrentHashMap; public class FakeIngestClientSupplier implements StreamingClientSupplier { private final ConcurrentHashMap pipeToIngestClientMap = new ConcurrentHashMap<>(); private long preExistingErrorCount = 0; @Override public SnowflakeStreamingIngestClient get( final String clientName, final String dbName, final String schemaName, final String pipeName, final StreamingClientProperties streamingClientProperties) { return pipeToIngestClientMap.computeIfAbsent( pipeName, (key) -> { final FakeSnowflakeStreamingIngestClient client = new FakeSnowflakeStreamingIngestClient(pipeName, clientName); client.setDefaultErrorCount(preExistingErrorCount); return client; }); } public Collection getFakeIngestClients() { return pipeToIngestClientMap.values(); } /** * Sets the pre-existing error count that will be applied to all channels when they are opened. * This simulates the cumulative error count that persists in Snowflake across connector restarts. */ public void setPreExistingErrorCount(final long errorCount) { this.preExistingErrorCount = errorCount; // Also update existing clients for (final FakeSnowflakeStreamingIngestClient client : pipeToIngestClientMap.values()) { client.setDefaultErrorCount(errorCount); } } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/FakeSnowflakeStreamingIngestChannel.java ================================================ package com.snowflake.kafka.connector.internal.streaming; import static java.util.List.copyOf; import com.snowflake.ingest.streaming.ChannelStatus; import com.snowflake.ingest.streaming.SnowflakeStreamingIngestChannel; import java.time.Duration; import java.time.Instant; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeoutException; import java.util.function.Predicate; public class FakeSnowflakeStreamingIngestChannel implements SnowflakeStreamingIngestChannel, Comparable { private final String databaseName; private final String schemaName; private final String pipeName; private final String channelName; private final List> appendedRows; private volatile boolean closed; private String offsetToken; private String statusCode = "SUCCESS"; private long rowsInsertedCount; private long rowsParsedCount; private long rowsErrorCount; private String lastErrorOffsetTokenUpperBound; private String lastErrorMessage; private Instant lastErrorTimestamp; private Duration serverAvgProcessingLatency; public FakeSnowflakeStreamingIngestChannel( final String pipeName, final String channelName, final FakeSnowflakeStreamingIngestClient parentClient) { this("db", "schema", pipeName, channelName); } public FakeSnowflakeStreamingIngestChannel( final String databaseName, final String schemaName, final String pipeName, final String channelName) { this.databaseName = databaseName; this.schemaName = schemaName; this.pipeName = pipeName; this.channelName = channelName; this.appendedRows = new ArrayList<>(); } @Override public String getDBName() { return databaseName; } @Override public String getSchemaName() { return schemaName; } @Override public String getPipeName() { return pipeName; } @Override public String getFullyQualifiedPipeName() { throw new UnsupportedOperationException(); } @Override public String getFullyQualifiedChannelName() { return channelName; } @Override public boolean isClosed() { return closed; } @Override public String getChannelName() { return channelName; } @Override public void close() { this.closed = true; } @Override public void close(final boolean waitForFlush, final Duration timeoutDuration) throws TimeoutException { this.close(); } @Override public synchronized void appendRow(final Map row, final String offsetToken) { this.appendedRows.add(row); this.offsetToken = offsetToken; } @Override public synchronized void appendRows( final Iterable> rows, final String startOffsetToken, final String endOffsetToken) { for (Map row : rows) { this.appendedRows.add(row); } this.offsetToken = endOffsetToken; } @Override public synchronized String getLatestCommittedOffsetToken() { return offsetToken; } @Override public ChannelStatus getChannelStatus() { return new ChannelStatus( databaseName, schemaName, pipeName, channelName, statusCode, offsetToken, Instant.now(), rowsInsertedCount, rowsParsedCount, rowsErrorCount, lastErrorOffsetTokenUpperBound, lastErrorMessage, lastErrorTimestamp, serverAvgProcessingLatency, Instant.now()); } public void updateErrors( long errorCount, String lastErrorMessage, String lastErrorOffsetTokenUpperBound) { this.rowsErrorCount = errorCount; this.lastErrorMessage = lastErrorMessage; this.lastErrorOffsetTokenUpperBound = lastErrorOffsetTokenUpperBound; this.lastErrorTimestamp = Instant.now(); } public void setErrorCount(final long errorCount) { this.rowsErrorCount = errorCount; } public void setOffsetToken(final String offsetToken) { this.offsetToken = offsetToken; } @Override public CompletableFuture waitForCommit( final Predicate tokenChecker, final Duration timeoutDuration) { throw new UnsupportedOperationException(); } @Override public CompletableFuture waitForFlush(final Duration timeoutDuration) { throw new UnsupportedOperationException(); } @Override public void initiateFlush() { throw new UnsupportedOperationException(); } public synchronized int getAppendedRowsCount() { return this.appendedRows.size(); } public synchronized List> getAppendedRows() { return copyOf(appendedRows); } @Override public int compareTo(final FakeSnowflakeStreamingIngestChannel o) { return this.channelName.compareTo(o.getChannelName()); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/FakeSnowflakeStreamingIngestClient.java ================================================ package com.snowflake.kafka.connector.internal.streaming; import com.snowflake.ingest.streaming.ChannelStatus; import com.snowflake.ingest.streaming.ChannelStatusBatch; import com.snowflake.ingest.streaming.OpenChannelResult; import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient; import java.time.Duration; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; public class FakeSnowflakeStreamingIngestClient implements SnowflakeStreamingIngestClient { private final String pipeName; private final String clientName; private final Map openedChannels = new ConcurrentHashMap<>(); private final Map channelNameToOffsetTokens = new ConcurrentHashMap<>(); // Shared error counts per channel name - persists across channel reopens like real Snowflake private final Map channelNameToErrorCount = new ConcurrentHashMap<>(); // Default error count to use when no channel-specific count is set private long defaultErrorCount = 0; private boolean closed = false; public FakeSnowflakeStreamingIngestClient(final String pipeName, final String clientName) { this.pipeName = pipeName; this.clientName = clientName; } public void setDefaultErrorCount(final long errorCount) { this.defaultErrorCount = errorCount; } public void setInitialErrorCountForChannel(final String channelName, final long errorCount) { channelNameToErrorCount.put(channelName, errorCount); } public long getErrorCountForChannel(final String channelName) { return channelNameToErrorCount.getOrDefault(channelName, defaultErrorCount); } @Override public void close() { this.closed = true; } @Override public CompletableFuture close(final boolean waitForFlush, final Duration timeoutDuration) { throw new UnsupportedOperationException(); } @Override public void initiateFlush() { throw new UnsupportedOperationException(); } @Override public OpenChannelResult openChannel(final String channelName) { throw new UnsupportedOperationException(); } @Override public OpenChannelResult openChannel(final String channelName, final String offsetToken) { if (offsetToken != null) { channelNameToOffsetTokens.put(channelName, offsetToken); } // Error counts persist across channel reopens, like real Snowflake. // Use the existing channel's count if present, otherwise fall back to pre-seeded or default. FakeSnowflakeStreamingIngestChannel previous = openedChannels.get(channelName); final long errorCount = previous != null ? previous.getChannelStatus().getRowsErrorCount() : channelNameToErrorCount.getOrDefault(channelName, defaultErrorCount); final FakeSnowflakeStreamingIngestChannel channel = new FakeSnowflakeStreamingIngestChannel("db", "schema", pipeName, channelName); channel.setOffsetToken(offsetToken); channel.setErrorCount(errorCount); openedChannels.put(channel.getFullyQualifiedChannelName(), channel); return new OpenChannelResult(channel, channel.getChannelStatus()); } @Override public void dropChannel(final String channelName) { throw new UnsupportedOperationException(); } @Override public Map getLatestCommittedOffsetTokens(final List channelNames) { throw new UnsupportedOperationException(); } @Override public ChannelStatusBatch getChannelStatus(final List channelNames) { Map statusMap = new HashMap<>(); for (String name : channelNames) { FakeSnowflakeStreamingIngestChannel channel = openedChannels.get(name); if (channel != null) { statusMap.put(name, channel.getChannelStatus()); } } return new ChannelStatusBatch(statusMap); } @Override public boolean isClosed() { throw new UnsupportedOperationException(); } @Override public CompletableFuture waitForFlush(final Duration timeoutDuration) { throw new UnsupportedOperationException(); } @Override public String getDBName() { throw new UnsupportedOperationException(); } @Override public String getSchemaName() { throw new UnsupportedOperationException(); } @Override public String getPipeName() { throw new UnsupportedOperationException(); } @Override public String getClientName() { return clientName; } public List getOpenedChannels() { return new ArrayList<>(openedChannels.values()); } public long countClosedChannels() { return openedChannels.values().stream() .filter(FakeSnowflakeStreamingIngestChannel::isClosed) .count(); } public int getAppendedRowCount() { return openedChannels.values().stream() .mapToInt(FakeSnowflakeStreamingIngestChannel::getAppendedRowsCount) .sum(); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/InMemorySinkTaskContext.java ================================================ package com.snowflake.kafka.connector.internal.streaming; import java.util.HashMap; import java.util.Map; import java.util.Set; import java.util.concurrent.Executors; import java.util.concurrent.Future; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.connect.sink.ErrantRecordReporter; import org.apache.kafka.connect.sink.SinkRecord; import org.apache.kafka.connect.sink.SinkTaskContext; /* In memory implementation of SinkTaskContext used for testing */ public class InMemorySinkTaskContext implements SinkTaskContext { private final Map offsets = new HashMap(); private long timeoutMs = -1L; private Set assignment; public InMemorySinkTaskContext(Set assignment) { this.assignment = assignment; } public Map configs() { throw new UnsupportedOperationException(); } public void offset(Map offsets) { this.offsets.putAll(offsets); } public void offset(TopicPartition tp, long offset) { this.offsets.put(tp, offset); } /** Returns the last offset set for the given partition, or -1 if not set. */ public long offset(TopicPartition tp) { return this.offsets.getOrDefault(tp, -1L); } public void timeout(long timeoutMs) { this.timeoutMs = timeoutMs; } public Set assignment() { return this.assignment; } public void pause(TopicPartition... partitions) {} public void resume(TopicPartition... partitions) {} public void requestCommit() {} public ErrantRecordReporter errantRecordReporter() { return new ErrantRecordReporter() { @Override public Future report(SinkRecord record, Throwable error) { return Executors.newCachedThreadPool().submit(() -> null); } }; } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/OpenChannelRetryPolicyTest.java ================================================ package com.snowflake.kafka.connector.internal.streaming; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertThrows; import com.snowflake.ingest.streaming.SFException; import com.snowflake.ingest.streaming.SnowflakeStreamingIngestChannel; import dev.failsafe.function.CheckedSupplier; import java.util.concurrent.atomic.AtomicInteger; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.mockito.Mock; import org.mockito.MockitoAnnotations; public class OpenChannelRetryPolicyTest { private static final String EXCEPTION_429_MSG = "Open channel request failed: HTTP Status: 429 ErrorBody: {\n" + "\"status_code\" : 87,\n" + "\"message\" : \"Cannot open channel at this time due to a high number of pending" + " open channel requests on the table.\"\n" + "}."; @Mock private SnowflakeStreamingIngestChannel mockChannel; private final String channelName = "test_channel"; @BeforeEach void setUp() { MockitoAnnotations.initMocks(this); } @Test void shouldReturnChannelOnFirstAttemptSuccess() { // Given CheckedSupplier supplier = () -> mockChannel; // When SnowflakeStreamingIngestChannel result = OpenChannelRetryPolicy.executeWithRetry(supplier, channelName); // Then assertSame(mockChannel, result); } @Test void shouldNotRetryOnNonSFException() { // Given IllegalArgumentException nonRetryableException = new IllegalArgumentException("Non-retryable"); CheckedSupplier supplier = () -> { throw nonRetryableException; }; // When/Then IllegalArgumentException thrownException = assertThrows( IllegalArgumentException.class, () -> OpenChannelRetryPolicy.executeWithRetry(supplier, channelName)); assertSame(nonRetryableException, thrownException); } @Test void shouldNotRetryOnSFExceptionWithout429() { // Given SFException nonRetryableException = new SFException("OPEN_CHANNEL_FAILURE", "Some other error", 400, "BAD_REQUEST"); AtomicInteger attemptCount = new AtomicInteger(0); CheckedSupplier supplier = () -> { attemptCount.incrementAndGet(); throw nonRetryableException; }; // When/Then SFException thrownException = assertThrows( SFException.class, () -> OpenChannelRetryPolicy.executeWithRetry(supplier, channelName)); assertSame(nonRetryableException, thrownException); assertEquals(1, attemptCount.get()); // Should only attempt once } @Test void shouldRetryMultipleTimesOn429Exception() { // Given SFException exception429 = new SFException("INTERNAL_ERROR", EXCEPTION_429_MSG, 429, "TOO_MANY_REQUESTS"); AtomicInteger attemptCount = new AtomicInteger(0); CheckedSupplier supplier = () -> { int attempt = attemptCount.incrementAndGet(); if (attempt <= 2) { throw exception429; // Fail first 2 attempts with 429 } return mockChannel; // Succeed on 3rd attempt }; // When SnowflakeStreamingIngestChannel result = OpenChannelRetryPolicy.executeWithRetry(supplier, channelName); // Then assertSame(mockChannel, result); assertEquals(3, attemptCount.get()); // Verify it retried 2 times before succeeding } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/SnowflakeSinkServiceV2AvroSchematizationIT.java ================================================ package com.snowflake.kafka.connector.internal.streaming; import static com.snowflake.kafka.connector.internal.TestUtils.assertWithRetry; import static com.snowflake.kafka.connector.internal.TestUtils.getTableContentOneRow; import com.snowflake.kafka.connector.ConnectorConfigTools; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.internal.SnowflakeConnectionService; import com.snowflake.kafka.connector.internal.SnowflakeSinkService; import com.snowflake.kafka.connector.internal.TestUtils; import io.confluent.connect.avro.AvroConverter; import io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient; import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Map; import org.apache.commons.lang3.StringUtils; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.SchemaAndValue; import org.apache.kafka.connect.data.SchemaBuilder; import org.apache.kafka.connect.data.Struct; import org.apache.kafka.connect.sink.SinkRecord; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; public class SnowflakeSinkServiceV2AvroSchematizationIT { private static final int PARTITION = 0; private static final int START_OFFSET = 0; private static final String ID_INT8 = "ID_INT8"; private static final String ID_INT8_OPTIONAL = "ID_INT8_OPTIONAL"; private static final String ID_INT16 = "ID_INT16"; private static final String ID_INT32 = "ID_INT32"; private static final String ID_INT64 = "ID_INT64"; private static final String FIRST_NAME = "FIRST_NAME"; private static final String RATING_FLOAT32 = "RATING_FLOAT32"; private static final String FLOAT_NAN = "FLOAT_NAN"; private static final String FLOAT_POSITIVE_INFINITY = "FLOAT_POSITIVE_INFINITY"; private static final String FLOAT_NEGATIVE_INFINITY = "FLOAT_NEGATIVE_INFINITY"; private static final String RATING_FLOAT64 = "RATING_FLOAT64"; private static final String APPROVAL = "APPROVAL"; private static final String INFO_ARRAY_STRING = "INFO_ARRAY_STRING"; private static final String INFO_ARRAY_INT = "INFO_ARRAY_INT"; private static final String INFO_ARRAY_JSON = "INFO_ARRAY_JSON"; private static final String INFO_MAP = "INFO_MAP"; private static final String RECORD_METADATA = "RECORD_METADATA"; private static final Map EXPECTED_AVRO_SCHEMA = new HashMap() { { put(ID_INT8, "NUMBER"); put(ID_INT8_OPTIONAL, "NUMBER"); put(ID_INT16, "NUMBER"); put(ID_INT32, "NUMBER"); put(ID_INT64, "NUMBER"); put(FIRST_NAME, "VARCHAR"); put(RATING_FLOAT32, "FLOAT"); put(FLOAT_NAN, "FLOAT"); put(FLOAT_POSITIVE_INFINITY, "FLOAT"); put(FLOAT_NEGATIVE_INFINITY, "FLOAT"); put(RATING_FLOAT64, "FLOAT"); put(APPROVAL, "BOOLEAN"); put(INFO_ARRAY_STRING, "ARRAY"); put(INFO_ARRAY_INT, "ARRAY"); put(INFO_ARRAY_JSON, "ARRAY"); put(INFO_MAP, "VARIANT"); put(RECORD_METADATA, "VARIANT"); } }; private String table; private SnowflakeConnectionService conn; private String topic; private TopicPartition topicPartition; private SnowflakeSinkService service; @BeforeEach void before() { table = TestUtils.randomTableName(); topic = table; conn = TestUtils.getConnectionServiceWithEncryptedKey(); topicPartition = new TopicPartition(topic, PARTITION); } @AfterEach void after() { service.closeAll(); } @Test public void testSchematizationWithTableCreationAndAvroInput() throws Exception { // given conn.createTableWithOnlyMetadataColumn(table); SinkRecord avroRecordValue = createSinkRecord(); service = createService(); // when service.insert(Collections.singletonList(avroRecordValue)); assertWithRetry(() -> TestUtils.getNumberOfRows(table) == 1); // then TestUtils.checkTableSchema(table, EXPECTED_AVRO_SCHEMA); Map actual = getTableContentOneRow(topic); Assertions.assertEquals(0L, actual.get(ID_INT8)); Assertions.assertNull(actual.get(ID_INT8_OPTIONAL)); Assertions.assertEquals(42L, actual.get(ID_INT16)); Assertions.assertEquals(42L, actual.get(ID_INT32)); Assertions.assertEquals(42L, actual.get(ID_INT64)); Assertions.assertEquals("zekai", actual.get(FIRST_NAME)); Assertions.assertEquals(0.99, ((Number) actual.get(RATING_FLOAT32)).doubleValue(), 0.001); Assertions.assertTrue( Double.isNaN(((Number) actual.get(FLOAT_NAN)).doubleValue()), "Expected NaN for " + FLOAT_NAN); Assertions.assertTrue( Double.isInfinite(((Number) actual.get(FLOAT_POSITIVE_INFINITY)).doubleValue()) && ((Number) actual.get(FLOAT_POSITIVE_INFINITY)).doubleValue() > 0, "Expected +Infinity for " + FLOAT_POSITIVE_INFINITY); Assertions.assertTrue( Double.isInfinite(((Number) actual.get(FLOAT_NEGATIVE_INFINITY)).doubleValue()) && ((Number) actual.get(FLOAT_NEGATIVE_INFINITY)).doubleValue() < 0, "Expected -Infinity for " + FLOAT_NEGATIVE_INFINITY); Assertions.assertEquals(0.99, ((Number) actual.get(RATING_FLOAT64)).doubleValue(), 0.001); Assertions.assertEquals(true, actual.get(APPROVAL)); Assertions.assertEquals( "[\"a\",\"b\"]", StringUtils.deleteWhitespace(actual.get(INFO_ARRAY_STRING).toString())); Assertions.assertEquals( "[1,2]", StringUtils.deleteWhitespace(actual.get(INFO_ARRAY_INT).toString())); Assertions.assertEquals( "[null,\"{\\\"a\\\":1,\\\"b\\\":null,\\\"c\\\":null,\\\"d\\\":\\\"89asda9s0a\\\"}\"]", StringUtils.deleteWhitespace(actual.get(INFO_ARRAY_JSON).toString())); Assertions.assertEquals( "{\"field\":3}", StringUtils.deleteWhitespace(actual.get(INFO_MAP).toString())); } private SnowflakeSinkService createService() { Map config = prepareConfig(); SinkTaskConfig sinkTaskConfig = SinkTaskConfig.from(config); SnowflakeSinkService service = StreamingSinkServiceBuilder.builder(conn, sinkTaskConfig) .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition))) .build(); service.startPartition(new TopicPartition(topic, PARTITION)); service.awaitInitialization(); return service; } private SinkRecord createSinkRecord() { Schema schema = prepareSchema(); Struct data = prepareData(schema); AvroConverter avroConverter = prepareAvroConverter(); byte[] converted = avroConverter.fromConnectData(topic, data.schema(), data); SchemaAndValue avroInputValue = avroConverter.toConnectData(topic, converted); return new SinkRecord( topic, PARTITION, Schema.STRING_SCHEMA, "test", avroInputValue.schema(), avroInputValue.value(), START_OFFSET); } private AvroConverter prepareAvroConverter() { SchemaRegistryClient schemaRegistry = new MockSchemaRegistryClient(); AvroConverter avroConverter = new AvroConverter(schemaRegistry); avroConverter.configure( Collections.singletonMap("schema.registry.url", "http://fake-url"), false); return avroConverter; } private Map prepareConfig() { Map config = TestUtils.getConnectorConfigurationForStreaming(false); config.put( KafkaConnectorConfigParams.VALUE_CONVERTER, "io.confluent.connect.avro.AvroConverter"); config.put(KafkaConnectorConfigParams.VALUE_CONVERTER_SCHEMA_REGISTRY_URL, "http://fake-url"); // Schema type inference assertions depend on client-side validation behavior config.put(KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, "client_side"); ConnectorConfigTools.setDefaultValues(config); return config; } private Schema prepareSchema() { SchemaBuilder schemaBuilder = SchemaBuilder.struct() .field(ID_INT8, Schema.INT8_SCHEMA) .field(ID_INT8_OPTIONAL, Schema.OPTIONAL_INT8_SCHEMA) .field(ID_INT16, Schema.INT16_SCHEMA) .field(ID_INT32, Schema.INT32_SCHEMA) .field(ID_INT64, Schema.INT64_SCHEMA) .field(FIRST_NAME, Schema.STRING_SCHEMA) .field(RATING_FLOAT32, Schema.FLOAT32_SCHEMA) .field(FLOAT_NAN, Schema.FLOAT32_SCHEMA) .field(FLOAT_POSITIVE_INFINITY, Schema.FLOAT32_SCHEMA) .field(FLOAT_NEGATIVE_INFINITY, Schema.FLOAT32_SCHEMA) .field(RATING_FLOAT64, Schema.FLOAT64_SCHEMA) .field(APPROVAL, Schema.BOOLEAN_SCHEMA) .field(INFO_ARRAY_STRING, SchemaBuilder.array(Schema.STRING_SCHEMA).build()) .field(INFO_ARRAY_INT, SchemaBuilder.array(Schema.INT32_SCHEMA).build()) .field(INFO_ARRAY_JSON, SchemaBuilder.array(Schema.OPTIONAL_STRING_SCHEMA).build()) .field(INFO_MAP, SchemaBuilder.map(Schema.STRING_SCHEMA, Schema.INT32_SCHEMA).build()); return schemaBuilder.build(); } private Struct prepareData(Schema schema) { return new Struct(schema) .put(ID_INT8, (byte) 0) .put(ID_INT16, (short) 42) .put(ID_INT32, 42) .put(ID_INT64, 42L) .put(FIRST_NAME, "zekai") .put(RATING_FLOAT32, 0.99f) .put(FLOAT_NAN, Float.NaN) .put(FLOAT_POSITIVE_INFINITY, Float.POSITIVE_INFINITY) .put(FLOAT_NEGATIVE_INFINITY, Float.NEGATIVE_INFINITY) .put(RATING_FLOAT64, 0.99d) .put(APPROVAL, true) .put(INFO_ARRAY_STRING, Arrays.asList("a", "b")) .put(INFO_ARRAY_INT, Arrays.asList(1, 2)) .put( INFO_ARRAY_JSON, Arrays.asList(null, "{\"a\": 1, \"b\": null, \"c\": null, \"d\": \"89asda9s0a\"}")) .put(INFO_MAP, Collections.singletonMap("field", 3)); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/SnowflakeSinkServiceV2BaseIT.java ================================================ package com.snowflake.kafka.connector.internal.streaming; import com.snowflake.kafka.connector.internal.TestUtils; import org.apache.kafka.common.TopicPartition; public abstract class SnowflakeSinkServiceV2BaseIT { protected final String table = TestUtils.randomTableName(); protected final int partition = 0; protected final int partition2 = 1; // Topic name should be same as table name. (Only for testing, not necessarily in real deployment) protected String topic = table; protected TopicPartition topicPartition = new TopicPartition(topic, partition); protected TopicPartition topicPartition2 = new TopicPartition(topic, partition2); } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/SnowflakeSinkServiceV2IT.java ================================================ package com.snowflake.kafka.connector.internal.streaming; import static com.snowflake.kafka.connector.internal.TestUtils.TEST_CONNECTOR_NAME; import static com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel.NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE; import static com.snowflake.kafka.connector.internal.streaming.v2.service.PartitionChannelManager.makeChannelName; import com.codahale.metrics.Gauge; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.config.SnowflakeValidation; import com.snowflake.kafka.connector.dlq.InMemoryKafkaRecordErrorReporter; import com.snowflake.kafka.connector.internal.SnowflakeConnectionService; import com.snowflake.kafka.connector.internal.SnowflakeSinkService; import com.snowflake.kafka.connector.internal.TestUtils; import com.snowflake.kafka.connector.internal.metrics.MetricsUtil; import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetryChannelCreation; import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetryChannelStatus; import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService; import io.confluent.connect.avro.AvroConverter; import io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient; import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.SchemaAndValue; import org.apache.kafka.connect.data.SchemaBuilder; import org.apache.kafka.connect.data.Struct; import org.apache.kafka.connect.json.JsonConverter; import org.apache.kafka.connect.sink.SinkRecord; import org.apache.kafka.connect.storage.Converter; import org.jetbrains.annotations.NotNull; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.mockito.Mockito; public class SnowflakeSinkServiceV2IT extends SnowflakeSinkServiceV2BaseIT { private final SnowflakeConnectionService conn = TestUtils.getConnectionServiceWithEncryptedKey(); private SinkTaskConfig.Builder configBuilder; private String pipe; @BeforeEach public void setup() { Map config = TestUtils.getConnectorConfigurationForStreaming(true); configBuilder = SinkTaskConfig.builderFrom(config).validation(SnowflakeValidation.SERVER_SIDE); pipe = table; } @AfterEach public void afterEach() { TestUtils.dropTable(table); TestUtils.dropPipe(pipe); } @Test public void testChannelCloseIngestion() throws Exception { // opens a channel for partition 0, table and topic SnowflakeSinkService service = StreamingSinkServiceBuilder.builder(conn, configBuilder.build()) .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition))) .build(); service.startPartition(topicPartition); service.awaitInitialization(); Converter converter = buildJsonConverter(); SchemaAndValue input = converter.toConnectData(topic, "{\"name\":\"test\"}".getBytes(StandardCharsets.UTF_8)); long offset = 0; SinkRecord record1 = new SinkRecord( topic, partition, Schema.STRING_SCHEMA, "test_key" + offset, input.schema(), input.value(), offset); // Lets close the service // Closing a partition == closing a channel service.close(Collections.singletonList(topicPartition)); // Lets insert a record when partition was closed. // It should auto create the channel service.insert(record1); TestUtils.assertWithRetry(() -> service.getOffset(topicPartition) == 1, 5, 20); service.closeAll(); } private static @NotNull Converter buildJsonConverter() { Converter converter = new JsonConverter(); HashMap converterConfig = new HashMap<>(); converterConfig.put("schemas.enable", "false"); converter.configure(converterConfig, true); return converter; } @Test public void testRebalanceOpenCloseIngestion() throws Exception { // opens a channel for partition 0, table and topic SnowflakeSinkService service = StreamingSinkServiceBuilder.builder(conn, configBuilder.build()) .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition))) .build(); service.startPartition(topicPartition); service.awaitInitialization(); Converter converter = buildJsonConverter(); SchemaAndValue input = converter.toConnectData(topic, "{\"name\":\"test\"}".getBytes(StandardCharsets.UTF_8)); long offset = 0; SinkRecord record1 = new SinkRecord( topic, partition, Schema.STRING_SCHEMA, "test_key" + offset, input.schema(), input.value(), offset); service.insert(record1); // Lets close the service // Closing a partition == closing a channel service.close(Collections.singletonList(topicPartition)); // it should skip this record1 since it will fetch offset token 0 from Snowflake service.insert(record1); TestUtils.assertWithRetry(() -> service.getOffset(topicPartition) == 1, 5, 20); service.closeAll(); } @Test public void testStreamingIngestion() throws Exception { // opens a channel for partition 0, table and topic SnowflakeSinkService service = StreamingSinkServiceBuilder.builder(conn, configBuilder.build()) .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition))) .build(); service.startPartition(topicPartition); service.awaitInitialization(); Converter converter = buildJsonConverter(); SchemaAndValue input = converter.toConnectData(topic, "{\"name\":\"test\"}".getBytes(StandardCharsets.UTF_8)); long offset = 0; SinkRecord record1 = new SinkRecord( topic, partition, Schema.STRING_SCHEMA, "test_key" + offset, input.schema(), input.value(), offset); service.insert(record1); TestUtils.assertWithRetry(() -> service.getOffset(topicPartition) == 1, 5, 20); // insert another offset and check what we committed offset += 1; SinkRecord record2 = new SinkRecord( topic, partition, Schema.STRING_SCHEMA, "test_key" + offset, input.schema(), input.value(), offset); offset += 1; SinkRecord record3 = new SinkRecord( topic, partition, Schema.STRING_SCHEMA, "test_key" + offset, input.schema(), input.value(), offset); service.insert(Arrays.asList(record2, record3)); TestUtils.assertWithRetry(() -> service.getOffset(topicPartition) == 3, 5, 20); service.closeAll(); } @Test public void testStreamingIngest_multipleChannelPartitions_withMetrics() throws Exception { // set up telemetry service spy SnowflakeConnectionService connectionService = Mockito.spy(this.conn); SnowflakeTelemetryService telemetryService = Mockito.spy(this.conn.getTelemetryClient()); Mockito.when(connectionService.getTelemetryClient()).thenReturn(telemetryService); // opens a channel for partition 0, table and topic SnowflakeSinkService service = StreamingSinkServiceBuilder.builder(connectionService, configBuilder.build()) .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition))) .withMetricsJmxReporter( new com.snowflake.kafka.connector.internal.metrics.MetricsJmxReporter( new com.codahale.metrics.MetricRegistry(), TEST_CONNECTOR_NAME)) .build(); service.startPartition(topicPartition); service.startPartition(new TopicPartition(topic, partition2)); service.awaitInitialization(); final int recordsInPartition1 = 2; final int recordsInPartition2 = 5; List recordsPartition1 = TestUtils.createJsonStringSinkRecords(0, recordsInPartition1, topic, partition); List recordsPartition2 = TestUtils.createJsonStringSinkRecords(0, recordsInPartition2, topic, partition2); List records = new ArrayList<>(recordsPartition1); records.addAll(recordsPartition2); service.insert(records); TestUtils.assertWithRetry( () -> { // This is how we will trigger flush. (Mimicking poll API) service.insert(new ArrayList<>()); // trigger time based flush return TestUtils.tableSize(table) == recordsInPartition1 + recordsInPartition2; }, 10, 20); TestUtils.assertWithRetry( () -> service.getOffset(topicPartition) == recordsInPartition1, 5, 20); TestUtils.assertWithRetry( () -> service.getOffset(new TopicPartition(topic, partition2)) == recordsInPartition2, 20, 5); // verify all metrics (gauges + counters) Map gaugeMetrics = service .getMetricRegistry(makeChannelName(TEST_CONNECTOR_NAME, topic, partition)) .get() .getGauges(); long totalMetrics = gaugeMetrics.size() + service .getMetricRegistry(makeChannelName(TEST_CONNECTOR_NAME, topic, partition)) .get() .getCounters() .size(); assert totalMetrics == SnowflakeTelemetryChannelStatus.NUM_METRICS * 2; // two partitions // partition 1 verifyPartitionMetrics( gaugeMetrics, makeChannelName(TEST_CONNECTOR_NAME, topic, partition), recordsInPartition1 - 1, recordsInPartition1 - 1); verifyPartitionMetrics( gaugeMetrics, makeChannelName(TEST_CONNECTOR_NAME, topic, partition2), recordsInPartition2 - 1, recordsInPartition2 - 1); // verify telemetry Mockito.verify(telemetryService, Mockito.times(2)) .reportKafkaPartitionStart(Mockito.any(SnowflakeTelemetryChannelCreation.class)); service.closeAll(); // verify metrics closed assert !service .getMetricRegistry(makeChannelName(TEST_CONNECTOR_NAME, topic, partition)) .isPresent(); Mockito.verify(telemetryService, Mockito.times(2)) .reportKafkaPartitionUsage( Mockito.any(SnowflakeTelemetryChannelStatus.class), Mockito.eq(true)); } private void verifyPartitionMetrics( Map metricRegistry, String partitionChannelKey, long offsetPersistedInSnowflake, long processedOffset) { // offsets assert (long) metricRegistry .get( MetricsUtil.channelMetricName( partitionChannelKey, MetricsUtil.OFFSET_SUB_DOMAIN, MetricsUtil.OFFSET_PERSISTED_IN_SNOWFLAKE)) .getValue() == offsetPersistedInSnowflake; assert (long) metricRegistry .get( MetricsUtil.channelMetricName( partitionChannelKey, MetricsUtil.OFFSET_SUB_DOMAIN, MetricsUtil.PROCESSED_OFFSET)) .getValue() == processedOffset; } @Test public void testStreamingIngest_multipleChannelPartitionsWithTopic2Table() throws Exception { final int partitionCount = 3; final int recordsInEachPartition = 2; final int topicCount = 3; Map topic2Table = new HashMap<>(); ArrayList topics = new ArrayList<>(); for (int topic = 0; topic < topicCount; topic++) { final String topicName = TestUtils.randomTableName(); topics.add(topicName); topic2Table.put(topicName, table); } configBuilder.topicToTableMap(topic2Table); SnowflakeSinkService service = StreamingSinkServiceBuilder.builder(conn, configBuilder.build()) .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition))) .build(); for (int topic = 0; topic < topicCount; topic++) { for (int partition = 0; partition < partitionCount; partition++) { service.startPartition(new TopicPartition(topics.get(topic), partition)); } service.awaitInitialization(); List records = new ArrayList<>(); for (int partition = 0; partition < partitionCount; partition++) { records.addAll( TestUtils.createJsonStringSinkRecords( 0, recordsInEachPartition, topics.get(topic), partition)); } service.insert(records); } TestUtils.assertWithRetry( () -> TestUtils.tableSize(table) == recordsInEachPartition * partitionCount * topicCount, 10, 20); for (int topic = 0; topic < topicCount; topic++) { int finalTopic = topic; for (int partition = 0; partition < partitionCount; partition++) { int finalPartition = partition; TestUtils.assertWithRetry( () -> service.getOffset(new TopicPartition(topics.get(finalTopic), finalPartition)) == recordsInEachPartition, 20, 5); } } service.closeAll(); } @Test public void testStreamingIngest_startPartitionsWithMultipleChannelPartitions() throws Exception { final int partitionCount = 5; final int recordsInEachPartition = 2; ArrayList topicPartitions = new ArrayList<>(); for (int partition = 0; partition < partitionCount; partition++) { topicPartitions.add(new TopicPartition(topic, partition)); } configBuilder.topicToTableMap(Collections.singletonMap(topic, table)); SnowflakeSinkService service = StreamingSinkServiceBuilder.builder(conn, configBuilder.build()) .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition))) .build(); service.startPartitions(topicPartitions); service.awaitInitialization(); List records = new ArrayList<>(); for (int partition = 0; partition < partitionCount; partition++) { records.addAll( TestUtils.createJsonStringSinkRecords(0, recordsInEachPartition, topic, partition)); } service.insert(records); TestUtils.assertWithRetry( () -> { service.insert(new ArrayList<>()); // trigger time based flush return TestUtils.tableSize(table) == recordsInEachPartition * partitionCount; }, 10, 20); for (int partition = 0; partition < partitionCount; partition++) { int finalPartition = partition; TestUtils.assertWithRetry( () -> service.getOffset(new TopicPartition(topic, finalPartition)) == recordsInEachPartition, 20, 5); } service.closeAll(); } @Test public void testNativeJsonInputIngestion() throws Exception { // json without schema JsonConverter converter = new JsonConverter(); HashMap converterConfig = new HashMap<>(); converterConfig.put("schemas.enable", "false"); converter.configure(converterConfig, false); SchemaAndValue noSchemaInputValue = converter.toConnectData( topic, TestUtils.JSON_WITHOUT_SCHEMA.getBytes(StandardCharsets.UTF_8)); converter = new JsonConverter(); converterConfig = new HashMap<>(); converterConfig.put("schemas.enable", "false"); converter.configure(converterConfig, true); SchemaAndValue noSchemaInputKey = converter.toConnectData( topic, TestUtils.JSON_WITHOUT_SCHEMA.getBytes(StandardCharsets.UTF_8)); // json with schema converter = new JsonConverter(); converterConfig = new HashMap<>(); converterConfig.put("schemas.enable", "true"); converter.configure(converterConfig, false); SchemaAndValue schemaInputValue = converter.toConnectData(topic, TestUtils.JSON_WITH_SCHEMA.getBytes(StandardCharsets.UTF_8)); converter = new JsonConverter(); converterConfig = new HashMap<>(); converterConfig.put("schemas.enable", "true"); converter.configure(converterConfig, true); SchemaAndValue schemaInputKey = converter.toConnectData(topic, TestUtils.JSON_WITH_SCHEMA.getBytes(StandardCharsets.UTF_8)); long startOffset = 0; long endOffset = 3; SinkRecord noSchemaRecordValue = new SinkRecord( topic, partition, Schema.STRING_SCHEMA, "test", noSchemaInputValue.schema(), noSchemaInputValue.value(), startOffset); SinkRecord schemaRecordValue = new SinkRecord( topic, partition, Schema.STRING_SCHEMA, "test", schemaInputValue.schema(), schemaInputValue.value(), startOffset + 1); SinkRecord noSchemaRecordKey = new SinkRecord( topic, partition, noSchemaInputKey.schema(), noSchemaInputKey.value(), schemaInputValue.schema(), schemaInputValue.value(), startOffset + 2); SinkRecord schemaRecordKey = new SinkRecord( topic, partition, schemaInputKey.schema(), schemaInputKey.value(), schemaInputValue.schema(), schemaInputValue.value(), startOffset + 3); SnowflakeSinkService service = StreamingSinkServiceBuilder.builder(conn, configBuilder.build()) .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition))) .build(); service.startPartition(topicPartition); service.awaitInitialization(); service.insert(noSchemaRecordValue); service.insert(schemaRecordValue); service.insert(noSchemaRecordKey); service.insert(schemaRecordKey); TestUtils.assertWithRetry(() -> service.getOffset(topicPartition) == endOffset + 1, 5, 20); service.closeAll(); } @Test public void testNativeAvroInputIngestion() throws Exception { // avro SchemaBuilder schemaBuilder = SchemaBuilder.struct() .field("int8", SchemaBuilder.int8().defaultValue((byte) 2).doc("int8 field").build()) .field("int16", Schema.INT16_SCHEMA) .field("int32", Schema.INT32_SCHEMA) .field("int64", Schema.INT64_SCHEMA) .field("float32", Schema.FLOAT32_SCHEMA) .field("float64", Schema.FLOAT64_SCHEMA) .field("int8Min", SchemaBuilder.int8().defaultValue((byte) 2).doc("int8 field").build()) .field("int16Min", Schema.INT16_SCHEMA) .field("int32Min", Schema.INT32_SCHEMA) .field("int64Min", Schema.INT64_SCHEMA) .field("float32Min", Schema.FLOAT32_SCHEMA) .field("float64Min", Schema.FLOAT64_SCHEMA) .field("int8Max", SchemaBuilder.int8().defaultValue((byte) 2).doc("int8 field").build()) .field("int16Max", Schema.INT16_SCHEMA) .field("int32Max", Schema.INT32_SCHEMA) .field("int64Max", Schema.INT64_SCHEMA) .field("float32Max", Schema.FLOAT32_SCHEMA) .field("float64Max", Schema.FLOAT64_SCHEMA) .field("float64HighPrecision", Schema.FLOAT64_SCHEMA) .field("float64TenDigits", Schema.FLOAT64_SCHEMA) .field("float64BigDigits", Schema.FLOAT64_SCHEMA) .field("boolean", Schema.BOOLEAN_SCHEMA) .field("string", Schema.STRING_SCHEMA) .field("bytes", Schema.BYTES_SCHEMA) .field("bytesReadOnly", Schema.BYTES_SCHEMA) .field("int16Optional", Schema.OPTIONAL_INT16_SCHEMA) .field("int32Optional", Schema.OPTIONAL_INT32_SCHEMA) .field("int64Optional", Schema.OPTIONAL_INT64_SCHEMA) .field("float32Optional", Schema.OPTIONAL_FLOAT32_SCHEMA) .field("float64Optional", Schema.OPTIONAL_FLOAT64_SCHEMA) .field("booleanOptional", Schema.OPTIONAL_BOOLEAN_SCHEMA) .field("stringOptional", Schema.OPTIONAL_STRING_SCHEMA) .field("bytesOptional", Schema.OPTIONAL_BYTES_SCHEMA) .field("array", SchemaBuilder.array(Schema.STRING_SCHEMA).build()) .field("map", SchemaBuilder.map(Schema.STRING_SCHEMA, Schema.INT32_SCHEMA).build()) .field( "int8Optional", SchemaBuilder.int8().defaultValue((byte) 2).doc("int8 field").build()) .field( "mapNonStringKeys", SchemaBuilder.map(Schema.INT32_SCHEMA, Schema.INT32_SCHEMA).build()) .field( "mapArrayMapInt", SchemaBuilder.map( Schema.STRING_SCHEMA, SchemaBuilder.array( SchemaBuilder.map(Schema.STRING_SCHEMA, Schema.INT32_SCHEMA) .build()) .build()) .build()); Struct original = new Struct(schemaBuilder.build()) .put("int8", (byte) 12) .put("int16", (short) 12) .put("int32", 12) .put("int64", 12L) .put("float32", 12.2f) .put("float64", 12.2) .put("int8Min", Byte.MIN_VALUE) .put("int16Min", Short.MIN_VALUE) .put("int32Min", Integer.MIN_VALUE) .put("int64Min", Long.MIN_VALUE) .put("float32Min", Float.MIN_VALUE) .put("float64Min", Double.MIN_VALUE) .put("int8Max", Byte.MAX_VALUE) .put("int16Max", Short.MAX_VALUE) .put("int32Max", Integer.MAX_VALUE) .put("int64Max", Long.MAX_VALUE) .put("float32Max", Float.MAX_VALUE) .put("float64Max", Double.MAX_VALUE) .put("float64HighPrecision", 2312.4200000000001d) .put("float64TenDigits", 1.0d / 3.0d) .put("float64BigDigits", 2312.42321432655123456d) .put("boolean", true) .put("string", "foo") .put("bytes", ByteBuffer.wrap("foo".getBytes())) .put("bytesReadOnly", ByteBuffer.wrap("foo".getBytes()).asReadOnlyBuffer()) .put("array", Arrays.asList("a", "b", "c")) .put("map", Collections.singletonMap("field", 1)) .put("mapNonStringKeys", Collections.singletonMap(1, 1)) .put( "mapArrayMapInt", Collections.singletonMap( "field", Arrays.asList( Collections.singletonMap("field", 1), Collections.singletonMap("field", 1)))); SchemaRegistryClient schemaRegistry = new MockSchemaRegistryClient(); AvroConverter avroConverter = new AvroConverter(schemaRegistry); avroConverter.configure( Collections.singletonMap("schema.registry.url", "http://fake-url"), false); byte[] converted = avroConverter.fromConnectData(topic, original.schema(), original); SchemaAndValue avroInputValue = avroConverter.toConnectData(topic, converted); avroConverter = new AvroConverter(schemaRegistry); avroConverter.configure( Collections.singletonMap("schema.registry.url", "http://fake-url"), true); converted = avroConverter.fromConnectData(topic, original.schema(), original); SchemaAndValue avroInputKey = avroConverter.toConnectData(topic, converted); long startOffset = 0; long endOffset = 2; SinkRecord avroRecordValue = new SinkRecord( topic, partition, Schema.STRING_SCHEMA, "test", avroInputValue.schema(), avroInputValue.value(), startOffset); SinkRecord avroRecordKey = new SinkRecord( topic, partition, avroInputKey.schema(), avroInputKey.value(), Schema.STRING_SCHEMA, "test", startOffset + 1); SinkRecord avroRecordKeyValue = new SinkRecord( topic, partition, avroInputKey.schema(), avroInputKey.value(), avroInputKey.schema(), avroInputKey.value(), startOffset + 2); configBuilder.tolerateErrors(true); configBuilder.dlqTopicName("DLQ_TOPIC"); configBuilder.errorsLogEnable(true); SnowflakeSinkService service = StreamingSinkServiceBuilder.builder(conn, configBuilder.build()) .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition))) .build(); service.startPartition(topicPartition); service.awaitInitialization(); service.insert(avroRecordValue); service.insert(avroRecordKey); service.insert(avroRecordKeyValue); TestUtils.assertWithRetry(() -> service.getOffset(topicPartition) == endOffset + 1, 5, 20); service.closeAll(); } @Test public void testBrokenIngestion() throws Exception { // Mismatched schema and value SchemaAndValue brokenInputValue = new SchemaAndValue(Schema.INT32_SCHEMA, "error"); long startOffset = 0; SinkRecord brokenValue = new SinkRecord( topic, partition, Schema.STRING_SCHEMA, "test", brokenInputValue.schema(), brokenInputValue.value(), startOffset); SinkRecord brokenKey = new SinkRecord( topic, partition, brokenInputValue.schema(), brokenInputValue.value(), Schema.STRING_SCHEMA, "test", startOffset + 1); SinkRecord brokenKeyValue = new SinkRecord( topic, partition, brokenInputValue.schema(), brokenInputValue.value(), brokenInputValue.schema(), brokenInputValue.value(), startOffset + 2); configBuilder.tolerateErrors(true); configBuilder.dlqTopicName("DLQ_TOPIC"); configBuilder.errorsLogEnable(true); InMemoryKafkaRecordErrorReporter errorReporter = new InMemoryKafkaRecordErrorReporter(); SnowflakeSinkService service = StreamingSinkServiceBuilder.builder(conn, configBuilder.build()) .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition))) .withErrorReporter(errorReporter) .build(); service.startPartition(topicPartition); service.awaitInitialization(); service.insert(brokenValue); service.insert(brokenKey); service.insert(brokenKeyValue); TestUtils.assertWithRetry( () -> service.getOffset(topicPartition) == NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE, 5, 20); List reportedData = errorReporter.getReportedRecords(); assert reportedData.size() == 3; assert TestUtils.tableSize(table) == 0 : "expected: " + 0 + " actual: " + TestUtils.tableSize(table); } @Test public void testBrokenRecordIngestionFollowedUpByValidRecord() throws Exception { // Mismatched schema and value SchemaAndValue brokenInputValue = new SchemaAndValue(Schema.INT32_SCHEMA, "error"); SinkRecord brokenValue = new SinkRecord( topic, partition, null, null, brokenInputValue.schema(), brokenInputValue.value(), 0); SinkRecord brokenKey = new SinkRecord( topic, partition, brokenInputValue.schema(), brokenInputValue.value(), null, null, 1); SinkRecord correctValue = new SinkRecord(topic, partition, null, "key1", null, Map.of("name", "john"), 2); configBuilder.tolerateErrors(true); configBuilder.dlqTopicName("DLQ_TOPIC"); configBuilder.errorsLogEnable(true); InMemoryKafkaRecordErrorReporter errorReporter = new InMemoryKafkaRecordErrorReporter(); SnowflakeSinkService service = StreamingSinkServiceBuilder.builder(conn, configBuilder.build()) .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition))) .withErrorReporter(errorReporter) .build(); service.startPartition(topicPartition); service.awaitInitialization(); service.insert(brokenValue); service.insert(brokenKey); service.insert(correctValue); TestUtils.assertWithRetry(() -> service.getOffset(topicPartition) == 3, 5, 20); List reportedData = errorReporter.getReportedRecords(); assert reportedData.size() == 2; assert TestUtils.tableSize(table) == 1 : "expected: " + 1 + " actual: " + TestUtils.tableSize(table); service.closeAll(); } /* Service start -> Insert -> Close. service start -> fetch the offsetToken, compare and ingest check data */ @Test public void testStreamingIngestionWithExactlyOnceSemanticsNoOverlappingOffsets() throws Exception { SnowflakeSinkService service = StreamingSinkServiceBuilder.builder(conn, configBuilder.build()) .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition))) .build(); service.startPartition(topicPartition); service.awaitInitialization(); Converter converter = buildJsonConverter(); SchemaAndValue input = converter.toConnectData(topic, "{\"name\":\"test\"}".getBytes(StandardCharsets.UTF_8)); long offset = 0; // Create sink record SinkRecord record1 = new SinkRecord( topic, partition, Schema.STRING_SCHEMA, "test", input.schema(), input.value(), offset); service.insert(record1); TestUtils.assertWithRetry(() -> service.getOffset(topicPartition) == 1, 5, 20); // wait for ingest TestUtils.assertWithRetry(() -> TestUtils.tableSize(table) == 1, 30, 20); service.closeAll(); // initialize a new sink service SnowflakeSinkService service2 = StreamingSinkServiceBuilder.builder(conn, configBuilder.build()) .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition))) .build(); service2.startPartition(topicPartition); service2.awaitInitialization(); offset = 1; // Create sink record SinkRecord record2 = new SinkRecord( topic, partition, Schema.STRING_SCHEMA, "test", input.schema(), input.value(), offset); service2.insert(record2); // wait for ingest TestUtils.assertWithRetry(() -> TestUtils.tableSize(table) == 2, 30, 20); assert service2.getOffset(topicPartition) == offset + 1; service2.closeAll(); } /* Service start -> Insert -> Close. service start -> fetch the offsetToken, compare and ingest check data */ @Test public void testStreamingIngestionWithExactlyOnceSemanticsOverlappingOffsets() throws Exception { SnowflakeSinkService service = StreamingSinkServiceBuilder.builder(conn, configBuilder.build()) .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition))) .build(); service.startPartition(topicPartition); service.awaitInitialization(); final long noOfRecords = 10; // send regular data List records = TestUtils.createJsonStringSinkRecords(0, noOfRecords, topic, partition); service.insert(records); TestUtils.assertWithRetry(() -> service.getOffset(topicPartition) == noOfRecords, 5, 20); // wait for ingest TestUtils.assertWithRetry(() -> TestUtils.tableSize(table) == 10, 30, 20); service.closeAll(); // initialize a new sink service SnowflakeSinkService service2 = StreamingSinkServiceBuilder.builder(conn, configBuilder.build()) .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition))) .build(); service2.startPartition(topicPartition); service2.awaitInitialization(); final long startOffsetAlreadyInserted = 5; records = TestUtils.createJsonStringSinkRecords( startOffsetAlreadyInserted, noOfRecords, topic, partition); service2.insert(records); final long totalRecordsExpected = noOfRecords + (noOfRecords - startOffsetAlreadyInserted); // wait for ingest TestUtils.assertWithRetry(() -> TestUtils.tableSize(table) == totalRecordsExpected, 30, 20); assert service2.getOffset(topicPartition) == totalRecordsExpected; service2.closeAll(); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/SnowflakeSinkServiceV2SchematizationIT.java ================================================ package com.snowflake.kafka.connector.internal.streaming; import com.snowflake.kafka.connector.builder.SinkRecordBuilder; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.dlq.InMemoryKafkaRecordErrorReporter; import com.snowflake.kafka.connector.internal.SnowflakeConnectionService; import com.snowflake.kafka.connector.internal.SnowflakeSinkService; import com.snowflake.kafka.connector.internal.TestUtils; import java.nio.charset.StandardCharsets; import java.util.Collections; import java.util.HashMap; import java.util.Map; import org.apache.kafka.connect.data.SchemaAndValue; import org.apache.kafka.connect.json.JsonConverter; import org.apache.kafka.connect.sink.SinkRecord; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; public class SnowflakeSinkServiceV2SchematizationIT extends SnowflakeSinkServiceV2BaseIT { private final SnowflakeConnectionService conn = TestUtils.getConnectionService(); private SinkTaskConfig sinkTaskConfig; private SnowflakeSinkService service; private String pipe; @BeforeEach public void setup() { Map config = TestUtils.getConnectorConfigurationForStreaming(false); sinkTaskConfig = SinkTaskConfig.builderFrom(config).tolerateErrors(true).dlqTopicName("dlq_topic").build(); pipe = table; } @AfterEach public void teardown() { service.closeAll(); TestUtils.dropTable(table); TestUtils.dropPipe(pipe); } @Test public void snowflakeSinkTask_put_whenJsonRecordCannotBeSchematized_sendRecordToDLQ() { // given conn.createTableWithOnlyMetadataColumn(table); InMemoryKafkaRecordErrorReporter errorReporter = new InMemoryKafkaRecordErrorReporter(); service = StreamingSinkServiceBuilder.builder(conn, sinkTaskConfig) .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition))) .withErrorReporter(errorReporter) .build(); service.startPartition(topicPartition); service.awaitInitialization(); // Create a record that cannot be schematized (array at root level) String notSchematizeableJsonRecord = "[{\"name\":\"sf\",\"answer\":42}]"; SinkRecord record = createKafkaRecordWithoutSchema(notSchematizeableJsonRecord, 0); // when service.insert(record); // then Assertions.assertEquals(1, errorReporter.getReportedRecords().size()); } /** Helper method to create a Kafka record from JSON string */ private SinkRecord createKafkaRecord(String jsonWithSchema, long offset, boolean withSchema) { JsonConverter jsonConverter = new JsonConverter(); Map converterConfig = new HashMap<>(); converterConfig.put("schemas.enable", String.valueOf(withSchema)); jsonConverter.configure(converterConfig, false); byte[] valueBytes = jsonWithSchema.getBytes(StandardCharsets.UTF_8); SchemaAndValue schemaAndValue = jsonConverter.toConnectData(topic, valueBytes); return SinkRecordBuilder.forTopicPartition(topic, partition) .withSchemaAndValue(schemaAndValue) .withOffset(offset) .withKey("test") .build(); } /** * Convenience method to create a Kafka record from JSON without schema (schemas.enable = false) */ private SinkRecord createKafkaRecordWithoutSchema(String jsonPayload, long offset) { return createKafkaRecord(jsonPayload, offset, false); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/SnowflakeSinkServiceV2Test.java ================================================ package com.snowflake.kafka.connector.internal.streaming; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import com.snowflake.ingest.streaming.SFException; import com.snowflake.kafka.connector.builder.SinkRecordBuilder; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.config.SinkTaskConfigTestBuilder; import com.snowflake.kafka.connector.config.SnowflakeValidation; import com.snowflake.kafka.connector.internal.SnowflakeConnectionService; import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException; import com.snowflake.kafka.connector.internal.metrics.TaskMetrics; import com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel; import com.snowflake.kafka.connector.internal.streaming.v2.BackpressureException; import com.snowflake.kafka.connector.internal.streaming.v2.service.BatchOffsetFetcher; import com.snowflake.kafka.connector.internal.streaming.v2.service.PartitionChannelManager; import com.snowflake.kafka.connector.internal.streaming.v2.service.ThreadPools; import java.time.Instant; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.function.Function; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.connect.sink.SinkRecord; import org.apache.kafka.connect.sink.SinkTaskContext; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.mockito.ArgumentCaptor; class SnowflakeSinkServiceV2Test { private static final String TOPIC = "test_topic"; private static final String CONNECTOR_NAME = "test_connector"; private PartitionChannelManager mockChannelManager; private BatchOffsetFetcher mockBatchOffsetFetcher; private SinkTaskContext mockSinkTaskContext; private SnowflakeSinkServiceV2 service; @BeforeEach void setUp() { mockChannelManager = mock(PartitionChannelManager.class); mockBatchOffsetFetcher = mock(BatchOffsetFetcher.class); mockSinkTaskContext = mock(SinkTaskContext.class); SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class); when(mockConn.isClosed()).thenReturn(false); service = new SnowflakeSinkServiceV2( mockConn, SinkTaskConfigTestBuilder.builder().connectorName(CONNECTOR_NAME).taskId("0").build(), mockSinkTaskContext, Optional.empty(), () -> mockBatchOffsetFetcher, () -> mockChannelManager, TaskMetrics.noop()); } @AfterEach void tearDown() { ThreadPools.closeForTask(CONNECTOR_NAME); } // --- insert() skip logic --- @Test void insertSkipsRecordsForInitializingPartitions() { TopicPartition tp = new TopicPartition(TOPIC, 0); TopicPartitionChannel channel = mockChannel("ch_0", true); when(mockChannelManager.getChannel(tp)).thenReturn(Optional.of(channel)); SinkRecord record = recordFor(TOPIC, 0, 10); service.insert(Collections.singletonList(record)); verify(channel, never()).insertRecord(any(), anyBoolean()); verify(mockSinkTaskContext).offset(tp, 10); } @Test void insertProcessesRecordsForReadyPartitions() { TopicPartition tp = new TopicPartition(TOPIC, 0); TopicPartitionChannel channel = mockChannel("ch_0", false); when(mockChannelManager.getChannel(tp)).thenReturn(Optional.of(channel)); SinkRecord record = recordFor(TOPIC, 0, 5); service.insert(Collections.singletonList(record)); verify(channel).insertRecord(record, true); verify(mockSinkTaskContext, never()).offset(any(TopicPartition.class), any(Long.class)); } @Test void insertHandlesMixOfInitializingAndReadyPartitions() { TopicPartition tpInit = new TopicPartition(TOPIC, 0); TopicPartition tpReady = new TopicPartition(TOPIC, 1); TopicPartitionChannel initChannel = mockChannel("ch_0", true); TopicPartitionChannel readyChannel = mockChannel("ch_1", false); when(mockChannelManager.getChannel(tpInit)).thenReturn(Optional.of(initChannel)); when(mockChannelManager.getChannel(tpReady)).thenReturn(Optional.of(readyChannel)); List records = Arrays.asList(recordFor(TOPIC, 0, 100), recordFor(TOPIC, 1, 200)); service.insert(records); verify(initChannel, never()).insertRecord(any(), anyBoolean()); verify(readyChannel).insertRecord(records.get(1), true); verify(mockSinkTaskContext).offset(tpInit, 100); verify(mockSinkTaskContext, never()).offset(tpReady, 200); } @Test void insertResetsToFirstSkippedOffset() { TopicPartition tp = new TopicPartition(TOPIC, 0); TopicPartitionChannel channel = mockChannel("ch_0", true); when(mockChannelManager.getChannel(tp)).thenReturn(Optional.of(channel)); List records = Arrays.asList(recordFor(TOPIC, 0, 5), recordFor(TOPIC, 0, 6), recordFor(TOPIC, 0, 7)); service.insert(records); verify(mockSinkTaskContext).offset(tp, 5); verify(mockSinkTaskContext, never()).offset(tp, 6); verify(mockSinkTaskContext, never()).offset(tp, 7); } // --- getCommittedOffsets() skip logic --- @Test @SuppressWarnings("unchecked") void getCommittedOffsetsExcludesInitializingPartitions() { TopicPartition tpInit = new TopicPartition(TOPIC, 0); TopicPartition tpReady = new TopicPartition(TOPIC, 1); TopicPartitionChannel initChannel = mockChannel("ch_0", true); TopicPartitionChannel readyChannel = mockChannel("ch_1", false); when(mockChannelManager.getChannel(tpInit)).thenReturn(Optional.of(initChannel)); when(mockChannelManager.getChannel(tpReady)).thenReturn(Optional.of(readyChannel)); Map expectedOffsets = new HashMap<>(); expectedOffsets.put(tpReady, 42L); when(mockBatchOffsetFetcher.getCommittedOffsets(any(), any(Function.class))) .thenReturn(expectedOffsets); Set allPartitions = new HashSet<>(Arrays.asList(tpInit, tpReady)); Map result = service.getCommittedOffsets(allPartitions); assertEquals(expectedOffsets, result); ArgumentCaptor> captor = ArgumentCaptor.forClass(Set.class); verify(mockBatchOffsetFetcher).getCommittedOffsets(captor.capture(), any(Function.class)); Set passedPartitions = captor.getValue(); assertEquals(1, passedPartitions.size()); assertTrue(passedPartitions.contains(tpReady)); } @Test @SuppressWarnings("unchecked") void getCommittedOffsetsReturnsEmptyWhenAllInitializing() { TopicPartition tp0 = new TopicPartition(TOPIC, 0); TopicPartition tp1 = new TopicPartition(TOPIC, 1); TopicPartitionChannel ch0 = mockChannel("ch_0", true); TopicPartitionChannel ch1 = mockChannel("ch_1", true); when(mockChannelManager.getChannel(tp0)).thenReturn(Optional.of(ch0)); when(mockChannelManager.getChannel(tp1)).thenReturn(Optional.of(ch1)); when(mockBatchOffsetFetcher.getCommittedOffsets(any(), any(Function.class))) .thenReturn(Collections.emptyMap()); Set allPartitions = new HashSet<>(Arrays.asList(tp0, tp1)); Map result = service.getCommittedOffsets(allPartitions); assertTrue(result.isEmpty()); ArgumentCaptor> captor = ArgumentCaptor.forClass(Set.class); verify(mockBatchOffsetFetcher).getCommittedOffsets(captor.capture(), any(Function.class)); assertTrue(captor.getValue().isEmpty()); } // --- transition from initializing to ready --- @Test void insertProcessesRecordsAfterChannelTransitionsFromInitializingToReady() { TopicPartition tp = new TopicPartition(TOPIC, 0); TopicPartitionChannel channel = mockChannel("ch_0", true); when(mockChannelManager.getChannel(tp)).thenReturn(Optional.of(channel)); SinkRecord record1 = recordFor(TOPIC, 0, 10); service.insert(Collections.singletonList(record1)); verify(channel, never()).insertRecord(any(), anyBoolean()); verify(mockSinkTaskContext).offset(tp, 10); // Channel finishes initializing — Kafka re-delivers from the rewound offset when(channel.isInitializing()).thenReturn(false); SinkRecord record2 = recordFor(TOPIC, 0, 11); service.insert(List.of(record1, record2)); verify(channel).insertRecord(record1, true); verify(channel).insertRecord(record2, false); } // --- startPartitions() pipe resolution (FR5) --- @Test void startPartitionsThrowsWhenValidationEnabledAndNonDefaultPipeExists() { SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class); when(mockConn.isClosed()).thenReturn(false); when(mockConn.tableExist(TOPIC)).thenReturn(true); when(mockConn.pipeExist(TOPIC)).thenReturn(true); SnowflakeSinkServiceV2 svc = buildService(mockConn, /* clientValidationEnabled= */ true); TopicPartition tp = new TopicPartition(TOPIC, 0); SnowflakeKafkaConnectorException exception = assertThrows(SnowflakeKafkaConnectorException.class, () -> svc.startPartitions(Set.of(tp))); assertTrue(exception.getMessage().contains("0032")); } @Test @SuppressWarnings("unchecked") void startPartitionsUsesDefaultPipeWhenValidationEnabledAndNoNonDefaultPipe() { SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class); when(mockConn.isClosed()).thenReturn(false); when(mockConn.tableExist(TOPIC)).thenReturn(true); when(mockConn.pipeExist(TOPIC)).thenReturn(false); PartitionChannelManager channelMgr = mock(PartitionChannelManager.class); SnowflakeSinkServiceV2 svc = buildService(mockConn, /* clientValidationEnabled= */ true, channelMgr); TopicPartition tp = new TopicPartition(TOPIC, 0); svc.startPartitions(Set.of(tp)); ArgumentCaptor> captor = ArgumentCaptor.forClass(Map.class); verify(channelMgr).startPartitions(any(), captor.capture()); assertEquals(TOPIC + "-STREAMING", captor.getValue().get(TOPIC)); } @Test @SuppressWarnings("unchecked") void startPartitionsUsesNonDefaultPipeWhenValidationDisabled() { SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class); when(mockConn.isClosed()).thenReturn(false); when(mockConn.tableExist(TOPIC)).thenReturn(true); when(mockConn.pipeExist(TOPIC)).thenReturn(true); PartitionChannelManager channelMgr = mock(PartitionChannelManager.class); SnowflakeSinkServiceV2 svc = buildService(mockConn, /* clientValidationEnabled= */ false, channelMgr); TopicPartition tp = new TopicPartition(TOPIC, 0); svc.startPartitions(Set.of(tp)); ArgumentCaptor> captor = ArgumentCaptor.forClass(Map.class); verify(channelMgr).startPartitions(any(), captor.capture()); assertEquals(TOPIC, captor.getValue().get(TOPIC)); } // --- backpressure handling --- @Test void insertSkipsAllPartitionsAfterBackpressure() { TopicPartition tp0 = new TopicPartition(TOPIC, 0); TopicPartition tp1 = new TopicPartition(TOPIC, 1); TopicPartitionChannel channel0 = mockChannel("ch_0", false); TopicPartitionChannel channel1 = mockChannel("ch_1", false); when(mockChannelManager.getChannel(tp0)).thenReturn(Optional.of(channel0)); when(mockChannelManager.getChannel(tp1)).thenReturn(Optional.of(channel1)); // channel0 throws BackpressureException doThrow( new BackpressureException( new SFException("MemoryThresholdExceeded", "backpressure", 0, ""))) .when(channel0) .insertRecord(any(), anyBoolean()); List records = Arrays.asList(recordFor(TOPIC, 0, 100), recordFor(TOPIC, 1, 200)); service.insert(records); // channel0 threw; channel1 is skipped because backpressure stops all partitions verify(channel0).insertRecord(records.get(0), true); verify(channel1, never()).insertRecord(any(), anyBoolean()); // Both partitions rewound verify(mockSinkTaskContext).offset(tp0, 100L); verify(mockSinkTaskContext).offset(tp1, 200L); } @Test void insertSkipsRemainingRecordsForAllPartitionsAfterBackpressure() { TopicPartition tp0 = new TopicPartition(TOPIC, 0); TopicPartition tp1 = new TopicPartition(TOPIC, 1); TopicPartitionChannel channel0 = mockChannel("ch_0", false); TopicPartitionChannel channel1 = mockChannel("ch_1", false); when(mockChannelManager.getChannel(tp0)).thenReturn(Optional.of(channel0)); when(mockChannelManager.getChannel(tp1)).thenReturn(Optional.of(channel1)); // channel1 throws BackpressureException doThrow(new BackpressureException(new SFException("ReceiverSaturated", "backpressure", 0, ""))) .when(channel1) .insertRecord(any(), anyBoolean()); // p0's first record succeeds, p1 throws, p0's second record is skipped List records = Arrays.asList(recordFor(TOPIC, 0, 100), recordFor(TOPIC, 1, 200), recordFor(TOPIC, 0, 101)); service.insert(records); // channel0 first record processed, channel1 threw, channel0 second record skipped verify(channel0).insertRecord(records.get(0), true); verify(channel1).insertRecord(records.get(1), true); verify(channel0, never()).insertRecord(records.get(2), false); // p1 rewound to the backpressured record; p0 rewound to the first skipped record verify(mockSinkTaskContext).offset(tp1, 200L); verify(mockSinkTaskContext).offset(tp0, 101L); } @Test void insertRewindsOnBackpressureWithInitializingPartitions() { TopicPartition tpInit = new TopicPartition(TOPIC, 0); TopicPartition tpReady = new TopicPartition(TOPIC, 1); TopicPartitionChannel initChannel = mockChannel("ch_0", true); TopicPartitionChannel readyChannel = mockChannel("ch_1", false); when(mockChannelManager.getChannel(tpInit)).thenReturn(Optional.of(initChannel)); when(mockChannelManager.getChannel(tpReady)).thenReturn(Optional.of(readyChannel)); // Ready channel hits backpressure doThrow( new BackpressureException( new SFException("MemoryThresholdExceeded", "backpressure", 0, ""))) .when(readyChannel) .insertRecord(any(), anyBoolean()); List records = Arrays.asList(recordFor(TOPIC, 0, 100), recordFor(TOPIC, 1, 200)); service.insert(records); // initChannel skipped (initializing), readyChannel attempted and threw verify(initChannel, never()).insertRecord(any(), anyBoolean()); verify(readyChannel).insertRecord(records.get(1), true); // Both partitions rewound via offsetsOfFirstSkippedRecord verify(mockSinkTaskContext).offset(tpInit, 100L); verify(mockSinkTaskContext).offset(tpReady, 200L); } @Test void insertSetsCooldownAfterBackpressure() { TopicPartition tp0 = new TopicPartition(TOPIC, 0); TopicPartitionChannel channel0 = mockChannel("ch_0", false); when(mockChannelManager.getChannel(tp0)).thenReturn(Optional.of(channel0)); doThrow( new BackpressureException( new SFException("MemoryThresholdExceeded", "backpressure", 0, ""))) .when(channel0) .insertRecord(any(), anyBoolean()); service.insert(Collections.singletonList(recordFor(TOPIC, 0, 100))); // Cooldown should be set to a future time assertTrue( service.backpressureUntil.isAfter( Instant.now().minus(SnowflakeSinkServiceV2.BACKPRESSURE_COOLDOWN))); } @Test void insertSkipsEntireBatchDuringCooldown() { TopicPartition tp0 = new TopicPartition(TOPIC, 0); TopicPartition tp1 = new TopicPartition(TOPIC, 1); TopicPartitionChannel channel0 = mockChannel("ch_0", false); TopicPartitionChannel channel1 = mockChannel("ch_1", false); when(mockChannelManager.getChannel(tp0)).thenReturn(Optional.of(channel0)); when(mockChannelManager.getChannel(tp1)).thenReturn(Optional.of(channel1)); // Set cooldown to a future time service.backpressureUntil = Instant.now().plusSeconds(30); List records = Arrays.asList(recordFor(TOPIC, 0, 100), recordFor(TOPIC, 1, 200)); service.insert(records); // No inserts attempted during cooldown verify(channel0, never()).insertRecord(any(), anyBoolean()); verify(channel1, never()).insertRecord(any(), anyBoolean()); // All partitions rewound verify(mockSinkTaskContext).offset(tp0, 100L); verify(mockSinkTaskContext).offset(tp1, 200L); } @Test void insertResumesNormallyAfterCooldownExpires() { TopicPartition tp0 = new TopicPartition(TOPIC, 0); TopicPartitionChannel channel0 = mockChannel("ch_0", false); when(mockChannelManager.getChannel(tp0)).thenReturn(Optional.of(channel0)); // Set cooldown to the past (expired) service.backpressureUntil = Instant.now().minusSeconds(1); service.insert(Collections.singletonList(recordFor(TOPIC, 0, 100))); // Normal processing resumes verify(channel0).insertRecord(any(), anyBoolean()); verify(mockSinkTaskContext, never()).offset(any(TopicPartition.class), any(Long.class)); } // --- recovery skip logic --- @Test void insertSkipsRemainingRecordsForPartitionAfterRecovery() { TopicPartition tp0 = new TopicPartition(TOPIC, 0); TopicPartition tp1 = new TopicPartition(TOPIC, 1); TopicPartitionChannel channel0 = mockChannel("ch_0", false); TopicPartitionChannel channel1 = mockChannel("ch_1", false); when(mockChannelManager.getChannel(tp0)).thenReturn(Optional.of(channel0)); when(mockChannelManager.getChannel(tp1)).thenReturn(Optional.of(channel1)); // channel0 signals recovery on its first record when(channel0.insertRecord(any(), anyBoolean())).thenReturn(false); List records = Arrays.asList( recordFor(TOPIC, 0, 100), recordFor(TOPIC, 1, 200), recordFor(TOPIC, 0, 101), recordFor(TOPIC, 0, 102)); service.insert(records); // channel0: only the first record was attempted; 101 and 102 were skipped verify(channel0).insertRecord(records.get(0), true); verify(channel0, never()).insertRecord(records.get(2), false); verify(channel0, never()).insertRecord(records.get(3), false); // channel1: processed normally verify(channel1).insertRecord(records.get(1), true); // Only the recovering partition is rewound, to the triggering record's offset verify(mockSinkTaskContext).offset(tp0, 100L); verify(mockSinkTaskContext, never()).offset(tp1, 200L); } @Test void insertRewindsToFirstSkippedOffsetAfterRecoveryMidPartition() { TopicPartition tp = new TopicPartition(TOPIC, 0); TopicPartitionChannel channel = mockChannel("ch_0", false); when(mockChannelManager.getChannel(tp)).thenReturn(Optional.of(channel)); // First record succeeds, second triggers recovery when(channel.insertRecord(any(), anyBoolean())).thenReturn(true).thenReturn(false); List records = Arrays.asList(recordFor(TOPIC, 0, 100), recordFor(TOPIC, 0, 101), recordFor(TOPIC, 0, 102)); service.insert(records); // First two records attempted, third skipped verify(channel).insertRecord(records.get(0), true); verify(channel).insertRecord(records.get(1), false); verify(channel, never()).insertRecord(records.get(2), false); // Rewind to the record that triggered recovery verify(mockSinkTaskContext).offset(tp, 101L); } // --- helpers --- private SnowflakeSinkServiceV2 buildService( SnowflakeConnectionService conn, boolean clientValidationEnabled) { return buildService(conn, clientValidationEnabled, mock(PartitionChannelManager.class)); } private SnowflakeSinkServiceV2 buildService( SnowflakeConnectionService conn, boolean clientValidationEnabled, PartitionChannelManager channelManager) { SinkTaskConfig config = SinkTaskConfigTestBuilder.builder() .connectorName(CONNECTOR_NAME) .taskId("0") .validation( clientValidationEnabled ? SnowflakeValidation.CLIENT_SIDE : SnowflakeValidation.SERVER_SIDE) .enableSanitization(false) .build(); return new SnowflakeSinkServiceV2( conn, config, mockSinkTaskContext, Optional.empty(), () -> mock(BatchOffsetFetcher.class), () -> channelManager, TaskMetrics.noop()); } private static TopicPartitionChannel mockChannel(String channelName, boolean initializing) { TopicPartitionChannel channel = mock(TopicPartitionChannel.class); when(channel.getChannelName()).thenReturn(channelName); when(channel.isInitializing()).thenReturn(initializing); when(channel.isChannelClosed()).thenReturn(false); when(channel.insertRecord(any(), anyBoolean())).thenReturn(true); return channel; } private static SinkRecord recordFor(String topic, int partition, long offset) { return SinkRecordBuilder.forTopicPartition(topic, partition).withOffset(offset).build(); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/SnowflakeSinkServiceV2ValidationLoggingTest.java ================================================ package com.snowflake.kafka.connector.internal.streaming; import static org.junit.jupiter.api.Assertions.*; import static org.mockito.Mockito.*; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.config.SinkTaskConfigTestBuilder; import com.snowflake.kafka.connector.config.SnowflakeValidation; import com.snowflake.kafka.connector.internal.SnowflakeConnectionService; import com.snowflake.kafka.connector.internal.metrics.TaskMetrics; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.function.Consumer; import org.apache.log4j.AppenderSkeleton; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.log4j.spi.LoggingEvent; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; /** * Tests for SnowflakeSinkServiceV2 pre-flight safety checks. Verifies validation configuration * logging for preventing data loss and task crashes. */ public class SnowflakeSinkServiceV2ValidationLoggingTest { private TestAppender testAppender; private Logger logger; @BeforeEach public void setUp() { // Capture logs from SnowflakeSinkServiceV2 logger = Logger.getLogger(SnowflakeSinkServiceV2.class); testAppender = new TestAppender(); logger.addAppender(testAppender); logger.setLevel(Level.INFO); } @AfterEach public void tearDown() { logger.removeAppender(testAppender); } /** * Test SAFE config: Validation enabled + errors.tolerance=none * *

    Task aborts on validation failure - no data loss */ @Test public void testSafeConfigValidationEnabledWithToleranceNone() { SinkTaskConfig config = SinkTaskConfigTestBuilder.builder() .connectorName("test-connector") .taskId("0") .validation(SnowflakeValidation.CLIENT_SIDE) .tolerateErrors(false) .build(); SnowflakeSinkServiceV2 service = createServiceWithConfig(config); assertNotNull(service); // Verify INFO log contains expected message assertTrue( testAppender.containsMessage(Level.INFO, "Client-side validation enabled"), "Should log INFO about validation enabled"); assertTrue( testAppender.containsMessage(Level.INFO, "Validation failures will abort the task (safe"), "Should log that task will abort on validation failure"); } /** * Test SAFE config: Validation enabled + errors.tolerance=all + DLQ configured * *

    Validation errors route to DLQ - no data loss */ @Test public void testSafeConfigValidationEnabledWithToleranceAllAndDlq() { SinkTaskConfig config = SinkTaskConfigTestBuilder.builder() .connectorName("test-connector") .taskId("0") .validation(SnowflakeValidation.CLIENT_SIDE) .tolerateErrors(true) .dlqTopicName("my-dlq-topic") .build(); SnowflakeSinkServiceV2 service = createServiceWithConfig(config); assertNotNull(service); // Verify INFO log contains expected message with DLQ topic name assertTrue( testAppender.containsMessage(Level.INFO, "Client-side validation enabled"), "Should log INFO about validation enabled"); assertTrue( testAppender.containsMessage(Level.INFO, "Validation failures will route to DLQ topic"), "Should log that failures route to DLQ"); assertTrue( testAppender.containsMessage(Level.INFO, "my-dlq-topic"), "Should log the DLQ topic name"); } /** * Test UNSAFE config: Validation enabled + errors.tolerance=all + NO DLQ * *

    Invalid records silently dropped - DATA LOSS */ @Test public void testUnsafeConfigValidationEnabledWithToleranceAllNoDlq() { SinkTaskConfig config = SinkTaskConfigTestBuilder.builder() .connectorName("test-connector") .taskId("0") .validation(SnowflakeValidation.CLIENT_SIDE) .tolerateErrors(true) .dlqTopicName("") .build(); SnowflakeSinkServiceV2 service = createServiceWithConfig(config); assertNotNull(service); // Verify ERROR log about unsafe configuration assertTrue( testAppender.containsMessage(Level.ERROR, "UNSAFE CONFIGURATION"), "Should log ERROR about unsafe configuration"); assertTrue( testAppender.containsMessage(Level.ERROR, "SILENTLY DROPPED"), "Should warn about silent data loss"); assertTrue( testAppender.containsMessage(Level.ERROR, "causing data loss"), "Should explicitly mention data loss"); } /** * Test: Validation disabled with ERROR_LOGGING enabled on existing table. * *

    Should NOT warn about missing error logging when ERROR_LOGGING is present. */ @Test public void testValidationDisabledWithErrorLoggingEnabled() { SinkTaskConfig config = SinkTaskConfigTestBuilder.builder() .connectorName("test-connector") .taskId("0") .validation(SnowflakeValidation.SERVER_SIDE) .topicToTableMap(Map.of("topic1", "table1")) .build(); SnowflakeSinkServiceV2 service = createServiceWithConfig( config, mockConn -> { when(mockConn.tableExist("table1")).thenReturn(true); when(mockConn.hasErrorLoggingEnabled("table1")).thenReturn(true); }); assertNotNull(service); assertFalse( testAppender.containsMessage(Level.WARN, "does not have ERROR_LOGGING"), "Should NOT warn about missing error logging when it is enabled"); assertTrue( testAppender.containsMessage(Level.INFO, "error table is active"), "Should log INFO confirming error table is active"); } /** * Test: Validation disabled, multiple tables — one enabled, one disabled. * *

    Verifies per-table iteration: only the disabled table gets a warning; the enabled table gets * an INFO confirmation. */ @Test public void testValidationDisabledMultipleTablesPartialErrorLogging() { SinkTaskConfig config = SinkTaskConfigTestBuilder.builder() .connectorName("test-connector") .taskId("0") .validation(SnowflakeValidation.SERVER_SIDE) .topicToTableMap(Map.of("topic_ok", "table_ok", "topic_bad", "table_bad")) .build(); SnowflakeSinkServiceV2 service = createServiceWithConfig( config, mockConn -> { when(mockConn.tableExist("table_ok")).thenReturn(true); when(mockConn.hasErrorLoggingEnabled("table_ok")).thenReturn(true); when(mockConn.tableExist("table_bad")).thenReturn(true); when(mockConn.hasErrorLoggingEnabled("table_bad")).thenReturn(false); }); assertNotNull(service); assertTrue( testAppender.containsMessage(Level.WARN, "table_bad"), "Should warn about the table missing ERROR_LOGGING"); assertFalse( testAppender.containsMessage(Level.WARN, "table_ok"), "Should NOT warn about the table that has ERROR_LOGGING enabled"); assertTrue( testAppender.containsMessage(Level.INFO, "table_ok"), "Should log INFO confirmation for the table with ERROR_LOGGING enabled"); } /** * Test: Validation disabled WITHOUT ERROR_LOGGING on existing table. * *

    Should warn about the specific table and suggest ALTER TABLE. */ @Test public void testValidationDisabledWithoutErrorLogging() { SinkTaskConfig config = SinkTaskConfigTestBuilder.builder() .connectorName("test-connector") .taskId("0") .validation(SnowflakeValidation.SERVER_SIDE) .topicToTableMap(Map.of("topic1", "table1")) .build(); SnowflakeSinkServiceV2 service = createServiceWithConfig( config, mockConn -> { when(mockConn.tableExist("table1")).thenReturn(true); when(mockConn.hasErrorLoggingEnabled("table1")).thenReturn(false); }); assertNotNull(service); assertTrue(testAppender.containsMessage(Level.WARN, "table1"), "Should mention the table name"); assertTrue( testAppender.containsMessage(Level.WARN, "does not have ERROR_LOGGING"), "Should warn about missing error logging"); assertTrue( testAppender.containsMessage(Level.WARN, "ALTER TABLE"), "Should suggest ALTER TABLE command"); } /** * Test: Validation disabled, table does not exist yet. * *

    Should NOT warn about error logging — table will be auto-created with ERROR_LOGGING = TRUE. */ @Test public void testValidationDisabledTableNotExists() { SinkTaskConfig config = SinkTaskConfigTestBuilder.builder() .connectorName("test-connector") .taskId("0") .validation(SnowflakeValidation.SERVER_SIDE) .topicToTableMap(Map.of("topic1", "table1")) .build(); SnowflakeSinkServiceV2 service = createServiceWithConfig( config, mockConn -> { when(mockConn.tableExist("table1")).thenReturn(false); }); assertNotNull(service); assertFalse( testAppender.containsMessage(Level.WARN, "does not have ERROR_LOGGING"), "Should NOT warn about error logging for non-existent table"); } /** * Test: Validation disabled, table is Iceberg. * *

    Should warn that Iceberg tables do not support ERROR_LOGGING and not check * hasErrorLoggingEnabled. */ @Test public void testValidationDisabledIcebergTableWarning() { SinkTaskConfig config = SinkTaskConfigTestBuilder.builder() .connectorName("test-connector") .taskId("0") .validation(SnowflakeValidation.SERVER_SIDE) .topicToTableMap(Map.of("topic1", "iceberg_table")) .build(); SnowflakeSinkServiceV2 service = createServiceWithConfig( config, mockConn -> { when(mockConn.tableExist("iceberg_table")).thenReturn(true); when(mockConn.isIcebergTable("iceberg_table")).thenReturn(true); }); assertNotNull(service); assertTrue( testAppender.containsMessage(Level.WARN, "Iceberg table"), "Should warn that the table is Iceberg"); assertTrue( testAppender.containsMessage(Level.WARN, "do not support ERROR_LOGGING"), "Should warn that Iceberg does not support ERROR_LOGGING"); assertFalse( testAppender.containsMessage(Level.WARN, "does not have ERROR_LOGGING"), "Should NOT emit the generic missing-ERROR_LOGGING warning for Iceberg tables"); } /** * Test: Legacy KC v3 config warning * *

    Warns if snowflake.enable.schematization is present (not supported in KC v4) */ @Test public void testLegacySchematizationConfigWarning() { SinkTaskConfig config = SinkTaskConfigTestBuilder.builder() .connectorName("test-connector") .taskId("0") .enableSchematization(true) .build(); SnowflakeSinkServiceV2 service = createServiceWithConfig(config); assertNotNull(service); // Verify WARN log about legacy config assertTrue( testAppender.containsMessage(Level.WARN, "snowflake.enable.schematization"), "Should mention legacy config name"); assertTrue( testAppender.containsMessage(Level.WARN, "not supported in KC v4"), "Should explain config is not supported"); assertTrue( testAppender.containsMessage(Level.WARN, "ENABLE_SCHEMA_EVOLUTION"), "Should mention server-side schema evolution"); } /** Helper to create SnowflakeSinkServiceV2 with minimal mocked dependencies. */ private SnowflakeSinkServiceV2 createServiceWithConfig(SinkTaskConfig config) { return createServiceWithConfig(config, mockConn -> {}); } /** Helper with optional mock setup for connection service. */ private SnowflakeSinkServiceV2 createServiceWithConfig( SinkTaskConfig config, Consumer mockSetup) { SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class); when(mockConn.isClosed()).thenReturn(false); when(mockConn.getTelemetryClient()).thenReturn(null); mockSetup.accept(mockConn); TaskMetrics mockMetrics = mock(TaskMetrics.class); try { return new SnowflakeSinkServiceV2( mockConn, config, null, // recordErrorReporter null, // sinkTaskContext java.util.Optional.empty(), // metricsJmxReporter mockMetrics); } catch (Exception e) { System.err.println("Failed to create service: " + e.getMessage()); e.printStackTrace(); return null; } } /** Test appender that captures log events for verification. */ private static class TestAppender extends AppenderSkeleton { private final List events = new ArrayList<>(); @Override protected void append(LoggingEvent event) { events.add(event); } @Override public void close() { // No-op } @Override public boolean requiresLayout() { return false; } public boolean containsMessage(Level level, String messageFragment) { return events.stream() .anyMatch( event -> event.getLevel().equals(level) && event.getRenderedMessage().contains(messageFragment)); } public List getEvents() { return new ArrayList<>(events); } } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/StreamingClientPropertiesTest.java ================================================ /* * Copyright (c) 2023 Snowflake Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.snowflake.kafka.connector.internal.streaming; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP; import static com.snowflake.kafka.connector.internal.TestUtils.generatePrivateKey; import static com.snowflake.kafka.connector.internal.TestUtils.getConnectorConfigurationForStreaming; import static com.snowflake.kafka.connector.internal.streaming.StreamingClientProperties.STREAMING_CLIENT_V2_PREFIX_NAME; import static org.assertj.core.api.Assertions.assertThat; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.Utils; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.config.SnowflakeSinkConnectorConfigBuilder; import com.snowflake.kafka.connector.internal.PrivateKeyTool; import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException; import com.snowflake.kafka.connector.internal.SnowflakeURL; import java.security.PrivateKey; import java.util.Base64; import java.util.HashMap; import java.util.Map; import java.util.Properties; import org.junit.Assert; import org.junit.jupiter.api.Test; public class StreamingClientPropertiesTest { private static final String EXAMPLE_PARAM1 = "EXAMPLE_PARAM1".toLowerCase(); private static final String EXAMPLE_PARAM2 = "EXAMPLE_PARAM2".toLowerCase(); @Test public void testGetValidProperties() { String privateKeyPem = Base64.getEncoder().encodeToString(generatePrivateKey().getEncoded()); String testUrl = "https://testaccount.us-east-1.snowflakecomputing.com"; Map connectorConfig = new HashMap<>(); connectorConfig.put(KafkaConnectorConfigParams.NAME, "testName"); connectorConfig.put(Utils.TASK_ID, "0"); connectorConfig.put(KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME, testUrl); connectorConfig.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, "testRole"); connectorConfig.put(KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME, "testUser"); connectorConfig.put(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY, privateKeyPem); SinkTaskConfig config = SinkTaskConfig.from(connectorConfig); StreamingClientProperties result = StreamingClientProperties.from(config); // verify client properties Properties clientProps = result.clientProperties; assertThat(clientProps.getProperty("user")).isEqualTo("testUser"); assertThat(clientProps.getProperty("role")).isEqualTo("testRole"); assertThat(clientProps.getProperty("account")).isEqualTo("testaccount"); assertThat(clientProps.getProperty("host")) .isEqualTo("testaccount.us-east-1.snowflakecomputing.com"); assertThat(clientProps.getProperty("private_key")).isEqualTo(privateKeyPem); assertThat(clientProps).hasSize(5); // verify client name prefix and empty parameter overrides assertThat(result.clientNamePrefix).isEqualTo(STREAMING_CLIENT_V2_PREFIX_NAME + "testName"); assertThat(result.parameterOverrides).isEmpty(); } @Test void shouldPropagateStreamingClientPropertiesFromOverrideMap() { // GIVEN Map connectorConfig = SnowflakeSinkConnectorConfigBuilder.streamingConfig().build(); connectorConfig.put(Utils.TASK_ID, "0"); connectorConfig.put( KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY, Base64.getEncoder().encodeToString(generatePrivateKey().getEncoded())); connectorConfig.put( SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP, "EXAMPLE_PARAM1:1,EXAMPLE_PARAM2:2"); Map expectedParameterOverrides = new HashMap<>(); expectedParameterOverrides.put(EXAMPLE_PARAM1, "1"); expectedParameterOverrides.put(EXAMPLE_PARAM2, "2"); // WHEN SinkTaskConfig config = SinkTaskConfig.from(connectorConfig); StreamingClientProperties resultProperties = StreamingClientProperties.from(config); // THEN assertThat(resultProperties.parameterOverrides).isEqualTo(expectedParameterOverrides); } @Test void explicitStreamingClientPropertiesTakePrecedenceOverOverrideMap_SingleBufferEnabled() { // GIVEN Map connectorConfig = SnowflakeSinkConnectorConfigBuilder.streamingConfig().build(); connectorConfig.put(Utils.TASK_ID, "0"); connectorConfig.put( KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY, Base64.getEncoder().encodeToString(generatePrivateKey().getEncoded())); connectorConfig.put( SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP, "EXAMPLE_PARAM1:1,EXAMPLE_PARAM2:2"); Map expectedParameterOverrides = new HashMap<>(); expectedParameterOverrides.put(EXAMPLE_PARAM1, "1"); expectedParameterOverrides.put(EXAMPLE_PARAM2, "2"); // WHEN SinkTaskConfig config = SinkTaskConfig.from(connectorConfig); StreamingClientProperties resultProperties = StreamingClientProperties.from(config); // THEN assertThat(resultProperties.parameterOverrides).isEqualTo(expectedParameterOverrides); } @Test public void testValidPropertiesWithOverriddenStreamingPropertiesMap() { Map connectorConfig = getConnectorConfigurationForStreaming(true); connectorConfig.put(KafkaConnectorConfigParams.NAME, "testName"); String testUrl = "https://testaccount.us-east-1.snowflakecomputing.com"; connectorConfig.put(KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME, testUrl); connectorConfig.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, "testRole"); connectorConfig.put(KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME, "testUser"); connectorConfig.put( SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP, "EXAMPLE_PARAM2:10000000"); SnowflakeURL parsedUrl = new SnowflakeURL(testUrl); Properties expectedProps = new Properties(); expectedProps.put("user", "testUser"); expectedProps.put("role", "testRole"); expectedProps.put("account", parsedUrl.getAccount()); expectedProps.put("host", parsedUrl.getUrlWithoutPort()); String privateKeyStr = connectorConfig.get(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY); if (privateKeyStr != null) { String passphrase = connectorConfig.get(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE); PrivateKey privateKey = PrivateKeyTool.parsePrivateKey(privateKeyStr, passphrase); expectedProps.put("private_key", Base64.getEncoder().encodeToString(privateKey.getEncoded())); } String expectedClientName = STREAMING_CLIENT_V2_PREFIX_NAME + "testName"; Map expectedParameterOverrides = new HashMap<>(); expectedParameterOverrides.put(EXAMPLE_PARAM2, "10000000"); // test get properties SinkTaskConfig config = SinkTaskConfig.from(connectorConfig); StreamingClientProperties resultProperties = StreamingClientProperties.from(config); // verify assert resultProperties.clientProperties.equals(expectedProps); assert resultProperties.clientNamePrefix.equals(expectedClientName); assert resultProperties.parameterOverrides.equals(expectedParameterOverrides); } @Test public void testInvalidStreamingClientPropertiesMap() { Map connectorConfig = getConnectorConfigurationForStreaming(true); connectorConfig.put(KafkaConnectorConfigParams.NAME, "testName"); connectorConfig.put( KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME, "https://testaccount.us-east-1.snowflakecomputing.com"); connectorConfig.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, "testRole"); connectorConfig.put(KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME, "testUser"); connectorConfig.put( SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP, "MAX_CHANNEL_SIZE_IN_BYTES->10000000,MAX_CLIENT_LAG100"); // test get properties try { SinkTaskConfig config = SinkTaskConfig.from(connectorConfig); StreamingClientProperties.from(config); Assert.fail("Should throw an exception"); } catch (SnowflakeKafkaConnectorException exception) { assert exception .getMessage() .contains(KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP); } connectorConfig.put( SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP, "MAX_CHANNEL_SIZE_IN_BYTES->10000000"); // test get properties try { SinkTaskConfig config = SinkTaskConfig.from(connectorConfig); StreamingClientProperties.from(config); Assert.fail("Should throw an exception"); } catch (SnowflakeKafkaConnectorException exception) { assert exception .getMessage() .contains(KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP); } } @Test public void testStreamingClientPropertiesEquality() { Map config1 = getConnectorConfigurationForStreaming(true); config1.put(KafkaConnectorConfigParams.NAME, "catConnector"); Map config2 = getConnectorConfigurationForStreaming(true); config2.put(KafkaConnectorConfigParams.NAME, "dogConnector"); // get properties StreamingClientProperties prop1 = StreamingClientProperties.from(SinkTaskConfig.from(config1)); StreamingClientProperties prop2 = StreamingClientProperties.from(SinkTaskConfig.from(config2)); assert prop1.equals(prop2); assert prop1.hashCode() == prop2.hashCode(); config1.put( SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP, "max_append_request_buffer_duration_ms:1000"); config2.put( SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP, "max_append_request_buffer_duration_ms:10000"); prop1 = StreamingClientProperties.from(SinkTaskConfig.from(config1)); prop2 = StreamingClientProperties.from(SinkTaskConfig.from(config2)); assert !prop1.equals(prop2); assert prop1.hashCode() != prop2.hashCode(); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/StreamingErrorHandlerIT.java ================================================ package com.snowflake.kafka.connector.internal.streaming; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.mock; import com.snowflake.kafka.connector.builder.SinkRecordBuilder; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.dlq.InMemoryKafkaRecordErrorReporter; import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException; import com.snowflake.kafka.connector.internal.TestUtils; import com.snowflake.kafka.connector.internal.metrics.TaskMetrics; import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetryChannelStatus; import com.snowflake.kafka.connector.internal.streaming.v2.SnowpipeStreamingPartitionChannel; import com.snowflake.kafka.connector.internal.streaming.v2.channel.PartitionOffsetTracker; import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService; import java.nio.charset.StandardCharsets; import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.Optional; import java.util.UUID; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.SchemaAndValue; import org.apache.kafka.connect.errors.DataException; import org.apache.kafka.connect.json.JsonConverter; import org.apache.kafka.connect.sink.SinkRecord; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; /** * Integration tests verifying client-side broken record errors are routed through {@link * StreamingErrorHandler} with proper {@code errors.tolerance} semantics. * *

    These tests exercise the full path: {@link SnowpipeStreamingPartitionChannel#insertRecord} → * broken record detection → {@link StreamingErrorHandler#handleError} → DLQ / throw. */ class StreamingErrorHandlerIT { private static final String TOPIC = "test_topic"; private static final int PARTITION = 0; private String channelName; private String pipeName; private SnowflakeTelemetryService mockTelemetryService; private InMemorySinkTaskContext sinkTaskContext; private ExecutorService openChannelIoExecutor; @BeforeEach void setUp() { final String uniqueId = UUID.randomUUID().toString().substring(0, 8); channelName = "test_channel_" + uniqueId; pipeName = "test_pipe_" + uniqueId; mockTelemetryService = mock(SnowflakeTelemetryService.class); sinkTaskContext = new InMemorySinkTaskContext(Collections.singleton(new TopicPartition(TOPIC, PARTITION))); openChannelIoExecutor = Executors.newSingleThreadExecutor(); } @AfterEach void tearDown() { openChannelIoExecutor.shutdownNow(); } // ── errors.tolerance = NONE (default) ────────────────────────────────────── @Test void brokenRecord_toleranceNone_shouldThrowDataException() { InMemoryKafkaRecordErrorReporter errorReporter = new InMemoryKafkaRecordErrorReporter(); Map config = baseConfig(); SnowpipeStreamingPartitionChannel channel = createChannel(config, errorReporter); SinkRecord brokenSinkRecord = buildBrokenValueRecord(0); DataException thrown = assertThrows(DataException.class, () -> channel.insertRecord(brokenSinkRecord, true)); // The cause should be the original SnowflakeKafkaConnectorException from convertToMap assertNotNull(thrown.getCause(), "DataException should wrap the original conversion exception"); assertTrue( thrown.getCause() instanceof SnowflakeKafkaConnectorException, "Cause should be the original SnowflakeKafkaConnectorException, got: " + thrown.getCause().getClass().getName()); assertEquals(0, errorReporter.getReportedRecords().size()); } @Test void brokenKeyRecord_toleranceNone_shouldThrowDataException() { InMemoryKafkaRecordErrorReporter errorReporter = new InMemoryKafkaRecordErrorReporter(); Map config = baseConfig(); SnowpipeStreamingPartitionChannel channel = createChannel(config, errorReporter); SinkRecord brokenSinkRecord = buildBrokenKeyRecord(0); DataException thrown = assertThrows(DataException.class, () -> channel.insertRecord(brokenSinkRecord, true)); assertNotNull(thrown.getCause(), "DataException should wrap the original conversion exception"); assertEquals(0, errorReporter.getReportedRecords().size()); } // ── errors.tolerance = NONE + DLQ configured ───────────────────── @Test void brokenRecord_toleranceNone_withDLQ_shouldRouteToDlqThenThrow() { InMemoryKafkaRecordErrorReporter errorReporter = new InMemoryKafkaRecordErrorReporter(); Map config = baseConfig(); config.put("errors.deadletterqueue.topic.name", "my-dlq-topic"); // errors.tolerance defaults to "none" SnowpipeStreamingPartitionChannel channel = createChannel(config, errorReporter); SinkRecord brokenSinkRecord = buildBrokenValueRecord(0); DataException thrown = assertThrows(DataException.class, () -> channel.insertRecord(brokenSinkRecord, true)); assertNotNull(thrown.getCause(), "DataException should wrap the original conversion exception"); // Record should be preserved in DLQ before task failure assertEquals( 1, errorReporter.getReportedRecords().size(), "Record should be routed to DLQ even when tolerance=none"); InMemoryKafkaRecordErrorReporter.ReportedRecord reported = errorReporter.getReportedRecords().get(0); assertEquals(brokenSinkRecord, reported.getRecord()); assertTrue( reported.getException() instanceof DataException, "DLQ should receive DataException wrapper"); } // ── errors.tolerance = ALL + DLQ configured ──────────────────────────────── @Test void brokenRecord_toleranceAll_withDLQ_shouldSendOriginalExceptionToDLQ() { InMemoryKafkaRecordErrorReporter errorReporter = new InMemoryKafkaRecordErrorReporter(); Map config = baseConfig(); config.put("errors.tolerance", "all"); config.put("errors.deadletterqueue.topic.name", "my-dlq-topic"); SnowpipeStreamingPartitionChannel channel = createChannel(config, errorReporter); SinkRecord brokenSinkRecord = buildBrokenValueRecord(0); // Should NOT throw channel.insertRecord(brokenSinkRecord, true); assertEquals(1, errorReporter.getReportedRecords().size()); InMemoryKafkaRecordErrorReporter.ReportedRecord reported = errorReporter.getReportedRecords().get(0); assertEquals(brokenSinkRecord, reported.getRecord()); // DLQ should receive DataException (KCv3-compatible) with original exception as cause assertTrue( reported.getException() instanceof DataException, "DLQ should receive DataException wrapper, got: " + reported.getException().getClass().getName()); assertNotNull( reported.getException().getCause(), "DataException should have the original exception as cause"); assertTrue( reported.getException().getCause() instanceof SnowflakeKafkaConnectorException, "DataException cause should be SnowflakeKafkaConnectorException, got: " + reported.getException().getCause().getClass().getName()); } @Test void brokenKeyRecord_toleranceAll_withDLQ_shouldSendOriginalExceptionToDLQ() { InMemoryKafkaRecordErrorReporter errorReporter = new InMemoryKafkaRecordErrorReporter(); Map config = baseConfig(); config.put("errors.tolerance", "all"); config.put("errors.deadletterqueue.topic.name", "my-dlq-topic"); SnowpipeStreamingPartitionChannel channel = createChannel(config, errorReporter); SinkRecord brokenSinkRecord = buildBrokenKeyRecord(0); channel.insertRecord(brokenSinkRecord, true); assertEquals(1, errorReporter.getReportedRecords().size()); InMemoryKafkaRecordErrorReporter.ReportedRecord reported = errorReporter.getReportedRecords().get(0); assertEquals(brokenSinkRecord, reported.getRecord()); // DLQ should receive DataException wrapper with original exception as cause assertTrue( reported.getException() instanceof DataException, "DLQ should receive DataException wrapper, got: " + reported.getException().getClass().getName()); assertNotNull(reported.getException().getCause(), "DataException should have cause"); } @Test void multipleBrokenRecords_toleranceAll_withDLQ_shouldSendOnlyBrokenToDLQ() { InMemoryKafkaRecordErrorReporter errorReporter = new InMemoryKafkaRecordErrorReporter(); Map config = baseConfig(); config.put("errors.tolerance", "all"); config.put("errors.deadletterqueue.topic.name", "my-dlq-topic"); SnowpipeStreamingPartitionChannel channel = createChannel(config, errorReporter); channel.insertRecord(buildBrokenValueRecord(0), true); channel.insertRecord(buildValidRecord(1), false); channel.insertRecord(buildBrokenValueRecord(2), false); channel.insertRecord(buildBrokenValueRecord(3), false); assertEquals(3, errorReporter.getReportedRecords().size()); } // ── errors.tolerance = ALL + no DLQ → should silently drop ───────────────── @Test void brokenRecord_toleranceAll_noDLQ_shouldSilentlyDrop() { InMemoryKafkaRecordErrorReporter errorReporter = new InMemoryKafkaRecordErrorReporter(); Map config = baseConfig(); config.put("errors.tolerance", "all"); // No DLQ topic configured SnowpipeStreamingPartitionChannel channel = createChannel(config, errorReporter); SinkRecord brokenSinkRecord = buildBrokenValueRecord(0); // Should NOT throw - record is silently dropped with a warning log channel.insertRecord(brokenSinkRecord, true); assertEquals(0, errorReporter.getReportedRecords().size()); } // ── Helpers ──────────────────────────────────────────────────────────────── private Map baseConfig() { return new HashMap<>(TestUtils.getConnectorConfigurationForStreaming(false)); } /** * Creates a SinkRecord whose value triggers a broken record (plain String with STRING_SCHEMA). */ private SinkRecord buildBrokenValueRecord(long offset) { return SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION) .withValueSchema(Schema.STRING_SCHEMA) .withValue("plain string - not a map") .withOffset(offset) .build(); } /** Creates a SinkRecord whose key triggers a broken record (String with INT32 key schema). */ private SinkRecord buildBrokenKeyRecord(long offset) { return SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION) .withKeySchema(Schema.INT32_SCHEMA) .withKey("not an int") .withValueSchema(Schema.STRING_SCHEMA) .withValue("{}") .withOffset(offset) .build(); } /** Creates a valid SinkRecord with a schemaless JSON map value. */ private SinkRecord buildValidRecord(long offset) { JsonConverter jsonConverter = new JsonConverter(); jsonConverter.configure(Collections.singletonMap("schemas.enable", "false"), false); SchemaAndValue schemaAndValue = jsonConverter.toConnectData(TOPIC, "{\"name\": \"test\"}".getBytes(StandardCharsets.UTF_8)); return SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION) .withSchemaAndValue(schemaAndValue) .withOffset(offset) .build(); } private SnowpipeStreamingPartitionChannel createChannel( Map config, InMemoryKafkaRecordErrorReporter errorReporter) { SinkTaskConfig taskConfig = SinkTaskConfig.from(config); StreamingErrorHandler errorHandler = new StreamingErrorHandler(taskConfig, errorReporter, mockTelemetryService); final TopicPartition topicPartition = new TopicPartition(TOPIC, PARTITION); final PartitionOffsetTracker offsetTracker = new PartitionOffsetTracker(topicPartition, sinkTaskContext, channelName); final SnowflakeTelemetryChannelStatus telemetryChannelStatus = new SnowflakeTelemetryChannelStatus( "test_table", "test_connector", channelName, System.currentTimeMillis(), Optional.empty(), offsetTracker.persistedOffsetRef(), offsetTracker.processedOffsetRef(), offsetTracker.consumerGroupOffsetRef()); return new SnowpipeStreamingPartitionChannel( "test_table", channelName, pipeName, new FakeSnowflakeStreamingIngestClient(pipeName, "test_connector"), openChannelIoExecutor, mockTelemetryService, telemetryChannelStatus, offsetTracker, taskConfig, errorHandler, TaskMetrics.noop(), false, null, Optional.empty()); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/StreamingManualModeIT.java ================================================ package com.snowflake.kafka.connector.internal.streaming; import static com.snowflake.kafka.connector.internal.TestUtils.assertTableColumnCount; import static com.snowflake.kafka.connector.internal.TestUtils.assertTableHasColumn; import static com.snowflake.kafka.connector.internal.TestUtils.assertTableRowCount; import static com.snowflake.kafka.connector.internal.TestUtils.assertWithRetry; import static com.snowflake.kafka.connector.internal.TestUtils.getTableRows; import static com.snowflake.kafka.connector.internal.TestUtils.tableSize; import static java.lang.String.format; import static org.apache.kafka.connect.data.Schema.STRING_SCHEMA; import static org.junit.jupiter.api.Assertions.assertEquals; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.snowflake.kafka.connector.Constants; import com.snowflake.kafka.connector.InjectQueryRunner; import com.snowflake.kafka.connector.InjectQueryRunnerExtension; import com.snowflake.kafka.connector.InjectSnowflakeDataSourceExtension; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.internal.SnowflakeConnectionService; import com.snowflake.kafka.connector.internal.SnowflakeSinkService; import com.snowflake.kafka.connector.internal.TestUtils; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.sql.SQLException; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.dbutils.QueryRunner; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.connect.data.SchemaAndValue; import org.apache.kafka.connect.json.JsonConverter; import org.apache.kafka.connect.sink.SinkRecord; import org.apache.kafka.connect.storage.Converter; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @ExtendWith({InjectSnowflakeDataSourceExtension.class, InjectQueryRunnerExtension.class}) // Manual mode meaning user creates his own pipe and table objects class StreamingManualModeIT { private final ObjectMapper objectMapper = new ObjectMapper(); private final SnowflakeConnectionService conn = TestUtils.getConnectionServiceWithEncryptedKey(); private String tableName; private String topicName; private TopicPartition topicPartition; private SnowflakeSinkService snowflakeSinkService; @InjectQueryRunner private QueryRunner queryRunner; @BeforeEach void beforeEach() throws SQLException { final Map config = TestUtils.getConnectorConfigurationForStreaming(true); config.put(Constants.KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, "server_side"); config.put( Constants.KafkaConnectorConfigParams .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION, "false"); SinkTaskConfig sinkTaskConfig = SinkTaskConfig.from(config); tableName = TestUtils.randomTableName(); topicName = tableName; topicPartition = new TopicPartition(topicName, 0); snowflakeSinkService = StreamingSinkServiceBuilder.builder(conn, sinkTaskConfig) .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition))) .build(); queryRunner.execute( format( "create table %s (city varchar, age number, married boolean, has_cat boolean," + " crazy_field_name boolean, skills variant, family variant)", tableName)); } @AfterEach void afterEach() { TestUtils.dropTable(tableName); TestUtils.dropPipe(tableName + "-STREAMING"); } @Nested class TableAndPipeDefinedByUser { private String pipeName; @BeforeEach void beforeEach() throws SQLException { pipeName = tableName; queryRunner.execute( format( "CREATE OR REPLACE PIPE %s AS COPY INTO %s FROM (SELECT $1:city, $1:age, $1:married," + " $1['has cat'] has_cat, $1['! @&$#* has Łułósżź'] crazy_field_name, $1:skills," + " $1:family FROM TABLE(DATA_SOURCE(TYPE => 'STREAMING')))", pipeName, tableName)); } @AfterEach void afterEach() throws SQLException { TestUtils.dropPipe(pipeName); } @Test void test_streaming_ingestion_with_user_defined_table_and_pipe() throws Exception { List records = buildContentSinkRecords(); snowflakeSinkService.startPartition(topicPartition); snowflakeSinkService.awaitInitialization(); snowflakeSinkService.insert(records); // Wait for data to be ingested into the table assertWithRetry(() -> tableSize(tableName) == 2); snowflakeSinkService.closeAll(); // Assert that the table has exactly 2 rows with the given values assertTableRowCount(tableName, 2); List> dbRows = getTableRows(tableName); final Map firstRow = dbRows.get(0); makeCommonAssertions(firstRow); assertEquals(true, firstRow.get("HAS_CAT")); assertEquals(true, firstRow.get("CRAZY_FIELD_NAME")); } } @Nested class DefaultPipe { @BeforeEach void beforeEach() throws SQLException { queryRunner.execute( format( "create or replace table %s (record_metadata variant, city varchar, age number," + " married boolean, \"has cat\" boolean , \"! @&$#* has Łułósżź\" boolean," + " skills variant, family variant)", tableName)); } @Test void test_streaming_ingestion_with_user_defined_table_and_default_pipe() throws Exception { List records = buildContentSinkRecords(); snowflakeSinkService.startPartition(topicPartition); snowflakeSinkService.awaitInitialization(); snowflakeSinkService.insert(records); // Wait for data to be ingested into the table assertWithRetry(() -> tableSize(tableName) == 2); snowflakeSinkService.closeAll(); // Assert that the table has exactly 2 rows and 2 columns assertTableRowCount(tableName, 2); assertTableColumnCount(tableName, 8); Map firstRow = getTableRows(tableName).get(0); assertTableHasColumn(tableName, "record_metadata"); makeCommonAssertions(firstRow); assertEquals(true, firstRow.get("! @&$#* has Łułósżź")); assertEquals(true, firstRow.get("has cat")); } } private List buildContentSinkRecords() throws JsonProcessingException { // this json row is sent twice to Kafka final byte[] jsonPayload = objectMapper .writeValueAsString( Map.of( "city", "Pcim Górny", "age", 30, "married", true, "has cat", true, "! @&$#* has Łułósżź", true, "skills", List.of("sitting", "standing", "eating"), "family", Map.of("son", "Jack", "daughter", "Anna"))) .getBytes(StandardCharsets.UTF_8); Converter converter = new JsonConverter(); final Map converterConfig = new HashMap<>(); converterConfig.put("schemas.enable", "false"); converter.configure(converterConfig, false); SchemaAndValue input = converter.toConnectData(topicName, jsonPayload); return List.of( new SinkRecord(topicName, 0, STRING_SCHEMA, "test_key1", input.schema(), input.value(), 1), new SinkRecord(topicName, 0, STRING_SCHEMA, "test_key2", input.schema(), input.value(), 2)); } private JsonNode toJson(Object value) throws IOException { if (value instanceof String) { return objectMapper.readTree((String) value); } if (value instanceof byte[]) { return objectMapper.readTree((byte[]) value); } return objectMapper.valueToTree(value); } private void makeCommonAssertions(final Map firstRow) throws IOException { assertEquals("Pcim Górny", firstRow.get("CITY")); assertEquals(30L, firstRow.get("AGE")); assertEquals(true, firstRow.get("MARRIED")); assertEquals(toJson(List.of("sitting", "standing", "eating")), toJson(firstRow.get("SKILLS"))); assertEquals(toJson(Map.of("son", "Jack", "daughter", "Anna")), toJson(firstRow.get("FAMILY"))); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/StreamingSinkServiceBuilder.java ================================================ package com.snowflake.kafka.connector.internal.streaming; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.dlq.InMemoryKafkaRecordErrorReporter; import com.snowflake.kafka.connector.dlq.KafkaRecordErrorReporter; import com.snowflake.kafka.connector.internal.SnowflakeConnectionService; import com.snowflake.kafka.connector.internal.metrics.MetricsJmxReporter; import com.snowflake.kafka.connector.internal.metrics.TaskMetrics; import java.util.Collections; import java.util.Optional; import org.apache.kafka.connect.sink.SinkTaskContext; public class StreamingSinkServiceBuilder { private final SnowflakeConnectionService conn; private SinkTaskConfig config; private KafkaRecordErrorReporter errorReporter = new InMemoryKafkaRecordErrorReporter(); private SinkTaskContext sinkTaskContext = new InMemorySinkTaskContext(Collections.emptySet()); private Optional metricsJmxReporter = Optional.empty(); private TaskMetrics taskMetrics = TaskMetrics.noop(); public static StreamingSinkServiceBuilder builder( SnowflakeConnectionService conn, SinkTaskConfig config) { return new StreamingSinkServiceBuilder(conn, config); } public SnowflakeSinkServiceV2 build() { return new SnowflakeSinkServiceV2( conn, config, errorReporter, sinkTaskContext, metricsJmxReporter, taskMetrics); } private StreamingSinkServiceBuilder(SnowflakeConnectionService conn, SinkTaskConfig config) { this.conn = conn; this.config = config; } public StreamingSinkServiceBuilder withErrorReporter(KafkaRecordErrorReporter errorReporter) { this.errorReporter = errorReporter; return this; } public StreamingSinkServiceBuilder withSinkTaskContext(SinkTaskContext sinkTaskContext) { this.sinkTaskContext = sinkTaskContext; return this; } public StreamingSinkServiceBuilder withMetricsJmxReporter(MetricsJmxReporter reporter) { this.metricsJmxReporter = Optional.of(reporter); return this; } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/telemetry/PeriodicTelemetryReporterTest.java ================================================ package com.snowflake.kafka.connector.internal.streaming.telemetry; import static com.snowflake.kafka.connector.internal.streaming.telemetry.PeriodicTelemetryReporter.MAX_INITIAL_JITTER_MS; import static com.snowflake.kafka.connector.internal.telemetry.TelemetryConstants.TOPIC_PARTITION_CHANNEL_NAME; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel; import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService; import java.util.Collections; import java.util.HashMap; import java.util.LinkedList; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Supplier; import java.util.stream.Collectors; import net.snowflake.client.internal.jdbc.telemetry.Telemetry; import net.snowflake.client.internal.jdbc.telemetry.TelemetryData; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; class PeriodicTelemetryReporterTest { private static final String CONNECTOR_NAME = "test-connector"; private static final String TASK_ID = "0"; private static final long SHORT_REPORT_INTERVAL_MS = 100L; private static final long MAX_WAIT_FOR_FIRST_REPORT_MS = MAX_INITIAL_JITTER_MS + 2000; private MockTelemetryClient mockTelemetryClient; private SnowflakeTelemetryService telemetryService; private PeriodicTelemetryReporter reporter; @BeforeEach void setUp() { mockTelemetryClient = new MockTelemetryClient(); telemetryService = new SnowflakeTelemetryService(mockTelemetryClient); telemetryService.setAppName(CONNECTOR_NAME); telemetryService.setTaskID(TASK_ID); } @AfterEach void tearDown() { if (reporter != null) { reporter.stop(); } } @Test void shouldStartAndStopWithoutErrors() { // Given Supplier> emptySupplier = Collections::emptyMap; reporter = new PeriodicTelemetryReporter( telemetryService, emptySupplier, CONNECTOR_NAME, TASK_ID, SHORT_REPORT_INTERVAL_MS); // When/Then assertDoesNotThrow(() -> reporter.start()); assertDoesNotThrow(() -> reporter.stop()); } @Test void shouldNotReportTelemetryWhenNoChannelsExist() throws InterruptedException { // Given Supplier> emptySupplier = Collections::emptyMap; reporter = new PeriodicTelemetryReporter( telemetryService, emptySupplier, CONNECTOR_NAME, TASK_ID, SHORT_REPORT_INTERVAL_MS); // When reporter.start(); // Wait for at least one report cycle Thread.sleep(SHORT_REPORT_INTERVAL_MS * 3); // Then assertTrue( mockTelemetryClient.getSentTelemetryData().isEmpty(), "No telemetry should be sent when there are no channels"); } @Test void shouldNotReportTelemetryWhenChannelsSupplierReturnsNull() throws InterruptedException { // Given Supplier> nullSupplier = () -> null; reporter = new PeriodicTelemetryReporter( telemetryService, nullSupplier, CONNECTOR_NAME, TASK_ID, SHORT_REPORT_INTERVAL_MS); // When reporter.start(); // Wait for at least one report cycle Thread.sleep(SHORT_REPORT_INTERVAL_MS * 3); // Then assertTrue( mockTelemetryClient.getSentTelemetryData().isEmpty(), "No telemetry should be sent when supplier returns null"); } @Test void shouldReportTelemetryForActiveChannels() throws InterruptedException { // Given TopicPartitionChannel mockChannel = createMockChannelWithNonEmptyStatus(); Map channels = new HashMap<>(); channels.put("channel1", mockChannel); Supplier> channelSupplier = () -> channels; reporter = new PeriodicTelemetryReporter( telemetryService, channelSupplier, CONNECTOR_NAME, TASK_ID, SHORT_REPORT_INTERVAL_MS); // When reporter.start(); // Wait for telemetry to be reported (accounting for jitter) waitForTelemetryCount(1, MAX_WAIT_FOR_FIRST_REPORT_MS); // Then assertTrue( mockTelemetryClient.getSentTelemetryData().size() >= 1, "At least one telemetry report should be sent"); } @Test void shouldReportTelemetryForMultipleChannels() throws InterruptedException { // Given final String channelName1 = "testChannel_topic1_partition0"; final String channelName2 = "testChannel_topic2_partition1"; TopicPartitionChannel mockChannel1 = createMockChannelWithNonEmptyStatus(channelName1); TopicPartitionChannel mockChannel2 = createMockChannelWithNonEmptyStatus(channelName2); Map channels = new HashMap<>(); channels.put("channel1", mockChannel1); channels.put("channel2", mockChannel2); Supplier> channelSupplier = () -> channels; reporter = new PeriodicTelemetryReporter( telemetryService, channelSupplier, CONNECTOR_NAME, TASK_ID, SHORT_REPORT_INTERVAL_MS); // When reporter.start(); // Wait for telemetry to be reported (at least 2 reports for 2 channels, accounting for jitter) waitForTelemetryCount(2, MAX_WAIT_FOR_FIRST_REPORT_MS); // Then LinkedList sentTelemetry = mockTelemetryClient.getSentTelemetryData(); assertTrue(sentTelemetry.size() >= 2, "Telemetry should be sent for all channels"); Set reportedChannelNames = sentTelemetry.stream() .map( telemetryData -> telemetryData .getMessage() .get("data") .get(TOPIC_PARTITION_CHANNEL_NAME) .asText()) .collect(Collectors.toSet()); assertEquals(2, reportedChannelNames.size(), "Telemetry should be sent for both channels"); assertThat(reportedChannelNames).containsExactlyInAnyOrder(channelName1, channelName2); } @Test void shouldNotReportTelemetryForEmptyChannelStatus() throws InterruptedException { // Given TopicPartitionChannel mockChannel = createMockChannelWithEmptyStatus(); Map channels = new HashMap<>(); channels.put("channel1", mockChannel); Supplier> channelSupplier = () -> channels; reporter = new PeriodicTelemetryReporter( telemetryService, channelSupplier, CONNECTOR_NAME, TASK_ID, SHORT_REPORT_INTERVAL_MS); // When reporter.start(); // Wait for at least one report cycle Thread.sleep(SHORT_REPORT_INTERVAL_MS * 3); // Then - empty status should not be reported assertTrue( mockTelemetryClient.getSentTelemetryData().isEmpty(), "Empty channel status should not trigger telemetry"); } @Test void shouldNotReportTelemetryWhenChannelStatusIsNull() throws InterruptedException { // Given TopicPartitionChannel mockChannel = mock(TopicPartitionChannel.class); when(mockChannel.getSnowflakeTelemetryChannelStatus()).thenReturn(null); Map channels = new HashMap<>(); channels.put("channel1", mockChannel); Supplier> channelSupplier = () -> channels; reporter = new PeriodicTelemetryReporter( telemetryService, channelSupplier, CONNECTOR_NAME, TASK_ID, SHORT_REPORT_INTERVAL_MS); // When reporter.start(); // Wait for at least one report cycle Thread.sleep(SHORT_REPORT_INTERVAL_MS * 3); // Then - null status should not be reported assertTrue( mockTelemetryClient.getSentTelemetryData().isEmpty(), "Null channel status should not trigger telemetry"); } @Test void shouldContinueReportingAfterExceptionInChannelStatusRetrieval() throws InterruptedException { // Given TopicPartitionChannel failingChannel = mock(TopicPartitionChannel.class); when(failingChannel.getSnowflakeTelemetryChannelStatus()) .thenThrow(new RuntimeException("Test exception")); TopicPartitionChannel workingChannel = createMockChannelWithNonEmptyStatus(); Map channels = new HashMap<>(); channels.put("failingChannel", failingChannel); channels.put("workingChannel", workingChannel); Supplier> channelSupplier = () -> channels; reporter = new PeriodicTelemetryReporter( telemetryService, channelSupplier, CONNECTOR_NAME, TASK_ID, SHORT_REPORT_INTERVAL_MS); // When reporter.start(); // Wait for telemetry to be reported (accounting for jitter) waitForTelemetryCount(1, MAX_WAIT_FOR_FIRST_REPORT_MS); // Then - should still report for the working channel assertTrue( mockTelemetryClient.getSentTelemetryData().size() >= 1, "Telemetry should be reported despite exception in one channel"); } @Test void shouldContinueReportingAfterExceptionInSupplier() throws InterruptedException { // Given final AtomicLong callCount = new AtomicLong(0); TopicPartitionChannel mockChannel = createMockChannelWithNonEmptyStatus(); Map channels = new HashMap<>(); channels.put("channel1", mockChannel); Supplier> flakySupplier = () -> { if (callCount.incrementAndGet() == 1) { throw new RuntimeException("First call fails"); } return channels; }; reporter = new PeriodicTelemetryReporter( telemetryService, flakySupplier, CONNECTOR_NAME, TASK_ID, SHORT_REPORT_INTERVAL_MS); // When reporter.start(); // Wait for telemetry to be reported (accounting for jitter + one extra interval after failure) waitForTelemetryCount(1, MAX_WAIT_FOR_FIRST_REPORT_MS + SHORT_REPORT_INTERVAL_MS * 2); // Then - should eventually report after first failure assertTrue( mockTelemetryClient.getSentTelemetryData().size() >= 1, "Telemetry should be reported after supplier recovers from exception"); } @Test void shouldReportPeriodically() throws InterruptedException { // Given TopicPartitionChannel mockChannel = createMockChannelWithNonEmptyStatus(); Map channels = new HashMap<>(); channels.put("channel1", mockChannel); Supplier> channelSupplier = () -> channels; reporter = new PeriodicTelemetryReporter( telemetryService, channelSupplier, CONNECTOR_NAME, TASK_ID, SHORT_REPORT_INTERVAL_MS); // When reporter.start(); // Wait for multiple report cycles (jitter + at least one more interval) waitForTelemetryCount(2, MAX_WAIT_FOR_FIRST_REPORT_MS + SHORT_REPORT_INTERVAL_MS * 2); // Then - should report multiple times assertTrue( mockTelemetryClient.getSentTelemetryData().size() >= 2, "Telemetry should be reported periodically"); } private void waitForTelemetryCount(int minCount, long maxWaitMs) throws InterruptedException { long startTime = System.currentTimeMillis(); while (mockTelemetryClient.getSentTelemetryData().size() < minCount) { if (System.currentTimeMillis() - startTime > maxWaitMs) { break; } Thread.sleep(10); } } private TopicPartitionChannel createMockChannelWithNonEmptyStatus() { return createMockChannelWithNonEmptyStatus("testChannel"); } private TopicPartitionChannel createMockChannelWithNonEmptyStatus(final String channelName) { TopicPartitionChannel mockChannel = mock(TopicPartitionChannel.class); SnowflakeTelemetryChannelStatus mockStatus = new SnowflakeTelemetryChannelStatus( "testTable", CONNECTOR_NAME, channelName, System.currentTimeMillis(), Optional.empty(), new AtomicLong(10L), new AtomicLong(5L), new AtomicLong(15L)); when(mockChannel.getSnowflakeTelemetryChannelStatus()).thenReturn(mockStatus); return mockChannel; } private TopicPartitionChannel createMockChannelWithEmptyStatus() { TopicPartitionChannel mockChannel = mock(TopicPartitionChannel.class); SnowflakeTelemetryChannelStatus emptyStatus = new SnowflakeTelemetryChannelStatus( "testTable", CONNECTOR_NAME, "testChannel", System.currentTimeMillis(), Optional.empty(), new AtomicLong(-1L), new AtomicLong(-1L), new AtomicLong(-1L)); when(mockChannel.getSnowflakeTelemetryChannelStatus()).thenReturn(emptyStatus); return mockChannel; } /** Mock implementation of Telemetry for testing. */ static class MockTelemetryClient implements Telemetry { private final LinkedList telemetryDataList = new LinkedList<>(); private final LinkedList sentTelemetryData = new LinkedList<>(); private final ExecutorService executor = Executors.newSingleThreadExecutor(); @Override public void addLogToBatch(TelemetryData telemetryData) { synchronized (this) { telemetryDataList.add(telemetryData); } } @Override public void close() { synchronized (this) { telemetryDataList.clear(); sentTelemetryData.clear(); } } @Override public Future sendBatchAsync() { return executor.submit( () -> { synchronized (MockTelemetryClient.this) { sentTelemetryData.addAll(telemetryDataList); telemetryDataList.clear(); } return true; }); } @Override public void postProcess(String s, String s1, int i, Throwable throwable) {} LinkedList getSentTelemetryData() { synchronized (this) { return new LinkedList<>(sentTelemetryData); } } } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/v2/AppendRowWithFallbackPolicyTest.java ================================================ package com.snowflake.kafka.connector.internal.streaming.v2; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import com.snowflake.ingest.streaming.SFException; import dev.failsafe.function.CheckedRunnable; import java.util.concurrent.atomic.AtomicInteger; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.mockito.MockitoAnnotations; public class AppendRowWithFallbackPolicyTest { private final String channelName = "test_channel"; @BeforeEach void setUp() { MockitoAnnotations.initMocks(this); } @Test void shouldReturnChannelOnFirstAttemptSuccess() { // Given CheckedRunnable supplier = () -> {}; // When boolean succeeded = AppendRowWithFallbackPolicy.executeWithFallback(supplier, failingFallback(), channelName); // Then assertTrue(succeeded, "Should return true on successful append"); } @Test void shouldThrowBackpressureExceptionOnRetryableException() { // Given AtomicInteger attemptCounter = new AtomicInteger(0); SFException retryableException = new SFException("MemoryThresholdExceeded", "Some Message", 429, "Some Stacktrace"); CheckedRunnable supplier = () -> { attemptCounter.getAndIncrement(); throw retryableException; }; // When/Then BackpressureException thrownException = assertThrows( BackpressureException.class, () -> AppendRowWithFallbackPolicy.executeWithFallback( supplier, failingFallback(), channelName)); // Then assertEquals(1, attemptCounter.get()); // Should only attempt once (no retry) assertSame(retryableException, thrownException.getCause()); assertEquals("SDK backpressure: MemoryThresholdExceeded", thrownException.getMessage()); } @Test void shouldThrowBackpressureExceptionForAllRetryableErrorCodes() { // Test ReceiverSaturated assertThrowsBackpressureException("ReceiverSaturated"); // Test MemoryThresholdExceeded assertThrowsBackpressureException("MemoryThresholdExceeded"); // Test MemoryThresholdExceededInContainer assertThrowsBackpressureException("MemoryThresholdExceededInContainer"); // Test HttpRetryableClientError assertThrowsBackpressureException("HttpRetryableClientError"); } private void assertThrowsBackpressureException(String errorCode) { // Given SFException sfException = new SFException(errorCode, "message", 429, "stack"); CheckedRunnable supplier = () -> { throw sfException; }; // When/Then BackpressureException exception = assertThrows( BackpressureException.class, () -> AppendRowWithFallbackPolicy.executeWithFallback( supplier, failingFallback(), channelName)); assertSame(sfException, exception.getCause()); assertEquals("SDK backpressure: " + errorCode, exception.getMessage()); } @Test void shouldFallbackOnNonRetryableSFException() { // Given AtomicInteger attemptCounter = new AtomicInteger(0); SFException nonRetryableException = new SFException("NonRetryableError", "Some Message", 420, "Some Stacktrace"); CheckedRunnable supplier = () -> { if (attemptCounter.getAndIncrement() == 0) { throw nonRetryableException; } }; AtomicInteger fallbackCallCounter = new AtomicInteger(0); // When boolean succeeded = AppendRowWithFallbackPolicy.executeWithFallback( supplier, countingFallbackSupplier(fallbackCallCounter), channelName); // Then assertEquals(1, attemptCounter.get()); // Should not retry assertEquals(1, fallbackCallCounter.get()); // Fallback should be called once assertFalse(succeeded, "Should return false when fallback fired"); } @Test void shouldNotRetryNorFallbackOnNonSFException() { // Given AtomicInteger attemptCounter = new AtomicInteger(0); IllegalArgumentException nonRetryableException = new IllegalArgumentException("Non-retryable"); CheckedRunnable supplier = () -> { attemptCounter.getAndIncrement(); throw nonRetryableException; }; // When/Then IllegalArgumentException thrownException = assertThrows( IllegalArgumentException.class, () -> AppendRowWithFallbackPolicy.executeWithFallback( supplier, failingFallback(), channelName)); assertSame(nonRetryableException, thrownException); assertEquals(1, attemptCounter.get()); // Should only attempt once } private AppendRowWithFallbackPolicy.FallbackSupplierWithException failingFallback() { return exception -> { throw new RuntimeException("Test Scenario Failure"); }; } private AppendRowWithFallbackPolicy.FallbackSupplierWithException countingFallbackSupplier( AtomicInteger callCounter) { return exception -> callCounter.getAndIncrement(); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/v2/BackpressureExceptionTest.java ================================================ package com.snowflake.kafka.connector.internal.streaming.v2; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import com.snowflake.ingest.streaming.SFException; import org.junit.jupiter.api.Test; public class BackpressureExceptionTest { @Test void shouldWrapSFExceptionWithCorrectMessage() { // Given SFException cause = new SFException("ReceiverSaturated", "Server overloaded", 429, "stack"); // When BackpressureException exception = new BackpressureException(cause); // Then assertEquals("SDK backpressure: ReceiverSaturated", exception.getMessage()); assertSame(cause, exception.getCause()); } @Test void shouldRecognizeReceiverSaturatedAsRetryable() { // Given SFException sfException = new SFException("ReceiverSaturated", "message", 429, "stack"); // When/Then assertTrue(BackpressureException.isRetryableError(sfException)); } @Test void shouldRecognizeMemoryThresholdExceededAsRetryable() { // Given SFException sfException = new SFException("MemoryThresholdExceeded", "message", 429, "stack"); // When/Then assertTrue(BackpressureException.isRetryableError(sfException)); } @Test void shouldRecognizeMemoryThresholdExceededInContainerAsRetryable() { // Given SFException sfException = new SFException("MemoryThresholdExceededInContainer", "message", 429, "stack"); // When/Then assertTrue(BackpressureException.isRetryableError(sfException)); } @Test void shouldRecognizeHttpRetryableClientErrorAsRetryable() { // Given SFException sfException = new SFException("HttpRetryableClientError", "message", 503, "stack"); // When/Then assertTrue(BackpressureException.isRetryableError(sfException)); } @Test void shouldRejectNonRetryableSFException() { // Given SFException sfException = new SFException("SomeOtherError", "message", 500, "stack"); // When/Then assertFalse(BackpressureException.isRetryableError(sfException)); } @Test void shouldRejectNonSFException() { // Given IllegalArgumentException nonSFException = new IllegalArgumentException("not an SFException"); // When/Then assertFalse(BackpressureException.isRetryableError(nonSFException)); } @Test void shouldRejectNullException() { // When/Then assertFalse(BackpressureException.isRetryableError(null)); } @Test void shouldRejectConstructionWithNonRetryableSFException() { // Given SFException nonRetryable = new SFException("SomeOtherError", "message", 500, "stack"); // When/Then assertThrows(IllegalArgumentException.class, () -> new BackpressureException(nonRetryable)); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/v2/ClientRecreationExceptionTest.java ================================================ package com.snowflake.kafka.connector.internal.streaming.v2; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import com.snowflake.ingest.streaming.SFException; import org.junit.jupiter.api.Test; public class ClientRecreationExceptionTest { @Test void shouldWrapSFExceptionWithCorrectMessage() { SFException cause = new SFException("InvalidClientError", "Client is invalid", 409, "Conflict"); ClientRecreationException exception = new ClientRecreationException(cause); assertEquals("SDK client invalid: InvalidClientError", exception.getMessage()); assertSame(cause, exception.getCause()); } @Test void shouldRecognizeInvalidClientError() { SFException sfException = new SFException("InvalidClientError", "Client is invalid", 409, "Conflict"); assertTrue(ClientRecreationException.isClientInvalidError(sfException)); } @Test void shouldRecognizeSfApiPipeFailedOverError() { SFException sfException = new SFException("SfApiPipeFailedOverError", "HTTP 410 pipe failover", 400, "Bad Request"); assertTrue(ClientRecreationException.isClientInvalidError(sfException)); } @Test void shouldRecognizeClosedClientError() { SFException sfException = new SFException("ClosedClientError", "Client is closed", 409, "Conflict"); assertTrue(ClientRecreationException.isClientInvalidError(sfException)); } @Test void shouldNotRecognizeBackpressureErrors() { assertFalse( ClientRecreationException.isClientInvalidError( new SFException("ReceiverSaturated", "message", 429, "stack"))); assertFalse( ClientRecreationException.isClientInvalidError( new SFException("MemoryThresholdExceeded", "message", 429, "stack"))); } @Test void shouldNotRecognizeChannelLevelErrors() { assertFalse( ClientRecreationException.isClientInvalidError( new SFException("InvalidChannelError", "Channel invalid", 409, "Conflict"))); assertFalse( ClientRecreationException.isClientInvalidError( new SFException("ClosedChannelError", "Channel closed", 409, "Conflict"))); } @Test void shouldNotRecognizeOtherSFException() { SFException sfException = new SFException("SomeOtherError", "message", 500, "stack"); assertFalse(ClientRecreationException.isClientInvalidError(sfException)); } @Test void shouldNotRecognizeNonSFException() { IllegalArgumentException nonSFException = new IllegalArgumentException("not an SFException"); assertFalse(ClientRecreationException.isClientInvalidError(nonSFException)); } @Test void shouldNotRecognizeNull() { assertFalse(ClientRecreationException.isClientInvalidError(null)); } @Test void shouldRejectConstructionWithNonClientInvalidSFException() { SFException nonClientInvalid = new SFException("SomeOtherError", "message", 500, "stack"); assertThrows( IllegalArgumentException.class, () -> new ClientRecreationException(nonClientInvalid)); } @Test void shouldRejectConstructionWithBackpressureSFException() { SFException backpressure = new SFException("ReceiverSaturated", "message", 429, "stack"); assertThrows(IllegalArgumentException.class, () -> new ClientRecreationException(backpressure)); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/v2/SnowpipeStreamingPartitionChannelTest.java ================================================ package com.snowflake.kafka.connector.internal.streaming.v2; import static com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel.NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.argThat; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import com.snowflake.ingest.streaming.ChannelStatus; import com.snowflake.ingest.streaming.ChannelStatusBatch; import com.snowflake.ingest.streaming.OpenChannelResult; import com.snowflake.ingest.streaming.SFException; import com.snowflake.ingest.streaming.SnowflakeStreamingIngestChannel; import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient; import com.snowflake.kafka.connector.builder.SinkRecordBuilder; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.config.SinkTaskConfigTestBuilder; import com.snowflake.kafka.connector.config.SnowflakeValidation; import com.snowflake.kafka.connector.internal.DescribeTableRow; import com.snowflake.kafka.connector.internal.SnowflakeConnectionService; import com.snowflake.kafka.connector.internal.metrics.TaskMetrics; import com.snowflake.kafka.connector.internal.streaming.InMemorySinkTaskContext; import com.snowflake.kafka.connector.internal.streaming.StreamingErrorHandler; import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetryChannelStatus; import com.snowflake.kafka.connector.internal.streaming.v2.channel.PartitionOffsetTracker; import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationMode; import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationResponse; import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService; import java.nio.charset.StandardCharsets; import java.time.Duration; import java.time.Instant; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.UUID; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Predicate; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.connect.data.SchemaAndValue; import org.apache.kafka.connect.errors.ConnectException; import org.apache.kafka.connect.json.JsonConverter; import org.apache.kafka.connect.sink.SinkRecord; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; class SnowpipeStreamingPartitionChannelTest { private static final String CONNECTOR_NAME = "test_connector"; private static final String TABLE_NAME = "test_table"; private static final String TOPIC_NAME = "test_topic"; private static final int PARTITION = 0; private static final String SSV1_CHANNEL_NAME = TOPIC_NAME + "_" + PARTITION; private String channelName; private String pipeName; private SnowflakeTelemetryService mockTelemetryService; private StreamingErrorHandler mockErrorHandler; private ExecutorService openChannelIoExecutor; private TrackingIngestClientSupplier trackingClientSupplier; private TrackingStreamingIngestClient trackingClient; private InMemorySinkTaskContext sinkTaskContext; @BeforeEach void setUp() { // Generate unique names to avoid StreamingClientPools caching issues between tests final String uniqueId = UUID.randomUUID().toString().substring(0, 8); channelName = "test_channel_" + uniqueId; pipeName = "test_pipe_" + uniqueId; mockTelemetryService = mock(SnowflakeTelemetryService.class); mockErrorHandler = mock(StreamingErrorHandler.class); sinkTaskContext = new InMemorySinkTaskContext( Collections.singleton(new TopicPartition(TOPIC_NAME, PARTITION))); trackingClientSupplier = new TrackingIngestClientSupplier(); trackingClient = new TrackingStreamingIngestClient(pipeName, trackingClientSupplier); openChannelIoExecutor = Executors.newSingleThreadExecutor(); } @AfterEach void tearDown() { openChannelIoExecutor.shutdownNow(); } @Test void shouldNotCloseChannelOnFirstOpen() { // When: Creating a new channel (first open) final SnowpipeStreamingPartitionChannel channel = createPartitionChannel(); // Wait for async init to complete channel.getChannel(); // Then: close() should not have been called because channel was null initially assertEquals(0, trackingClientSupplier.getCloseCallCount()); } @Test void shouldCloseOpenChannelBeforeReopening() { // Given: A partition channel is created and its underlying channel is open final SnowpipeStreamingPartitionChannel partitionChannel = createPartitionChannel(); // Wait for async init to complete partitionChannel.getChannel(); assertEquals(1, trackingClientSupplier.getTotalChannelsCreated()); assertTrue(!partitionChannel.isChannelClosed(), "Channel should be open before recovery"); // Record close count before recovery final int closeCountBeforeRecovery = trackingClientSupplier.getCloseCallCount(); // When: appendRow throws SFException once, triggering the fallback that reopens the channel. // After recovery the fallback completes normally — no exception propagates. trackingClientSupplier.setNonRetryableAppendRowFailures(1); partitionChannel.insertRecord(buildValidRecord(0), true); // reopenChannel closes the old channel before opening a new one assertEquals(closeCountBeforeRecovery + 1, trackingClientSupplier.getCloseCallCount()); assertEquals(2, trackingClientSupplier.getTotalChannelsCreated()); } @Test void closeChannelAsyncCancelsInitializationBeforeChannelOpens() throws Exception { // Block the single-threaded executor so the channel init task is queued but not started CountDownLatch blockExecutor = new CountDownLatch(1); openChannelIoExecutor.submit( () -> { blockExecutor.await(); return null; }); SnowpipeStreamingPartitionChannel partitionChannel = createPartitionChannel(); // closeChannelAsync sets cancelled=true while the init task is still queued CompletableFuture closeFuture = partitionChannel.closeChannelAsync(); // Unblock the executor — init task starts, sees cancelled=true, throws CancellationException blockExecutor.countDown(); // The close future should complete via the exceptionally branch closeFuture.get(5, TimeUnit.SECONDS); // No SDK channel was ever opened or closed assertEquals(0, trackingClientSupplier.getTotalChannelsCreated()); assertEquals(0, trackingClientSupplier.getCloseCallCount()); } @Test void reopenChannelRecoversAfterFailedAsyncInitialization() { // Make the first openChannel call (during async init) throw trackingClientSupplier.setThrowOnOpenChannel(true); SnowpipeStreamingPartitionChannel partitionChannel = createPartitionChannel(); // Wait for the async init to complete exceptionally assertThrows(SFException.class, partitionChannel::getChannel); assertEquals( 0, trackingClientSupplier.getTotalChannelsCreated(), "No channels should have been created since openChannel threw"); // Allow subsequent openChannel calls to succeed (simulating a transient failure) trackingClientSupplier.setThrowOnOpenChannel(false); // First insertRecord triggers recovery via the Failsafe fallback. reopenChannel handles // the failed init future gracefully (skips close, opens a new channel). After successful // recovery the record is inserted on the new channel — no exception propagates. partitionChannel.insertRecord(buildValidRecord(0), true); assertEquals( 1, trackingClientSupplier.getTotalChannelsCreated(), "reopenChannel should have opened a new channel after transient init failure"); } @Test void reopenChannelClosesOldChannelWhenAsyncInitSucceeded() { SnowpipeStreamingPartitionChannel partitionChannel = createPartitionChannel(); partitionChannel.getChannel(); assertEquals(1, trackingClientSupplier.getTotalChannelsCreated()); assertEquals(0, trackingClientSupplier.getCloseCallCount()); // Trigger reopenChannel via appendRow SFException (throw once, then succeed on new channel) trackingClientSupplier.setNonRetryableAppendRowFailures(1); partitionChannel.insertRecord(buildValidRecord(0), true); // reopenChannel should have closed the old channel BEFORE opening the new one assertEquals( 1, trackingClientSupplier.getCloseCallCount(), "Old channel should have been closed during reopenChannel"); assertEquals( 2, trackingClientSupplier.getTotalChannelsCreated(), "A new channel should have been opened during reopenChannel"); } @Test void insertRecordThrowsBackpressureExceptionOnRetryableError() { SnowpipeStreamingPartitionChannel partitionChannel = createPartitionChannel(); partitionChannel.getChannel(); assertEquals(1, trackingClientSupplier.getTotalChannelsCreated()); // appendRow will throw MemoryThresholdExceeded (retryable error) trackingClientSupplier.setRetryableAppendRowFailures(1); // BackpressureException should propagate up (not caught in this layer) // Task 4 will handle it at the batch-level insert() loop BackpressureException exception = assertThrows( BackpressureException.class, () -> partitionChannel.insertRecord(buildValidRecord(0), true)); assertEquals("SDK backpressure: MemoryThresholdExceeded", exception.getMessage()); // No channel reopening should have happened - the exception signals backpressure, not channel // invalidation assertEquals(0, trackingClientSupplier.getCloseCallCount()); assertEquals(1, trackingClientSupplier.getTotalChannelsCreated()); } @Test void isInitializingReturnsTrueWhileChannelFutureIsPending() throws Exception { // Block the executor so the channel init task is queued but not started CountDownLatch blockExecutor = new CountDownLatch(1); openChannelIoExecutor.submit( () -> { blockExecutor.await(); return null; }); SnowpipeStreamingPartitionChannel partitionChannel = createPartitionChannel(); assertTrue(partitionChannel.isInitializing(), "Should be initializing while future is pending"); // Unblock and wait for init to complete blockExecutor.countDown(); partitionChannel.getChannel(); assertFalse( partitionChannel.isInitializing(), "Should not be initializing after future completes"); } @Test void channelInvalidationRecovery_taskSurvivesAndContinuesIngesting() { // This test validates the fix for the channel invalidation recovery bug: // Before the fix, a channel invalidation (SFException on appendRow) would trigger // the fallback to reopen the channel, but then unconditionally re-throw the exception, // causing the KC framework to kill the task as "unrecoverable". // After the fix, the fallback reopens the channel and completes normally, allowing // Failsafe to re-execute appendRow on the new channel. SnowpipeStreamingPartitionChannel partitionChannel = createPartitionChannel(); partitionChannel.getChannel(); assertEquals(1, trackingClientSupplier.getTotalChannelsCreated()); // Insert first record successfully partitionChannel.insertRecord(buildValidRecord(0), true); // Simulate channel invalidation: appendRow throws once (non-retryable SFException), // then succeeds on the reopened channel. trackingClientSupplier.setNonRetryableAppendRowFailures(1); partitionChannel.insertRecord(buildValidRecord(1), false); // The channel should have been reopened (old closed, new opened) assertEquals(1, trackingClientSupplier.getCloseCallCount()); assertEquals(2, trackingClientSupplier.getTotalChannelsCreated()); // Subsequent records should continue to be ingested on the new channel partitionChannel.insertRecord(buildValidRecord(2), false); partitionChannel.insertRecord(buildValidRecord(3), false); // No additional channel reopenings assertEquals(1, trackingClientSupplier.getCloseCallCount()); assertEquals(2, trackingClientSupplier.getTotalChannelsCreated()); } @Test void channelInvalidation_stopsReopeningAfterMaxConsecutiveRecoveries() { // If the channel is permanently broken (every appendRow fails), we should not // loop forever reopening channels. After MAX_CONSECUTIVE_RECOVERIES (5) the // fallback stops reopening — no more channel churn. SnowpipeStreamingPartitionChannel partitionChannel = createPartitionChannel(); partitionChannel.getChannel(); assertEquals(1, trackingClientSupplier.getTotalChannelsCreated()); // Every appendRow throws — channel is permanently invalid trackingClientSupplier.setThrowOnAppendRow(true); // Send many records. Each triggers the fallback, but only the first // MAX_CONSECUTIVE_RECOVERIES (5) actually reopen the channel. After that // the circuit breaker trips and no more channels are created. for (int i = 0; i < 20; i++) { partitionChannel.insertRecord(buildValidRecord(i), i == 0); } // Verify we didn't create an unbounded number of channels. // 1 initial + at most 5 recoveries = at most 6 channels. assertTrue( trackingClientSupplier.getTotalChannelsCreated() <= 6, "Expected at most 6 channels (1 initial + 5 recoveries), got: " + trackingClientSupplier.getTotalChannelsCreated()); } private SinkRecord buildValidRecord(long offset) { JsonConverter jsonConverter = new JsonConverter(); jsonConverter.configure(Collections.singletonMap("schemas.enable", "false"), false); SchemaAndValue schemaAndValue = jsonConverter.toConnectData( TOPIC_NAME, "{\"name\": \"test\"}".getBytes(StandardCharsets.UTF_8)); return SinkRecordBuilder.forTopicPartition(TOPIC_NAME, PARTITION) .withSchemaAndValue(schemaAndValue) .withOffset(offset) .build(); } private SnowpipeStreamingPartitionChannel createPartitionChannel() { final TopicPartition topicPartition = new TopicPartition(TOPIC_NAME, PARTITION); final PartitionOffsetTracker offsetTracker = new PartitionOffsetTracker(topicPartition, sinkTaskContext, channelName); final SnowflakeTelemetryChannelStatus telemetryChannelStatus = new SnowflakeTelemetryChannelStatus( TABLE_NAME, CONNECTOR_NAME, channelName, System.currentTimeMillis(), Optional.empty(), offsetTracker.persistedOffsetRef(), offsetTracker.processedOffsetRef(), offsetTracker.consumerGroupOffsetRef()); SinkTaskConfig taskConfig = SinkTaskConfigTestBuilder.builder() .connectorName(CONNECTOR_NAME) .taskId("0") .enableSchematization(false) .enableColumnIdentifierNormalization(true) .validation(SnowflakeValidation.SERVER_SIDE) .build(); return new SnowpipeStreamingPartitionChannel( TABLE_NAME, channelName, pipeName, trackingClient, openChannelIoExecutor, mockTelemetryService, telemetryChannelStatus, offsetTracker, taskConfig, mockErrorHandler, TaskMetrics.noop(), false, null, Optional.empty()); } @Test void parseOffsetToken_nullReturnsNoOffset() { assertEquals( NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE, SnowpipeStreamingPartitionChannel.parseOffsetToken(null, "test_channel")); } @Test void parseOffsetToken_validToken() { assertEquals(42L, SnowpipeStreamingPartitionChannel.parseOffsetToken("42", "test_channel")); assertEquals(0L, SnowpipeStreamingPartitionChannel.parseOffsetToken("0", "test_channel")); assertEquals( Long.MAX_VALUE, SnowpipeStreamingPartitionChannel.parseOffsetToken( String.valueOf(Long.MAX_VALUE), "test_channel")); } @Test void parseOffsetToken_invalidTokenThrows() { assertThrows( ConnectException.class, () -> SnowpipeStreamingPartitionChannel.parseOffsetToken("not_a_number", "test_channel")); assertThrows( ConnectException.class, () -> SnowpipeStreamingPartitionChannel.parseOffsetToken("", "test_channel")); assertThrows( ConnectException.class, () -> SnowpipeStreamingPartitionChannel.parseOffsetToken("12.5", "test_channel")); } // --- Validation integration tests --- private SnowflakeConnectionService mockConnService; private SnowpipeStreamingPartitionChannel createValidationEnabledChannel( List describeResult, boolean enableSchematization, boolean shouldEvolveSchema) { mockConnService = mock(SnowflakeConnectionService.class); when(mockConnService.describeTable(TABLE_NAME)).thenReturn(Optional.of(describeResult)); final TopicPartition topicPartition = new TopicPartition(TOPIC_NAME, PARTITION); final PartitionOffsetTracker offsetTracker = new PartitionOffsetTracker(topicPartition, sinkTaskContext, channelName); final SnowflakeTelemetryChannelStatus telemetryChannelStatus = new SnowflakeTelemetryChannelStatus( TABLE_NAME, CONNECTOR_NAME, channelName, System.currentTimeMillis(), Optional.empty(), offsetTracker.persistedOffsetRef(), offsetTracker.processedOffsetRef(), offsetTracker.consumerGroupOffsetRef()); SinkTaskConfig taskConfig = SinkTaskConfigTestBuilder.builder() .connectorName(CONNECTOR_NAME) .taskId("0") .enableSchematization(enableSchematization) .enableColumnIdentifierNormalization(true) .validation(SnowflakeValidation.CLIENT_SIDE) .build(); return new SnowpipeStreamingPartitionChannel( TABLE_NAME, channelName, pipeName, trackingClient, openChannelIoExecutor, mockTelemetryService, telemetryChannelStatus, offsetTracker, taskConfig, mockErrorHandler, TaskMetrics.noop(), shouldEvolveSchema, mockConnService, Optional.empty()); } private static final List STANDARD_TABLE_SCHEMA = Arrays.asList( new DescribeTableRow("RECORD_CONTENT", "VARIANT", null, "Y"), new DescribeTableRow("RECORD_METADATA", "VARIANT", null, "Y")); @Test void validationEnabled_validRecord_insertsSuccessfully() { // enableSchematization=false so the record is wrapped into RECORD_CONTENT/RECORD_METADATA SnowpipeStreamingPartitionChannel channel = createValidationEnabledChannel(STANDARD_TABLE_SCHEMA, false, true); SinkRecord record = buildValidRecord(0); channel.insertRecord(record, true); verify(mockErrorHandler, never()).handleError(any(Exception.class), any(SinkRecord.class)); assertEquals(1, trackingClientSupplier.getTotalChannelsCreated()); } @Test void validationEnabled_extraColumn_triggersSchemaEvolution() { // Table only has RECORD_METADATA — RECORD_CONTENT will be "extra" List schema = Arrays.asList(new DescribeTableRow("RECORD_METADATA", "VARIANT", null, "Y")); SnowpipeStreamingPartitionChannel channel = createValidationEnabledChannel(schema, true, true); SinkRecord record = buildValidRecord(0); channel.insertRecord(record, true); // Schema evolution attempted, but refreshed schema still missing RECORD_CONTENT -> error verify(mockErrorHandler).handleError(any(Exception.class), eq(record)); } @Test void validationEnabled_schemaEvolutionDisabled_structuralErrorRoutesToDlq() { List schema = Arrays.asList(new DescribeTableRow("RECORD_METADATA", "VARIANT", null, "Y")); SnowpipeStreamingPartitionChannel channel = createValidationEnabledChannel(schema, true, false); SinkRecord record = buildValidRecord(0); channel.insertRecord(record, true); verify(mockErrorHandler).handleError(any(Exception.class), eq(record)); verify(mockConnService, never()).appendColumnsToTable(any(), any()); verify(mockConnService, never()).alterNonNullableColumns(any(), any()); } @Test void validationEnabled_describeTableFails_disablesValidation() { mockConnService = mock(SnowflakeConnectionService.class); when(mockConnService.describeTable(TABLE_NAME)).thenReturn(Optional.empty()); final TopicPartition topicPartition = new TopicPartition(TOPIC_NAME, PARTITION); final PartitionOffsetTracker offsetTracker = new PartitionOffsetTracker(topicPartition, sinkTaskContext, channelName); final SnowflakeTelemetryChannelStatus telemetryChannelStatus = new SnowflakeTelemetryChannelStatus( TABLE_NAME, CONNECTOR_NAME, channelName, System.currentTimeMillis(), Optional.empty(), offsetTracker.persistedOffsetRef(), offsetTracker.processedOffsetRef(), offsetTracker.consumerGroupOffsetRef()); SinkTaskConfig taskConfig = SinkTaskConfigTestBuilder.builder() .connectorName(CONNECTOR_NAME) .taskId("0") .enableSchematization(true) .enableColumnIdentifierNormalization(true) .validation(SnowflakeValidation.CLIENT_SIDE) .build(); SnowpipeStreamingPartitionChannel channel = new SnowpipeStreamingPartitionChannel( TABLE_NAME, channelName, pipeName, trackingClient, openChannelIoExecutor, mockTelemetryService, telemetryChannelStatus, offsetTracker, taskConfig, mockErrorHandler, TaskMetrics.noop(), true, mockConnService, Optional.empty()); SinkRecord record = buildValidRecord(0); channel.insertRecord(record, true); verify(mockErrorHandler, never()).handleError(any(Exception.class), any(SinkRecord.class)); } @Test void validationEnabled_notNullColumn_detectsMissingValue() { // RECORD_CONTENT and RECORD_METADATA are nullable, but REQUIRED_COL is NOT NULL List schema = Arrays.asList( new DescribeTableRow("RECORD_CONTENT", "VARIANT", null, "Y"), new DescribeTableRow("RECORD_METADATA", "VARIANT", null, "Y"), new DescribeTableRow("REQUIRED_COL", "VARCHAR(100)", null, "N")); // shouldEvolveSchema=true so schema evolution is attempted for the missing NOT NULL // col SnowpipeStreamingPartitionChannel channel = createValidationEnabledChannel(schema, true, true); // Record doesn't have REQUIRED_COL — should trigger structural error SinkRecord record = buildValidRecord(0); channel.insertRecord(record, true); verify(mockErrorHandler).handleError(any(Exception.class), eq(record)); } @Test void validationEnabled_multipleExtraColumns_passesRawColumnNames() { List schema = Arrays.asList(new DescribeTableRow("RECORD_METADATA", "VARIANT", null, "Y")); SnowpipeStreamingPartitionChannel channel = createValidationEnabledChannel(schema, true, true); String json = "{\"city\": \"Hsinchu\", \"age\": 25, \"country\": \"TW\"}"; JsonConverter jsonConverter = new JsonConverter(); jsonConverter.configure(Collections.singletonMap("schemas.enable", "false"), false); SchemaAndValue schemaAndValue = jsonConverter.toConnectData(TOPIC_NAME, json.getBytes(StandardCharsets.UTF_8)); SinkRecord record = SinkRecordBuilder.forTopicPartition(TOPIC_NAME, PARTITION) .withSchemaAndValue(schemaAndValue) .withOffset(0) .build(); channel.insertRecord(record, true); verify(mockConnService) .appendColumnsToTable( eq(TABLE_NAME), argThat( columnInfos -> { if (columnInfos == null) return false; boolean hasCity = columnInfos.containsKey("CITY"); boolean hasAge = columnInfos.containsKey("AGE"); boolean hasCountry = columnInfos.containsKey("COUNTRY"); return hasCity && hasAge && hasCountry; })); } @Test void validationEnabled_identityColumnMissing_insertsSuccessfully() { List schema = Arrays.asList( new DescribeTableRow( "ID", "NUMBER(38,0)", null, "N", null, "IDENTITY START 1 INCREMENT 1"), new DescribeTableRow("RECORD_CONTENT", "VARIANT", null, "Y"), new DescribeTableRow("RECORD_METADATA", "VARIANT", null, "Y")); // enableSchematization=false so the record populates RECORD_CONTENT/RECORD_METADATA only SnowpipeStreamingPartitionChannel channel = createValidationEnabledChannel(schema, false, true); SinkRecord record = buildValidRecord(0); channel.insertRecord(record, true); // Identity column is missing from the row but should not trigger an error verify(mockErrorHandler, never()).handleError(any(Exception.class), any(SinkRecord.class)); } @Test void validationEnabled_defaultNotNullColumnMissing_insertsSuccessfully() { List schema = Arrays.asList( new DescribeTableRow("RECORD_CONTENT", "VARIANT", null, "Y"), new DescribeTableRow("RECORD_METADATA", "VARIANT", null, "Y"), new DescribeTableRow( "CREATED_AT", "TIMESTAMP_NTZ(9)", null, "N", "CURRENT_TIMESTAMP()", null)); SnowpipeStreamingPartitionChannel channel = createValidationEnabledChannel(schema, false, true); SinkRecord record = buildValidRecord(0); channel.insertRecord(record, true); verify(mockErrorHandler, never()).handleError(any(Exception.class), any(SinkRecord.class)); } // --- SSv1 offset migration tests --- private SnowpipeStreamingPartitionChannel createPartitionChannelWithMigration( Ssv1MigrationMode migrationMode, SnowflakeConnectionService mockConn) { final TopicPartition topicPartition = new TopicPartition(TOPIC_NAME, PARTITION); final PartitionOffsetTracker offsetTracker = new PartitionOffsetTracker(topicPartition, sinkTaskContext, channelName); final SnowflakeTelemetryChannelStatus telemetryChannelStatus = new SnowflakeTelemetryChannelStatus( TABLE_NAME, CONNECTOR_NAME, channelName, System.currentTimeMillis(), Optional.empty(), offsetTracker.persistedOffsetRef(), offsetTracker.processedOffsetRef(), offsetTracker.consumerGroupOffsetRef()); SinkTaskConfig migrationTaskConfig = SinkTaskConfigTestBuilder.builder() .connectorName(CONNECTOR_NAME) .taskId("0") .enableSchematization(false) .enableColumnIdentifierNormalization(true) .validation(SnowflakeValidation.SERVER_SIDE) .ssv1MigrationMode(migrationMode) .build(); return new SnowpipeStreamingPartitionChannel( TABLE_NAME, channelName, pipeName, trackingClient, openChannelIoExecutor, mockTelemetryService, telemetryChannelStatus, offsetTracker, migrationTaskConfig, mockErrorHandler, TaskMetrics.noop(), false, mockConn, migrationMode == Ssv1MigrationMode.SKIP ? Optional.empty() : Optional.of(SSV1_CHANNEL_NAME)); } @Test void migration_skip_doesNotConsultSsv1() { SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class); SnowpipeStreamingPartitionChannel channel = createPartitionChannelWithMigration(Ssv1MigrationMode.SKIP, mockConn); channel.getChannel(); // System function should never be called when mode is SKIP verify(mockConn, never()).migrateSsv1ChannelOffset(any(), any(), any(), any()); } @Test void migration_bestEffort_usesSsv1OffsetWhenSsv2HasNone() { SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class); when(mockConn.migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName)) .thenReturn(Ssv1MigrationResponse.migrated(100L)); SnowpipeStreamingPartitionChannel channel = createPartitionChannelWithMigration(Ssv1MigrationMode.BEST_EFFORT, mockConn); channel.getChannel(); // SSv2 has no offset (null from FakeClient), so SSv1 should be consulted verify(mockConn).migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName); // Kafka offset should be set to ssv1Offset + 1 (101) assertEquals(101L, sinkTaskContext.offset(new TopicPartition(TOPIC_NAME, PARTITION))); } @Test void migration_bestEffort_proceedsWhenSsv1NotFound() { SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class); when(mockConn.migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName)) .thenReturn(Ssv1MigrationResponse.channelNotFound()); SnowpipeStreamingPartitionChannel channel = createPartitionChannelWithMigration(Ssv1MigrationMode.BEST_EFFORT, mockConn); channel.getChannel(); // SSv1 not found — best_effort falls through to consumer group offset verify(mockConn).migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName); } @Test void migration_bestEffort_proceedsWhenSsv1HasNoOffset() { SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class); when(mockConn.migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName)) .thenReturn(Ssv1MigrationResponse.channelFoundNoOffset()); SnowpipeStreamingPartitionChannel channel = createPartitionChannelWithMigration(Ssv1MigrationMode.BEST_EFFORT, mockConn); channel.getChannel(); // SSv1 channel exists but has no committed offset — best_effort falls through verify(mockConn).migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName); } @Test void migration_bestEffort_ignoresSsv1WhenSsv2HasOffset() { // Pre-seed an offset in the tracking client so SSv2 openChannel returns a non-null offset trackingClient = new TrackingStreamingIngestClient(pipeName, trackingClientSupplier) { @Override public OpenChannelResult openChannel(String channelNameArg, String offsetToken) { OpenChannelResult result = super.openChannel(channelNameArg, offsetToken); ChannelStatus status = new ChannelStatus( "db", "schema", pipeName, channelNameArg, "SUCCESS", "50", Instant.now(), 0, 0, 0, null, null, null, null, Instant.now()); return new OpenChannelResult(result.getChannel(), status); } }; SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class); SnowpipeStreamingPartitionChannel channel = createPartitionChannelWithMigration(Ssv1MigrationMode.BEST_EFFORT, mockConn); channel.getChannel(); // SSv2 already has an offset, so system function should NOT be called verify(mockConn, never()).migrateSsv1ChannelOffset(any(), any(), any(), any()); // Kafka offset should be set to ssv2Offset + 1 (51) assertEquals(51L, sinkTaskContext.offset(new TopicPartition(TOPIC_NAME, PARTITION))); } @Test void migration_strict_usesSsv1OffsetWhenFound() { SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class); when(mockConn.migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName)) .thenReturn(Ssv1MigrationResponse.migrated(100L)); SnowpipeStreamingPartitionChannel channel = createPartitionChannelWithMigration(Ssv1MigrationMode.STRICT, mockConn); channel.getChannel(); // SSv1 found — strict mode migrates the offset just like best_effort verify(mockConn).migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName); assertEquals(101L, sinkTaskContext.offset(new TopicPartition(TOPIC_NAME, PARTITION))); } @Test void migration_strict_throwsWhenSsv1NotFound() { SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class); when(mockConn.migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName)) .thenReturn(Ssv1MigrationResponse.channelNotFound()); SnowpipeStreamingPartitionChannel channel = createPartitionChannelWithMigration(Ssv1MigrationMode.STRICT, mockConn); // SSv1 not found — strict mode fails rather than falling through assertThrows(ConnectException.class, () -> channel.getChannel()); } @Test void migration_strict_proceedsWhenSsv1HasNoOffset() { SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class); when(mockConn.migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName)) .thenReturn(Ssv1MigrationResponse.channelFoundNoOffset()); SnowpipeStreamingPartitionChannel channel = createPartitionChannelWithMigration(Ssv1MigrationMode.STRICT, mockConn); channel.getChannel(); // SSv1 channel exists but has no committed offset — strict does NOT throw because the // channel was found (nothing to migrate is different from channel not existing) verify(mockConn).migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName); } @Test void migration_ssv2OpenFails_doesNotConsultSsv1() { // Simulate SSv2 openChannel failure trackingClientSupplier.setThrowOnOpenChannel(true); SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class); SnowpipeStreamingPartitionChannel channel = createPartitionChannelWithMigration(Ssv1MigrationMode.BEST_EFFORT, mockConn); // SSv2 open failed, so the channel init future should fail assertThrows(RuntimeException.class, () -> channel.getChannel()); // System function should NOT have been called — SSv2 must open successfully first verify(mockConn, never()).migrateSsv1ChannelOffset(any(), any(), any(), any()); } @Test void migration_systemFunctionFails_propagatesException() { SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class); when(mockConn.migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName)) .thenThrow( new RuntimeException( "SYSTEM$MIGRATE_SSV1_CHANNEL_OFFSET failed for ssv1Channel=" + SSV1_CHANNEL_NAME)); SnowpipeStreamingPartitionChannel channel = createPartitionChannelWithMigration(Ssv1MigrationMode.BEST_EFFORT, mockConn); // The system function failure must propagate, not silently fall through to consumer group // offset. Falling through would risk duplicates if the consumer group offset is behind // the SSv1 offset. RuntimeException exception = assertThrows(RuntimeException.class, () -> channel.getChannel()); assertTrue(exception.getMessage().contains("SYSTEM$MIGRATE_SSV1_CHANNEL_OFFSET")); } @Test void migration_bestEffort_consultsSsv1DuringReopenChannel() { SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class); when(mockConn.migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName)) .thenReturn(Ssv1MigrationResponse.migrated(100L)); // Fail the initial channel open so no migration fires during construction trackingClientSupplier.setThrowOnOpenChannel(true); SnowpipeStreamingPartitionChannel channel = createPartitionChannelWithMigration(Ssv1MigrationMode.BEST_EFFORT, mockConn); assertThrows(RuntimeException.class, () -> channel.getChannel()); // Initial open failed — system function should not have been called verify(mockConn, never()).migrateSsv1ChannelOffset(any(), any(), any(), any()); // Allow the next openChannel to succeed trackingClientSupplier.setThrowOnOpenChannel(false); // Trigger reopenChannel via insertRecord: getChannel() re-throws the SFException from the // failed init future, which AppendRowWithFallbackPolicy catches and invokes reopenChannel. // reopenChannel's .exceptionally() handler handles the failed init, then opens a new channel. channel.insertRecord(buildValidRecord(0), true); // Wait for the async reopen to complete channel.getChannel(); // reopenChannel should have consulted SSv1 exactly once (the initial open never reached it) verify(mockConn, times(1)) .migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName); // Kafka offset should be set to ssv1Offset + 1 (101) assertEquals(101L, sinkTaskContext.offset(new TopicPartition(TOPIC_NAME, PARTITION))); } /** Shared state holder that tracks channel operations for verification in tests. */ static class TrackingIngestClientSupplier { private final AtomicInteger closeCallCount = new AtomicInteger(0); private final AtomicInteger totalChannelsCreated = new AtomicInteger(0); private volatile boolean throwOnOffsetToken; private volatile boolean throwOnAppendRow; private volatile boolean throwOnOpenChannel; private final AtomicInteger retryableAppendRowFailures = new AtomicInteger(0); private final AtomicInteger nonRetryableAppendRowFailures = new AtomicInteger(0); private volatile CountDownLatch blockOnOpenChannel; int getCloseCallCount() { return closeCallCount.get(); } int getTotalChannelsCreated() { return totalChannelsCreated.get(); } void setThrowOnOffsetToken(boolean throwOnOffsetToken) { this.throwOnOffsetToken = throwOnOffsetToken; } void setThrowOnAppendRow(boolean throwOnAppendRow) { this.throwOnAppendRow = throwOnAppendRow; } void setThrowOnOpenChannel(boolean throwOnOpenChannel) { this.throwOnOpenChannel = throwOnOpenChannel; } void setRetryableAppendRowFailures(int count) { this.retryableAppendRowFailures.set(count); } void setNonRetryableAppendRowFailures(int count) { this.nonRetryableAppendRowFailures.set(count); } void setBlockOnOpenChannel(CountDownLatch latch) { this.blockOnOpenChannel = latch; } void incrementCloseCallCount() { closeCallCount.incrementAndGet(); } int incrementChannelsCreated() { return totalChannelsCreated.incrementAndGet(); } } /** Streaming ingest client that creates tracking channels. */ static class TrackingStreamingIngestClient implements SnowflakeStreamingIngestClient { private final String pipeName; private final TrackingIngestClientSupplier supplier; private final ConcurrentHashMap channels = new ConcurrentHashMap<>(); TrackingStreamingIngestClient( final String pipeName, final TrackingIngestClientSupplier supplier) { this.pipeName = pipeName; this.supplier = supplier; } @Override public OpenChannelResult openChannel(final String channelName, final String offsetToken) { if (supplier.throwOnOpenChannel) { throw new SFException("OpenChannelFailed", "Test simulated openChannel failure", 0, ""); } CountDownLatch latch = supplier.blockOnOpenChannel; if (latch != null) { try { latch.await(); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException(e); } } supplier.incrementChannelsCreated(); final ChannelStatus channelStatus = new ChannelStatus( "db", "schema", pipeName, channelName, "SUCCESS", offsetToken, Instant.now(), 0, 0, 0, null, null, null, null, Instant.now()); final TrackingStreamingIngestChannel channel = new TrackingStreamingIngestChannel(pipeName, channelName, supplier); channels.put(channelName, channel); return new OpenChannelResult(channel, channelStatus); } @Override public OpenChannelResult openChannel(final String channelName) { return openChannel(channelName, null); } @Override public void close() {} @Override public CompletableFuture close( final boolean waitForFlush, final Duration timeoutDuration) { throw new UnsupportedOperationException(); } @Override public void initiateFlush() {} @Override public void dropChannel(final String channelName) { throw new UnsupportedOperationException(); } @Override public Map getLatestCommittedOffsetTokens(final List channelNames) { throw new UnsupportedOperationException(); } @Override public ChannelStatusBatch getChannelStatus(final List channelNames) { Map statusMap = new HashMap<>(); for (String name : channelNames) { TrackingStreamingIngestChannel ch = channels.get(name); if (ch != null) { statusMap.put(name, ch.getChannelStatus()); } } return new ChannelStatusBatch(statusMap); } @Override public boolean isClosed() { return false; } @Override public CompletableFuture waitForFlush(final Duration timeoutDuration) { throw new UnsupportedOperationException(); } @Override public String getDBName() { throw new UnsupportedOperationException(); } @Override public String getSchemaName() { throw new UnsupportedOperationException(); } @Override public String getPipeName() { return pipeName; } @Override public String getClientName() { throw new UnsupportedOperationException(); } } /** Streaming ingest channel that tracks close() calls. */ static class TrackingStreamingIngestChannel implements SnowflakeStreamingIngestChannel { private final String pipeName; private final String channelName; private final TrackingIngestClientSupplier supplier; private volatile boolean closed = false; TrackingStreamingIngestChannel( final String pipeName, final String channelName, final TrackingIngestClientSupplier supplier) { this.pipeName = pipeName; this.channelName = channelName; this.supplier = supplier; } @Override public String getDBName() { throw new UnsupportedOperationException(); } @Override public String getSchemaName() { throw new UnsupportedOperationException(); } @Override public String getPipeName() { return pipeName; } @Override public String getFullyQualifiedPipeName() { return pipeName; } @Override public String getFullyQualifiedChannelName() { return channelName; } @Override public boolean isClosed() { return closed; } @Override public String getChannelName() { return channelName; } @Override public void close() { closed = true; supplier.incrementCloseCallCount(); } @Override public void close(final boolean waitForFlush, final Duration timeoutDuration) { close(); } @Override public void appendRow(final Map row, final String offsetToken) { if (supplier.retryableAppendRowFailures.getAndUpdate(n -> n > 0 ? n - 1 : 0) > 0) { throw new SFException("MemoryThresholdExceeded", "Test simulated backpressure", 0, ""); } if (supplier.nonRetryableAppendRowFailures.getAndUpdate(n -> n > 0 ? n - 1 : 0) > 0) { throw new SFException("ChannelInvalidated", "Test simulated channel invalidation", 0, ""); } if (supplier.throwOnAppendRow) { throw new SFException("ChannelInvalidated", "Test simulated channel invalidation", 0, ""); } } @Override public void appendRows( final Iterable> rows, final String startOffsetToken, final String endOffsetToken) {} @Override public String getLatestCommittedOffsetToken() { if (supplier.throwOnOffsetToken) { throw new SFException("ChannelInvalidated", "Test simulated channel invalidation", 0, ""); } return null; } @Override public ChannelStatus getChannelStatus() { return new ChannelStatus( "db", "schema", pipeName, channelName, "SUCCESS", null, Instant.now(), 0, 0, 0, null, null, null, null, Instant.now()); } @Override public CompletableFuture waitForCommit( final Predicate tokenChecker, final Duration timeoutDuration) { throw new UnsupportedOperationException(); } @Override public CompletableFuture waitForFlush(final Duration timeoutDuration) { throw new UnsupportedOperationException(); } @Override public void initiateFlush() {} } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/v2/StreamingClientManagerIT.java ================================================ package com.snowflake.kafka.connector.internal.streaming.v2; import static com.snowflake.kafka.connector.Constants.DEFAULT_PIPE_NAME_SUFFIX; import static org.assertj.core.api.Assertions.*; import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.internal.SnowflakeConnectionService; import com.snowflake.kafka.connector.internal.TestUtils; import com.snowflake.kafka.connector.internal.metrics.TaskMetrics; import com.snowflake.kafka.connector.internal.streaming.StreamingClientProperties; import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientPools; import com.snowflake.kafka.connector.internal.streaming.v2.service.ThreadPools; import java.util.Map; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; public class StreamingClientManagerIT { private Map connectorConfig; private SinkTaskConfig sinkTaskConfig; private StreamingClientProperties streamingClientProperties; private String testConnectorName; private String pipe1, pipe2; private String task1, task2; private String table1, table2; @BeforeEach public void setUp() { final long salt = System.currentTimeMillis(); final SnowflakeConnectionService connectionService = TestUtils.getConnectionServiceWithEncryptedKey(); connectorConfig = TestUtils.getConnectorConfigurationForStreaming(true); sinkTaskConfig = SinkTaskConfig.from(connectorConfig); streamingClientProperties = StreamingClientProperties.from(sinkTaskConfig); table1 = "TABLE1" + salt; table2 = "TABLE2" + salt; task1 = "task1" + salt; task2 = "task2" + salt; testConnectorName = "TEST_CONNECTOR_" + salt; pipe1 = table1 + DEFAULT_PIPE_NAME_SUFFIX; pipe2 = table2 + DEFAULT_PIPE_NAME_SUFFIX; TestUtils.createTableWithMetadataColumn(table1); TestUtils.createTableWithMetadataColumn(table2); ThreadPools.registerTask(testConnectorName, sinkTaskConfig); ThreadPools.registerTask(testConnectorName, sinkTaskConfig); } @AfterEach public void tearDown() { TestUtils.dropTable(table1); TestUtils.dropTable(table2); closeTaskClients(task1); closeTaskClients(task2); ThreadPools.closeForTask(testConnectorName); ThreadPools.closeForTask(testConnectorName); } @Test public void testGetClient_FirstTime_CreatesNewClient() { // When SnowflakeStreamingIngestClient client = getClient(task1, pipe1); // Then assertThat(client).as("Client should not be null").isNotNull(); } @Test public void testGetClient_SamePipeName_ReturnsExistingClient() { // Given SnowflakeStreamingIngestClient client1 = getClient(task1, pipe1); // When SnowflakeStreamingIngestClient client2 = getClient(task1, pipe1); // Then assertThat(client1) .as("Should return the same client instance for same pipe name") .isEqualTo(client2); } @Test public void testGetClient_DifferentPipeNames_CreatesDistinctClients() { // When SnowflakeStreamingIngestClient client1 = getClient(task1, pipe1); SnowflakeStreamingIngestClient client2 = getClient(task2, pipe2); // Then assertThat(client1) .as("Different pipe names should create different clients") .isNotEqualTo(client2); } @Test public void testGetClient_AfterClientClosed_CreatesNewClient() { // Given SnowflakeStreamingIngestClient client1 = getClient(task1, pipe1); // Close the client for this task closeTaskClients(task1); // When SnowflakeStreamingIngestClient client2 = getClient(task1, pipe1); // Then assertThat(client1) .as("Should create a new client when previous task released it") .isNotEqualTo(client2); } @Test public void testClose_ExistingPipe_ClosesAndRemovesClient() { // Given SnowflakeStreamingIngestClient client = getClient(task1, pipe1); // When - Release the task closeTaskClients(task1); // Then - Verify new client is created for same pipe name with different task SnowflakeStreamingIngestClient newClient = getClient(task2, pipe1); assertThat(client).as("Should create new client after close").isNotEqualTo(newClient); } @Test public void testClose_NonExistentPipe_DoesNotThrow() { assertThatCode(() -> closeTaskClients("nonExistentTask")).doesNotThrowAnyException(); } @Test public void testClose_MultipleClients_ClosesAllClients() { // Given getClient(task1, pipe1); getClient(task1, pipe2); assertThat(StreamingClientPools.getClientCountForTask(testConnectorName, task1)).isEqualTo(2); closeTaskClients(task1); assertThat(StreamingClientPools.getClientCountForTask(testConnectorName, task1)).isEqualTo(0); } @Test public void testProvider_ReuseAfterPartialClose_WorksCorrectly() { // task 1 uses 2 pipes, so it has 2 ingest clients SnowflakeStreamingIngestClient client1 = getClient(task1, pipe1); SnowflakeStreamingIngestClient client2 = getClient(task1, pipe2); // Task2 also uses pipe1 (shares one client with task1) in total there should only be 2 ingest // clients in the system SnowflakeStreamingIngestClient client3 = getClient(task2, pipe1); assertThat(client1).isEqualTo(client3); // When - task1 stops closeTaskClients(task1); // Then - Client1 should still be open (task2 still using it) SnowflakeStreamingIngestClient client1AfterRelease = getClient(task1, pipe1); // should get the SAME client1 that task2 is still using assertThat(client1AfterRelease) .as("Should reuse client when another task is still using it") .isEqualTo(client1); } private SnowflakeStreamingIngestClient getClient(String task, String pipe) { return StreamingClientPools.getClient( testConnectorName, task, pipe, sinkTaskConfig, streamingClientProperties, TaskMetrics.noop()); } private void closeTaskClients(String task) { StreamingClientPools.closeTaskClients(testConnectorName, task); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/v2/client/StreamingClientPoolTest.java ================================================ package com.snowflake.kafka.connector.internal.streaming.v2.client; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.timeout; import static org.mockito.Mockito.verify; import com.snowflake.ingest.streaming.SFException; import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException; import com.snowflake.kafka.connector.internal.TestUtils; import com.snowflake.kafka.connector.internal.metrics.TaskMetrics; import com.snowflake.kafka.connector.internal.streaming.StreamingClientProperties; import com.snowflake.kafka.connector.internal.streaming.v2.service.ThreadPools; import java.io.IOException; import java.net.URLClassLoader; import java.util.Map; import java.util.UUID; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletionException; import java.util.concurrent.CountDownLatch; import java.util.concurrent.Executors; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; class StreamingClientPoolTest { private SinkTaskConfig connectorConfig; private StreamingClientProperties streamingClientProperties; @BeforeEach void setUp() { Map config = TestUtils.getConnectorConfigurationForStreaming(false); connectorConfig = SinkTaskConfig.from(config); streamingClientProperties = StreamingClientProperties.from(connectorConfig); } @AfterEach void tearDown() { StreamingClientFactory.resetStreamingClientSupplier(); } @Nested class RefCountedClientTest { @Test void taskTracking() { RefCountedClientTestHarness harness = new RefCountedClientTestHarness(); // empty initially assertThat(harness.refCountedClient.taskCount()).isEqualTo(0); assertThat(harness.refCountedClient.hasTask("task-0")).isFalse(); // add two tasks (duplicate add is idempotent) harness.refCountedClient.addTask("task-0"); harness.refCountedClient.addTask("task-1"); harness.refCountedClient.addTask("task-0"); assertThat(harness.refCountedClient.taskCount()).isEqualTo(2); // removing unknown task is a no-op assertThat(harness.refCountedClient.removeTask("task-unknown")).isFalse(); // removing one of two is not the last assertThat(harness.refCountedClient.removeTask("task-0")).isFalse(); assertThat(harness.refCountedClient.hasTask("task-0")).isFalse(); assertThat(harness.refCountedClient.hasTask("task-1")).isTrue(); // removing the final task signals empty assertThat(harness.refCountedClient.removeTask("task-1")).isTrue(); assertThat(harness.refCountedClient.taskCount()).isEqualTo(0); } @Test void clientFuture_returns_client_on_success() { SnowflakeStreamingIngestClient mockClient = mock(SnowflakeStreamingIngestClient.class); setSupplierReturning(mockClient); RefCountedClientTestHarness harness = new RefCountedClientTestHarness(); assertThat(harness.refCountedClient.clientFuture.join()).isSameAs(mockClient); } @Test void clientFuture_exposes_original_exception_on_failure() { SnowflakeKafkaConnectorException originalException = new SnowflakeKafkaConnectorException("boom", "TEST_ERROR"); setSupplierThrowing(originalException); RefCountedClientTestHarness failedHarness = new RefCountedClientTestHarness(); assertThatThrownBy(() -> failedHarness.refCountedClient.clientFuture.join()) .isInstanceOf(CompletionException.class) .hasCause(originalException); // a new RefCountedClient with a working supplier succeeds SnowflakeStreamingIngestClient mockClient = mock(SnowflakeStreamingIngestClient.class); setSupplierReturning(mockClient); RefCountedClientTestHarness successHarness = new RefCountedClientTestHarness(); assertThat(successHarness.refCountedClient.clientFuture.join()).isSameAs(mockClient); } @Test void clientFuture_wraps_checked_exception_in_CompletionException() { IOException checkedException = new IOException("disk error"); setSupplierThrowingChecked(checkedException); RefCountedClientTestHarness harness = new RefCountedClientTestHarness(); assertThatThrownBy(() -> harness.refCountedClient.clientFuture.join()) .isInstanceOf(CompletionException.class) .hasCause(checkedException); } @Test void close_calls_close_on_client() { SnowflakeStreamingIngestClient mockClient = mock(SnowflakeStreamingIngestClient.class); setSupplierReturning(mockClient); RefCountedClientTestHarness harness = new RefCountedClientTestHarness(); harness.refCountedClient.clientFuture.join(); harness.refCountedClient.close("test-pipe", "test-connector"); verify(mockClient).close(); } /** * Helper that creates a RefCountedClient with the currently-installed supplier. Must be called * after configuring the supplier via {@code setSupplier*} methods. */ class RefCountedClientTestHarness { final StreamingClientPool.RefCountedClient refCountedClient; RefCountedClientTestHarness() { this.refCountedClient = new StreamingClientPool.RefCountedClient( "test-pipe", "test-connector", connectorConfig, streamingClientProperties, TaskMetrics.noop(), Executors.newSingleThreadExecutor()); } } } @Nested class PoolTest { private StreamingClientPool pool; private String connectorName; @BeforeEach void setUp() { connectorName = "test-connector-" + UUID.randomUUID().toString().substring(0, 8); ThreadPools.registerTask(connectorName, connectorConfig); pool = new StreamingClientPool(connectorName); } @AfterEach void tearDownPool() { ThreadPools.closeForTask(connectorName); } private SnowflakeStreamingIngestClient getClient(String taskId, String pipeName) { return pool.getClientAsync( taskId, pipeName, connectorConfig, streamingClientProperties, TaskMetrics.noop()) .join(); } @Test void getClient_creates_client_for_new_pipe() { SnowflakeStreamingIngestClient mockClient = mock(SnowflakeStreamingIngestClient.class); setSupplierReturning(mockClient); SnowflakeStreamingIngestClient result = getClient("task-0", "pipe-A"); assertThat(result).isSameAs(mockClient); } @Test void getClient_reuses_client_for_same_pipe() { AtomicInteger callCount = new AtomicInteger(); StreamingClientFactory.setStreamingClientSupplier( (clientName, dbName, schemaName, pipeName, props) -> { callCount.incrementAndGet(); return mock(SnowflakeStreamingIngestClient.class); }); getClient("task-0", "pipe-A"); getClient("task-1", "pipe-A"); assertThat(callCount.get()) .as("supplier should only be called once for the same pipe") .isEqualTo(1); } @Test void getClient_returns_different_clients_for_different_pipes() { AtomicInteger callCount = new AtomicInteger(); StreamingClientFactory.setStreamingClientSupplier( (clientName, dbName, schemaName, pipeName, props) -> { callCount.incrementAndGet(); return mock(SnowflakeStreamingIngestClient.class); }); SnowflakeStreamingIngestClient clientA = getClient("task-0", "pipe-A"); SnowflakeStreamingIngestClient clientB = getClient("task-0", "pipe-B"); assertThat(clientA).isNotSameAs(clientB); assertThat(callCount.get()).isEqualTo(2); } @Test void getClientCountForTask_counts_only_that_tasks_pipes() { setSupplierReturning(mock(SnowflakeStreamingIngestClient.class)); // initially zero assertThat(pool.getClientCountForTask("task-0")).isEqualTo(0); // task-0 on two pipes, task-1 on one — counts are independent getClient("task-0", "pipe-A"); getClient("task-0", "pipe-B"); getClient("task-1", "pipe-B"); assertThat(pool.getClientCountForTask("task-0")).isEqualTo(2); assertThat(pool.getClientCountForTask("task-1")).isEqualTo(1); } @Test void closeTaskClients_removes_entry_when_last_task_released() { SnowflakeStreamingIngestClient mockClient = mock(SnowflakeStreamingIngestClient.class); setSupplierReturning(mockClient); getClient("task-0", "pipe-A"); pool.closeTaskClients("task-0"); assertThat(pool.getClientCountForTask("task-0")).isEqualTo(0); verify(mockClient, timeout(5000)).close(); } @Test void closeTaskClients_keeps_client_when_other_tasks_remain() { SnowflakeStreamingIngestClient mockClient = mock(SnowflakeStreamingIngestClient.class); setSupplierReturning(mockClient); getClient("task-0", "pipe-A"); getClient("task-1", "pipe-A"); pool.closeTaskClients("task-0"); assertThat(pool.getClientCountForTask("task-1")).isEqualTo(1); verify(mockClient, never()).close(); } @Test void closeTaskClients_then_getClient_creates_new_client() { AtomicInteger callCount = new AtomicInteger(); StreamingClientFactory.setStreamingClientSupplier( (clientName, dbName, schemaName, pipeName, props) -> { callCount.incrementAndGet(); return mock(SnowflakeStreamingIngestClient.class); }); SnowflakeStreamingIngestClient first = getClient("task-0", "pipe-A"); pool.closeTaskClients("task-0"); SnowflakeStreamingIngestClient second = getClient("task-0", "pipe-A"); assertThat(second).isNotSameAs(first); assertThat(callCount.get()).isEqualTo(2); } @Test void closeTaskClients_for_unknown_task_does_not_throw() { pool.closeTaskClients("nonexistent-task"); } @Test void getClient_removes_entry_on_failure_and_rethrows() { SnowflakeKafkaConnectorException originalException = new SnowflakeKafkaConnectorException("creation failed", "TEST_ERROR"); setSupplierThrowing(originalException); assertThatThrownBy(() -> getClient("task-0", "pipe-A")) .isInstanceOf(CompletionException.class) .hasCause(originalException); assertThat(pool.getClientCountForTask("task-0")).isEqualTo(0); } @Test void getClient_after_failure_retries_creation() { AtomicInteger callCount = new AtomicInteger(); SnowflakeStreamingIngestClient mockClient = mock(SnowflakeStreamingIngestClient.class); StreamingClientFactory.setStreamingClientSupplier( (clientName, dbName, schemaName, pipeName, props) -> { if (callCount.incrementAndGet() == 1) { throw new SnowflakeKafkaConnectorException("transient", "TEST_ERROR"); } return mockClient; }); assertThatThrownBy(() -> getClient("task-0", "pipe-A")) .isInstanceOf(CompletionException.class) .hasCauseInstanceOf(SnowflakeKafkaConnectorException.class); SnowflakeStreamingIngestClient result = getClient("task-0", "pipe-A"); assertThat(result).isSameAs(mockClient); assertThat(callCount.get()).isEqualTo(2); } @Test void recreateClient_retries_on_client_invalid_error() { SnowflakeStreamingIngestClient oldClient = mock(SnowflakeStreamingIngestClient.class); SnowflakeStreamingIngestClient newClient = mock(SnowflakeStreamingIngestClient.class); AtomicInteger callCount = new AtomicInteger(); StreamingClientFactory.setStreamingClientSupplier( (clientName, dbName, schemaName, pipeName, props) -> { int count = callCount.incrementAndGet(); if (count == 1) return oldClient; if (count == 2) { // First recreation attempt fails with pipe failover throw new SFException("SfApiPipeFailedOverError", "Pipe failed over", 410, ""); } return newClient; }); // Create initial client getClient("task-0", "pipe-A"); // Recreate — first attempt fails with 410, should retry SnowflakeStreamingIngestClient result = StreamingClientPools.recreateClient( connectorName, "task-0", "pipe-A", oldClient, connectorConfig, streamingClientProperties, TaskMetrics.noop()); assertThat(result).isSameAs(newClient); assertThat(callCount.get()) .isEqualTo(3); // original + failed recreation + successful recreation } @Test void pool_threads_inherit_context_classloader_from_pool_creator() { AtomicReference capturedClassLoader = new AtomicReference<>(); StreamingClientFactory.setStreamingClientSupplier( (clientName, dbName, schemaName, pipeName, props) -> { capturedClassLoader.set(Thread.currentThread().getContextClassLoader()); return mock(SnowflakeStreamingIngestClient.class); }); // Simulate Kafka Connect's PluginClassLoader by setting a custom context classloader // before creating the pool — the factory captures it at construction time. URLClassLoader fakePluginCL = new URLClassLoader(new java.net.URL[0], null); ClassLoader originalCL = Thread.currentThread().getContextClassLoader(); String clConnectorName = "test-connector-cl-" + UUID.randomUUID().toString().substring(0, 8); Thread.currentThread().setContextClassLoader(fakePluginCL); StreamingClientPool poolWithCustomCL; try { ThreadPools.registerTask(clConnectorName, connectorConfig); poolWithCustomCL = new StreamingClientPool(clConnectorName); } finally { Thread.currentThread().setContextClassLoader(originalCL); } try { poolWithCustomCL .getClientAsync( "task-0", "pipe-A", connectorConfig, streamingClientProperties, TaskMetrics.noop()) .join(); assertThat(capturedClassLoader.get()) .as("Pool thread should have the classloader from the pool creator") .isSameAs(fakePluginCL); } finally { ThreadPools.closeForTask(clConnectorName); } } @Test void recreateClient_replaces_entry_and_preserves_tasks() { SnowflakeStreamingIngestClient oldClient = mock(SnowflakeStreamingIngestClient.class); SnowflakeStreamingIngestClient newClient = mock(SnowflakeStreamingIngestClient.class); AtomicInteger callCount = new AtomicInteger(); StreamingClientFactory.setStreamingClientSupplier( (clientName, dbName, schemaName, pipeName, props) -> { return callCount.incrementAndGet() == 1 ? oldClient : newClient; }); // Two tasks share the same pipe getClient("task-0", "pipe-A"); getClient("task-1", "pipe-A"); assertThat(pool.getClientCountForTask("task-0")).isEqualTo(1); assertThat(pool.getClientCountForTask("task-1")).isEqualTo(1); // Recreate the client SnowflakeStreamingIngestClient result = pool.recreateClient( "task-0", "pipe-A", oldClient, connectorConfig, streamingClientProperties, TaskMetrics.noop()); assertThat(result).isSameAs(newClient); // Both tasks should still be registered assertThat(pool.getClientCountForTask("task-0")).isEqualTo(1); assertThat(pool.getClientCountForTask("task-1")).isEqualTo(1); assertThat(callCount.get()).isEqualTo(2); } @Test void recreateClient_closes_old_client() { SnowflakeStreamingIngestClient oldClient = mock(SnowflakeStreamingIngestClient.class); SnowflakeStreamingIngestClient newClient = mock(SnowflakeStreamingIngestClient.class); AtomicInteger callCount = new AtomicInteger(); StreamingClientFactory.setStreamingClientSupplier( (clientName, dbName, schemaName, pipeName, props) -> { return callCount.incrementAndGet() == 1 ? oldClient : newClient; }); getClient("task-0", "pipe-A"); pool.recreateClient( "task-0", "pipe-A", oldClient, connectorConfig, streamingClientProperties, TaskMetrics.noop()); verify(oldClient).close(); } @Test void recreateClient_creates_fresh_entry_and_registers_task_when_no_entry_exists() { // When recreateClient is called for a pipe with no existing entry (e.g. the entry was // already evicted by a failed creation), the fresh entry must have the caller's task // registered so closeTaskClients on a different task doesn't prematurely evict it. SnowflakeStreamingIngestClient freshClient = mock(SnowflakeStreamingIngestClient.class); setSupplierReturning(freshClient); // No prior call — pool is empty for this pipe. assertThat(pool.getClientCountForTask("task-0")).isEqualTo(0); SnowflakeStreamingIngestClient result = pool.recreateClient( "task-0", "pipe-A", mock(SnowflakeStreamingIngestClient.class), connectorConfig, streamingClientProperties, TaskMetrics.noop()); assertThat(result).isSameAs(freshClient); // Task must be registered so closeTaskClients on a different task doesn't evict us. assertThat(pool.getClientCountForTask("task-0")).isEqualTo(1); // Simulate cleanup of a different task — the fresh entry must survive. pool.closeTaskClients("some-other-task"); assertThat(pool.getClientCountForTask("task-0")).isEqualTo(1); } @Test void recreateClient_noop_if_client_already_replaced() { SnowflakeStreamingIngestClient oldClient = mock(SnowflakeStreamingIngestClient.class); SnowflakeStreamingIngestClient newClient = mock(SnowflakeStreamingIngestClient.class); AtomicInteger callCount = new AtomicInteger(); StreamingClientFactory.setStreamingClientSupplier( (clientName, dbName, schemaName, pipeName, props) -> { return callCount.incrementAndGet() == 1 ? oldClient : newClient; }); getClient("task-0", "pipe-A"); // First recreation succeeds SnowflakeStreamingIngestClient firstResult = pool.recreateClient( "task-0", "pipe-A", oldClient, connectorConfig, streamingClientProperties, TaskMetrics.noop()); assertThat(firstResult).isSameAs(newClient); // Second recreation with the OLD client reference should be a no-op SnowflakeStreamingIngestClient secondResult = pool.recreateClient( "task-0", "pipe-A", oldClient, connectorConfig, streamingClientProperties, TaskMetrics.noop()); assertThat(secondResult).isSameAs(newClient); // Supplier should only have been called twice (original + one recreation) assertThat(callCount.get()).isEqualTo(2); } @Test void recreateClient_then_getClient_returns_new_client() { SnowflakeStreamingIngestClient oldClient = mock(SnowflakeStreamingIngestClient.class); SnowflakeStreamingIngestClient newClient = mock(SnowflakeStreamingIngestClient.class); AtomicInteger callCount = new AtomicInteger(); StreamingClientFactory.setStreamingClientSupplier( (clientName, dbName, schemaName, pipeName, props) -> { return callCount.incrementAndGet() == 1 ? oldClient : newClient; }); getClient("task-0", "pipe-A"); pool.recreateClient( "task-0", "pipe-A", oldClient, connectorConfig, streamingClientProperties, TaskMetrics.noop()); // A subsequent getClient should return the new client (not create a third one) SnowflakeStreamingIngestClient result = getClient("task-0", "pipe-A"); assertThat(result).isSameAs(newClient); assertThat(callCount.get()).isEqualTo(2); } @Test void recreateClient_concurrent_callers_only_creates_once() throws Exception { SnowflakeStreamingIngestClient oldClient = mock(SnowflakeStreamingIngestClient.class); AtomicInteger supplierCallCount = new AtomicInteger(); CountDownLatch supplierStarted = new CountDownLatch(1); CountDownLatch supplierProceed = new CountDownLatch(1); StreamingClientFactory.setStreamingClientSupplier( (clientName, dbName, schemaName, pipeName, props) -> { int count = supplierCallCount.incrementAndGet(); if (count == 1) { // First call returns oldClient immediately return oldClient; } // Second call (recreation) blocks until signaled supplierStarted.countDown(); try { supplierProceed.await(); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException(e); } return mock(SnowflakeStreamingIngestClient.class); }); getClient("task-0", "pipe-A"); getClient("task-1", "pipe-A"); // Launch two concurrent recreateClient calls CompletableFuture future1 = CompletableFuture.supplyAsync( () -> pool.recreateClient( "task-0", "pipe-A", oldClient, connectorConfig, streamingClientProperties, TaskMetrics.noop())); CompletableFuture future2 = CompletableFuture.supplyAsync( () -> pool.recreateClient( "task-1", "pipe-A", oldClient, connectorConfig, streamingClientProperties, TaskMetrics.noop())); // Wait for the supplier to start (only one should start) supplierStarted.await(); supplierProceed.countDown(); SnowflakeStreamingIngestClient result1 = future1.join(); SnowflakeStreamingIngestClient result2 = future2.join(); // Both callers should get the same new client assertThat(result1).isSameAs(result2); // Supplier should have been called exactly twice (original + one recreation) assertThat(supplierCallCount.get()).isEqualTo(2); } @Test void getClient_parallel_for_different_pipes_creates_concurrently() throws Exception { CountDownLatch bothStarted = new CountDownLatch(2); CountDownLatch proceed = new CountDownLatch(1); StreamingClientFactory.setStreamingClientSupplier( (clientName, dbName, schemaName, pipeName, props) -> { bothStarted.countDown(); try { proceed.await(); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException(e); } return mock(SnowflakeStreamingIngestClient.class); }); CompletableFuture futureA = pool.getClientAsync( "task-0", "pipe-A", connectorConfig, streamingClientProperties, TaskMetrics.noop()); CompletableFuture futureB = pool.getClientAsync( "task-1", "pipe-B", connectorConfig, streamingClientProperties, TaskMetrics.noop()); // Both suppliers should have started before either completes bothStarted.await(); proceed.countDown(); SnowflakeStreamingIngestClient clientA = futureA.join(); SnowflakeStreamingIngestClient clientB = futureB.join(); assertThat(clientA).isNotSameAs(clientB); } } private void setSupplierReturning(SnowflakeStreamingIngestClient client) { StreamingClientFactory.setStreamingClientSupplier( (clientName, dbName, schemaName, pipeName, props) -> client); } private void setSupplierThrowing(RuntimeException exception) { StreamingClientFactory.setStreamingClientSupplier( (clientName, dbName, schemaName, pipeName, props) -> { throw exception; }); } @SuppressWarnings("unchecked") private void setSupplierThrowingChecked(Exception checkedException) { StreamingClientFactory.setStreamingClientSupplier( (clientName, dbName, schemaName, pipeName, props) -> { sneakyThrow(checkedException); return null; // unreachable }); } /** * Throws a checked exception without declaring it, for testing CompletionException unwrapping. */ @SuppressWarnings("unchecked") private static void sneakyThrow(Exception exception) throws E { throw (E) exception; } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/v2/client/StreamingClientPoolsTest.java ================================================ package com.snowflake.kafka.connector.internal.streaming.v2.client; import static org.assertj.core.api.Assertions.assertThatThrownBy; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException; import com.snowflake.kafka.connector.internal.TestUtils; import com.snowflake.kafka.connector.internal.metrics.TaskMetrics; import com.snowflake.kafka.connector.internal.streaming.StreamingClientProperties; import com.snowflake.kafka.connector.internal.streaming.v2.service.ThreadPools; import java.util.UUID; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; class StreamingClientPoolsTest { private SinkTaskConfig sinkTaskConfig; private StreamingClientProperties streamingClientProperties; private String connectorName; @BeforeEach void setUp() { sinkTaskConfig = SinkTaskConfig.from(TestUtils.getConnectorConfigurationForStreaming(false)); streamingClientProperties = StreamingClientProperties.from(sinkTaskConfig); connectorName = "test-connector-pools-" + UUID.randomUUID().toString().substring(0, 8); ThreadPools.registerTask(connectorName, sinkTaskConfig); } @AfterEach void tearDown() { StreamingClientFactory.resetStreamingClientSupplier(); StreamingClientPools.closeTaskClients(connectorName, "test-task"); ThreadPools.closeForTask(connectorName); } @Test void getClient_unwraps_CompletionException_and_throws_original_RuntimeException() { SnowflakeKafkaConnectorException originalException = new SnowflakeKafkaConnectorException("creation failed", "TEST_ERROR"); StreamingClientFactory.setStreamingClientSupplier( (clientName, dbName, schemaName, pipeName, props) -> { throw originalException; }); assertThatThrownBy( () -> StreamingClientPools.getClient( connectorName, "test-task", "pipe-A", sinkTaskConfig, streamingClientProperties, TaskMetrics.noop())) .isSameAs(originalException); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/v2/service/PartitionChannelManagerTest.java ================================================ package com.snowflake.kafka.connector.internal.streaming.v2.service; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.config.SinkTaskConfigTestBuilder; import com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.Optional; import java.util.concurrent.CompletableFuture; import org.apache.kafka.common.TopicPartition; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; class PartitionChannelManagerTest { private static final String CONNECTOR_NAME = "test_connector"; private static final String TASK_ID = "0"; private static final String TOPIC = "test_topic"; private PartitionChannelManager manager; private Map createdChannels; @BeforeEach void setUp() { createdChannels = new HashMap<>(); PartitionChannelManager.PartitionChannelBuilder trackingBuilder = (topicPartition, tableName, channelName, pipeName) -> { TopicPartitionChannel channel = mock(TopicPartitionChannel.class); when(channel.getChannelName()).thenReturn(channelName); when(channel.getPipeName()).thenReturn(pipeName); when(channel.closeChannelAsync()).thenReturn(CompletableFuture.completedFuture(null)); when(channel.waitForLastProcessedRecordCommitted()) .thenReturn(CompletableFuture.completedFuture(null)); createdChannels.put(topicPartition, channel); return channel; }; manager = new PartitionChannelManager(testConfig(Collections.emptyMap()), trackingBuilder); } // --- makeChannelName --- @Test void makeChannelNameConcatenatesWithUnderscores() { assertEquals( "myConnector_myTopic_3", PartitionChannelManager.makeChannelName("myConnector", "myTopic", 3)); } // --- startPartitions --- @Test void startPartitionsRegistersChannelsInMap() { TopicPartition tp0 = new TopicPartition(TOPIC, 0); TopicPartition tp1 = new TopicPartition(TOPIC, 1); Map tableToPipe = new HashMap<>(); tableToPipe.put(TOPIC, "pipe_" + TOPIC); manager.startPartitions(Arrays.asList(tp0, tp1), tableToPipe); assertEquals(2, manager.getPartitionChannels().size()); assertTrue(manager.getChannel(tp0).isPresent()); assertTrue(manager.getChannel(tp1).isPresent()); } @Test void startPartitionsPassesCorrectNamesToBuilder() { Map capturedArgs = new HashMap<>(); PartitionChannelManager.PartitionChannelBuilder capturingBuilder = (topicPartition, tableName, channelName, pipeName) -> { capturedArgs.put("tableName", tableName); capturedArgs.put("channelName", channelName); capturedArgs.put("pipeName", pipeName); TopicPartitionChannel channel = mock(TopicPartitionChannel.class); when(channel.getChannelName()).thenReturn(channelName); return channel; }; PartitionChannelManager capturingManager = new PartitionChannelManager(testConfig(Collections.emptyMap()), capturingBuilder); TopicPartition tp = new TopicPartition(TOPIC, 7); Map tableToPipe = new HashMap<>(); tableToPipe.put(TOPIC, "pipe_" + TOPIC); capturingManager.startPartitions(Collections.singletonList(tp), tableToPipe); String expectedChannelName = PartitionChannelManager.makeChannelName(CONNECTOR_NAME, TOPIC, 7); assertEquals(TOPIC, capturedArgs.get("tableName")); assertEquals(expectedChannelName, capturedArgs.get("channelName")); assertEquals("pipe_" + TOPIC, capturedArgs.get("pipeName")); } @Test void startPartitionsUsesTopicToTableMapForTableName() { Map topicToTable = new HashMap<>(); topicToTable.put("raw_topic", "mapped_table"); Map capturedArgs = new HashMap<>(); PartitionChannelManager.PartitionChannelBuilder capturingBuilder = (topicPartition, tableName, channelName, pipeName) -> { capturedArgs.put("tableName", tableName); capturedArgs.put("pipeName", pipeName); TopicPartitionChannel channel = mock(TopicPartitionChannel.class); when(channel.getChannelName()).thenReturn(channelName); return channel; }; PartitionChannelManager managerWithMapping = new PartitionChannelManager(testConfig(topicToTable), capturingBuilder); TopicPartition tp = new TopicPartition("raw_topic", 0); Map tableToPipe = new HashMap<>(); tableToPipe.put("mapped_table", "pipe_mapped_table"); managerWithMapping.startPartitions(Collections.singletonList(tp), tableToPipe); assertEquals("mapped_table", capturedArgs.get("tableName")); assertEquals("pipe_mapped_table", capturedArgs.get("pipeName")); } // --- getChannel --- @Test void getChannelByTopicPartitionReturnsChannel() { TopicPartition tp = new TopicPartition(TOPIC, 0); startSinglePartition(tp); Optional result = manager.getChannel(tp); assertTrue(result.isPresent()); assertSame(createdChannels.get(tp), result.get()); } @Test void getChannelByStringReturnsChannel() { TopicPartition tp = new TopicPartition(TOPIC, 0); startSinglePartition(tp); String channelName = PartitionChannelManager.makeChannelName(CONNECTOR_NAME, TOPIC, 0); Optional result = manager.getChannel(channelName); assertTrue(result.isPresent()); assertSame(createdChannels.get(tp), result.get()); } @Test void getChannelReturnsEmptyForUnknownPartition() { TopicPartition unknown = new TopicPartition("no_such_topic", 99); assertFalse(manager.getChannel(unknown).isPresent()); } @Test void getChannelByStringReturnsEmptyForUnknownName() { assertFalse(manager.getChannel("nonexistent_channel").isPresent()); } // --- close (subset) --- @Test void closeRemovesOnlyRequestedPartitions() { TopicPartition tp0 = new TopicPartition(TOPIC, 0); TopicPartition tp1 = new TopicPartition(TOPIC, 1); TopicPartition tp2 = new TopicPartition(TOPIC, 2); startPartitions(tp0, tp1, tp2); manager.close(Collections.singletonList(tp1)); assertFalse(manager.getChannel(tp1).isPresent()); assertTrue(manager.getChannel(tp0).isPresent()); assertTrue(manager.getChannel(tp2).isPresent()); assertEquals(2, manager.getPartitionChannels().size()); } @Test void closeCallsCloseChannelAsyncOnRequestedPartitions() { TopicPartition tp0 = new TopicPartition(TOPIC, 0); TopicPartition tp1 = new TopicPartition(TOPIC, 1); startPartitions(tp0, tp1); manager.close(Collections.singletonList(tp0)); verify(createdChannels.get(tp0)).closeChannelAsync(); verify(createdChannels.get(tp1), never()).closeChannelAsync(); } @Test void closeHandlesUnknownPartitionsGracefully() { TopicPartition tp0 = new TopicPartition(TOPIC, 0); startSinglePartition(tp0); TopicPartition unknown = new TopicPartition("unknown", 99); manager.close(Arrays.asList(tp0, unknown)); assertFalse(manager.getChannel(tp0).isPresent()); assertEquals(0, manager.getPartitionChannels().size()); } @Test void closeWithEmptyCollectionIsNoop() { TopicPartition tp0 = new TopicPartition(TOPIC, 0); startSinglePartition(tp0); manager.close(Collections.emptyList()); assertTrue(manager.getChannel(tp0).isPresent()); assertEquals(1, manager.getPartitionChannels().size()); } // --- closeAll --- @Test void closeAllClosesAllChannelsAndClearsMap() { TopicPartition tp0 = new TopicPartition(TOPIC, 0); TopicPartition tp1 = new TopicPartition(TOPIC, 1); startPartitions(tp0, tp1); manager.closeAll(); assertTrue(manager.getPartitionChannels().isEmpty()); verify(createdChannels.get(tp0)).closeChannelAsync(); verify(createdChannels.get(tp1)).closeChannelAsync(); } @Test void closeAllOnEmptyManagerIsNoop() { manager.closeAll(); assertTrue(manager.getPartitionChannels().isEmpty()); } // --- waitForAllChannelsToCommitData --- @Test void waitForAllChannelsCallsFlushOnEveryChannel() { TopicPartition tp0 = new TopicPartition(TOPIC, 0); TopicPartition tp1 = new TopicPartition(TOPIC, 1); startPartitions(tp0, tp1); manager.waitForAllChannelsToCommitData(); verify(createdChannels.get(tp0)).waitForLastProcessedRecordCommitted(); verify(createdChannels.get(tp1)).waitForLastProcessedRecordCommitted(); } @Test void waitForAllChannelsOnEmptyManagerIsNoop() { manager.waitForAllChannelsToCommitData(); assertTrue(manager.getPartitionChannels().isEmpty()); } // --- helpers --- private void startSinglePartition(TopicPartition topicPartition) { startPartitions(topicPartition); } private void startPartitions(TopicPartition... partitions) { Map tableToPipe = new HashMap<>(); for (TopicPartition topicPartition : partitions) { String tableName = topicPartition.topic(); tableToPipe.putIfAbsent(tableName, "pipe_" + tableName); } manager.startPartitions(Arrays.asList(partitions), tableToPipe); } private static SinkTaskConfig testConfig(Map topicToTableMap) { return SinkTaskConfigTestBuilder.builder() .connectorName(CONNECTOR_NAME) .taskId(TASK_ID) .topicToTableMap(topicToTableMap) .enableSanitization(false) .build(); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/telemetry/SnowflakeTelemetryChannelStatusTest.java ================================================ package com.snowflake.kafka.connector.internal.telemetry; import static com.snowflake.kafka.connector.internal.TestUtils.TEST_CONNECTOR_NAME; import static com.snowflake.kafka.connector.internal.metrics.MetricsUtil.channelMetricPrefix; import static org.junit.Assert.assertEquals; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import com.codahale.metrics.MetricRegistry; import com.snowflake.kafka.connector.internal.metrics.MetricsJmxReporter; import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetryChannelStatus; import java.util.Optional; import java.util.concurrent.atomic.AtomicLong; import net.snowflake.client.jdbc.internal.fasterxml.jackson.databind.ObjectMapper; import net.snowflake.client.jdbc.internal.fasterxml.jackson.databind.node.ObjectNode; import org.junit.Test; import org.mockito.Mockito; public class SnowflakeTelemetryChannelStatusTest { private final String tableName = "tableName"; private final String connectorName = "connectorName"; private final String channelName = "channelName"; @Test public void testRegisterAndUnregisterJmxMetrics() { MetricRegistry metricRegistry = Mockito.spy(MetricRegistry.class); MetricsJmxReporter metricsJmxReporter = Mockito.spy(new MetricsJmxReporter(metricRegistry, TEST_CONNECTOR_NAME)); SnowflakeTelemetryChannelStatus status = new SnowflakeTelemetryChannelStatus( tableName, connectorName, channelName, 1234, Optional.of(metricsJmxReporter), new AtomicLong(-1), new AtomicLong(-1), new AtomicLong(-1)); // Registration: 4 metrics registered, start() NOT called (handled at task level) verify(metricsJmxReporter, times(0)).start(); verify(metricRegistry, times((int) SnowflakeTelemetryChannelStatus.NUM_METRICS)) .register(Mockito.anyString(), Mockito.any()); // No removeMatching scan should have been called during registration verify(metricsJmxReporter, times(0)).removeMetricsFromRegistry(Mockito.anyString()); // Unregister: uses targeted removal (4 individual remove calls) status.tryUnregisterChannelJMXMetrics(); verify(metricRegistry, times((int) SnowflakeTelemetryChannelStatus.NUM_METRICS)) .remove(Mockito.anyString()); } @Test public void testDisabledJmx() { MetricRegistry metricRegistry = Mockito.spy(MetricRegistry.class); MetricsJmxReporter metricsJmxReporter = Mockito.spy(new MetricsJmxReporter(metricRegistry, TEST_CONNECTOR_NAME)); SnowflakeTelemetryChannelStatus snowflakeTelemetryChannelStatus = new SnowflakeTelemetryChannelStatus( tableName, connectorName, channelName, 1234, Optional.empty(), new AtomicLong(-1), new AtomicLong(-1), new AtomicLong(-1)); verify(metricsJmxReporter, times(0)).start(); verify(metricRegistry, times(0)).register(Mockito.anyString(), Mockito.any()); verify(metricsJmxReporter, times(0)) .removeMetricsFromRegistry(channelMetricPrefix(channelName)); snowflakeTelemetryChannelStatus.tryUnregisterChannelJMXMetrics(); verify(metricsJmxReporter, times(0)) .removeMetricsFromRegistry(channelMetricPrefix(channelName)); } @Test public void testValidationFailureCountInDumpTo() { SnowflakeTelemetryChannelStatus status = new SnowflakeTelemetryChannelStatus( tableName, connectorName, channelName, 1234, Optional.empty(), new AtomicLong(-1), new AtomicLong(-1), new AtomicLong(-1)); // Initially zero ObjectNode msg = new ObjectMapper().createObjectNode(); status.dumpTo(msg); assertEquals(0, msg.get(TelemetryConstants.VALIDATION_FAILURE_COUNT).asLong()); // Increment and verify status.incValidationFailureCount(); status.incValidationFailureCount(); status.incValidationFailureCount(); msg = new ObjectMapper().createObjectNode(); status.dumpTo(msg); assertEquals(3, msg.get(TelemetryConstants.VALIDATION_FAILURE_COUNT).asLong()); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/telemetry/SnowflakeTelemetryServiceTest.java ================================================ package com.snowflake.kafka.connector.internal.telemetry; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.KEY_CONVERTER; import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.VALUE_CONVERTER; import static com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService.INGESTION_METHOD; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; import com.snowflake.ingest.streaming.ChannelStatus; import com.snowflake.kafka.connector.ConnectorConfigTools; import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams; import com.snowflake.kafka.connector.Utils; import com.snowflake.kafka.connector.internal.SnowflakeErrors; import com.snowflake.kafka.connector.internal.TestUtils; import com.snowflake.kafka.connector.internal.streaming.IngestionMethodConfig; import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetryChannelCreation; import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetryChannelStatus; import java.time.Duration; import java.time.Instant; import java.util.LinkedList; import java.util.Map; import java.util.Optional; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicLong; import net.snowflake.client.internal.jdbc.telemetry.Telemetry; import net.snowflake.client.internal.jdbc.telemetry.TelemetryData; import net.snowflake.client.jdbc.internal.fasterxml.jackson.databind.JsonNode; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.EnumSource; public class SnowflakeTelemetryServiceTest { public static final String KAFKA_STRING_CONVERTER = "org.apache.kafka.connect.storage.StringConverter"; public static final String KAFKA_CONFLUENT_AVRO_CONVERTER = "io.confluent.connect.avro.AvroConverter"; private long startTime; private MockTelemetryClient mockTelemetryClient; @BeforeEach void setUp() { this.startTime = System.currentTimeMillis(); this.mockTelemetryClient = new MockTelemetryClient(); } @ParameterizedTest @EnumSource(value = IngestionMethodConfig.class) public void testReportKafkaConnectStart(IngestionMethodConfig ingestionMethodConfig) { // given Map connectorConfig = createConnectorConfig(); connectorConfig.put(KEY_CONVERTER, KAFKA_STRING_CONVERTER); connectorConfig.put(KafkaConnectorConfigParams.VALUE_CONVERTER, KAFKA_CONFLUENT_AVRO_CONVERTER); SnowflakeTelemetryService snowflakeTelemetryService = createSnowflakeTelemetryService(connectorConfig); // when snowflakeTelemetryService.reportKafkaConnectStart(System.currentTimeMillis(), connectorConfig); // then LinkedList sentData = this.mockTelemetryClient.getSentTelemetryData(); assertEquals(1, sentData.size()); JsonNode allNode = sentData.get(0).getMessage(); assertEquals( SnowflakeTelemetryService.TelemetryType.KAFKA_START.toString(), allNode.get("type").asText()); assertEquals("kafka_connector", allNode.get("source").asText()); assertEquals(Utils.VERSION, allNode.get("version").asText()); assertEquals(ingestionMethodConfig.toString(), sentTelemetryDataField(INGESTION_METHOD)); JsonNode dataNode = allNode.get("data"); assertTrue( dataNode.get(TelemetryConstants.START_TIME).asLong() <= System.currentTimeMillis() && dataNode.get(TelemetryConstants.START_TIME).asLong() >= this.startTime); assertNotNull(dataNode.get("jdk_version")); assertNotNull(dataNode.get("jdk_distribution")); validateKeyAndValueConverter(dataNode); // All non-sensitive config keys from the map should be present assertTrue(dataNode.has(KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME)); assertTrue(dataNode.has(KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME)); assertTrue(dataNode.has(KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME)); assertTrue(dataNode.has(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME)); // Sensitive keys must NOT be present assertFalse(dataNode.has(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY)); assertFalse(dataNode.has(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE)); } @Test public void testReportKafkaConnectStart_clientValidationExplicitlySet() { Map connectorConfig = createConnectorConfig(); connectorConfig.put(KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, "server_side"); SnowflakeTelemetryService snowflakeTelemetryService = createSnowflakeTelemetryService(connectorConfig); snowflakeTelemetryService.reportKafkaConnectStart(System.currentTimeMillis(), connectorConfig); LinkedList sentData = this.mockTelemetryClient.getSentTelemetryData(); assertEquals(1, sentData.size()); JsonNode dataNode = sentData.get(0).getMessage().get("data"); assertEquals( "server_side", dataNode.get(KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION).asText()); } @ParameterizedTest @EnumSource(value = IngestionMethodConfig.class) public void testReportKafkaConnectStop(IngestionMethodConfig ingestionMethodConfig) { // given Map connectorConfig = createConnectorConfig(); SnowflakeTelemetryService snowflakeTelemetryService = createSnowflakeTelemetryService(connectorConfig); // when snowflakeTelemetryService.reportKafkaConnectStop(System.currentTimeMillis()); // then LinkedList sentData = this.mockTelemetryClient.getSentTelemetryData(); assertEquals(1, sentData.size()); JsonNode allNode = sentData.get(0).getMessage(); assertEquals( SnowflakeTelemetryService.TelemetryType.KAFKA_STOP.toString(), allNode.get("type").asText()); assertEquals("kafka_connector", allNode.get("source").asText()); assertEquals(Utils.VERSION, allNode.get("version").asText()); JsonNode dataNode = allNode.get("data"); assertNotNull(dataNode); assertTrue(dataNode.has(INGESTION_METHOD)); assertEquals(dataNode.get(INGESTION_METHOD).asInt(), ingestionMethodConfig.ordinal()); assertTrue( dataNode.get(TelemetryConstants.START_TIME).asLong() <= System.currentTimeMillis() && dataNode.get(TelemetryConstants.START_TIME).asLong() >= this.startTime); } @ParameterizedTest @EnumSource(value = IngestionMethodConfig.class) public void testReportKafkaConnectFatalError(IngestionMethodConfig ingestionMethodConfig) { // given Map connectorConfig = createConnectorConfig(); SnowflakeTelemetryService snowflakeTelemetryService = createSnowflakeTelemetryService(connectorConfig); String expectedException = SnowflakeErrors.ERROR_0003.getException("test exception").getMessage(); // when snowflakeTelemetryService.reportKafkaConnectFatalError(expectedException); // validate data sent LinkedList sentData = this.mockTelemetryClient.getSentTelemetryData(); assertEquals(1, sentData.size()); JsonNode allNode = sentData.get(0).getMessage(); assertEquals( SnowflakeTelemetryService.TelemetryType.KAFKA_FATAL_ERROR.toString(), allNode.get("type").asText()); assertEquals("kafka_connector", allNode.get("source").asText()); assertEquals(Utils.VERSION, allNode.get("version").asText()); JsonNode dataNode = allNode.get("data"); assertNotNull(dataNode); assertTrue(dataNode.has(INGESTION_METHOD)); assertEquals(dataNode.get(INGESTION_METHOD).asInt(), ingestionMethodConfig.ordinal()); assertTrue( dataNode.get(TelemetryConstants.UNIX_TIME).asLong() <= System.currentTimeMillis() && dataNode.get(TelemetryConstants.UNIX_TIME).asLong() >= this.startTime); assertEquals(dataNode.get(TelemetryConstants.ERROR_DETAIL).asText(), expectedException); } @Test public void testReportKafkaConnectFatalErrorWithChannelContext() { Map connectorConfig = createConnectorConfig(); SnowflakeTelemetryService snowflakeTelemetryService = createSnowflakeTelemetryService(connectorConfig); snowflakeTelemetryService.reportKafkaConnectFatalError( "test error", "myChannel", "myTable", "myPipe"); LinkedList sentData = this.mockTelemetryClient.getSentTelemetryData(); assertEquals(1, sentData.size()); JsonNode dataNode = sentData.get(0).getMessage().get("data"); assertEquals("test error", dataNode.get(TelemetryConstants.ERROR_DETAIL).asText()); assertEquals( "myChannel", dataNode.get(TelemetryConstants.TOPIC_PARTITION_CHANNEL_NAME).asText()); assertEquals("myTable", dataNode.get(TelemetryConstants.TABLE_NAME).asText()); assertEquals("myPipe", dataNode.get(TelemetryConstants.PIPE_NAME).asText()); } @ParameterizedTest @EnumSource(value = IngestionMethodConfig.class) public void testReportKafkaPartitionUsage(IngestionMethodConfig ingestionMethodConfig) { // given Map connectorConfig = createConnectorConfig(); SnowflakeTelemetryService snowflakeTelemetryService = createSnowflakeTelemetryService(connectorConfig); // expected values final String expectedTableName = "tableName"; final String expectedConnectorName = "connectorName"; final String expectedTpChannelName = "channelName"; final long expectedTpChannelCreationTime = 1234; final long expectedProcessedOffset = 1; final long expectedOffsetPersistedInSnowflake = 4; final long expectedLatestConsumerOffset = 5; SnowflakeTelemetryBasicInfo partitionUsage; SnowflakeTelemetryChannelStatus channelStatus = new SnowflakeTelemetryChannelStatus( expectedTableName, expectedConnectorName, expectedTpChannelName, expectedTpChannelCreationTime, Optional.empty(), new AtomicLong(expectedOffsetPersistedInSnowflake), new AtomicLong(expectedProcessedOffset), new AtomicLong(expectedLatestConsumerOffset)); channelStatus.incErrorToleratedCount(); channelStatus.incErrorToleratedCount(); channelStatus.incErrorToleratedCount(); channelStatus.updateFromChannelStatus( new ChannelStatus( "testDb", "testSchema", "testPipe", expectedTpChannelName, "SUCCESS", "0", Instant.now(), 100, 105, 2, "42", "some error", Instant.parse("2026-03-24T00:00:00Z"), Duration.ofMillis(45), Instant.now())); // Recovery count works without JMX channelStatus.incRecoveryCount(); channelStatus.incRecoveryCount(); partitionUsage = channelStatus; // when snowflakeTelemetryService.reportKafkaPartitionUsage(partitionUsage, false); // then LinkedList sentData = this.mockTelemetryClient.getSentTelemetryData(); assertEquals(1, sentData.size()); JsonNode allNode = sentData.get(0).getMessage(); assertEquals("kafka_connector", allNode.get("source").asText()); assertEquals(Utils.VERSION, allNode.get("version").asText()); JsonNode dataNode = allNode.get("data"); assertNotNull(dataNode); assertTrue(dataNode.has(INGESTION_METHOD)); assertEquals(dataNode.get(INGESTION_METHOD).asInt(), ingestionMethodConfig.ordinal()); assertEquals( expectedProcessedOffset, dataNode.get(TelemetryConstants.PROCESSED_OFFSET).asLong()); assertEquals(expectedTableName, dataNode.get(TelemetryConstants.TABLE_NAME).asText()); assertEquals( expectedTpChannelCreationTime, dataNode.get(TelemetryConstants.TOPIC_PARTITION_CHANNEL_CREATION_TIME).asLong()); assertTrue( dataNode.get(TelemetryConstants.TOPIC_PARTITION_CHANNEL_CLOSE_TIME).asLong() <= System.currentTimeMillis() && dataNode.get(TelemetryConstants.TOPIC_PARTITION_CHANNEL_CLOSE_TIME).asLong() >= this.startTime); assertEquals( SnowflakeTelemetryService.TelemetryType.KAFKA_CHANNEL_USAGE.toString(), allNode.get("type").asText()); assertEquals( expectedLatestConsumerOffset, dataNode.get(TelemetryConstants.LATEST_CONSUMER_OFFSET).asLong()); assertEquals( expectedOffsetPersistedInSnowflake, dataNode.get(TelemetryConstants.OFFSET_PERSISTED_IN_SNOWFLAKE).asLong()); assertEquals( expectedTpChannelName, dataNode.get(TelemetryConstants.TOPIC_PARTITION_CHANNEL_NAME).asText()); assertEquals(expectedConnectorName, dataNode.get(TelemetryConstants.CONNECTOR_NAME).asText()); // Error-tolerated count assertEquals(3, dataNode.get(TelemetryConstants.ERROR_TOLERATED_COUNT).asLong()); // Channel recovery count (works without JMX) assertEquals(2, dataNode.get(TelemetryConstants.CHANNEL_RECOVERY_COUNT).asLong()); // Validation disabled flag assertFalse(dataNode.get(TelemetryConstants.VALIDATION_DISABLED).asBoolean()); // SDK ChannelStatus fields assertEquals(100, dataNode.get(TelemetryConstants.ROWS_INSERTED_COUNT).asLong()); assertEquals(105, dataNode.get(TelemetryConstants.ROWS_PARSED_COUNT).asLong()); assertEquals(2, dataNode.get(TelemetryConstants.ROWS_ERROR_COUNT).asLong()); assertEquals(45, dataNode.get(TelemetryConstants.SERVER_AVG_PROCESSING_LATENCY_MS).asLong()); // SDK ChannelStatus identity and error fields assertEquals("testDb", dataNode.get(TelemetryConstants.DATABASE_NAME).asText()); assertEquals("testSchema", dataNode.get(TelemetryConstants.SCHEMA_NAME).asText()); assertEquals("testPipe", dataNode.get(TelemetryConstants.PIPE_NAME).asText()); assertEquals("SUCCESS", dataNode.get(TelemetryConstants.STATUS_CODE).asText()); assertFalse(dataNode.has("last_error_message")); // omitted for privacy assertEquals( "2026-03-24T00:00:00Z", dataNode.get(TelemetryConstants.LAST_ERROR_TIMESTAMP).asText()); assertEquals( "42", dataNode.get(TelemetryConstants.LAST_ERROR_OFFSET_TOKEN_UPPER_BOUND).asText()); // Backpressure/fallback counts (0 since not incremented in this test) assertEquals(0, dataNode.get(TelemetryConstants.BACKPRESSURE_RETRY_COUNT).asLong()); assertEquals(0, dataNode.get(TelemetryConstants.APPEND_ROW_FALLBACK_COUNT).asLong()); // Schema evolution failure count assertEquals(0, dataNode.get(TelemetryConstants.SCHEMA_EVOLUTION_FAILURE_COUNT).asLong()); } @ParameterizedTest @EnumSource(value = IngestionMethodConfig.class) public void testReportKafkaPartitionStart(IngestionMethodConfig ingestionMethodConfig) { // given Map connectorConfig = createConnectorConfig(); SnowflakeTelemetryService snowflakeTelemetryService = createSnowflakeTelemetryService(connectorConfig); SnowflakeTelemetryBasicInfo partitionCreation; final String expectedTableName = "tableName"; final String expectedChannelName = "channelName"; final long expectedChannelCreationTime = 1234; SnowflakeTelemetryChannelCreation channelCreation = new SnowflakeTelemetryChannelCreation( expectedTableName, expectedChannelName, expectedChannelCreationTime); channelCreation.setReuseTable(true); partitionCreation = channelCreation; // when snowflakeTelemetryService.reportKafkaPartitionStart(partitionCreation); // then LinkedList sentData = this.mockTelemetryClient.getSentTelemetryData(); assertEquals(1, sentData.size()); JsonNode allNode = sentData.get(0).getMessage(); assertEquals("kafka_connector", allNode.get("source").asText()); assertEquals(Utils.VERSION, allNode.get("version").asText()); JsonNode dataNode = allNode.get("data"); assertNotNull(dataNode); assertTrue(dataNode.has(INGESTION_METHOD)); assertEquals(dataNode.get(INGESTION_METHOD).asInt(), ingestionMethodConfig.ordinal()); assertEquals(expectedTableName, dataNode.get(TelemetryConstants.TABLE_NAME).asText()); assertEquals( expectedChannelCreationTime, dataNode.get(TelemetryConstants.TOPIC_PARTITION_CHANNEL_CREATION_TIME).asLong()); assertEquals( SnowflakeTelemetryService.TelemetryType.KAFKA_CHANNEL_START.toString(), allNode.get("type").asText()); assertEquals( expectedChannelName, dataNode.get(TelemetryConstants.TOPIC_PARTITION_CHANNEL_NAME).asText()); } private Map createConnectorConfig() { return TestUtils.getConnectorConfigurationForStreaming(false); } private SnowflakeTelemetryService createSnowflakeTelemetryService( Map connectorConfig) { SnowflakeTelemetryService snowflakeTelemetryService; snowflakeTelemetryService = new SnowflakeTelemetryService(mockTelemetryClient); ConnectorConfigTools.setDefaultValues(connectorConfig); snowflakeTelemetryService.setAppName("TEST_APP"); snowflakeTelemetryService.setTaskID("1"); return snowflakeTelemetryService; } private String sentTelemetryDataField(String field) { LinkedList sentData = this.mockTelemetryClient.getSentTelemetryData(); assertEquals(1, sentData.size()); JsonNode allNode = sentData.get(0).getMessage(); return allNode.get("data").get(field).asText(); } private void validateKeyAndValueConverter(JsonNode dataNode) { assertTrue(dataNode.has(KEY_CONVERTER)); assertTrue(dataNode.get(KEY_CONVERTER).asText().equalsIgnoreCase(KAFKA_STRING_CONVERTER)); assertTrue(dataNode.has(VALUE_CONVERTER)); assertTrue( dataNode.get(VALUE_CONVERTER).asText().equalsIgnoreCase(KAFKA_CONFLUENT_AVRO_CONVERTER)); } public static class MockTelemetryClient implements Telemetry { private final LinkedList telemetryDataList; private final LinkedList sentTelemetryData; private final ExecutorService executor = Executors.newSingleThreadExecutor(); public MockTelemetryClient() { this.telemetryDataList = new LinkedList<>(); this.sentTelemetryData = new LinkedList<>(); } @Override public void addLogToBatch(TelemetryData telemetryData) { this.telemetryDataList.add(telemetryData); } @Override public void close() { this.telemetryDataList.clear(); this.sentTelemetryData.clear(); } @Override public Future sendBatchAsync() { return executor.submit(() -> true); } @Override public void postProcess(String s, String s1, int i, Throwable throwable) {} public LinkedList getSentTelemetryData() { this.sentTelemetryData.addAll(telemetryDataList); this.telemetryDataList.clear(); return sentTelemetryData; } } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/validation/DataValidationUtilTest.java ================================================ /* * COPIED FROM SNOWFLAKE INGEST SDK V1 * Source: snowflake-ingest-java/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java * * Modifications: * - Iceberg-specific tests removed (5 test methods) * - Package changed to com.snowflake.kafka.connector.internal.validation * - Added buildString() helper method inline (was in TestUtils) * * Copyright (c) 2024 Snowflake Computing Inc. All rights reserved. */ package com.snowflake.kafka.connector.internal.validation; import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.BYTES_16_MB; import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.BYTES_8_MB; import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.isAllowedSemiStructuredType; import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseArray; import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseArrayNew; import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseBigDecimal; import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseBinary; import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseBoolean; import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseDate; import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseObject; import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseObjectNew; import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseReal; import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseString; import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseTime; import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseTimestamp; import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseVariant; import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseVariantNew; import static java.time.ZoneOffset.UTC; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import java.math.BigDecimal; import java.math.BigInteger; import java.nio.charset.StandardCharsets; import java.text.ParseException; import java.text.SimpleDateFormat; import java.time.Instant; import java.time.LocalDate; import java.time.LocalDateTime; import java.time.LocalTime; import java.time.OffsetDateTime; import java.time.OffsetTime; import java.time.ZoneId; import java.time.ZoneOffset; import java.time.ZonedDateTime; import java.util.Arrays; import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.Map; import java.util.Optional; import java.util.TimeZone; import org.apache.commons.codec.DecoderException; import org.apache.commons.codec.binary.Hex; import org.apache.commons.lang3.StringUtils; import org.junit.Assert; import org.junit.Test; public class DataValidationUtilTest { private static final ObjectMapper objectMapper = new ObjectMapper(); /** Helper method to build a string by repeating a character */ private static String buildString(String str, int count) { StringBuilder sb = new StringBuilder(count); for (int i = 0; i < count; i++) { sb.append(str); } return sb.toString(); } private void expectErrorCodeAndMessage( ErrorCode expectedErrorCode, String expectedExceptionMessage, Runnable action) { try { action.run(); Assert.fail("Expected Exception"); } catch (SFExceptionValidation e) { assertEquals(expectedErrorCode.getMessageCode(), e.getVendorCode()); if (expectedExceptionMessage != null) assertEquals(expectedExceptionMessage, e.getMessage()); } catch (Exception e) { e.printStackTrace(); Assert.fail("Invalid error through"); } } private void expectError(ErrorCode expectedErrorCode, Runnable action) { expectErrorCodeAndMessage(expectedErrorCode, null, action); } @Test public void testValidateAndParseDate() { assertEquals(9, validateAndParseDate("COL", LocalDate.of(1970, 1, 10), 0)); assertEquals(9, validateAndParseDate("COL", LocalDateTime.of(1970, 1, 10, 1, 0), 0)); assertEquals( 9, validateAndParseDate( "COL", OffsetDateTime.of(1970, 1, 10, 1, 0, 34, 123456789, ZoneOffset.of("-07:00")), 0)); assertEquals( 9, validateAndParseDate( "COL", OffsetDateTime.of(1970, 1, 10, 1, 0, 34, 123456789, ZoneOffset.of("+07:00")), 0)); assertEquals( 9, validateAndParseDate( "COL", ZonedDateTime.of(1970, 1, 10, 1, 0, 34, 123456789, ZoneId.of("America/Los_Angeles")), 0)); assertEquals( 9, validateAndParseDate( "COL", ZonedDateTime.of(1970, 1, 10, 1, 0, 34, 123456789, ZoneId.of("Asia/Tokyo")), 0)); assertEquals(19380, validateAndParseDate("COL", Instant.ofEpochMilli(1674478926000L), 0)); assertEquals(-923, validateAndParseDate("COL", "1967-06-23", 0)); assertEquals(-923, validateAndParseDate("COL", " 1967-06-23 \t\n", 0)); assertEquals(-923, validateAndParseDate("COL", "1967-06-23T01:01:01", 0)); assertEquals(18464, validateAndParseDate("COL", "2020-07-21", 0)); assertEquals(18464, validateAndParseDate("COL", "2020-07-21T23:31:00", 0)); assertEquals(18464, validateAndParseDate("COL", "2020-07-21T23:31:00+07:00", 0)); assertEquals(18464, validateAndParseDate("COL", "2020-07-21T23:31:00-07:00", 0)); assertEquals( 18464, validateAndParseDate("COL", "2020-07-21T23:31:00-07:00[America/Los_Angeles]", 0)); assertEquals(18464, validateAndParseDate("COL", "2020-07-21T23:31:00+09:00[Asia/Tokyo]", 0)); // Test integer-stored date assertEquals(19380, validateAndParseDate("COL", "1674478926", 0)); assertEquals(19380, validateAndParseDate("COL", "1674478926000", 0)); assertEquals(19380, validateAndParseDate("COL", "1674478926000000", 0)); assertEquals(19380, validateAndParseDate("COL", "1674478926000000000", 0)); // Time input is not supported expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseDate("COL", "20:57:01", 0)); // Test values out of range expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseDate("COL", LocalDateTime.of(10000, 2, 2, 2, 2), 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseDate("COL", LocalDateTime.of(-10000, 2, 2, 2, 2), 0)); // Test forbidden values expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseDate("COL", new Object(), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseDate("COL", LocalTime.now(), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseDate("COL", OffsetTime.now(), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseDate("COL", new java.util.Date(), 0)); expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseDate("COL", false, 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseDate("COL", "", 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseDate("COL", "foo", 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseDate("COL", "1.0", 0)); expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseDate("COL", 'c', 0)); expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseDate("COL", 1, 0)); expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseDate("COL", 1L, 0)); expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseDate("COL", 1.25, 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseDate("COL", BigInteger.valueOf(1), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseDate("COL", BigDecimal.valueOf(1.25), 0)); } @Test public void testValidateAndParseTime() { // Test local time assertEquals(46920, validateAndParseTime("COL", "13:02", 0, 0).longValueExact()); assertEquals(46920, validateAndParseTime("COL", " 13:02 \t\n", 0, 0).longValueExact()); assertEquals(46926, validateAndParseTime("COL", "13:02:06", 0, 0).longValueExact()); assertEquals(469260, validateAndParseTime("COL", "13:02:06", 1, 0).longValueExact()); assertEquals(46926000000000L, validateAndParseTime("COL", "13:02:06", 9, 0).longValueExact()); assertEquals(46926, validateAndParseTime("COL", "13:02:06.1234", 0, 0).longValueExact()); assertEquals(469261, validateAndParseTime("COL", "13:02:06.1234", 1, 0).longValueExact()); assertEquals( 46926123400000L, validateAndParseTime("COL", "13:02:06.1234", 9, 0).longValueExact()); assertEquals(46926, validateAndParseTime("COL", "13:02:06.123456789", 0, 0).longValueExact()); assertEquals(469261, validateAndParseTime("COL", "13:02:06.123456789", 1, 0).longValueExact()); assertEquals( 46926123456789L, validateAndParseTime("COL", "13:02:06.123456789", 9, 0).longValueExact()); // Test that offset time does not make any difference assertEquals( 46926123456789L, validateAndParseTime("COL", "13:02:06.123456789+09:00", 9, 0).longValueExact()); assertEquals( 46926123456789L, validateAndParseTime("COL", "13:02:06.123456789-09:00", 9, 0).longValueExact()); // Test integer-stored time and scale guessing assertEquals(46926L, validateAndParseTime("COL", "1674478926", 0, 0).longValueExact()); assertEquals(46926L, validateAndParseTime("COL", "1674478926123", 0, 0).longValueExact()); assertEquals(46926L, validateAndParseTime("COL", "1674478926123456", 0, 0).longValueExact()); assertEquals(46926L, validateAndParseTime("COL", "1674478926123456789", 0, 0).longValueExact()); assertEquals(469260L, validateAndParseTime("COL", "1674478926", 1, 0).longValueExact()); assertEquals(469261L, validateAndParseTime("COL", "1674478926123", 1, 0).longValueExact()); assertEquals(469261L, validateAndParseTime("COL", "1674478926123456", 1, 0).longValueExact()); assertEquals( 469261L, validateAndParseTime("COL", "1674478926123456789", 1, 0).longValueExact()); assertEquals(46926000000000L, validateAndParseTime("COL", "1674478926", 9, 0).longValueExact()); assertEquals( 46926123000000L, validateAndParseTime("COL", "1674478926123", 9, 0).longValueExact()); assertEquals( 46926123456000L, validateAndParseTime("COL", "1674478926123456", 9, 0).longValueExact()); assertEquals( 46926123456789L, validateAndParseTime("COL", "1674478926123456789", 9, 0).longValueExact()); // Test Java objects assertEquals( 46926123456789L, validateAndParseTime("COL", LocalTime.of(13, 2, 6, 123456789), 9, 0).longValueExact()); assertEquals( 46926123456789L, validateAndParseTime( "COL", OffsetTime.of(13, 2, 6, 123456789, ZoneOffset.of("+09:00")), 9, 0) .longValueExact()); // Dates and timestamps are forbidden expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseTime("COL", "2023-01-19", 9, 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseTime("COL", "2023-01-19T14:23:55.878137", 9, 0)); // Test forbidden values expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", LocalDate.now(), 3, 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", LocalDateTime.now(), 3, 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", OffsetDateTime.now(), 3, 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", ZonedDateTime.now(), 3, 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", Instant.now(), 3, 0)); expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", new Date(), 3, 0)); expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", 1.5f, 3, 0)); expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", 1.5, 3, 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseTime("COL", "1.5", 3, 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseTime("COL", "1.0", 3, 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", new Object(), 3, 0)); expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", false, 3, 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseTime("COL", "", 3, 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseTime("COL", "foo", 3, 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", java.sql.Time.valueOf("20:57:00"), 3, 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", java.sql.Date.valueOf("2010-11-03"), 3, 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", java.sql.Timestamp.valueOf("2010-11-03 20:57:00"), 3, 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", BigInteger.ZERO, 3, 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", BigDecimal.ZERO, 3, 0)); expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", 'c', 3, 0)); } @Test public void testValidateAndParseTimestamp() throws ParseException { TimestampWrapper wrapper = DataValidationUtil.validateAndParseTimestamp( "COL", "2021-01-01T01:00:00.123+01:00", 4, UTC, false, 0); assertEquals(1609459200, wrapper.getEpochSecond()); assertEquals(123000000, wrapper.getFraction()); assertEquals(3600, wrapper.getTimezoneOffsetSeconds()); assertEquals(1500, wrapper.getTimeZoneIndex()); wrapper = validateAndParseTimestamp("COL", " 2021-01-01T01:00:00.123 \t\n", 9, UTC, true, 0); Assert.assertEquals(1609462800, wrapper.getEpochSecond()); Assert.assertEquals(123000000, wrapper.getFraction()); Assert.assertEquals(new BigInteger("1609462800123000000"), wrapper.toBinary(false)); // Test integer-stored time and scale guessing SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); df.setTimeZone(TimeZone.getTimeZone("UTC")); assertEquals( BigInteger.valueOf(df.parse("1971-01-01 00:00:00.001").getTime()) .multiply(BigInteger.valueOf(1000000)), validateAndParseTimestamp("COL", "31536000001", 9, UTC, true, 0).toBinary(false)); assertEquals( BigInteger.valueOf(df.parse("2969-05-02 23:59:59.999").getTime()) .multiply(BigInteger.valueOf(1000000)), validateAndParseTimestamp("COL", "31535999999999", 9, UTC, true, 0).toBinary(false)); assertEquals( BigInteger.valueOf(df.parse("1971-01-01 00:00:00.000").getTime()) .multiply(BigInteger.valueOf(1000000)), validateAndParseTimestamp("COL", "31536000000000", 9, UTC, true, 0).toBinary(false)); assertEquals( BigInteger.valueOf(df.parse("2969-05-02 23:59:59.999").getTime()) .multiply(BigInteger.valueOf(1000000)), validateAndParseTimestamp("COL", "31535999999999", 9, UTC, true, 0).toBinary(false)); assertEquals( BigInteger.valueOf(df.parse("1971-01-01 00:00:00.000").getTime()) .multiply(BigInteger.valueOf(1000000)), validateAndParseTimestamp("COL", "31536000000000000", 9, UTC, true, 0).toBinary(false)); // Time input is not supported expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseTimestamp("COL", "20:57:01", 3, UTC, false, 0)); // Test values out of range expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseTimestamp( "COL", LocalDateTime.of(10000, 2, 2, 2, 2), 3, UTC, false, 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseTimestamp("COL", LocalDateTime.of(0, 2, 2, 2, 2), 3, UTC, false, 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseTimestamp("COL", LocalDateTime.of(-1, 2, 2, 2, 2), 3, UTC, false, 0)); // Test forbidden values expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTimestamp("COL", LocalTime.now(), 3, UTC, false, 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTimestamp("COL", OffsetTime.now(), 3, UTC, false, 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTimestamp("COL", new Date(), 3, UTC, false, 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTimestamp("COL", 1.5f, 3, UTC, false, 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTimestamp("COL", 1.5, 3, UTC, false, 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseTimestamp("COL", "1.5", 3, UTC, false, 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseTimestamp("COL", "1.0", 3, UTC, false, 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTimestamp("COL", new Object(), 3, UTC, false, 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTimestamp("COL", false, 3, UTC, false, 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseTimestamp("COL", "", 3, UTC, false, 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseTimestamp("COL", "foo", 3, UTC, false, 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTimestamp("COL", java.sql.Time.valueOf("20:57:00"), 3, UTC, false, 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTimestamp( "COL", java.sql.Date.valueOf("2010-11-03"), 3, UTC, false, 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTimestamp( "COL", java.sql.Timestamp.valueOf("2010-11-03 20:57:00"), 3, UTC, false, 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTimestamp("COL", BigInteger.ZERO, 3, UTC, false, 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTimestamp("COL", BigDecimal.ZERO, 3, UTC, false, 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTimestamp("COL", 'c', 3, UTC, false, 0)); } @Test public void testValidateAndParseTimestamp_integerEpoch() { // Integer epoch (seconds) — same as string "1709712000" TimestampWrapper fromInt = validateAndParseTimestamp("COL", 1709712000, 9, UTC, true, 0); TimestampWrapper fromStr = validateAndParseTimestamp("COL", "1709712000", 9, UTC, true, 0); assertEquals(fromStr.toBinary(false), fromInt.toBinary(false)); // Long epoch (milliseconds) — same as string "1709712000000" TimestampWrapper fromLong = validateAndParseTimestamp("COL", 1709712000000L, 9, UTC, true, 0); TimestampWrapper fromStrMs = validateAndParseTimestamp("COL", "1709712000000", 9, UTC, true, 0); assertEquals(fromStrMs.toBinary(false), fromLong.toBinary(false)); // Zero epoch TimestampWrapper fromZeroInt = validateAndParseTimestamp("COL", 0, 9, UTC, true, 0); TimestampWrapper fromZeroStr = validateAndParseTimestamp("COL", "0", 9, UTC, true, 0); assertEquals(fromZeroStr.toBinary(false), fromZeroInt.toBinary(false)); // Negative epoch (before 1970) TimestampWrapper fromNeg = validateAndParseTimestamp("COL", -86400, 9, UTC, true, 0); TimestampWrapper fromNegStr = validateAndParseTimestamp("COL", "-86400", 9, UTC, true, 0); assertEquals(fromNegStr.toBinary(false), fromNeg.toBinary(false)); // TIMESTAMP_LTZ with integer epoch TimestampWrapper ltzFromLong = validateAndParseTimestamp("COL", 1709712000L, 9, UTC, false, 0); assertEquals(fromStr.getEpochSecond(), ltzFromLong.getEpochSecond()); } @Test public void testValidateAndParseBigDecimal() { assertEquals(new BigDecimal("1"), validateAndParseBigDecimal("COL", "1", 0)); assertEquals(new BigDecimal("1"), validateAndParseBigDecimal("COL", " 1 \t\n ", 0)); assertEquals( new BigDecimal("1000").toBigInteger(), validateAndParseBigDecimal("COL", "1e3", 0).toBigInteger()); assertEquals( new BigDecimal("1000").toBigInteger(), validateAndParseBigDecimal("COL", " 1e3 \t\n", 0).toBigInteger()); assertEquals( new BigDecimal("1000").toBigInteger(), validateAndParseBigDecimal("COL", "1e3", 0).toBigInteger()); assertEquals( new BigDecimal("-1000").toBigInteger(), validateAndParseBigDecimal("COL", "-1e3", 0).toBigInteger()); assertEquals( new BigDecimal("1").toBigInteger(), validateAndParseBigDecimal("COL", "1e0", 0).toBigInteger()); assertEquals( new BigDecimal("-1").toBigInteger(), validateAndParseBigDecimal("COL", "-1e0", 0).toBigInteger()); assertEquals( new BigDecimal("123").toBigInteger(), validateAndParseBigDecimal("COL", "1.23e2", 0).toBigInteger()); assertEquals( new BigDecimal("123.4").toBigInteger(), validateAndParseBigDecimal("COL", "1.234e2", 0).toBigInteger()); assertEquals( new BigDecimal("0.1234").toBigInteger(), validateAndParseBigDecimal("COL", "1.234e-1", 0).toBigInteger()); assertEquals( new BigDecimal("0.1234").toBigInteger(), validateAndParseBigDecimal("COL", "1234e-5", 0).toBigInteger()); assertEquals( new BigDecimal("0.1234").toBigInteger(), validateAndParseBigDecimal("COL", "1234E-5", 0).toBigInteger()); assertEquals(new BigDecimal("1"), validateAndParseBigDecimal("COL", 1, 0)); assertEquals(new BigDecimal("1.0"), validateAndParseBigDecimal("COL", 1D, 0)); assertEquals(new BigDecimal("1"), validateAndParseBigDecimal("COL", 1L, 0)); assertEquals(new BigDecimal("1.0"), validateAndParseBigDecimal("COL", 1F, 0)); assertEquals( BigDecimal.valueOf(10).pow(37), validateAndParseBigDecimal("COL", BigDecimal.valueOf(10).pow(37), 0)); assertEquals( BigDecimal.valueOf(-1).multiply(BigDecimal.valueOf(10).pow(37)), validateAndParseBigDecimal( "COL", BigInteger.valueOf(-1).multiply(BigInteger.valueOf(10).pow(37)), 0)); // Test forbidden values expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseBigDecimal("COL", "honk", 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseBigDecimal("COL", "0x22", 0)); expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBigDecimal("COL", true, 0)); expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBigDecimal("COL", false, 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBigDecimal("COL", new Object(), 0)); expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBigDecimal("COL", 'a', 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBigDecimal("COL", new byte[4], 0)); } @Test public void testValidateAndParseString() { assertEquals("honk", validateAndParseString("COL", "honk", Optional.empty(), 0)); // Check max byte length String maxString = buildString("a", BYTES_16_MB); assertEquals(maxString, validateAndParseString("COL", maxString, Optional.empty(), 0)); // max byte length - 1 should also succeed String maxStringMinusOne = buildString("a", BYTES_16_MB - 1); assertEquals( maxStringMinusOne, validateAndParseString("COL", maxStringMinusOne, Optional.empty(), 0)); // max byte length + 1 should fail expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseString("COL", maxString + "a", Optional.empty(), 0)); // Test that max character length validation counts characters and not bytes assertEquals("a", validateAndParseString("COL", "a", Optional.of(1), 0)); assertEquals("č", validateAndParseString("COL", "č", Optional.of(1), 0)); assertEquals("❄", validateAndParseString("COL", "❄", Optional.of(1), 0)); assertEquals("🍞", validateAndParseString("COL", "🍞", Optional.of(1), 0)); // Test max character length rejection expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseString("COL", "a🍞", Optional.of(1), 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseString("COL", "12345", Optional.of(4), 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseString("COL", false, Optional.of(4), 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseString("COL", 12345, Optional.of(4), 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseString("COL", 1.2345, Optional.of(4), 0)); // Test that invalid UTF-8 strings cannot be ingested expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseString("COL", "foo\uD800bar", Optional.empty(), 0)); // Test unsupported values expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseString("COL", new Object(), Optional.empty(), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseString("COL", new byte[] {}, Optional.of(4), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseString("COL", new char[] {}, Optional.of(4), 0)); } @Test public void testValidateAndParseVariant() throws Exception { assertJson("variant", "1", 1); assertJson("variant", "1", "1"); assertJson("variant", "1", " 1 \n"); assertJson("variant", "{\"key\":1}", "{\"key\":1}"); assertJson("variant", "{\"key\":1}", " { \"key\": 1 } "); // Variants should preserve input format of numbers assertJson( "variant", "{\"key\":1111111.1111111}", " {\"key\": 1111111.1111111} \t\n", false); assertJson( "variant", "{\"key\":11.111111111111e8}", " {\"key\": 11.111111111111e8 } \t\n", false); assertJson( "variant", "{\"key\":11.111111111111e-8}", " {\"key\": 11.111111111111e-8 } \t\n", false); assertJson( "variant", "{\"key\":11.111111111111E8}", " {\"key\": 11.111111111111E8 } \t\n", false); assertJson( "variant", "{\"key\":11.111111111111E-8}", " {\"key\": 11.111111111111E-8 } \t\n", false); assertJson( "variant", "{\"key\":11111111111111e8}", " {\"key\": 11111111111111e8 } \t\n", false); assertJson( "variant", "{\"key\":11111111111111e-8}", " {\"key\": 11111111111111e-8 } \t\n", false); assertJson( "variant", "{\"key\":11111111111111E8}", " {\"key\": 11111111111111E8 } \t\n", false); assertJson( "variant", "{\"key\":11111111111111E-8}", " {\"key\": 11111111111111E-8 } \t\n", false); // Test custom serializers assertJson("variant", "[-128,0,127]", new byte[] {Byte.MIN_VALUE, 0, Byte.MAX_VALUE}); assertJson( "variant", "\"2022-09-28T03:04:12.123456789-07:00\"", ZonedDateTime.of(2022, 9, 28, 3, 4, 12, 123456789, ZoneId.of("America/Los_Angeles"))); // Test valid JSON tokens assertJson("variant", "null", null); assertJson("variant", "null", "null"); assertJson("variant", "true", true); assertJson("variant", "true", "true"); assertJson("variant", "false", false); assertJson("variant", "false", "false"); assertJson("variant", "[]", "[]"); assertJson("variant", "{}", "{}"); assertJson("variant", "[\"foo\",1,null]", "[\"foo\",1,null]"); assertJson("variant", "\"\"", "\"\""); // Test missing values are null instead of empty string assertNull(validateAndParseVariant("COL", "", 0)); assertNull(validateAndParseVariantNew("COL", "", 0)); assertNull(validateAndParseVariant("COL", " ", 0)); assertNull(validateAndParseVariantNew("COL", " ", 0)); // Test that invalid UTF-8 strings cannot be ingested expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariant("COL", "\"foo\uD800bar\"", 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariantNew("COL", "\"foo\uD800bar\"", 0)); // Test forbidden values expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariant("COL", "{null}", 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariantNew("COL", "{null}", 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariant("COL", "}{", 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariantNew("COL", "}{", 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseVariant("COL", readTree("{}"), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseVariantNew("COL", readTree("{}"), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseVariant("COL", new Object(), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseVariantNew("COL", new Object(), 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariant("COL", "foo", 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariantNew("COL", "foo", 0)); expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseVariant("COL", new Date(), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseVariantNew("COL", new Date(), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseVariant("COL", Collections.singletonList(new Object()), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseVariantNew("COL", Collections.singletonList(new Object()), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseVariant( "COL", Collections.singletonList(Collections.singletonMap("foo", new Object())), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseVariantNew( "COL", Collections.singletonList(Collections.singletonMap("foo", new Object())), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseVariant("COL", Collections.singletonMap(new Object(), "foo"), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseVariantNew("COL", Collections.singletonMap(new Object(), "foo"), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseVariant("COL", Collections.singletonMap("foo", new Object()), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseVariantNew("COL", Collections.singletonMap("foo", new Object()), 0)); // Test stripping null terminator assertJson("variant", "{\"key\":0,\"\\u0000key\":1}", "{\"key\":0,\"\\u0000key\":1}", false); assertJson("variant", "{\"key\\u0000\":0}", "{\"key\\u0000\":0}", false); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariantNew("COL", "{\"key\": 0, \"key\\u0000\": 1}", 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariantNew("COL", "{\"key\": 0, \"key\\u0000\\u0000\": 1}", 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariantNew( "COL", "{\"key\": {\"key\": {\"key\": 0, \"key\\u0000\": 1}}}", 0)); assertJson( "variant", "{\"key\":0,\"\\u0000key\":1}", new HashMap() { { put("key", 0); put("\u0000key", 1); } }, false); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariantNew( "COL", new HashMap() { { put("key", 0); put("key\u0000", 1); } }, 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariantNew( "COL", new HashMap() { { put("key", 0); put("key\u0000\u0000", 1); } }, 0)); // Test that invalid UTF-8 map keys or values cannot be ingested expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariantNew( "COL", new HashMap() { { put("foo\uD800bar", 1); } }, 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariantNew( "COL", new HashMap() { { put("key", "foo\uD800bar"); } }, 0)); } private void assertJson(String colType, String expectedValue, Object value) { assertJson(colType, expectedValue, value, true); } private void assertJson( String colType, String expectedValue, Object value, boolean alsoTestOldApproach) { if (colType.equalsIgnoreCase("variant")) { assertEquals(expectedValue, validateAndParseVariantNew("COL", value, 0)); if (alsoTestOldApproach) { assertEquals(expectedValue, validateAndParseVariant("COL", value, 0)); } } else if (colType.equalsIgnoreCase("array")) { assertEquals(expectedValue, validateAndParseArrayNew("COL", value, 0)); if (alsoTestOldApproach) { assertEquals(expectedValue, validateAndParseArray("COL", value, 0)); } } else if (colType.equalsIgnoreCase("object")) { assertEquals(expectedValue, validateAndParseObjectNew("COL", value, 0)); if (alsoTestOldApproach) { assertEquals(expectedValue, validateAndParseObject("COL", value, 0)); } } else { Assert.fail("Unexpected colType " + colType); } } @Test public void testValidateAndParseArray() throws Exception { assertJson("array", "[1]", 1); assertJson("array", "[1]", "1"); assertJson("array", "[\"1\"]", "\"1\""); assertJson("array", "[1.1e10]", " 1.1e10 ", false); assertJson("array", "[1,2,3]", " [1, 2, 3] \t\n"); assertJson("array", "[1,2,3]", new int[] {1, 2, 3}); assertJson("array", "[\"a\",\"b\",\"c\"]", new String[] {"a", "b", "c"}); assertJson("array", "[1,2,3]", new Object[] {1, 2, 3}); assertJson("array", "[1,null,3]", new Object[] {1, null, 3}); assertJson("array", "[[1,2,3],null,[4,5,6]]", new Object[][] {{1, 2, 3}, null, {4, 5, 6}}); assertJson("array", "[1,2,3]", Arrays.asList(1, 2, 3)); assertJson("array", "[[1,2,3],2,3]", Arrays.asList(Arrays.asList(1, 2, 3), 2, 3)); // Test null values assertJson("array", "[null]", ""); assertJson("array", "[null]", " "); assertJson("array", "[null]", "null"); assertJson("array", "[null]", null); // Test that invalid UTF-8 strings cannot be ingested expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseArray("COL", "\"foo\uD800bar\"", 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseArrayNew("COL", "\"foo\uD800bar\"", 0)); // Test forbidden values expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseArray("COL", readTree("[]"), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseArrayNew("COL", readTree("[]"), 0)); expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseArray("COL", new Object(), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseArrayNew("COL", new Object(), 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseArray("COL", "foo", 0)); // invalid JSON expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseArrayNew("COL", "foo", 0)); // invalid JSON expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseArray("COL", new Date(), 0)); expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseArrayNew("COL", new Date(), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseArray("COL", Collections.singletonList(new Object()), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseArrayNew("COL", Collections.singletonList(new Object()), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseArray( "COL", Collections.singletonList(Collections.singletonMap("foo", new Object())), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseArrayNew( "COL", Collections.singletonList(Collections.singletonMap("foo", new Object())), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseArray("COL", Collections.singletonMap(new Object(), "foo"), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseArrayNew("COL", Collections.singletonMap(new Object(), "foo"), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseArray("COL", Collections.singletonMap("foo", new Object()), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseArrayNew("COL", Collections.singletonMap("foo", new Object()), 0)); } @Test public void testValidateAndParseObject() throws Exception { assertJson("object", "{}", " { } "); assertJson("object", "{\"key\":1}", "{\"key\":1}"); assertJson("object", "{\"key\":1}", " { \"key\" : 1 } "); assertJson("object", "{\"key\":111.111}", " { \"key\" : 111.111 } "); assertJson("object", "{\"key\":111.111e6}", " { \"key\" : 111.111e6 } ", false); assertJson("object", "{\"key\":111.111E6}", " { \"key\" : 111.111E6 } ", false); assertJson("object", "{\"key\":111.111e-6}", " { \"key\" : 111.111e-6 } ", false); assertJson("object", "{\"key\":111.111E-6}", " { \"key\" : 111.111E-6 } ", false); final String tooLargeObject = objectMapper.writeValueAsString( Collections.singletonMap("key", StringUtils.repeat('a', 20000000))); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObject("COL", tooLargeObject, 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObjectNew("COL", tooLargeObject, 0)); // Test that invalid UTF-8 strings cannot be ingested expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObject("COL", "{\"foo\": \"foo\uD800bar\"}", 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObjectNew("COL", "{\"foo\": \"foo\uD800bar\"}", 0)); // Test forbidden values expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObject("COL", "", 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObjectNew("COL", "", 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseObject("COL", readTree("{}"), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseObjectNew("COL", readTree("{}"), 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObject("COL", "[]", 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObjectNew("COL", "[]", 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObject("COL", "1", 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObjectNew("COL", "1", 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObject("COL", 1, 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObjectNew("COL", 1, 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObject("COL", 1.5, 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObjectNew("COL", 1.5, 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObject("COL", false, 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObjectNew("COL", false, 0)); expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseObject("COL", new Object(), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseObjectNew("COL", new Object(), 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObject("COL", "foo", 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObjectNew("COL", "foo", 0)); expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseObject("COL", new Date(), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseObjectNew("COL", new Date(), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseObject("COL", Collections.singletonList(new Object()), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseObjectNew("COL", Collections.singletonList(new Object()), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseObject( "COL", Collections.singletonList(Collections.singletonMap("foo", new Object())), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseObjectNew( "COL", Collections.singletonList(Collections.singletonMap("foo", new Object())), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseObject("COL", Collections.singletonMap(new Object(), "foo"), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseObjectNew("COL", Collections.singletonMap(new Object(), "foo"), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseObject("COL", Collections.singletonMap(new Object(), "foo"), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseObjectNew("COL", Collections.singletonMap(new Object(), "foo"), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseObject("COL", Collections.singletonMap("foo", new Object()), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseObjectNew("COL", Collections.singletonMap("foo", new Object()), 0)); } @Test public void testValidateDuplicateKeys() { // simple JSON object with duplicate keys can not be ingested expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObjectNew("COL", "{\"key\":1, \"key\":2}", 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariantNew("COL", "{\"key\":1, \"key\":2}", 0)); // nested JSON object with duplicate keys can not be ingested expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObjectNew("COL", "{\"key\":1, \"nested\":{\"key\":2, \"key\":3}}", 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariantNew("COL", "{\"key\":1, \"nested\":{\"key\":2, \"key\":3}}", 0)); // array of objects with duplicate keys can not be ingested expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseArrayNew("COL", "[{\"key\":1, \"key\":2}]", 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariantNew("COL", "[{\"key\":1, \"key\":2}]", 0)); } @Test public void testTooLargeVariant() { char[] stringContent = new char[16 * 1024 * 1024 - 16]; // {"a":"11","b":""} Arrays.fill(stringContent, 'c'); // {"a":"11","b":""} Map m = new HashMap<>(); m.put("a", "11"); m.put("b", new String(stringContent)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariant("COL", m, 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseArray("COL", m, 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObject("COL", m, 0)); } @Test public void testTooLargeMultiByteSemiStructuredValues() { // Variant max size is not in characters, but in bytes char[] stringContent = new char[9 * 1024 * 1024]; // 8MB < value < 16MB Arrays.fill(stringContent, 'Č'); Map m = new HashMap<>(); m.put("a", new String(stringContent)); expectErrorCodeAndMessage( ErrorCode.INVALID_VALUE_ROW, "The given row cannot be converted to the internal format due to invalid value: Value" + " cannot be ingested into Snowflake column COL of type VARIANT, rowIndex:0, reason:" + " Variant too long: length=18874376 maxLength=16777152", () -> validateAndParseVariant("COL", m, 0)); expectErrorCodeAndMessage( ErrorCode.INVALID_VALUE_ROW, "The given row cannot be converted to the internal format due to invalid value: Value" + " cannot be ingested into Snowflake column COL of type ARRAY, rowIndex:0, reason:" + " Array too large. length=18874378 maxLength=16777152", () -> validateAndParseArray("COL", m, 0)); expectErrorCodeAndMessage( ErrorCode.INVALID_VALUE_ROW, "The given row cannot be converted to the internal format due to invalid value: Value" + " cannot be ingested into Snowflake column COL of type OBJECT, rowIndex:0, reason:" + " Object too large. length=18874376 maxLength=16777152", () -> validateAndParseObject("COL", m, 0)); } @Test public void testValidVariantType() { // Test primitive types Assert.assertTrue(isAllowedSemiStructuredType((byte) 1)); Assert.assertTrue(isAllowedSemiStructuredType((short) 1)); Assert.assertTrue(isAllowedSemiStructuredType(1)); Assert.assertTrue(isAllowedSemiStructuredType(1L)); Assert.assertTrue(isAllowedSemiStructuredType(1.25f)); Assert.assertTrue(isAllowedSemiStructuredType(1.25d)); Assert.assertTrue(isAllowedSemiStructuredType(false)); Assert.assertTrue(isAllowedSemiStructuredType('c')); // Test boxed primitive types Assert.assertTrue(isAllowedSemiStructuredType(Byte.valueOf((byte) 1))); Assert.assertTrue(isAllowedSemiStructuredType(Short.valueOf((short) 1))); Assert.assertTrue(isAllowedSemiStructuredType(Integer.valueOf(1))); Assert.assertTrue(isAllowedSemiStructuredType(Long.valueOf(1L))); Assert.assertTrue(isAllowedSemiStructuredType(Float.valueOf(1.25f))); Assert.assertTrue(isAllowedSemiStructuredType(Double.valueOf(1.25d))); Assert.assertTrue(isAllowedSemiStructuredType(Boolean.valueOf(false))); Assert.assertTrue(isAllowedSemiStructuredType(Character.valueOf('c'))); // Test primitive arrays Assert.assertTrue(isAllowedSemiStructuredType(new byte[] {1})); Assert.assertTrue(isAllowedSemiStructuredType(new short[] {1})); Assert.assertTrue(isAllowedSemiStructuredType(new int[] {1})); Assert.assertTrue(isAllowedSemiStructuredType(new long[] {1L})); Assert.assertTrue(isAllowedSemiStructuredType(new float[] {1.25f})); Assert.assertTrue(isAllowedSemiStructuredType(new double[] {1.25d})); Assert.assertTrue(isAllowedSemiStructuredType(new boolean[] {false})); Assert.assertTrue(isAllowedSemiStructuredType(new char[] {'c'})); // Test primitive lists Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList((byte) 1))); Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList((short) 1))); Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList(1))); Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList(1L))); Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList(1.25f))); Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList(1.25d))); Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList(false))); Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList('c'))); // Test additional numeric types and their collections Assert.assertTrue(isAllowedSemiStructuredType(new BigInteger("1"))); Assert.assertTrue(isAllowedSemiStructuredType(new BigInteger[] {new BigInteger("1")})); Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList(new BigInteger("1")))); Assert.assertTrue(isAllowedSemiStructuredType(new BigDecimal("1.25"))); Assert.assertTrue(isAllowedSemiStructuredType(new BigDecimal[] {new BigDecimal("1.25")})); Assert.assertTrue( isAllowedSemiStructuredType(Collections.singletonList(new BigDecimal("1.25")))); // Test strings Assert.assertTrue(isAllowedSemiStructuredType("foo")); Assert.assertTrue(isAllowedSemiStructuredType(new String[] {"foo"})); Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList("foo"))); // Test date/time objects and their collections Assert.assertTrue(isAllowedSemiStructuredType(LocalTime.now())); Assert.assertTrue(isAllowedSemiStructuredType(OffsetTime.now())); Assert.assertTrue(isAllowedSemiStructuredType(LocalDate.now())); Assert.assertTrue(isAllowedSemiStructuredType(LocalDateTime.now())); Assert.assertTrue(isAllowedSemiStructuredType(ZonedDateTime.now())); Assert.assertTrue(isAllowedSemiStructuredType(OffsetDateTime.now())); Assert.assertTrue(isAllowedSemiStructuredType(new LocalTime[] {LocalTime.now()})); Assert.assertTrue(isAllowedSemiStructuredType(new OffsetTime[] {OffsetTime.now()})); Assert.assertTrue(isAllowedSemiStructuredType(new LocalDate[] {LocalDate.now()})); Assert.assertTrue(isAllowedSemiStructuredType(new LocalDateTime[] {LocalDateTime.now()})); Assert.assertTrue(isAllowedSemiStructuredType(new ZonedDateTime[] {ZonedDateTime.now()})); Assert.assertTrue(isAllowedSemiStructuredType(new OffsetDateTime[] {OffsetDateTime.now()})); Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList(LocalTime.now()))); Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList(OffsetTime.now()))); Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList(LocalDate.now()))); Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList(LocalDateTime.now()))); Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList(ZonedDateTime.now()))); Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList(OffsetDateTime.now()))); // Test mixed collections Assert.assertTrue( isAllowedSemiStructuredType( new Object[] { 1, false, new BigInteger("1"), LocalDateTime.now(), new Object[] {new Object[] {new Object[] {LocalDateTime.now(), false}}} })); Assert.assertFalse( isAllowedSemiStructuredType( new Object[] { 1, false, new BigInteger("1"), LocalDateTime.now(), new Object[] {new Object[] {new Object[] {new Object(), false}}} })); Assert.assertTrue( isAllowedSemiStructuredType( Arrays.asList( new BigInteger("1"), "foo", false, Arrays.asList(13, Arrays.asList(Arrays.asList(false, 'c')))))); Assert.assertFalse( isAllowedSemiStructuredType( Arrays.asList( new BigInteger("1"), "foo", false, Arrays.asList(13, Arrays.asList(Arrays.asList(new Object(), 'c')))))); // Test maps Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonMap("foo", "bar"))); Assert.assertFalse(isAllowedSemiStructuredType(Collections.singletonMap(new Object(), "foo"))); Assert.assertFalse(isAllowedSemiStructuredType(Collections.singletonMap("foo", new Object()))); Assert.assertTrue( isAllowedSemiStructuredType( Collections.singletonMap( "foo", new Object[] { 1, false, new BigInteger("1"), LocalDateTime.now(), new Object[] {new Object[] {new Object[] {LocalDateTime.now(), false}}} }))); Assert.assertFalse( isAllowedSemiStructuredType( Collections.singletonMap( "foo", new Object[] { 1, false, new BigInteger("1"), LocalDateTime.now(), new Object[] {new Object[] {new Object[] {new Object(), false}}} }))); Assert.assertTrue( isAllowedSemiStructuredType( Collections.singletonMap( "foo", Arrays.asList( new BigInteger("1"), "foo", false, Arrays.asList(13, Arrays.asList(Arrays.asList(false, 'c'))))))); Assert.assertFalse( isAllowedSemiStructuredType( Collections.singletonMap( "foo", Arrays.asList( new BigInteger("1"), "foo", false, Arrays.asList(13, Arrays.asList(Arrays.asList(new Object(), 'c'))))))); } @Test public void testValidateAndParseBinary() throws DecoderException { byte[] maxAllowedArray = new byte[BYTES_8_MB]; byte[] maxAllowedArrayMinusOne = new byte[BYTES_8_MB - 1]; assertArrayEquals( "honk".getBytes(StandardCharsets.UTF_8), validateAndParseBinary( "COL", "honk".getBytes(StandardCharsets.UTF_8), Optional.empty(), 0)); assertArrayEquals( new byte[] {-1, 0, 1}, validateAndParseBinary("COL", new byte[] {-1, 0, 1}, Optional.empty(), 0)); assertArrayEquals( Hex.decodeHex("1234567890abcdef"), // pragma: allowlist secret NOT A SECRET validateAndParseBinary( "COL", "1234567890abcdef", // pragma: allowlist secret NOT A SECRET Optional.empty(), 0)); // pragma: allowlist secret NOT A SECRET assertArrayEquals( Hex.decodeHex("1234567890abcdef"), // pragma: allowlist secret NOT A SECRET validateAndParseBinary( "COL", " 1234567890abcdef \t\n", Optional.empty(), 0)); // pragma: allowlist secret NOT A SECRET assertArrayEquals( maxAllowedArray, validateAndParseBinary("COL", maxAllowedArray, Optional.empty(), 0)); assertArrayEquals( maxAllowedArrayMinusOne, validateAndParseBinary("COL", maxAllowedArrayMinusOne, Optional.empty(), 0)); // Too large arrays should be rejected expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseBinary("COL", new byte[1], Optional.of(0), 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseBinary("COL", new byte[BYTES_8_MB + 1], Optional.empty(), 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseBinary("COL", new byte[8], Optional.of(7), 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseBinary("COL", "aabb", Optional.of(1), 0)); // unsupported data types should fail expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseBinary("COL", "000", Optional.empty(), 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseBinary("COL", "abcg", Optional.empty(), 0)); expectError( ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseBinary("COL", "c", Optional.empty(), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBinary( "COL", Arrays.asList((byte) 1, (byte) 2, (byte) 3), Optional.empty(), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBinary("COL", 1, Optional.empty(), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBinary("COL", 12, Optional.empty(), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBinary("COL", 1.5, Optional.empty(), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBinary("COL", BigInteger.ONE, Optional.empty(), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBinary("COL", false, Optional.empty(), 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBinary("COL", new Object(), Optional.empty(), 0)); } @Test public void testValidateAndParseReal() throws Exception { // From number types assertEquals(1.23d, validateAndParseReal("COL", 1.23f, 0), 0); assertEquals(1.23d, validateAndParseReal("COL", 1.23, 0), 0); assertEquals(1.23d, validateAndParseReal("COL", 1.23d, 0), 0); assertEquals(1.23d, validateAndParseReal("COL", new BigDecimal("1.23"), 0), 0); assertEquals(Double.NaN, validateAndParseReal("COL", "Nan", 0), 0); assertEquals(Double.POSITIVE_INFINITY, validateAndParseReal("COL", "inF", 0), 0); assertEquals(Double.NEGATIVE_INFINITY, validateAndParseReal("COL", "-inF", 0), 0); assertEquals(Double.NEGATIVE_INFINITY, validateAndParseReal("COL", " -inF \t\n", 0), 0); // From string assertEquals(1.23d, validateAndParseReal("COL", " 1.23 \t\n", 0), 0); assertEquals(1.23d, validateAndParseReal("COL", "1.23", 0), 0); assertEquals(123d, validateAndParseReal("COL", "1.23E2", 0), 0); assertEquals(123d, validateAndParseReal("COL", "1.23e2", 0), 0); // Test forbidden values expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseReal("COL", "foo", 0)); expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseReal("COL", 'c', 0)); expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseReal("COL", new Object(), 0)); expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseReal("COL", false, 0)); expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseReal("COL", true, 0)); } @Test public void testValidateAndParseBoolean() { for (Object input : Arrays.asList( true, "true", "True", "TruE", "t", "yes", "YeS", "y", "on", "1", " true \t\n", 1.1, -1.1, -10, 10)) { assertEquals(1, validateAndParseBoolean("COL", input, 0)); } int rowIndex = 0; for (Object input : Arrays.asList(false, "false", "False", "FalsE", "f", "no", "NO", "n", "off", "0", 0)) { assertEquals(0, validateAndParseBoolean("COL", input, rowIndex)); rowIndex += 1; } // Test forbidden values expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBoolean("COL", new Object(), 0)); expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBoolean("COL", 't', 0)); expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBoolean("COL", 'f', 0)); expectError( ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBoolean("COL", new int[] {}, 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseBoolean("COL", "foobar", 0)); expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseBoolean("COL", "", 0)); } /** * Tests that exception message are constructed correctly when ingesting forbidden Java type, as * well a value of an allowed type, but in invalid format */ @Test public void testExceptionMessages() { // BOOLEAN expectErrorCodeAndMessage( ErrorCode.INVALID_FORMAT_ROW, "The given row cannot be converted to the internal format: Object of type java.lang.Object" + " cannot be ingested into Snowflake column COL of type BOOLEAN, rowIndex:0. Allowed" + " Java types: boolean, Number, String", () -> validateAndParseBoolean("COL", new Object(), 0)); expectErrorCodeAndMessage( ErrorCode.INVALID_VALUE_ROW, "The given row cannot be converted to the internal format due to invalid value: Value" + " cannot be ingested into Snowflake column COL of type BOOLEAN, rowIndex:0, reason:" + " Not a valid boolean, see" + " https://docs.snowflake.com/en/sql-reference/data-types-logical.html#conversion-to-boolean" + " for the list of supported formats", () -> validateAndParseBoolean("COL", "abc", 0)); // TIME expectErrorCodeAndMessage( ErrorCode.INVALID_FORMAT_ROW, "The given row cannot be converted to the internal format: Object of type java.lang.Object" + " cannot be ingested into Snowflake column COL of type TIME, rowIndex:0. Allowed" + " Java types: String, LocalTime, OffsetTime", () -> validateAndParseTime("COL", new Object(), 10, 0)); expectErrorCodeAndMessage( ErrorCode.INVALID_VALUE_ROW, "The given row cannot be converted to the internal format due to invalid value: Value" + " cannot be ingested into Snowflake column COL of type TIME, rowIndex:0, reason:" + " Not a valid time, see" + " https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview for" + " the list of supported formats", () -> validateAndParseTime("COL", "abc", 10, 0)); // DATE expectErrorCodeAndMessage( ErrorCode.INVALID_FORMAT_ROW, "The given row cannot be converted to the internal format: Object of type java.lang.Object" + " cannot be ingested into Snowflake column COL of type DATE, rowIndex:0. Allowed" + " Java types: String, LocalDate, LocalDateTime, ZonedDateTime, OffsetDateTime", () -> validateAndParseDate("COL", new Object(), 0)); expectErrorCodeAndMessage( ErrorCode.INVALID_VALUE_ROW, "The given row cannot be converted to the internal format due to invalid value: Value" + " cannot be ingested into Snowflake column COL of type DATE, rowIndex:0, reason:" + " Not a valid value, see" + " https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview for" + " the list of supported formats", () -> validateAndParseDate("COL", "abc", 0)); // TIMESTAMP_NTZ expectErrorCodeAndMessage( ErrorCode.INVALID_FORMAT_ROW, "The given row cannot be converted to the internal format: Object of type java.lang.Object" + " cannot be ingested into Snowflake column COL of type TIMESTAMP, rowIndex:0." + " Allowed Java types: String, LocalDate, LocalDateTime, ZonedDateTime," + " OffsetDateTime", () -> validateAndParseTimestamp("COL", new Object(), 3, UTC, true, 0)); expectErrorCodeAndMessage( ErrorCode.INVALID_VALUE_ROW, "The given row cannot be converted to the internal format due to invalid value: Value" + " cannot be ingested into Snowflake column COL of type TIMESTAMP, rowIndex:0," + " reason: Not a valid value, see" + " https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview for" + " the list of supported formats", () -> validateAndParseTimestamp("COL", "abc", 3, UTC, true, 0)); // TIMESTAMP_LTZ expectErrorCodeAndMessage( ErrorCode.INVALID_FORMAT_ROW, "The given row cannot be converted to the internal format: Object of type java.lang.Object" + " cannot be ingested into Snowflake column COL of type TIMESTAMP, rowIndex:0." + " Allowed Java types: String, LocalDate, LocalDateTime, ZonedDateTime," + " OffsetDateTime", () -> validateAndParseTimestamp("COL", new Object(), 3, UTC, false, 0)); expectErrorCodeAndMessage( ErrorCode.INVALID_VALUE_ROW, "The given row cannot be converted to the internal format due to invalid value: Value" + " cannot be ingested into Snowflake column COL of type TIMESTAMP, rowIndex:0," + " reason: Not a valid value, see" + " https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview for" + " the list of supported formats", () -> validateAndParseTimestamp("COL", "abc", 3, UTC, false, 0)); // TIMESTAMP_TZ expectErrorCodeAndMessage( ErrorCode.INVALID_FORMAT_ROW, "The given row cannot be converted to the internal format: Object of type java.lang.Object" + " cannot be ingested into Snowflake column COL of type TIMESTAMP, rowIndex:0." + " Allowed Java types: String, LocalDate, LocalDateTime, ZonedDateTime," + " OffsetDateTime", () -> validateAndParseTimestamp("COL", new Object(), 3, UTC, false, 0)); expectErrorCodeAndMessage( ErrorCode.INVALID_VALUE_ROW, "The given row cannot be converted to the internal format due to invalid value: Value" + " cannot be ingested into Snowflake column COL of type TIMESTAMP, rowIndex:0," + " reason: Not a valid value, see" + " https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview for" + " the list of supported formats", () -> validateAndParseTimestamp("COL", "abc", 3, UTC, false, 0)); // NUMBER expectErrorCodeAndMessage( ErrorCode.INVALID_FORMAT_ROW, "The given row cannot be converted to the internal format: Object of type java.lang.Object" + " cannot be ingested into Snowflake column COL of type NUMBER, rowIndex:0. Allowed" + " Java types: int, long, byte, short, float, double, BigDecimal, BigInteger, String", () -> validateAndParseBigDecimal("COL", new Object(), 0)); expectErrorCodeAndMessage( ErrorCode.INVALID_VALUE_ROW, "The given row cannot be converted to the internal format due to invalid value: Value" + " cannot be ingested into Snowflake column COL of type NUMBER, rowIndex:0, reason:" + " Not a valid number", () -> validateAndParseBigDecimal("COL", "abc", 0)); // REAL expectErrorCodeAndMessage( ErrorCode.INVALID_FORMAT_ROW, "The given row cannot be converted to the internal format: Object of type java.lang.Object" + " cannot be ingested into Snowflake column COL of type REAL, rowIndex:0. Allowed" + " Java types: Number, String", () -> validateAndParseReal("COL", new Object(), 0)); expectErrorCodeAndMessage( ErrorCode.INVALID_VALUE_ROW, "The given row cannot be converted to the internal format due to invalid value: Value" + " cannot be ingested into Snowflake column COL of type REAL, rowIndex:0, reason:" + " Not a valid decimal number", () -> validateAndParseReal("COL", "abc", 0)); // STRING expectErrorCodeAndMessage( ErrorCode.INVALID_FORMAT_ROW, "The given row cannot be converted to the internal format: Object of type java.lang.Object" + " cannot be ingested into Snowflake column COL of type STRING, rowIndex:0. Allowed" + " Java types: String, Number, boolean, char", () -> validateAndParseString("COL", new Object(), Optional.empty(), 0)); expectErrorCodeAndMessage( ErrorCode.INVALID_VALUE_ROW, "The given row cannot be converted to the internal format due to invalid value: Value" + " cannot be ingested into Snowflake column COL of type STRING, rowIndex:0, reason:" + " String too long: length=3 characters maxLength=2 characters", () -> validateAndParseString("COL", "abc", Optional.of(2), 0)); // BINARY expectErrorCodeAndMessage( ErrorCode.INVALID_FORMAT_ROW, "The given row cannot be converted to the internal format: Object of type java.lang.Object" + " cannot be ingested into Snowflake column COL of type BINARY, rowIndex:0. Allowed" + " Java types: byte[], String", () -> validateAndParseBinary("COL", new Object(), Optional.empty(), 0)); expectErrorCodeAndMessage( ErrorCode.INVALID_VALUE_ROW, "The given row cannot be converted to the internal format due to invalid value: Value" + " cannot be ingested into Snowflake column COL of type BINARY, rowIndex:0, reason:" + " Binary too long: length=2 maxLength=1", () -> validateAndParseBinary("COL", new byte[] {1, 2}, Optional.of(1), 0)); expectErrorCodeAndMessage( ErrorCode.INVALID_VALUE_ROW, "The given row cannot be converted to the internal format due to invalid value: Value" + " cannot be ingested into Snowflake column COL of type BINARY, rowIndex:0, reason:" + " Not a valid hex string", () -> validateAndParseBinary("COL", "ghi", Optional.empty(), 0)); // VARIANT expectErrorCodeAndMessage( ErrorCode.INVALID_FORMAT_ROW, "The given row cannot be converted to the internal format: Object of type java.lang.Object" + " cannot be ingested into Snowflake column COL of type VARIANT, rowIndex:0. Allowed" + " Java types: String, Primitive data types and their arrays, java.time.*, List," + " Map, T[]", () -> validateAndParseVariant("COL", new Object(), 0)); expectErrorCodeAndMessage( ErrorCode.INVALID_VALUE_ROW, "The given row cannot be converted to the internal format due to invalid value: Value" + " cannot be ingested into Snowflake column COL of type VARIANT, rowIndex:0, reason:" + " Not a valid JSON", () -> validateAndParseVariant("COL", "][", 0)); // ARRAY expectErrorCodeAndMessage( ErrorCode.INVALID_FORMAT_ROW, "The given row cannot be converted to the internal format: Object of type java.lang.Object" + " cannot be ingested into Snowflake column COL of type ARRAY, rowIndex:0. Allowed" + " Java types: String, Primitive data types and their arrays, java.time.*, List," + " Map, T[]", () -> validateAndParseArray("COL", new Object(), 0)); expectErrorCodeAndMessage( ErrorCode.INVALID_VALUE_ROW, "The given row cannot be converted to the internal format due to invalid value: Value" + " cannot be ingested into Snowflake column COL of type ARRAY, rowIndex:0, reason:" + " Not a valid JSON", () -> validateAndParseArray("COL", "][", 0)); // OBJECT expectErrorCodeAndMessage( ErrorCode.INVALID_FORMAT_ROW, "The given row cannot be converted to the internal format: Object of type java.lang.Object" + " cannot be ingested into Snowflake column COL of type OBJECT, rowIndex:0. Allowed" + " Java types: String, Primitive data types and their arrays, java.time.*, List," + " Map, T[]", () -> validateAndParseObject("COL", new Object(), 0)); expectErrorCodeAndMessage( ErrorCode.INVALID_VALUE_ROW, "The given row cannot be converted to the internal format due to invalid value: Value" + " cannot be ingested into Snowflake column COL of type OBJECT, rowIndex:0, reason:" + " Not a valid JSON", () -> validateAndParseObject("COL", "}{", 0)); } // ================ validateAndParseVariantAsObject ================ @Test public void testValidateAndParseVariantAsObject_jsonObject() { Object result = DataValidationUtil.validateAndParseVariantAsObject("COL", "{\"a\":1,\"b\":true}", 0); Assert.assertTrue(result instanceof Map); Map map = (Map) result; assertEquals(1, map.get("a")); assertEquals(true, map.get("b")); } @Test public void testValidateAndParseVariantAsObject_jsonArray() { Object result = DataValidationUtil.validateAndParseVariantAsObject("COL", "[1,2,3]", 0); Assert.assertTrue(result instanceof java.util.List); assertEquals(Arrays.asList(1, 2, 3), result); } @Test public void testValidateAndParseVariantAsObject_primitive() { assertEquals(42, DataValidationUtil.validateAndParseVariantAsObject("COL", "42", 0)); assertEquals(true, DataValidationUtil.validateAndParseVariantAsObject("COL", "true", 0)); assertEquals( "hello", DataValidationUtil.validateAndParseVariantAsObject("COL", "\"hello\"", 0)); } @Test public void testValidateAndParseVariantAsObject_missingNode() { assertNull(DataValidationUtil.validateAndParseVariantAsObject("COL", "", 0)); assertNull(DataValidationUtil.validateAndParseVariantAsObject("COL", " ", 0)); } @Test public void testValidateAndParseVariantAsObject_invalidJson() { expectError( ErrorCode.INVALID_VALUE_ROW, () -> DataValidationUtil.validateAndParseVariantAsObject("COL", "not_json", 0)); } @Test public void testValidateAndParseVariantAsObject_nativePassthrough() { Map nativeMap = Collections.singletonMap("key", "value"); Object result = DataValidationUtil.validateAndParseVariantAsObject("COL", nativeMap, 0); Assert.assertTrue(result instanceof Map); assertEquals("value", ((Map) result).get("key")); } // ================ validateAndParseArrayAsList ================ @Test public void testValidateAndParseArrayAsList_jsonArray() { java.util.List result = DataValidationUtil.validateAndParseArrayAsList("COL", "[1,2,3]", 0); assertEquals(Arrays.asList(1, 2, 3), result); } @Test public void testValidateAndParseArrayAsList_nonArrayWrapped() { java.util.List result = DataValidationUtil.validateAndParseArrayAsList("COL", "\"hello\"", 0); assertEquals(Collections.singletonList("hello"), result); } @Test public void testValidateAndParseArrayAsList_nativeList() { java.util.List result = DataValidationUtil.validateAndParseArrayAsList("COL", Arrays.asList(10, 20), 0); assertEquals(Arrays.asList(10, 20), result); } @Test public void testValidateAndParseArrayAsList_invalidJson() { expectError( ErrorCode.INVALID_VALUE_ROW, () -> DataValidationUtil.validateAndParseArrayAsList("COL", "not_json", 0)); } // ================ validateAndFormatTimestamp ================ @Test public void testValidateAndFormatTimestamp_integerEpochNtz() { // 1705312800 seconds = 2024-01-15T10:00:00 UTC String result = DataValidationUtil.validateAndFormatTimestamp("COL", 1705312800, UTC, true, 0); assertEquals("2024-01-15T10:00", result); } @Test public void testValidateAndFormatTimestamp_longEpochNtz() { String result = DataValidationUtil.validateAndFormatTimestamp("COL", 1705312800L, UTC, true, 0); assertEquals("2024-01-15T10:00", result); } @Test public void testValidateAndFormatTimestamp_integerEpochLtz() { String result = DataValidationUtil.validateAndFormatTimestamp("COL", 1705312800, UTC, false, 0); assertEquals("2024-01-15T10:00Z", result); } @Test public void testValidateAndFormatTimestamp_stringPassthrough() { // String input with explicit timezone String result = DataValidationUtil.validateAndFormatTimestamp( "COL", "2024-01-15T13:45:30+05:00", UTC, false, 0); assertEquals("2024-01-15T13:45:30+05:00", result); } @Test public void testValidateAndFormatTimestamp_invalidString() { expectError( ErrorCode.INVALID_VALUE_ROW, () -> DataValidationUtil.validateAndFormatTimestamp("COL", "not_a_timestamp", UTC, true, 0)); } private JsonNode readTree(String value) { try { return objectMapper.readTree(value); } catch (JsonProcessingException e) { throw new RuntimeException(e); } } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/validation/RowValidatorTest.java ================================================ /* * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved. * * Tests for the validation integration layer (Commit 4). */ package com.snowflake.kafka.connector.internal.validation; import static org.junit.jupiter.api.Assertions.*; import java.sql.ResultSet; import java.sql.SQLException; import java.util.*; import org.junit.jupiter.api.Test; import org.mockito.Mockito; /** Tests for RowValidator, ColumnSchema, and ValidationResult */ public class RowValidatorTest { // ================ ColumnSchema Tests ================ @Test public void testColumnSchemaParseNumber() throws SQLException { ResultSet rs = mockDescribeTableRow("COL1", "NUMBER(38,0)", "Y"); ColumnSchema schema = ColumnSchema.fromDescribeTableRow(rs); assertEquals("COL1", schema.getName()); assertEquals(ColumnLogicalType.FIXED, schema.getLogicalType()); assertEquals(ColumnPhysicalType.SB16, schema.getPhysicalType()); assertTrue(schema.isNullable()); assertEquals(38, schema.getPrecision()); assertEquals(0, schema.getScale()); } @Test public void testColumnSchemaParseVarchar() throws SQLException { ResultSet rs = mockDescribeTableRow("COL2", "VARCHAR(16777216)", "N"); ColumnSchema schema = ColumnSchema.fromDescribeTableRow(rs); assertEquals("COL2", schema.getName()); assertEquals(ColumnLogicalType.TEXT, schema.getLogicalType()); assertEquals(ColumnPhysicalType.LOB, schema.getPhysicalType()); assertFalse(schema.isNullable()); assertEquals(16777216, schema.getLength()); // byteLength capped at 16MB (SSv1 SDK limit), not 16777216 * 4 = 64MB assertEquals(16777216, schema.getByteLength()); } @Test public void testColumnSchemaParseVarcharSmall() throws SQLException { // For small VARCHAR, byteLength = length * 4 (no capping needed) ResultSet rs = mockDescribeTableRow("COL3", "VARCHAR(1000)", "Y"); ColumnSchema schema = ColumnSchema.fromDescribeTableRow(rs); assertEquals("COL3", schema.getName()); assertEquals(ColumnLogicalType.TEXT, schema.getLogicalType()); assertEquals(1000, schema.getLength()); assertEquals(4000, schema.getByteLength()); // 1000 * 4, no capping } @Test public void testColumnSchemaParseTimestampNtz() throws SQLException { ResultSet rs = mockDescribeTableRow("COL3", "TIMESTAMP_NTZ(9)", "Y"); ColumnSchema schema = ColumnSchema.fromDescribeTableRow(rs); assertEquals("COL3", schema.getName()); assertEquals(ColumnLogicalType.TIMESTAMP_NTZ, schema.getLogicalType()); assertEquals(ColumnPhysicalType.SB8, schema.getPhysicalType()); assertEquals(9, schema.getScale()); } @Test public void testColumnSchemaParseBinary() throws SQLException { ResultSet rs = mockDescribeTableRow("COL4", "BINARY(8388608)", "Y"); ColumnSchema schema = ColumnSchema.fromDescribeTableRow(rs); assertEquals("COL4", schema.getName()); assertEquals(ColumnLogicalType.BINARY, schema.getLogicalType()); assertEquals(ColumnPhysicalType.BINARY, schema.getPhysicalType()); assertEquals(8388608, schema.getByteLength()); } @Test public void testColumnSchemaParseVariant() throws SQLException { ResultSet rs = mockDescribeTableRow("COL5", "VARIANT", "Y"); ColumnSchema schema = ColumnSchema.fromDescribeTableRow(rs); assertEquals("COL5", schema.getName()); assertEquals(ColumnLogicalType.VARIANT, schema.getLogicalType()); assertEquals(ColumnPhysicalType.LOB, schema.getPhysicalType()); } @Test public void testColumnSchemaParseArray() throws SQLException { ResultSet rs = mockDescribeTableRow("COL6", "ARRAY", "Y"); ColumnSchema schema = ColumnSchema.fromDescribeTableRow(rs); assertEquals(ColumnLogicalType.ARRAY, schema.getLogicalType()); assertEquals(ColumnPhysicalType.LOB, schema.getPhysicalType()); } @Test public void testColumnSchemaParseBoolean() throws SQLException { ResultSet rs = mockDescribeTableRow("COL7", "BOOLEAN", "Y"); ColumnSchema schema = ColumnSchema.fromDescribeTableRow(rs); assertEquals(ColumnLogicalType.BOOLEAN, schema.getLogicalType()); assertEquals(ColumnPhysicalType.SB1, schema.getPhysicalType()); } @Test public void testColumnSchemaParseUnknownType() throws SQLException { ResultSet rs = mockDescribeTableRow("COL8", "GEOGRAPHY", "Y"); ColumnSchema schema = ColumnSchema.fromDescribeTableRow(rs); assertNull(schema.getLogicalType()); // Unknown types return null assertNull(schema.getPhysicalType()); } // ================ ValidationResult Tests ================ @Test public void testValidationResultValid() { ValidationResult result = ValidationResult.valid(); assertTrue(result.isValid()); assertFalse(result.hasTypeError()); assertFalse(result.hasStructuralError()); assertFalse(result.needsSchemaEvolution()); } @Test public void testValidationResultTypeError() { ValidationResult result = ValidationResult.typeError("COL1", "Invalid type"); assertFalse(result.isValid()); assertTrue(result.hasTypeError()); assertFalse(result.hasStructuralError()); assertEquals("COL1", result.getColumnName()); assertEquals("Invalid type", result.getValueError()); assertEquals("type_error", result.getErrorType()); } @Test public void testValidationResultStructuralError() { Set extraCols = new HashSet<>(Arrays.asList("EXTRA1", "EXTRA2")); Set missingNotNull = new HashSet<>(Arrays.asList("REQUIRED1")); Set nullNotNull = new HashSet<>(Arrays.asList("COL2")); ValidationResult result = ValidationResult.structuralError(extraCols, missingNotNull, nullNotNull); assertFalse(result.isValid()); assertFalse(result.hasTypeError()); assertTrue(result.hasStructuralError()); assertTrue(result.needsSchemaEvolution()); assertEquals(2, result.getExtraColNames().size()); assertEquals(1, result.getMissingNotNullColNames().size()); assertEquals(1, result.getNullValueForNotNullColNames().size()); assertEquals("structural_error", result.getErrorType()); } @Test public void testValidationResultEmptyStructuralError() { ValidationResult result = ValidationResult.structuralError( Collections.emptySet(), Collections.emptySet(), Collections.emptySet()); assertFalse(result.isValid()); assertTrue(result.hasStructuralError()); assertFalse(result.needsSchemaEvolution()); // No actual errors } // ================ RowValidator Tests ================ @Test public void testValidateRowValid() { Map schema = new HashMap<>(); schema.put("COL1", createColumnSchema("COL1", ColumnLogicalType.TEXT, true, null, null, 100)); schema.put("COL2", createColumnSchema("COL2", ColumnLogicalType.FIXED, true, 38, 0, null)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("COL1", "test value"); row.put("COL2", 123); ValidationResult result = validator.validateRow(row); assertTrue(result.isValid()); } @Test public void testValidateRowExtraColumn() { Map schema = new HashMap<>(); schema.put("COL1", createColumnSchema("COL1", ColumnLogicalType.TEXT, true, null, null, 100)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("COL1", "test value"); row.put("COL2", "extra column"); // Extra column not in schema ValidationResult result = validator.validateRow(row); assertFalse(result.isValid()); assertTrue(result.hasStructuralError()); assertTrue(result.getExtraColNames().contains("COL2")); } @Test public void testValidateRowMissingNotNull() { Map schema = new HashMap<>(); schema.put( "COL1", createColumnSchema("COL1", ColumnLogicalType.TEXT, false, null, null, 100)); // NOT NULL RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); // COL1 is missing ValidationResult result = validator.validateRow(row); assertFalse(result.isValid()); assertTrue(result.hasStructuralError()); assertTrue(result.getMissingNotNullColNames().contains("COL1")); } @Test public void testValidateRowNullInNotNull() { Map schema = new HashMap<>(); schema.put( "COL1", createColumnSchema("COL1", ColumnLogicalType.TEXT, false, null, null, 100)); // NOT NULL RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("COL1", null); // Null value in NOT NULL column ValidationResult result = validator.validateRow(row); assertFalse(result.isValid()); assertTrue(result.hasStructuralError()); assertTrue(result.getNullValueForNotNullColNames().contains("COL1")); } @Test public void testValidateRowInvalidType() { Map schema = new HashMap<>(); schema.put("COL1", createColumnSchema("COL1", ColumnLogicalType.FIXED, true, 38, 0, null)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("COL1", "not a number"); // String in numeric column ValidationResult result = validator.validateRow(row); assertFalse(result.isValid()); assertTrue(result.hasTypeError()); assertEquals("COL1", result.getColumnName()); assertNotNull(result.getValueError()); } @Test public void testValidateRowMatchingColumnName() { // Column names are expected to be already normalized by the caller (SnowflakeSinkRecord). // RowValidator just does direct comparison against raw column names. Map schema = new HashMap<>(); schema.put( "COL NAME", createColumnSchema("COL NAME", ColumnLogicalType.TEXT, true, null, null, 100)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("COL NAME", "test value"); // Raw column name (already normalized) ValidationResult result = validator.validateRow(row); assertTrue(result.isValid()); } @Test public void testValidateSchemaUnsupportedType() { Map schema = new HashMap<>(); ColumnSchema unknownCol = createColumnSchema("COL1", null, true, null, null, null); // null logicalType schema.put("COL1", unknownCol); assertThrows(SFExceptionValidation.class, () -> RowValidator.validateSchema(schema)); } @Test public void testValidateSchemaCollatedColumn() { Map schema = new HashMap<>(); ColumnSchema collatedCol = new ColumnSchema( "COL1", ColumnLogicalType.TEXT, ColumnPhysicalType.LOB, true, null, null, 100, 400, "en-ci"); // Collated column schema.put("COL1", collatedCol); assertThrows(SFExceptionValidation.class, () -> RowValidator.validateSchema(schema)); } @Test public void testValidateSchemaValid() { Map schema = new HashMap<>(); schema.put("COL1", createColumnSchema("COL1", ColumnLogicalType.TEXT, true, null, null, 100)); schema.put("COL2", createColumnSchema("COL2", ColumnLogicalType.FIXED, true, 38, 0, null)); schema.put( "COL3", createColumnSchema("COL3", ColumnLogicalType.VARIANT, true, null, null, null)); assertDoesNotThrow(() -> RowValidator.validateSchema(schema)); } @Test public void testValidateRowEmptyColumnName() { Map schema = new HashMap<>(); schema.put("COL1", createColumnSchema("COL1", ColumnLogicalType.TEXT, true, null, null, 100)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("", "value"); // Empty column name row.put("COL1", "test value"); // Empty column name should be caught - it becomes empty after unquoting ValidationResult result = validator.validateRow(row); assertFalse(result.isValid()); // Empty column will be treated as extra column or skipped with warning } @Test public void testValidateRowWhitespaceColumnName() { Map schema = new HashMap<>(); schema.put("COL1", createColumnSchema("COL1", ColumnLogicalType.TEXT, true, null, null, 100)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put(" ", "value"); // Whitespace-only column name row.put("\t\n", "value2"); // Control characters row.put("COL1", "test value"); // Whitespace column names should be caught ValidationResult result = validator.validateRow(row); assertFalse(result.isValid()); // Whitespace columns will be treated as extra columns or skipped with warning } // ================ Code Review Fix Tests ================ /** * Test that structured OBJECT types are rejected (Issue #1 from code review). SSv1 SDK doesn't * support structured OBJECT types like OBJECT(a INT, b TEXT). */ @Test public void testStructuredObjectTypeRejected() throws SQLException { ResultSet rs = mockDescribeTableRow("COL1", "OBJECT(a NUMBER(38,0), b VARCHAR(16777216))", "Y"); IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> ColumnSchema.fromDescribeTableRow(rs)); assertTrue(exception.getMessage().contains("Structured OBJECT types are not supported")); assertTrue(exception.getMessage().contains("unstructured OBJECT")); } /** * Test that structured ARRAY types are rejected (Issue #1 from code review). SSv1 SDK doesn't * support structured ARRAY types like ARRAY(INT). */ @Test public void testStructuredArrayTypeRejected() throws SQLException { ResultSet rs = mockDescribeTableRow("COL1", "ARRAY(INT)", "Y"); IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> ColumnSchema.fromDescribeTableRow(rs)); assertTrue(exception.getMessage().contains("Structured ARRAY types are not supported")); assertTrue(exception.getMessage().contains("unstructured ARRAY")); } /** Test that unstructured OBJECT types are accepted. */ @Test public void testUnstructuredObjectTypeAccepted() throws SQLException { ResultSet rs = mockDescribeTableRow("COL1", "OBJECT", "Y"); ColumnSchema schema = ColumnSchema.fromDescribeTableRow(rs); assertEquals(ColumnLogicalType.OBJECT, schema.getLogicalType()); assertEquals(ColumnPhysicalType.LOB, schema.getPhysicalType()); } /** Test that unstructured ARRAY types are accepted. */ @Test public void testUnstructuredArrayTypeAccepted() throws SQLException { ResultSet rs = mockDescribeTableRow("COL1", "ARRAY", "Y"); ColumnSchema schema = ColumnSchema.fromDescribeTableRow(rs); assertEquals(ColumnLogicalType.ARRAY, schema.getLogicalType()); assertEquals(ColumnPhysicalType.LOB, schema.getPhysicalType()); } /** * Test that nested type parsing uses lastIndexOf for correct parameter extraction (Issue #1). * Without lastIndexOf, "OBJECT(a NUMBER(38,0), b TEXT)" would incorrectly extract params as "a * NUMBER(38,0" instead of the full parameter list. */ @Test public void testNestedTypeParsingWithLastIndexOf() throws SQLException { // This should fail with structured type error, not parsing error ResultSet rs = mockDescribeTableRow("COL1", "OBJECT(a NUMBER(38,0), b VARCHAR(100))", "Y"); IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> ColumnSchema.fromDescribeTableRow(rs)); // Should get structured type error, not malformed type string error assertTrue(exception.getMessage().contains("Structured OBJECT types are not supported")); assertFalse(exception.getMessage().contains("Malformed type string")); } /** * Test that missing NOT NULL columns trigger schema evolution (Issue #3 from code review). KC v3 * treated missing and null NOT NULL columns identically - both drop NOT NULL. */ @Test public void testMissingNotNullColumnTriggersSchemaEvolution() { Map schemaMap = new HashMap<>(); schemaMap.put( "COL1", createColumnSchema("COL1", ColumnLogicalType.FIXED, false, 38, 0, null)); // NOT NULL RowValidator validator = new RowValidator(schemaMap); // Missing COL1 entirely (not in row) Map row = new HashMap<>(); // Empty row - missing NOT NULL column ValidationResult result = validator.validateRow(row); assertFalse(result.isValid()); assertTrue(result.hasStructuralError()); assertEquals(1, result.getMissingNotNullColNames().size()); assertTrue(result.getMissingNotNullColNames().contains("COL1")); // Should trigger schema evolution (matches KC v3 behavior) assertTrue(result.needsSchemaEvolution()); assertFalse(result.hasUnresolvableError()); // NOT unresolvable anymore } /** * Test that null NOT NULL columns trigger schema evolution (Issue #3 from code review). This was * already working, but verify it still works after fix. */ @Test public void testNullNotNullColumnTriggersSchemaEvolution() { Map schemaMap = new HashMap<>(); schemaMap.put( "COL1", createColumnSchema("COL1", ColumnLogicalType.FIXED, false, 38, 0, null)); // NOT NULL RowValidator validator = new RowValidator(schemaMap); // COL1 present but null Map row = new HashMap<>(); row.put("COL1", null); ValidationResult result = validator.validateRow(row); assertFalse(result.isValid()); assertTrue(result.hasStructuralError()); assertEquals(1, result.getNullValueForNotNullColNames().size()); assertTrue(result.getNullValueForNotNullColNames().contains("COL1")); // Should trigger schema evolution assertTrue(result.needsSchemaEvolution()); assertFalse(result.hasUnresolvableError()); } /** * Test that null values in nullable columns are valid (Graphite bot feedback). When a nullable * column has a null value, it should pass validation. */ @Test public void testNullValueInNullableColumnIsValid() { Map schemaMap = new HashMap<>(); schemaMap.put( "COL1", createColumnSchema("COL1", ColumnLogicalType.FIXED, true, 38, 0, null)); // NULLABLE schemaMap.put( "COL2", createColumnSchema("COL2", ColumnLogicalType.TEXT, true, null, null, 100)); // NULLABLE RowValidator validator = new RowValidator(schemaMap); // Both columns present with null values (valid for nullable columns) Map row = new HashMap<>(); row.put("COL1", null); row.put("COL2", null); ValidationResult result = validator.validateRow(row); // Should be valid - null is allowed for nullable columns assertTrue(result.isValid()); assertFalse(result.hasStructuralError()); assertFalse(result.hasTypeError()); } /** Test that nullable column with actual value also validates correctly. */ @Test public void testNullableColumnWithValue() { Map schemaMap = new HashMap<>(); schemaMap.put( "COL1", createColumnSchema("COL1", ColumnLogicalType.FIXED, true, 38, 0, null)); // NULLABLE RowValidator validator = new RowValidator(schemaMap); // Nullable column with actual value Map row = new HashMap<>(); row.put("COL1", 42); ValidationResult result = validator.validateRow(row); // Should be valid assertTrue(result.isValid()); } /** * Test that large VARCHAR lengths don't cause integer overflow (Graphite security issue). Without * long cast, info.length * 4 can overflow for corrupted/malformed lengths. */ @Test public void testVarcharLargeValueNoOverflow() throws SQLException { // Test with a value that would overflow if multiplied as int: Integer.MAX_VALUE / 2 // This simulates corrupted DESCRIBE TABLE result int largeLength = Integer.MAX_VALUE / 2; // ~1 billion ResultSet rs = mockDescribeTableRow("COL1", "VARCHAR(" + largeLength + ")", "Y"); ColumnSchema schema = ColumnSchema.fromDescribeTableRow(rs); // Should not overflow - byteLength should be capped at MAX_LOB_SIZE_BYTES (16MB) assertEquals(16777216, schema.getByteLength()); // 16MB cap assertEquals(largeLength, schema.getLength()); // Original length preserved } // ================ Server-Filled Column Tests (FR7) ================ @Test public void testValidateRow_missingIdentityColumn_passes() { Map schema = new HashMap<>(); schema.put( "ID", new ColumnSchema( "ID", ColumnLogicalType.FIXED, ColumnPhysicalType.SB16, false, 38, 0, null, null, null, false, true)); // NOT NULL, autoincrement=true schema.put("DATA", createColumnSchema("DATA", ColumnLogicalType.TEXT, true, null, null, 100)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("DATA", "hello"); // ID is missing — server fills it ValidationResult result = validator.validateRow(row); assertTrue(result.isValid(), "Record should be valid when identity column is omitted"); } @Test public void testValidateRow_missingDefaultNotNullColumn_passes() { Map schema = new HashMap<>(); schema.put("DATA", createColumnSchema("DATA", ColumnLogicalType.TEXT, true, null, null, 100)); schema.put( "CREATED_AT", new ColumnSchema( "CREATED_AT", ColumnLogicalType.TIMESTAMP_NTZ, ColumnPhysicalType.SB8, false, null, 9, null, null, null, true, false)); // NOT NULL, hasDefault=true RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("DATA", "hello"); // CREATED_AT is missing — server fills it ValidationResult result = validator.validateRow(row); assertTrue(result.isValid(), "Record should be valid when default NOT NULL column is omitted"); } @Test public void testValidateRow_missingRegularNotNullColumn_stillFails() { Map schema = new HashMap<>(); schema.put( "REQUIRED", new ColumnSchema( "REQUIRED", ColumnLogicalType.TEXT, ColumnPhysicalType.LOB, false, null, null, 100, 400, null, false, false)); // NOT NULL, no default, no autoincrement RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); // REQUIRED is missing — no server default, should fail ValidationResult result = validator.validateRow(row); assertFalse( result.isValid(), "Record should be invalid when regular NOT NULL column is missing"); assertTrue(result.getMissingNotNullColNames().contains("REQUIRED")); } @Test public void testValidateRow_mixedServerFilledAndRegularColumns() { Map schema = new HashMap<>(); schema.put( "ID", new ColumnSchema( "ID", ColumnLogicalType.FIXED, ColumnPhysicalType.SB16, false, 38, 0, null, null, null, false, true)); // autoincrement schema.put("DATA", createColumnSchema("DATA", ColumnLogicalType.TEXT, true, null, null, 100)); schema.put( "CREATED_AT", new ColumnSchema( "CREATED_AT", ColumnLogicalType.TIMESTAMP_NTZ, ColumnPhysicalType.SB8, false, null, 9, null, null, null, true, false)); // default schema.put( "STATUS", new ColumnSchema( "STATUS", ColumnLogicalType.FIXED, ColumnPhysicalType.SB16, false, 38, 0, null, null, null, true, false)); // default RowValidator validator = new RowValidator(schema); // Only DATA provided — ID, CREATED_AT, STATUS are server-filled Map row = new HashMap<>(); row.put("DATA", "hello"); ValidationResult result = validator.validateRow(row); assertTrue( result.isValid(), "Record should be valid when only server-filled NOT NULL columns are missing"); } @Test public void testValidateRow_explicitValueForIdentityColumn_passes() { Map schema = new HashMap<>(); schema.put( "ID", new ColumnSchema( "ID", ColumnLogicalType.FIXED, ColumnPhysicalType.SB16, false, 38, 0, null, null, null, false, true)); // autoincrement schema.put("DATA", createColumnSchema("DATA", ColumnLogicalType.TEXT, true, null, null, 100)); RowValidator validator = new RowValidator(schema); // User explicitly provides a value for the identity column — should still be accepted Map row = new HashMap<>(); row.put("ID", 42); row.put("DATA", "hello"); ValidationResult result = validator.validateRow(row); assertTrue( result.isValid(), "Record should be valid when identity column is explicitly provided"); } @Test public void testColumnSchema_isServerFilled() { ColumnSchema autoincCol = new ColumnSchema( "ID", ColumnLogicalType.FIXED, ColumnPhysicalType.SB16, false, 38, 0, null, null, null, false, true); assertTrue(autoincCol.isServerFilled()); assertTrue(autoincCol.isAutoincrement()); assertFalse(autoincCol.hasDefault()); ColumnSchema defaultCol = new ColumnSchema( "TS", ColumnLogicalType.TIMESTAMP_NTZ, ColumnPhysicalType.SB8, false, null, 9, null, null, null, true, false); assertTrue(defaultCol.isServerFilled()); assertFalse(defaultCol.isAutoincrement()); assertTrue(defaultCol.hasDefault()); ColumnSchema regularCol = createColumnSchema("REG", ColumnLogicalType.TEXT, false, null, null, 100); assertFalse(regularCol.isServerFilled()); assertFalse(regularCol.isAutoincrement()); assertFalse(regularCol.hasDefault()); } /** Hex string for a BINARY column is converted to byte[] in-place during validation. */ @Test public void testValidateRowBinaryHexStringConvertedToByteArray() { Map schema = new HashMap<>(); schema.put( "BIN_COL", new ColumnSchema( "BIN_COL", ColumnLogicalType.BINARY, ColumnPhysicalType.BINARY, true, null, null, null, 8388608, null)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("BIN_COL", "FFFFFFFF"); ValidationResult result = validator.validateRow(row); assertTrue(result.isValid()); // Row map must now contain byte[] instead of the original hex string assertInstanceOf(byte[].class, row.get("BIN_COL")); assertArrayEquals( new byte[] {(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF}, (byte[]) row.get("BIN_COL")); } /** byte[] input for a BINARY column is preserved as-is. */ @Test public void testValidateRowBinaryByteArrayPassthrough() { Map schema = new HashMap<>(); schema.put( "BIN_COL", new ColumnSchema( "BIN_COL", ColumnLogicalType.BINARY, ColumnPhysicalType.BINARY, true, null, null, null, 8388608, null)); RowValidator validator = new RowValidator(schema); byte[] input = new byte[] {0x01, 0x02, 0x03}; Map row = new HashMap<>(); row.put("BIN_COL", input); ValidationResult result = validator.validateRow(row); assertTrue(result.isValid()); assertInstanceOf(byte[].class, row.get("BIN_COL")); assertArrayEquals(input, (byte[]) row.get("BIN_COL")); } /** Empty hex string ("") for a BINARY column is decoded to byte[0]. */ @Test public void testValidateRowBinaryEmptyHexString() { Map schema = new HashMap<>(); schema.put( "BIN_COL", new ColumnSchema( "BIN_COL", ColumnLogicalType.BINARY, ColumnPhysicalType.BINARY, true, null, null, null, 8388608, null)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("BIN_COL", ""); ValidationResult result = validator.validateRow(row); assertTrue(result.isValid()); assertInstanceOf(byte[].class, row.get("BIN_COL")); assertArrayEquals(new byte[0], (byte[]) row.get("BIN_COL")); } /** Odd-length hex string for a BINARY column produces a type error. */ @Test public void testValidateRowBinaryOddLengthHexStringFails() { Map schema = new HashMap<>(); schema.put( "BIN_COL", new ColumnSchema( "BIN_COL", ColumnLogicalType.BINARY, ColumnPhysicalType.BINARY, true, null, null, null, 8388608, null)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("BIN_COL", "FFF"); ValidationResult result = validator.validateRow(row); assertFalse(result.isValid()); assertTrue(result.hasTypeError()); assertEquals("BIN_COL", result.getColumnName()); } /** Lowercase hex string for a BINARY column is decoded case-insensitively. */ @Test public void testValidateRowBinaryLowercaseHexString() { Map schema = new HashMap<>(); schema.put( "BIN_COL", new ColumnSchema( "BIN_COL", ColumnLogicalType.BINARY, ColumnPhysicalType.BINARY, true, null, null, null, 8388608, null)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("BIN_COL", "ffffffff"); ValidationResult result = validator.validateRow(row); assertTrue(result.isValid()); assertInstanceOf(byte[].class, row.get("BIN_COL")); assertArrayEquals( new byte[] {(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF}, (byte[]) row.get("BIN_COL")); } // ================ VARCHAR Map/List serialization Tests ================ /** * Map sent to a VARCHAR column is serialized to JSON string, matching SSv1/SSv2 SDK behavior. * Both SDKs serialize complex objects via Jackson inside appendRow(); RowValidator must * replicate. */ @Test public void testValidateRowVarcharMapSerializedToJson() { Map schema = new HashMap<>(); schema.put( "STR_COL", createColumnSchema("STR_COL", ColumnLogicalType.TEXT, true, null, null, null)); RowValidator validator = new RowValidator(schema); Map inputMap = new LinkedHashMap<>(); inputMap.put("key", "value"); Map row = new HashMap<>(); row.put("STR_COL", inputMap); ValidationResult result = validator.validateRow(row); assertTrue(result.isValid()); assertEquals("{\"key\":\"value\"}", row.get("STR_COL")); } /** List sent to a VARCHAR column is serialized to JSON array string. */ @Test public void testValidateRowVarcharListSerializedToJson() { Map schema = new HashMap<>(); schema.put( "STR_COL", createColumnSchema("STR_COL", ColumnLogicalType.TEXT, true, null, null, null)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("STR_COL", Arrays.asList(1, 2, 3)); ValidationResult result = validator.validateRow(row); assertTrue(result.isValid()); assertEquals("[1,2,3]", row.get("STR_COL")); } /** Nested Map sent to VARCHAR is serialized recursively. */ @Test public void testValidateRowVarcharNestedMapSerializedToJson() { Map schema = new HashMap<>(); schema.put( "STR_COL", createColumnSchema("STR_COL", ColumnLogicalType.TEXT, true, null, null, null)); RowValidator validator = new RowValidator(schema); Map nested = new LinkedHashMap<>(); nested.put("b", 1); Map inputMap = new LinkedHashMap<>(); inputMap.put("a", nested); Map row = new HashMap<>(); row.put("STR_COL", inputMap); ValidationResult result = validator.validateRow(row); assertTrue(result.isValid()); assertEquals("{\"a\":{\"b\":1}}", row.get("STR_COL")); } /** Map serialized to JSON that exceeds VARCHAR(N) length limit produces a type error. */ @Test public void testValidateRowVarcharMapExceedsLengthLimit() { Map schema = new HashMap<>(); schema.put( "STR_COL", createColumnSchema("STR_COL", ColumnLogicalType.TEXT, true, null, null, 5)); RowValidator validator = new RowValidator(schema); Map inputMap = new LinkedHashMap<>(); inputMap.put("key", "value"); // {"key":"value"} = 15 chars, exceeds 5 Map row = new HashMap<>(); row.put("STR_COL", inputMap); ValidationResult result = validator.validateRow(row); assertFalse(result.isValid()); assertTrue(result.hasTypeError()); } // ================ Boolean Normalization Tests ================ /** * Integer 0/1 must be normalized to Boolean before reaching the SSv2 SDK. The SDK only accepts * Boolean for BOOLEAN columns — Integer inputs are silently dropped without this normalization. */ @Test public void testValidateRowBooleanIntegerZeroNormalizedToFalse() { Map schema = new HashMap<>(); schema.put( "BOOL_COL", createColumnSchema("BOOL_COL", ColumnLogicalType.BOOLEAN, true, null, null, null)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("BOOL_COL", 0); ValidationResult result = validator.validateRow(row); assertTrue(result.isValid()); assertEquals(Boolean.FALSE, row.get("BOOL_COL")); } @Test public void testValidateRowBooleanIntegerOneNormalizedToTrue() { Map schema = new HashMap<>(); schema.put( "BOOL_COL", createColumnSchema("BOOL_COL", ColumnLogicalType.BOOLEAN, true, null, null, null)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("BOOL_COL", 1); ValidationResult result = validator.validateRow(row); assertTrue(result.isValid()); assertEquals(Boolean.TRUE, row.get("BOOL_COL")); } /** Native Boolean values must also be normalized (no-op in effect, but consistent). */ @Test public void testValidateRowBooleanNativeBooleanPassthrough() { Map schema = new HashMap<>(); schema.put( "BOOL_COL", createColumnSchema("BOOL_COL", ColumnLogicalType.BOOLEAN, true, null, null, null)); RowValidator validator = new RowValidator(schema); for (Object input : Arrays.asList(Boolean.TRUE, Boolean.FALSE)) { Map row = new HashMap<>(); row.put("BOOL_COL", input); ValidationResult result = validator.validateRow(row); assertTrue(result.isValid()); assertInstanceOf(Boolean.class, row.get("BOOL_COL")); assertEquals(input, row.get("BOOL_COL")); } } /** String tokens are normalized to Boolean (previously accepted as String by SDK). */ @Test public void testValidateRowBooleanStringTokensNormalizedToBoolean() { Map schema = new HashMap<>(); schema.put( "BOOL_COL", createColumnSchema("BOOL_COL", ColumnLogicalType.BOOLEAN, true, null, null, null)); RowValidator validator = new RowValidator(schema); Map trueInputs = new LinkedHashMap<>(); trueInputs.put("true", Boolean.TRUE); trueInputs.put("yes", Boolean.TRUE); trueInputs.put("on", Boolean.TRUE); Map falseInputs = new LinkedHashMap<>(); falseInputs.put("false", Boolean.FALSE); falseInputs.put("no", Boolean.FALSE); falseInputs.put("off", Boolean.FALSE); for (Map.Entry entry : trueInputs.entrySet()) { Map row = new HashMap<>(); row.put("BOOL_COL", entry.getKey()); ValidationResult result = validator.validateRow(row); assertTrue(result.isValid(), "Expected valid for input: " + entry.getKey()); assertEquals(entry.getValue(), row.get("BOOL_COL"), "Expected TRUE for: " + entry.getKey()); } for (Map.Entry entry : falseInputs.entrySet()) { Map row = new HashMap<>(); row.put("BOOL_COL", entry.getKey()); ValidationResult result = validator.validateRow(row); assertTrue(result.isValid(), "Expected valid for input: " + entry.getKey()); assertEquals(entry.getValue(), row.get("BOOL_COL"), "Expected FALSE for: " + entry.getKey()); } } /** Invalid inputs for BOOLEAN still produce a type error. */ @Test public void testValidateRowBooleanInvalidInputProducesTypeError() { Map schema = new HashMap<>(); schema.put( "BOOL_COL", createColumnSchema("BOOL_COL", ColumnLogicalType.BOOLEAN, true, null, null, null)); RowValidator validator = new RowValidator(schema); for (Object invalid : Arrays.asList(new HashMap<>(), new ArrayList<>(), "not_a_bool")) { Map row = new HashMap<>(); row.put("BOOL_COL", invalid); ValidationResult result = validator.validateRow(row); assertFalse(result.isValid(), "Expected type error for input: " + invalid); assertTrue(result.hasTypeError(), "Expected type error for input: " + invalid); assertEquals("BOOL_COL", result.getColumnName()); } } /** * Non-0/1 numeric values for BOOLEAN produce a type error. Although SSv1 SDK's * DataValidationUtil.validateAndParseBoolean accepts any Number directly, in KC v3 the record * mapper converts all values to Strings first — and SSv1's convertStringToBoolean only accepts * "0"/"1"/"true"/"false"/"yes"/"no"/"on"/"off". "42" is not in that set, so it's rejected. * RowValidator pre-rejects non-0/1 Numbers to match end-to-end KC v3 behavior. */ @Test public void testValidateRowBooleanNonZeroOneIntegerProducesTypeError() { Map schema = new HashMap<>(); schema.put( "BOOL_COL", createColumnSchema("BOOL_COL", ColumnLogicalType.BOOLEAN, true, null, null, null)); RowValidator validator = new RowValidator(schema); for (Object input : Arrays.asList(42, -1, 999, 2L, -100L)) { Map row = new HashMap<>(); row.put("BOOL_COL", input); ValidationResult result = validator.validateRow(row); assertFalse(result.isValid(), "Expected type error for numeric input: " + input); assertTrue(result.hasTypeError(), "Expected type error for numeric input: " + input); assertEquals("BOOL_COL", result.getColumnName()); } } // ================ VARIANT normalization (String → native object) ================ /** * JSON object string sent to VARIANT is parsed back to a Map so the SSv2 SDK stores it as a * native VARIANT object, not a JSON-quoted string. */ @Test public void testValidateRowVariantJsonObjectStringNormalizedToMap() { Map schema = new HashMap<>(); schema.put("V", createColumnSchema("V", ColumnLogicalType.VARIANT, true, null, null, null)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("V", "{\"a\":1}"); ValidationResult result = validator.validateRow(row); assertTrue(result.isValid()); Object normalized = row.get("V"); assertTrue(normalized instanceof Map, "Expected Map but got: " + normalized.getClass()); assertEquals(1, ((Map) normalized).size()); assertEquals(1, ((Map) normalized).get("a")); } /** * JSON array string sent to VARIANT is parsed back to a List so the SSv2 SDK stores it as a * native array. */ @Test public void testValidateRowVariantJsonArrayStringNormalizedToList() { Map schema = new HashMap<>(); schema.put("V", createColumnSchema("V", ColumnLogicalType.VARIANT, true, null, null, null)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("V", "[1,2,3]"); ValidationResult result = validator.validateRow(row); assertTrue(result.isValid()); Object normalized = row.get("V"); assertTrue(normalized instanceof List, "Expected List but got: " + normalized.getClass()); assertEquals(Arrays.asList(1, 2, 3), normalized); } /** Non-String native objects passed to VARIANT are returned unchanged. */ @Test public void testValidateRowVariantNativeObjectPassthrough() { Map schema = new HashMap<>(); schema.put("V", createColumnSchema("V", ColumnLogicalType.VARIANT, true, null, null, null)); RowValidator validator = new RowValidator(schema); Map nativeMap = new HashMap<>(); nativeMap.put("key", "value"); Map row = new HashMap<>(); row.put("V", nativeMap); ValidationResult result = validator.validateRow(row); assertTrue(result.isValid()); assertSame(nativeMap, row.get("V"), "Native Map should not be replaced"); } /** Invalid (non-JSON) string sent to VARIANT produces a type error. */ @Test public void testValidateRowVariantInvalidJsonStringProducesTypeError() { Map schema = new HashMap<>(); schema.put("V", createColumnSchema("V", ColumnLogicalType.VARIANT, true, null, null, null)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("V", "not valid json"); ValidationResult result = validator.validateRow(row); assertFalse(result.isValid()); assertTrue(result.hasTypeError()); assertEquals("V", result.getColumnName()); } // ================ ARRAY normalization (String → List) ================ /** * JSON array string sent to ARRAY is parsed back to a List so the SSv2 SDK stores it as a proper * array, not a single-element array wrapping the literal string. */ @Test public void testValidateRowArrayJsonStringNormalizedToList() { Map schema = new HashMap<>(); schema.put("A", createColumnSchema("A", ColumnLogicalType.ARRAY, true, null, null, null)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("A", "[1,2,3]"); ValidationResult result = validator.validateRow(row); assertTrue(result.isValid()); Object normalized = row.get("A"); assertTrue(normalized instanceof List, "Expected List but got: " + normalized.getClass()); assertEquals(Arrays.asList(1, 2, 3), normalized); } /** * Non-array JSON string sent to ARRAY is wrapped in a single-element List (matching * validateAndParseArray behavior which wraps non-arrays into single-element arrays). */ @Test public void testValidateRowArrayNonArrayJsonStringWrappedInList() { Map schema = new HashMap<>(); schema.put("A", createColumnSchema("A", ColumnLogicalType.ARRAY, true, null, null, null)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("A", "\"hello\""); // JSON string (not an array) ValidationResult result = validator.validateRow(row); assertTrue(result.isValid()); Object normalized = row.get("A"); assertTrue(normalized instanceof List, "Expected List but got: " + normalized.getClass()); assertEquals(Arrays.asList("hello"), normalized); } /** Native List passed to ARRAY is returned unchanged. */ @Test public void testValidateRowArrayNativeListPassthrough() { Map schema = new HashMap<>(); schema.put("A", createColumnSchema("A", ColumnLogicalType.ARRAY, true, null, null, null)); RowValidator validator = new RowValidator(schema); List nativeList = Arrays.asList(10, 20, 30); Map row = new HashMap<>(); row.put("A", nativeList); ValidationResult result = validator.validateRow(row); assertTrue(result.isValid()); assertSame(nativeList, row.get("A"), "Native List should not be replaced"); } /** Invalid (non-JSON) string sent to ARRAY produces a type error. */ @Test public void testValidateRowArrayInvalidJsonStringProducesTypeError() { Map schema = new HashMap<>(); schema.put("A", createColumnSchema("A", ColumnLogicalType.ARRAY, true, null, null, null)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("A", "not_json"); ValidationResult result = validator.validateRow(row); assertFalse(result.isValid()); assertTrue(result.hasTypeError()); assertEquals("A", result.getColumnName()); } // ================ OBJECT validation Tests ================ /** Invalid (non-JSON) string sent to OBJECT produces a type error. */ @Test public void testValidateRowObjectInvalidJsonStringProducesTypeError() { Map schema = new HashMap<>(); schema.put("O", createColumnSchema("O", ColumnLogicalType.OBJECT, true, null, null, null)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("O", "not_json"); ValidationResult result = validator.validateRow(row); assertFalse(result.isValid()); assertTrue(result.hasTypeError()); assertEquals("O", result.getColumnName()); } /** Valid JSON array string sent to OBJECT is rejected (not an object). */ @Test public void testValidateRowObjectArrayJsonStringProducesTypeError() { Map schema = new HashMap<>(); schema.put("O", createColumnSchema("O", ColumnLogicalType.OBJECT, true, null, null, null)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("O", "[1,2,3]"); ValidationResult result = validator.validateRow(row); assertFalse(result.isValid()); assertTrue(result.hasTypeError()); assertEquals("O", result.getColumnName()); } /** Valid JSON object string sent to OBJECT is accepted. */ @Test public void testValidateRowObjectValidJsonStringAccepted() { Map schema = new HashMap<>(); schema.put("O", createColumnSchema("O", ColumnLogicalType.OBJECT, true, null, null, null)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("O", "{\"key\":\"value\"}"); ValidationResult result = validator.validateRow(row); assertTrue(result.isValid()); } /** Invalid hex string for a BINARY column produces a type error. */ @Test public void testValidateRowBinaryInvalidHexStringFails() { Map schema = new HashMap<>(); schema.put( "BIN_COL", new ColumnSchema( "BIN_COL", ColumnLogicalType.BINARY, ColumnPhysicalType.BINARY, true, null, null, null, 8388608, null)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("BIN_COL", "not-valid-hex!"); ValidationResult result = validator.validateRow(row); assertFalse(result.isValid()); assertTrue(result.hasTypeError()); assertEquals("BIN_COL", result.getColumnName()); } // ================ Timestamp normalization Tests ================ /** * Integer epoch for TIMESTAMP_NTZ must be normalized to an ISO timestamp string. The SSv2 SDK * passes raw integers to the Snowflake backend which interprets them using the channel's default * timezone (America/Los_Angeles) instead of UTC. SSv1 SDK converts epochs to UTC client-side. */ @Test public void testValidateRowTimestampNtzIntegerEpochNormalized() { Map schema = new HashMap<>(); schema.put("TS", createTimestampColumnSchema("TS", ColumnLogicalType.TIMESTAMP_NTZ)); RowValidator validator = new RowValidator(schema); // 1705312800 = 2024-01-15T10:00:00Z Map row = new HashMap<>(); row.put("TS", 1705312800); ValidationResult result = validator.validateRow(row); assertTrue(result.isValid()); Object normalized = row.get("TS"); assertInstanceOf(String.class, normalized, "Integer epoch should be normalized to String"); assertEquals("2024-01-15T10:00", normalized); } /** Long epoch for TIMESTAMP_NTZ is also normalized (same as Integer). */ @Test public void testValidateRowTimestampNtzLongEpochNormalized() { Map schema = new HashMap<>(); schema.put("TS", createTimestampColumnSchema("TS", ColumnLogicalType.TIMESTAMP_NTZ)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("TS", 1705312800L); ValidationResult result = validator.validateRow(row); assertTrue(result.isValid()); Object normalized = row.get("TS"); assertInstanceOf(String.class, normalized, "Long epoch should be normalized to String"); assertEquals("2024-01-15T10:00", normalized); } /** String timestamp for TIMESTAMP_NTZ is validated but returned unchanged. */ @Test public void testValidateRowTimestampNtzStringPassthrough() { Map schema = new HashMap<>(); schema.put("TS", createTimestampColumnSchema("TS", ColumnLogicalType.TIMESTAMP_NTZ)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("TS", "2024-01-15T13:45:30"); ValidationResult result = validator.validateRow(row); assertTrue(result.isValid()); assertEquals("2024-01-15T13:45:30", row.get("TS")); } /** Integer epoch for TIMESTAMP_LTZ is normalized to ISO string with UTC offset. */ @Test public void testValidateRowTimestampLtzIntegerEpochNormalized() { Map schema = new HashMap<>(); schema.put("TS", createTimestampColumnSchema("TS", ColumnLogicalType.TIMESTAMP_LTZ)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("TS", 1705312800); ValidationResult result = validator.validateRow(row); assertTrue(result.isValid()); Object normalized = row.get("TS"); assertInstanceOf(String.class, normalized, "Integer epoch should be normalized to String"); assertEquals("2024-01-15T10:00Z", normalized); } /** Invalid string for TIMESTAMP_NTZ produces a type error. */ @Test public void testValidateRowTimestampNtzInvalidStringRejects() { Map schema = new HashMap<>(); schema.put("TS", createTimestampColumnSchema("TS", ColumnLogicalType.TIMESTAMP_NTZ)); RowValidator validator = new RowValidator(schema); Map row = new HashMap<>(); row.put("TS", "not_a_timestamp"); ValidationResult result = validator.validateRow(row); assertFalse(result.isValid()); assertTrue(result.hasTypeError()); assertEquals("TS", result.getColumnName()); } // ================ Helper Methods ================ private ResultSet mockDescribeTableRow(String name, String type, String nullable) throws SQLException { ResultSet rs = Mockito.mock(ResultSet.class); Mockito.when(rs.getString("name")).thenReturn(name); Mockito.when(rs.getString("type")).thenReturn(type); Mockito.when(rs.getString("null?")).thenReturn(nullable); return rs; } private ColumnSchema createColumnSchema( String name, ColumnLogicalType logicalType, boolean nullable, Integer precision, Integer scale, Integer length) { ColumnPhysicalType physicalType = logicalType != null ? (logicalType == ColumnLogicalType.FIXED ? ColumnPhysicalType.SB16 : logicalType == ColumnLogicalType.TEXT ? ColumnPhysicalType.LOB : logicalType == ColumnLogicalType.BOOLEAN ? ColumnPhysicalType.SB1 : ColumnPhysicalType.LOB) : null; Integer byteLength = length != null ? length * 4 : null; return new ColumnSchema( name, logicalType, physicalType, nullable, precision, scale, length, byteLength, null); } private ColumnSchema createTimestampColumnSchema(String name, ColumnLogicalType logicalType) { return new ColumnSchema( name, logicalType, ColumnPhysicalType.SB8, true, null, 9, null, null, null); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/internal/validation/SqlIdentifierNormalizerTest.java ================================================ package com.snowflake.kafka.connector.internal.validation; import static org.junit.jupiter.api.Assertions.assertEquals; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; public class SqlIdentifierNormalizerTest { @ParameterizedTest @CsvSource({ // Unquoted → uppercased "city, CITY", "myCol, MYCOL", "ABC, ABC", "a_b_c, A_B_C", // Unquoted with escaped spaces "col\\ name, COL NAME", }) public void testUnquotedIdentifiers(String input, String expected) { assertEquals(expected, SqlIdentifierNormalizer.normalizeSqlIdentifier(input)); } @ParameterizedTest @CsvSource({ // Quoted → strip quotes, preserve case "'\"city\"', city", "'\"MyCol\"', MyCol", "'\"ABC\"', ABC", "'\"col name\"', col name", // Quoted with escaped double-quotes "'\"col\"\"name\"', col\"name", "'\"\"\"city\"\"\"', \"city\"", }) public void testQuotedIdentifiers(String input, String expected) { assertEquals(expected, SqlIdentifierNormalizer.normalizeSqlIdentifier(input)); } @Test public void testEmptyString() { assertEquals("", SqlIdentifierNormalizer.normalizeSqlIdentifier("")); } @Test public void testSingleChar() { assertEquals("A", SqlIdentifierNormalizer.normalizeSqlIdentifier("a")); } @Test public void testSingleQuote() { // A single double-quote char is not a valid quoted identifier — treated as unquoted assertEquals("\"", SqlIdentifierNormalizer.normalizeSqlIdentifier("\"")); } @Test public void testEmptyQuotedIdentifier() { assertEquals("", SqlIdentifierNormalizer.normalizeSqlIdentifier("\"\"")); } @Test public void testCacheReturnsSameResult() { String first = SqlIdentifierNormalizer.normalizeSqlIdentifier("cached_test"); String second = SqlIdentifierNormalizer.normalizeSqlIdentifier("cached_test"); assertEquals(first, second); assertEquals("CACHED_TEST", first); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/mock/MockResultSetForSizeTest.java ================================================ package com.snowflake.kafka.connector.mock; import java.io.InputStream; import java.io.Reader; import java.math.BigDecimal; import java.net.URL; import java.sql.*; import java.util.Calendar; import java.util.Map; public class MockResultSetForSizeTest implements ResultSet { private final int size; private int currentIndex; public MockResultSetForSizeTest(int size) { this.size = size; currentIndex = 0; } @Override public boolean next() throws SQLException { if (currentIndex < size) { currentIndex++; return true; } return false; } @Override public void close() throws SQLException {} @Override public boolean wasNull() throws SQLException { return false; } @Override public String getString(final int columnIndex) throws SQLException { return null; } @Override public boolean getBoolean(final int columnIndex) throws SQLException { return false; } @Override public byte getByte(final int columnIndex) throws SQLException { return 0; } @Override public short getShort(final int columnIndex) throws SQLException { return 0; } @Override public int getInt(final int columnIndex) throws SQLException { return 0; } @Override public long getLong(final int columnIndex) throws SQLException { return 0; } @Override public float getFloat(final int columnIndex) throws SQLException { return 0; } @Override public double getDouble(final int columnIndex) throws SQLException { return 0; } @Override public BigDecimal getBigDecimal(final int columnIndex, final int scale) throws SQLException { return null; } @Override public byte[] getBytes(final int columnIndex) throws SQLException { return new byte[0]; } @Override public Date getDate(final int columnIndex) throws SQLException { return null; } @Override public Time getTime(final int columnIndex) throws SQLException { return null; } @Override public Timestamp getTimestamp(final int columnIndex) throws SQLException { return null; } @Override public InputStream getAsciiStream(final int columnIndex) throws SQLException { return null; } @Override public InputStream getUnicodeStream(final int columnIndex) throws SQLException { return null; } @Override public InputStream getBinaryStream(final int columnIndex) throws SQLException { return null; } @Override public String getString(final String columnLabel) throws SQLException { return null; } @Override public boolean getBoolean(final String columnLabel) throws SQLException { return false; } @Override public byte getByte(final String columnLabel) throws SQLException { return 0; } @Override public short getShort(final String columnLabel) throws SQLException { return 0; } @Override public int getInt(final String columnLabel) throws SQLException { return 0; } @Override public long getLong(final String columnLabel) throws SQLException { return 0; } @Override public float getFloat(final String columnLabel) throws SQLException { return 0; } @Override public double getDouble(final String columnLabel) throws SQLException { return 0; } @Override public BigDecimal getBigDecimal(final String columnLabel, final int scale) throws SQLException { return null; } @Override public byte[] getBytes(final String columnLabel) throws SQLException { return new byte[0]; } @Override public Date getDate(final String columnLabel) throws SQLException { return null; } @Override public Time getTime(final String columnLabel) throws SQLException { return null; } @Override public Timestamp getTimestamp(final String columnLabel) throws SQLException { return null; } @Override public InputStream getAsciiStream(final String columnLabel) throws SQLException { return null; } @Override public InputStream getUnicodeStream(final String columnLabel) throws SQLException { return null; } @Override public InputStream getBinaryStream(final String columnLabel) throws SQLException { return null; } @Override public SQLWarning getWarnings() throws SQLException { return null; } @Override public void clearWarnings() throws SQLException {} @Override public String getCursorName() throws SQLException { return null; } @Override public ResultSetMetaData getMetaData() throws SQLException { return null; } @Override public Object getObject(final int columnIndex) throws SQLException { return null; } @Override public Object getObject(final String columnLabel) throws SQLException { return null; } @Override public int findColumn(final String columnLabel) throws SQLException { return 0; } @Override public Reader getCharacterStream(final int columnIndex) throws SQLException { return null; } @Override public Reader getCharacterStream(final String columnLabel) throws SQLException { return null; } @Override public BigDecimal getBigDecimal(final int columnIndex) throws SQLException { return null; } @Override public BigDecimal getBigDecimal(final String columnLabel) throws SQLException { return null; } @Override public boolean isBeforeFirst() throws SQLException { return false; } @Override public boolean isAfterLast() throws SQLException { return false; } @Override public boolean isFirst() throws SQLException { return false; } @Override public boolean isLast() throws SQLException { return false; } @Override public void beforeFirst() throws SQLException {} @Override public void afterLast() throws SQLException {} @Override public boolean first() throws SQLException { return false; } @Override public boolean last() throws SQLException { return false; } @Override public int getRow() throws SQLException { return 0; } @Override public boolean absolute(final int row) throws SQLException { return false; } @Override public boolean relative(final int rows) throws SQLException { return false; } @Override public boolean previous() throws SQLException { return false; } @Override public void setFetchDirection(final int direction) throws SQLException {} @Override public int getFetchDirection() throws SQLException { return 0; } @Override public void setFetchSize(final int rows) throws SQLException {} @Override public int getFetchSize() throws SQLException { return 0; } @Override public int getType() throws SQLException { return 0; } @Override public int getConcurrency() throws SQLException { return 0; } @Override public boolean rowUpdated() throws SQLException { return false; } @Override public boolean rowInserted() throws SQLException { return false; } @Override public boolean rowDeleted() throws SQLException { return false; } @Override public void updateNull(final int columnIndex) throws SQLException {} @Override public void updateBoolean(final int columnIndex, final boolean x) throws SQLException {} @Override public void updateByte(final int columnIndex, final byte x) throws SQLException {} @Override public void updateShort(final int columnIndex, final short x) throws SQLException {} @Override public void updateInt(final int columnIndex, final int x) throws SQLException {} @Override public void updateLong(final int columnIndex, final long x) throws SQLException {} @Override public void updateFloat(final int columnIndex, final float x) throws SQLException {} @Override public void updateDouble(final int columnIndex, final double x) throws SQLException {} @Override public void updateBigDecimal(final int columnIndex, final BigDecimal x) throws SQLException {} @Override public void updateString(final int columnIndex, final String x) throws SQLException {} @Override public void updateBytes(final int columnIndex, final byte[] x) throws SQLException {} @Override public void updateDate(final int columnIndex, final Date x) throws SQLException {} @Override public void updateTime(final int columnIndex, final Time x) throws SQLException {} @Override public void updateTimestamp(final int columnIndex, final Timestamp x) throws SQLException {} @Override public void updateAsciiStream(final int columnIndex, final InputStream x, final int length) throws SQLException {} @Override public void updateBinaryStream(final int columnIndex, final InputStream x, final int length) throws SQLException {} @Override public void updateCharacterStream(final int columnIndex, final Reader x, final int length) throws SQLException {} @Override public void updateObject(final int columnIndex, final Object x, final int scaleOrLength) throws SQLException {} @Override public void updateObject(final int columnIndex, final Object x) throws SQLException {} @Override public void updateNull(final String columnLabel) throws SQLException {} @Override public void updateBoolean(final String columnLabel, final boolean x) throws SQLException {} @Override public void updateByte(final String columnLabel, final byte x) throws SQLException {} @Override public void updateShort(final String columnLabel, final short x) throws SQLException {} @Override public void updateInt(final String columnLabel, final int x) throws SQLException {} @Override public void updateLong(final String columnLabel, final long x) throws SQLException {} @Override public void updateFloat(final String columnLabel, final float x) throws SQLException {} @Override public void updateDouble(final String columnLabel, final double x) throws SQLException {} @Override public void updateBigDecimal(final String columnLabel, final BigDecimal x) throws SQLException {} @Override public void updateString(final String columnLabel, final String x) throws SQLException {} @Override public void updateBytes(final String columnLabel, final byte[] x) throws SQLException {} @Override public void updateDate(final String columnLabel, final Date x) throws SQLException {} @Override public void updateTime(final String columnLabel, final Time x) throws SQLException {} @Override public void updateTimestamp(final String columnLabel, final Timestamp x) throws SQLException {} @Override public void updateAsciiStream(final String columnLabel, final InputStream x, final int length) throws SQLException {} @Override public void updateBinaryStream(final String columnLabel, final InputStream x, final int length) throws SQLException {} @Override public void updateCharacterStream(final String columnLabel, final Reader reader, final int length) throws SQLException {} @Override public void updateObject(final String columnLabel, final Object x, final int scaleOrLength) throws SQLException {} @Override public void updateObject(final String columnLabel, final Object x) throws SQLException {} @Override public void insertRow() throws SQLException {} @Override public void updateRow() throws SQLException {} @Override public void deleteRow() throws SQLException {} @Override public void refreshRow() throws SQLException {} @Override public void cancelRowUpdates() throws SQLException {} @Override public void moveToInsertRow() throws SQLException {} @Override public void moveToCurrentRow() throws SQLException {} @Override public Statement getStatement() throws SQLException { return null; } @Override public Object getObject(final int columnIndex, final Map> map) throws SQLException { return null; } @Override public Ref getRef(final int columnIndex) throws SQLException { return null; } @Override public Blob getBlob(final int columnIndex) throws SQLException { return null; } @Override public Clob getClob(final int columnIndex) throws SQLException { return null; } @Override public Array getArray(final int columnIndex) throws SQLException { return null; } @Override public Object getObject(final String columnLabel, final Map> map) throws SQLException { return null; } @Override public Ref getRef(final String columnLabel) throws SQLException { return null; } @Override public Blob getBlob(final String columnLabel) throws SQLException { return null; } @Override public Clob getClob(final String columnLabel) throws SQLException { return null; } @Override public Array getArray(final String columnLabel) throws SQLException { return null; } @Override public Date getDate(final int columnIndex, final Calendar cal) throws SQLException { return null; } @Override public Date getDate(final String columnLabel, final Calendar cal) throws SQLException { return null; } @Override public Time getTime(final int columnIndex, final Calendar cal) throws SQLException { return null; } @Override public Time getTime(final String columnLabel, final Calendar cal) throws SQLException { return null; } @Override public Timestamp getTimestamp(final int columnIndex, final Calendar cal) throws SQLException { return null; } @Override public Timestamp getTimestamp(final String columnLabel, final Calendar cal) throws SQLException { return null; } @Override public URL getURL(final int columnIndex) throws SQLException { return null; } @Override public URL getURL(final String columnLabel) throws SQLException { return null; } @Override public void updateRef(final int columnIndex, final Ref x) throws SQLException {} @Override public void updateRef(final String columnLabel, final Ref x) throws SQLException {} @Override public void updateBlob(final int columnIndex, final Blob x) throws SQLException {} @Override public void updateBlob(final String columnLabel, final Blob x) throws SQLException {} @Override public void updateClob(final int columnIndex, final Clob x) throws SQLException {} @Override public void updateClob(final String columnLabel, final Clob x) throws SQLException {} @Override public void updateArray(final int columnIndex, final Array x) throws SQLException {} @Override public void updateArray(final String columnLabel, final Array x) throws SQLException {} @Override public RowId getRowId(final int columnIndex) throws SQLException { return null; } @Override public RowId getRowId(final String columnLabel) throws SQLException { return null; } @Override public void updateRowId(final int columnIndex, final RowId x) throws SQLException {} @Override public void updateRowId(final String columnLabel, final RowId x) throws SQLException {} @Override public int getHoldability() throws SQLException { return 0; } @Override public boolean isClosed() throws SQLException { return false; } @Override public void updateNString(final int columnIndex, final String nString) throws SQLException {} @Override public void updateNString(final String columnLabel, final String nString) throws SQLException {} @Override public void updateNClob(final int columnIndex, final NClob nClob) throws SQLException {} @Override public void updateNClob(final String columnLabel, final NClob nClob) throws SQLException {} @Override public NClob getNClob(final int columnIndex) throws SQLException { return null; } @Override public NClob getNClob(final String columnLabel) throws SQLException { return null; } @Override public SQLXML getSQLXML(final int columnIndex) throws SQLException { return null; } @Override public SQLXML getSQLXML(final String columnLabel) throws SQLException { return null; } @Override public void updateSQLXML(final int columnIndex, final SQLXML xmlObject) throws SQLException {} @Override public void updateSQLXML(final String columnLabel, final SQLXML xmlObject) throws SQLException {} @Override public String getNString(final int columnIndex) throws SQLException { return null; } @Override public String getNString(final String columnLabel) throws SQLException { return null; } @Override public Reader getNCharacterStream(final int columnIndex) throws SQLException { return null; } @Override public Reader getNCharacterStream(final String columnLabel) throws SQLException { return null; } @Override public void updateNCharacterStream(final int columnIndex, final Reader x, final long length) throws SQLException {} @Override public void updateNCharacterStream( final String columnLabel, final Reader reader, final long length) throws SQLException {} @Override public void updateAsciiStream(final int columnIndex, final InputStream x, final long length) throws SQLException {} @Override public void updateBinaryStream(final int columnIndex, final InputStream x, final long length) throws SQLException {} @Override public void updateCharacterStream(final int columnIndex, final Reader x, final long length) throws SQLException {} @Override public void updateAsciiStream(final String columnLabel, final InputStream x, final long length) throws SQLException {} @Override public void updateBinaryStream(final String columnLabel, final InputStream x, final long length) throws SQLException {} @Override public void updateCharacterStream( final String columnLabel, final Reader reader, final long length) throws SQLException {} @Override public void updateBlob(final int columnIndex, final InputStream inputStream, final long length) throws SQLException {} @Override public void updateBlob(final String columnLabel, final InputStream inputStream, final long length) throws SQLException {} @Override public void updateClob(final int columnIndex, final Reader reader, final long length) throws SQLException {} @Override public void updateClob(final String columnLabel, final Reader reader, final long length) throws SQLException {} @Override public void updateNClob(final int columnIndex, final Reader reader, final long length) throws SQLException {} @Override public void updateNClob(final String columnLabel, final Reader reader, final long length) throws SQLException {} @Override public void updateNCharacterStream(final int columnIndex, final Reader x) throws SQLException {} @Override public void updateNCharacterStream(final String columnLabel, final Reader reader) throws SQLException {} @Override public void updateAsciiStream(final int columnIndex, final InputStream x) throws SQLException {} @Override public void updateBinaryStream(final int columnIndex, final InputStream x) throws SQLException {} @Override public void updateCharacterStream(final int columnIndex, final Reader x) throws SQLException {} @Override public void updateAsciiStream(final String columnLabel, final InputStream x) throws SQLException {} @Override public void updateBinaryStream(final String columnLabel, final InputStream x) throws SQLException {} @Override public void updateCharacterStream(final String columnLabel, final Reader reader) throws SQLException {} @Override public void updateBlob(final int columnIndex, final InputStream inputStream) throws SQLException {} @Override public void updateBlob(final String columnLabel, final InputStream inputStream) throws SQLException {} @Override public void updateClob(final int columnIndex, final Reader reader) throws SQLException {} @Override public void updateClob(final String columnLabel, final Reader reader) throws SQLException {} @Override public void updateNClob(final int columnIndex, final Reader reader) throws SQLException {} @Override public void updateNClob(final String columnLabel, final Reader reader) throws SQLException {} @Override public T getObject(final int columnIndex, final Class type) throws SQLException { return null; } @Override public T getObject(final String columnLabel, final Class type) throws SQLException { return null; } @Override public T unwrap(final Class iface) throws SQLException { return null; } @Override public boolean isWrapperFor(final Class iface) throws SQLException { return false; } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/records/ConverterTest.java ================================================ package com.snowflake.kafka.connector.records; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException; import java.math.BigDecimal; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.time.Instant; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.kafka.connect.data.Date; import org.apache.kafka.connect.data.Decimal; import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.SchemaAndValue; import org.apache.kafka.connect.data.SchemaBuilder; import org.apache.kafka.connect.data.Struct; import org.apache.kafka.connect.data.Time; import org.apache.kafka.connect.data.Timestamp; import org.apache.kafka.connect.header.ConnectHeaders; import org.apache.kafka.connect.json.JsonConverter; import org.apache.kafka.connect.storage.SimpleHeaderConverter; import org.junit.jupiter.api.Test; class ConverterTest { private static final ObjectMapper mapper = new ObjectMapper(); @Test void testConnectJsonConverter_MapInt64() throws JsonProcessingException { JsonConverter jsonConverter = new JsonConverter(); Map config = Collections.singletonMap("schemas.enable", false); jsonConverter.configure(config, false); Map jsonMap = new HashMap<>(); // Value will map to int64. jsonMap.put("test", Integer.MAX_VALUE); SchemaAndValue schemaAndValue = jsonConverter.toConnectData("test", mapper.writeValueAsBytes(jsonMap)); Map result = KafkaRecordConverter.convertToMap(schemaAndValue.schema(), schemaAndValue.value()); Map expected = new HashMap<>(); expected.put("test", (long) Integer.MAX_VALUE); assertEquals(expected, result); } @Test void testConnectJsonConverter_MapBigDecimal() throws JsonProcessingException { JsonConverter jsonConverter = new JsonConverter(); Map config = Collections.singletonMap("schemas.enable", false); jsonConverter.configure(config, false); // Use a BigDecimal that fits within precision limits Map jsonMap = new HashMap<>(); jsonMap.put("test", new BigDecimal("12345678901234567890")); SchemaAndValue schemaAndValue = jsonConverter.toConnectData("test", mapper.writeValueAsBytes(jsonMap)); Map result = KafkaRecordConverter.convertToMap(schemaAndValue.schema(), schemaAndValue.value()); // BigDecimal gets converted through JSON which treats it as a number // JSON doesn't preserve BigDecimal - large numbers become scientific notation or lose precision // The important thing is the value is preserved as a numeric type assertNotNull(result.get("test")); assertInstanceOf( Number.class, result.get("test"), "Expected Number but got: " + result.get("test").getClass()); } @Test void testConvertMapWithNestedValues() throws JsonProcessingException { JsonConverter jsonConverter = new JsonConverter(); Map config = Collections.singletonMap("schemas.enable", false); jsonConverter.configure(config, false); Map nestedMap = new HashMap<>(); nestedMap.put("nested", "value"); Map jsonMap = new HashMap<>(); jsonMap.put("outer", nestedMap); jsonMap.put("simple", "text"); SchemaAndValue schemaAndValue = jsonConverter.toConnectData("test", mapper.writeValueAsBytes(jsonMap)); Map result = KafkaRecordConverter.convertToMap(schemaAndValue.schema(), schemaAndValue.value()); assertEquals("text", result.get("simple")); assertInstanceOf(Map.class, result.get("outer")); @SuppressWarnings("unchecked") Map outerMap = (Map) result.get("outer"); assertEquals("value", outerMap.get("nested")); } @Test void testConvertHeaders() { org.apache.kafka.connect.header.Headers headers = new org.apache.kafka.connect.header.ConnectHeaders(); headers.addString("stringHeader", "value"); headers.addInt("intHeader", 42); headers.addBoolean("boolHeader", true); Map result = KafkaRecordConverter.convertHeaders(headers); assertEquals("value", result.get("stringHeader")); assertEquals("42", result.get("intHeader")); assertEquals("true", result.get("boolHeader")); } @Test void testConvertKey() { // Test string key Object stringKeyResult = KafkaRecordConverter.convertKey(Schema.STRING_SCHEMA, "testKey"); assertEquals("testKey", stringKeyResult); // Test int key Object intKeyResult = KafkaRecordConverter.convertKey(Schema.INT32_SCHEMA, 123); assertEquals(123, intKeyResult); // Test null key Object nullKeyResult = KafkaRecordConverter.convertKey(Schema.OPTIONAL_STRING_SCHEMA, null); assertNull(nullKeyResult); } @SuppressWarnings("resource") @Test void testConvertHeaders_WithSimpleHeaderConverter() { // Test that headers converted by SimpleHeaderConverter are properly handled // This covers the scenario where raw JSON header bytes are first converted by // SimpleHeaderConverter SimpleHeaderConverter headerConverter = new SimpleHeaderConverter(); String rawHeader = "{\"f1\": \"1970-03-22T00:00:00.000Z\", \"f2\": true}"; SchemaAndValue schemaAndValue = headerConverter.toConnectHeader( "test", "h1", rawHeader.getBytes(StandardCharsets.US_ASCII)); // SimpleHeaderConverter returns String schema with the raw string value for complex JSON ConnectHeaders headers = new ConnectHeaders(); headers.add("h1", schemaAndValue); Map result = KafkaRecordConverter.convertHeaders(headers); // The header value should contain the JSON structure as a string assertNotNull(result.get("h1")); assertTrue(result.get("h1").contains("f1")); assertTrue(result.get("h1").contains("f2")); } @Test void testConvertHeaders_WithTimestampLogicalType() { // Test headers with Timestamp logical type ConnectHeaders headers = new ConnectHeaders(); java.util.Date timestampValue = new java.util.Date(80 * 24 * 60 * 60 * 1000L); // 80 days from epoch headers.add("timestampHeader", timestampValue, Timestamp.SCHEMA); headers.add("boolHeader", true, Schema.BOOLEAN_SCHEMA); Map result = KafkaRecordConverter.convertHeaders(headers); // Timestamp is formatted as ISO-8601 with millisecond precision via ISO_DATE_TIME_FORMAT assertEquals("1970-03-22T00:00:00.000Z", result.get("timestampHeader")); assertEquals("true", result.get("boolHeader")); } @Test void testConvertHeaders_WithDateLogicalType() { // Test headers with Date logical type ConnectHeaders headers = new ConnectHeaders(); // Create a date value (80 days from epoch = 1970-03-22) java.util.Date dateValue = new java.util.Date(80 * 24 * 60 * 60 * 1000L); headers.add("dateHeader", dateValue, Date.SCHEMA); Map result = KafkaRecordConverter.convertHeaders(headers); // Date should be formatted as ISO date-time string assertEquals("1970-03-22T00:00:00.000Z", result.get("dateHeader")); } @Test void testConvertStructWithAllTypes() { // Test conversion of Struct with all supported types (equivalent to old RecordContentTest) Schema schema = SchemaBuilder.struct() .field("int8", SchemaBuilder.int8().defaultValue((byte) 2).doc("int8 field").build()) .field("int16", Schema.INT16_SCHEMA) .field("int32", Schema.INT32_SCHEMA) .field("int64", Schema.INT64_SCHEMA) .field("float32", Schema.FLOAT32_SCHEMA) .field("float64", Schema.FLOAT64_SCHEMA) .field("boolean", Schema.BOOLEAN_SCHEMA) .field("string", Schema.STRING_SCHEMA) .field("bytes", Schema.BYTES_SCHEMA) .field("array", SchemaBuilder.array(Schema.STRING_SCHEMA).build()) .field("map", SchemaBuilder.map(Schema.STRING_SCHEMA, Schema.INT32_SCHEMA).build()) .field( "mapNonStringKeys", SchemaBuilder.map(Schema.INT32_SCHEMA, Schema.INT32_SCHEMA).build()) .build(); Struct original = new Struct(schema) .put("int8", (byte) 12) .put("int16", (short) 12) .put("int32", 12) .put("int64", 12L) .put("float32", 12.2f) .put("float64", 12.2) .put("boolean", true) .put("string", "foo") .put("bytes", ByteBuffer.wrap("foo".getBytes())) .put("array", Arrays.asList("a", "b", "c")) .put("map", Collections.singletonMap("field", 1)) .put("mapNonStringKeys", Collections.singletonMap(1, 1)); Map result = KafkaRecordConverter.convertToMap(schema, original); assertEquals((byte) 12, result.get("int8")); assertEquals((short) 12, result.get("int16")); assertEquals(12, result.get("int32")); assertEquals(12L, result.get("int64")); assertEquals(12.2f, result.get("float32")); assertEquals(12.2, result.get("float64")); assertEquals(true, result.get("boolean")); assertEquals("foo", result.get("string")); assertArrayEquals("foo".getBytes(), (byte[]) result.get("bytes")); assertEquals(Arrays.asList("a", "b", "c"), result.get("array")); @SuppressWarnings("unchecked") Map mapResult = (Map) result.get("map"); assertEquals(1, mapResult.get("field")); // Non-string keys are encoded as [[key, value], ...] @SuppressWarnings("unchecked") List> mapNonStringKeysResult = (List>) result.get("mapNonStringKeys"); assertEquals(1, mapNonStringKeysResult.size()); assertEquals(Arrays.asList(1, 1), mapNonStringKeysResult.get(0)); } @Test void testConvertValue_WithDefaultValue() { // Test that default values are returned when struct field value is null Schema fieldSchema = SchemaBuilder.int32().optional().defaultValue(123).build(); Schema schema = SchemaBuilder.struct().field("field", fieldSchema).build(); Struct struct = new Struct(schema); struct.put("field", null); Map result = KafkaRecordConverter.convertToMap(schema, struct); assertEquals(123, result.get("field")); } @Test void testConvertReadOnlyByteBuffer() { // Test conversion of read-only ByteBuffer byte[] original = "bytes".getBytes(); ByteBuffer buffer = ByteBuffer.wrap(original).asReadOnlyBuffer(); Schema schema = SchemaBuilder.struct().field("bytesField", Schema.BYTES_SCHEMA).build(); Struct struct = new Struct(schema).put("bytesField", buffer); Map result = KafkaRecordConverter.convertToMap(schema, struct); assertArrayEquals(original, (byte[]) result.get("bytesField")); } @Test void testConvertToMap_WithInvalidInput_ThrowsException() { // Test that invalid inputs throw exceptions assertThrows( SnowflakeKafkaConnectorException.class, () -> KafkaRecordConverter.convertToMap(Schema.STRING_SCHEMA, "not a map or struct")); } @Test void testConvertKey_WithTypeMismatch_ThrowsException() { // Test that type mismatch throws exception assertThrows( SnowflakeKafkaConnectorException.class, () -> KafkaRecordConverter.convertKey(Schema.INT32_SCHEMA, "not an int")); } @Test void testConvertDecimal() { // Test Decimal logical type conversion Schema decimalSchema = Decimal.schema(2); BigDecimal value = new BigDecimal("123.45"); Schema schema = SchemaBuilder.struct().field("decimal", decimalSchema).build(); Struct struct = new Struct(schema).put("decimal", value); Map result = KafkaRecordConverter.convertToMap(schema, struct); assertEquals(value, result.get("decimal")); } @Test void testConvertDecimal_ExceedsPrecision_ReturnsString() { // Test that BigDecimal exceeding max precision is converted to string Schema decimalSchema = Decimal.schema(0); BigDecimal value = new BigDecimal("999999999999999999999999999999999999999"); Schema schema = SchemaBuilder.struct().field("decimal", decimalSchema).build(); Struct struct = new Struct(schema).put("decimal", value); Map result = KafkaRecordConverter.convertToMap(schema, struct); assertEquals(value.toString(), result.get("decimal")); } @Test void testConvertTime() { // Test Time logical type conversion // Use a fixed time that will work regardless of local timezone // The Time logical type represents milliseconds since midnight, formatted with HH:mm:ss.SSSXXX java.util.Date timeValue = new java.util.Date(0L); // midnight UTC Schema schema = SchemaBuilder.struct().field("time", Time.SCHEMA).build(); Struct struct = new Struct(schema).put("time", timeValue); Map result = KafkaRecordConverter.convertToMap(schema, struct); assertNotNull(result.get("time")); // The result should be a time string in format HH:mm:ss.SSSXXX String timeResult = result.get("time").toString(); assertTrue(timeResult.contains(":"), "Time should contain colons: " + timeResult); } @Test void testConvertFloatSpecialValues() { // Test Float special values (NaN, Infinity) Schema schema = SchemaBuilder.struct() .field("nan", Schema.FLOAT32_SCHEMA) .field("posInf", Schema.FLOAT32_SCHEMA) .field("negInf", Schema.FLOAT32_SCHEMA) .build(); Struct struct = new Struct(schema) .put("nan", Float.NaN) .put("posInf", Float.POSITIVE_INFINITY) .put("negInf", Float.NEGATIVE_INFINITY); Map result = KafkaRecordConverter.convertToMap(schema, struct); assertEquals("NaN", result.get("nan")); assertEquals("Inf", result.get("posInf")); assertEquals("-Inf", result.get("negInf")); } @Test void testConvertDoubleSpecialValues() { // Test Double special values (NaN, Infinity) Schema schema = SchemaBuilder.struct() .field("nan", Schema.FLOAT64_SCHEMA) .field("posInf", Schema.FLOAT64_SCHEMA) .field("negInf", Schema.FLOAT64_SCHEMA) .build(); Struct struct = new Struct(schema) .put("nan", Double.NaN) .put("posInf", Double.POSITIVE_INFINITY) .put("negInf", Double.NEGATIVE_INFINITY); Map result = KafkaRecordConverter.convertToMap(schema, struct); assertEquals("NaN", result.get("nan")); assertEquals("Inf", result.get("posInf")); assertEquals("-Inf", result.get("negInf")); } /** * Regression test for issue #1334. * *

    Timestamps whose epoch-millisecond value has fewer than 13 digits (i.e. dates roughly within * ±1 year of 1970-01-01) were corrupted when ingested into Snowflake. The old code serialised * them as an epoch-millisecond string (e.g. "-23068800000"). Snowflake's integer-stored date * auto-detection mistook that 11-digit value for epoch seconds, shifting the stored * timestamp by three orders of magnitude (~1236 AD instead of 1969). * *

    The fix formats the {@code java.util.Date} as an ISO-8601 string, so Snowflake never * triggers its numeric epoch-length heuristic, and the value is safe for Jackson serialization in * all downstream paths (including legacy mode and schema evolution). */ @Test void testConvertToMap_TimestampNearEpoch_ReturnsIsoString() { // 1969-04-08 is ~267 days before the Unix epoch; its epoch-ms value is -23068800000, // an 11-digit number that Snowflake's auto-detection misreads as epoch seconds. java.util.Date nearEpochDate = new java.util.Date(Instant.parse("1969-04-08T00:00:00Z").toEpochMilli()); Schema schema = SchemaBuilder.struct().field("ts", Timestamp.SCHEMA).build(); Struct struct = new Struct(schema).put("ts", nearEpochDate); Map result = KafkaRecordConverter.convertToMap(schema, struct); // Must be an ISO-8601 string — not the epoch-ms string that triggers Snowflake's // auto-detection bug, and not a raw Instant that breaks plain Jackson ObjectMapper. assertInstanceOf( String.class, result.get("ts"), "Timestamp near epoch must be an ISO-8601 string. Got: " + result.get("ts")); assertEquals("1969-04-08T00:00:00.000Z", result.get("ts")); } @Test void testConvertNullValue() { // Test null value handling with optional schema Schema schema = SchemaBuilder.struct().field("optional", Schema.OPTIONAL_STRING_SCHEMA).build(); Struct struct = new Struct(schema).put("optional", null); Map result = KafkaRecordConverter.convertToMap(schema, struct); assertNull(result.get("optional")); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/records/SnowflakeSinkRecordTest.java ================================================ package com.snowflake.kafka.connector.records; import static com.snowflake.kafka.connector.Utils.TABLE_COLUMN_METADATA; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; import com.snowflake.kafka.connector.builder.SinkRecordBuilder; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.time.Instant; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.stream.Stream; import org.apache.kafka.common.record.TimestampType; import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.SchemaAndValue; import org.apache.kafka.connect.data.SchemaBuilder; import org.apache.kafka.connect.data.Struct; import org.apache.kafka.connect.header.ConnectHeaders; import org.apache.kafka.connect.header.Headers; import org.apache.kafka.connect.json.JsonConverter; import org.apache.kafka.connect.sink.SinkRecord; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; class SnowflakeSinkRecordTest { private static final String TOPIC = "test"; private static final int PARTITION = 0; private final SnowflakeMetadataConfig metadataConfig = new SnowflakeMetadataConfig(); private final JsonConverter jsonConverter = createJsonConverter(); @Test void testValidRecord_WithJsonMap() { // Test creating a valid record from JSON map SchemaAndValue schemaAndValue = toConnectData("{\"name\": \"test\", \"value\": 123}"); SinkRecord kafkaRecord = SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION) .withSchemaAndValue(schemaAndValue) .build(); SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, true, false); assertTrue(record.isValid()); assertFalse(record.isBroken()); assertFalse(record.isTombstone()); assertNull(record.getBrokenReason()); assertEquals(SnowflakeSinkRecord.RecordState.VALID, record.getState()); Map content = record.getContent(); assertEquals("test", content.get("name")); assertEquals(123L, content.get("value")); } @Test void testValidRecord_WithStruct() { // Test creating a valid record from Struct with multiple types Schema schema = SchemaBuilder.struct() .field("int8", Schema.INT8_SCHEMA) .field("int16", Schema.INT16_SCHEMA) .field("int32", Schema.INT32_SCHEMA) .field("int64", Schema.INT64_SCHEMA) .field("float32", Schema.FLOAT32_SCHEMA) .field("float64", Schema.FLOAT64_SCHEMA) .field("boolean", Schema.BOOLEAN_SCHEMA) .field("string", Schema.STRING_SCHEMA) .field("bytes", Schema.BYTES_SCHEMA) .field("array", SchemaBuilder.array(Schema.STRING_SCHEMA).build()) .field("map", SchemaBuilder.map(Schema.STRING_SCHEMA, Schema.INT32_SCHEMA).build()) .build(); Struct struct = new Struct(schema) .put("int8", (byte) 12) .put("int16", (short) 12) .put("int32", 12) .put("int64", 12L) .put("float32", 12.2f) .put("float64", 12.2) .put("boolean", true) .put("string", "foo") .put("bytes", ByteBuffer.wrap("foo".getBytes())) .put("array", Arrays.asList("a", "b", "c")) .put("map", Collections.singletonMap("field", 1)); SinkRecord kafkaRecord = SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION) .withValueSchema(schema) .withValue(struct) .build(); SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, true, false); assertTrue(record.isValid()); assertFalse(record.isBroken()); assertFalse(record.isTombstone()); Map content = record.getContent(); assertEquals((byte) 12, content.get("int8")); assertEquals((short) 12, content.get("int16")); assertEquals(12, content.get("int32")); assertEquals(12L, content.get("int64")); assertEquals(12.2f, content.get("float32")); assertEquals(12.2, content.get("float64")); assertEquals(true, content.get("boolean")); assertEquals("foo", content.get("string")); assertArrayEquals("foo".getBytes(), (byte[]) content.get("bytes")); assertEquals(Arrays.asList("a", "b", "c"), content.get("array")); } @Test void testTombstoneRecord() { // Test creating a tombstone record (null value) SinkRecord kafkaRecord = SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION) .withValueSchema(null) .withValue(null) .build(); SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, true, false); assertFalse(record.isValid()); assertFalse(record.isBroken()); assertTrue(record.isTombstone()); assertNull(record.getBrokenReason()); assertEquals(SnowflakeSinkRecord.RecordState.TOMBSTONE, record.getState()); assertTrue(record.getContent().isEmpty()); } @Test void testBrokenRecord_WithInvalidKeySchema() { // Test creating a broken record when key doesn't match schema SinkRecord kafkaRecord = SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION) .withKeySchema(Schema.INT32_SCHEMA) .withKey("not an int") // String doesn't match INT32_SCHEMA .withValueSchema(Schema.STRING_SCHEMA) .withValue("{}") .build(); SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, true, false); assertFalse(record.isValid()); assertTrue(record.isBroken()); assertFalse(record.isTombstone()); assertNotNull(record.getBrokenReason()); assertEquals(SnowflakeSinkRecord.RecordState.BROKEN, record.getState()); } @Test void testBrokenRecord_WithInvalidValue() { // Test creating a broken record when value cannot be converted // Using a String value with STRING_SCHEMA but convertToMap expects Map or Struct SinkRecord kafkaRecord = SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION) .withValueSchema(Schema.STRING_SCHEMA) .withValue("just a plain string") .build(); SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, true, false); // Record should be broken because convertToMap cannot handle plain String assertTrue(record.isBroken()); assertFalse(record.isValid()); assertFalse(record.isTombstone()); assertNotNull(record.getBrokenReason()); assertEquals(SnowflakeSinkRecord.RecordState.BROKEN, record.getState()); } @Test void testGetContentWithMetadata_WhenIncludeMetadataTrue() { // Test that metadata is included when flag is true SnowflakeSinkRecord record = createRecordFromJson("{\"name\": \"test\"}", createMetadataConfigWithAll()); Map contentWithMetadata = record.getContentWithMetadata(true); assertNotNull(contentWithMetadata.get(TABLE_COLUMN_METADATA)); assertEquals("test", contentWithMetadata.get("name")); } @Test void testGetContentWithMetadata_WhenIncludeMetadataFalse() { // Test that metadata is NOT included when flag is false SnowflakeSinkRecord record = createRecordFromJson("{\"name\": \"test\"}", metadataConfig); Map contentWithMetadata = record.getContentWithMetadata(false); assertFalse(contentWithMetadata.containsKey(TABLE_COLUMN_METADATA)); assertEquals("test", contentWithMetadata.get("name")); } @Test void testMetadataContainsKey() { // Test that metadata contains the key SchemaAndValue schemaAndValue = toConnectData("{\"name\": \"test\"}"); SinkRecord kafkaRecord = SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION) .withKeySchema(Schema.STRING_SCHEMA) .withKey("myKey") .withSchemaAndValue(schemaAndValue) .build(); SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, createMetadataConfigWithAll(), true, false); Map metadata = record.getMetadata(); assertEquals("myKey", metadata.get("key")); } @Test void testFullMetadataFields() { // Test that all metadata fields are present when configured Map config = new HashMap<>(); config.put("snowflake.metadata.all", "true"); config.put("snowflake.metadata.createtime", "true"); config.put("snowflake.metadata.topic", "true"); config.put("snowflake.metadata.offset.and.partition", "true"); config.put("snowflake.streaming.metadata.connectorPushTime", "true"); SnowflakeMetadataConfig fullMetadataConfig = new SnowflakeMetadataConfig(config); SchemaAndValue schemaAndValue = toConnectData("{\"data\": \"value\"}"); long createTime = 1234567890L; long offset = 10L; SinkRecord kafkaRecord = SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION) .withKeySchema(Schema.STRING_SCHEMA) .withKey("testKey") .withSchemaAndValue(schemaAndValue) .withOffset(offset) .withTimestamp(createTime, TimestampType.CREATE_TIME) .build(); Instant connectorPushTime = Instant.ofEpochMilli(9876543210L); SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, fullMetadataConfig, connectorPushTime, true, false); Map metadata = record.getMetadata(); // Verify all metadata fields assertEquals(TOPIC, metadata.get("topic")); assertEquals(offset, metadata.get("offset")); assertEquals(PARTITION, metadata.get("partition")); assertEquals("testKey", metadata.get("key")); assertEquals(createTime, metadata.get("CreateTime")); assertEquals(connectorPushTime.toEpochMilli(), metadata.get("SnowflakeConnectorPushTime")); } @ParameterizedTest(name = "timestamp type {0} should produce metadata key {1}") @MethodSource("timestampTypeTestCases") void testMetadataWithTimestampType(TimestampType timestampType, String expectedMetadataKey) { Map config = new HashMap<>(); config.put("snowflake.metadata.createtime", "true"); SnowflakeMetadataConfig timestampConfig = new SnowflakeMetadataConfig(config); SchemaAndValue schemaAndValue = toConnectData("{\"data\": \"value\"}"); long timestamp = 1609459200000L; // 2021-01-01 00:00:00 UTC SinkRecord kafkaRecord = SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION) .withSchemaAndValue(schemaAndValue) .withTimestamp(timestamp, timestampType) .build(); SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, timestampConfig, true, false); Map metadata = record.getMetadata(); assertEquals(timestamp, metadata.get(expectedMetadataKey)); } @Test void testMetadataWithHeaders() { // Test metadata includes headers with various types SchemaAndValue schemaAndValue = toConnectData("{\"data\": \"value\"}"); Headers headers = new ConnectHeaders(); headers.addString("stringHeader", "testHeaderValue"); headers.addInt("intHeader", 42); headers.addBoolean("boolHeader", true); SinkRecord kafkaRecord = createSinkRecordWithHeaders(schemaAndValue, headers, "key"); SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, createMetadataConfigWithAll(), true, false); Map metadata = record.getMetadata(); assertNotNull(metadata.get("headers")); @SuppressWarnings("unchecked") Map headersMap = (Map) metadata.get("headers"); assertEquals("testHeaderValue", headersMap.get("stringHeader")); assertEquals("42", headersMap.get("intHeader")); assertEquals("true", headersMap.get("boolHeader")); } @Test void testMetadataWithComplexHeaders() { // Test metadata with headers containing JSON-like complex values SchemaAndValue schemaAndValue = toConnectData("{\"data\": \"value\"}"); Headers headers = new ConnectHeaders(); headers.addString("objectAsJsonStringHeader", "{\"key1\":\"value1\",\"key2\":\"value2\"}"); headers.addString("header2", "testheaderstring"); SinkRecord kafkaRecord = createSinkRecordWithHeaders(schemaAndValue, headers, "key"); SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, createMetadataConfigWithAll(), true, false); Map metadata = record.getMetadata(); @SuppressWarnings("unchecked") Map headersMap = (Map) metadata.get("headers"); assertEquals( "{\"key1\":\"value1\",\"key2\":\"value2\"}", headersMap.get("objectAsJsonStringHeader")); assertEquals("testheaderstring", headersMap.get("header2")); } @Test void testContentWithArray() { SnowflakeSinkRecord record = createRecordFromJson("{\"key\": [\"a\", \"b\", \"c\"]}", metadataConfig); assertTrue(record.isValid()); Map content = record.getContent(); @SuppressWarnings("unchecked") List arrayValue = (List) content.get("key"); assertEquals(Arrays.asList("a", "b", "c"), arrayValue); } @Test void testContentWithEmptyArray() { SnowflakeSinkRecord record = createRecordFromJson("{\"key\": []}", metadataConfig); assertTrue(record.isValid()); Map content = record.getContent(); @SuppressWarnings("unchecked") List arrayValue = (List) content.get("key"); assertTrue(arrayValue.isEmpty()); } @Test void testEmptyContentWithMetadata() { SnowflakeSinkRecord record = createRecordFromJson("{}", createMetadataConfigWithAll()); assertTrue(record.isValid()); // Content should be empty (no data fields) assertTrue(record.getContent().isEmpty()); // But metadata should still be present Map contentWithMetadata = record.getContentWithMetadata(true); assertNotNull(contentWithMetadata.get(TABLE_COLUMN_METADATA)); } @Test void testContentWithKeyValue() { SnowflakeSinkRecord record = createRecordFromJson("{\"key\": \"value\"}", createMetadataConfigWithAll()); assertTrue(record.isValid()); Map content = record.getContent(); assertEquals("value", content.get("key")); Map contentWithMetadata = record.getContentWithMetadata(true); assertEquals("value", contentWithMetadata.get("key")); assertNotNull(contentWithMetadata.get(TABLE_COLUMN_METADATA)); } @Test void testConnectorPushTime_WhenDisabled_NotPresent() { // Test that SnowflakeConnectorPushTime is NOT present when disabled Map config = new HashMap<>(); config.put("snowflake.metadata.all", "true"); config.put("snowflake.streaming.metadata.connectorPushTime", "false"); SnowflakeMetadataConfig disabledPushTimeConfig = new SnowflakeMetadataConfig(config); SnowflakeSinkRecord record = createRecordFromJson("{\"data\": \"value\"}", disabledPushTimeConfig); Map metadata = record.getMetadata(); assertFalse(metadata.containsKey("SnowflakeConnectorPushTime")); } @Test void testMetadata_WhenCreateTimeDisabled_NotPresent() { // Test that CreateTime is NOT present when snowflake.metadata.createtime=false Map config = new HashMap<>(); config.put("snowflake.metadata.createtime", "false"); config.put("snowflake.metadata.topic", "true"); config.put("snowflake.metadata.offset.and.partition", "true"); SnowflakeMetadataConfig noCreateTimeConfig = new SnowflakeMetadataConfig(config); SchemaAndValue schemaAndValue = toConnectData("{\"data\": \"value\"}"); long createTime = 1234567890L; SinkRecord kafkaRecord = SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION) .withSchemaAndValue(schemaAndValue) .withTimestamp(createTime, TimestampType.CREATE_TIME) .build(); SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, noCreateTimeConfig, true, false); Map metadata = record.getMetadata(); assertFalse(metadata.containsKey("CreateTime")); assertTrue(metadata.containsKey("topic")); assertTrue(metadata.containsKey("offset")); assertTrue(metadata.containsKey("partition")); } @Test void testMetadata_WhenTopicDisabled_NotPresent() { // Test that topic is NOT present when snowflake.metadata.topic=false Map config = new HashMap<>(); config.put("snowflake.metadata.createtime", "true"); config.put("snowflake.metadata.topic", "false"); config.put("snowflake.metadata.offset.and.partition", "true"); SnowflakeMetadataConfig noTopicConfig = new SnowflakeMetadataConfig(config); SchemaAndValue schemaAndValue = toConnectData("{\"data\": \"value\"}"); SinkRecord kafkaRecord = SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION) .withSchemaAndValue(schemaAndValue) .withTimestamp(System.currentTimeMillis(), TimestampType.CREATE_TIME) .build(); SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, noTopicConfig, true, false); Map metadata = record.getMetadata(); assertFalse(metadata.containsKey("topic")); assertTrue(metadata.containsKey("CreateTime")); assertTrue(metadata.containsKey("offset")); assertTrue(metadata.containsKey("partition")); } @Test void testMetadata_WhenOffsetAndPartitionDisabled_NotPresent() { // Test that offset/partition are NOT present when snowflake.metadata.offset.and.partition=false Map config = new HashMap<>(); config.put("snowflake.metadata.createtime", "true"); config.put("snowflake.metadata.topic", "true"); config.put("snowflake.metadata.offset.and.partition", "false"); SnowflakeMetadataConfig noOffsetPartitionConfig = new SnowflakeMetadataConfig(config); SchemaAndValue schemaAndValue = toConnectData("{\"data\": \"value\"}"); SinkRecord kafkaRecord = SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION) .withSchemaAndValue(schemaAndValue) .withTimestamp(System.currentTimeMillis(), TimestampType.CREATE_TIME) .build(); SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, noOffsetPartitionConfig, true, false); Map metadata = record.getMetadata(); assertFalse(metadata.containsKey("offset")); assertFalse(metadata.containsKey("partition")); assertTrue(metadata.containsKey("topic")); assertTrue(metadata.containsKey("CreateTime")); } @Test void testMetadata_WhenAllFieldsDisabled_EmptyMetadata() { // Test that when all metadata fields are disabled, metadata has minimal content Map config = new HashMap<>(); config.put("snowflake.metadata.createtime", "false"); config.put("snowflake.metadata.topic", "false"); config.put("snowflake.metadata.offset.and.partition", "false"); config.put("snowflake.streaming.metadata.connectorPushTime", "false"); SnowflakeMetadataConfig allDisabledConfig = new SnowflakeMetadataConfig(config); SchemaAndValue schemaAndValue = toConnectData("{\"data\": \"value\"}"); // Create SinkRecord directly without key to avoid SinkRecordBuilder's default key SinkRecord kafkaRecord = new SinkRecord( TOPIC, PARTITION, null, // keySchema null, // key schemaAndValue.schema(), schemaAndValue.value(), 0, System.currentTimeMillis(), TimestampType.CREATE_TIME); SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, allDisabledConfig, true, false); Map metadata = record.getMetadata(); assertFalse(metadata.containsKey("offset")); assertFalse(metadata.containsKey("partition")); assertFalse(metadata.containsKey("topic")); assertFalse(metadata.containsKey("CreateTime")); assertFalse(metadata.containsKey("SnowflakeConnectorPushTime")); assertFalse(metadata.containsKey("key")); } @Test void testMetadata_WhenAllFieldsExplicitlyDisabled_ContentWithMetadataHasNoMetadataColumn() { // Test that when ALL individual metadata fields are disabled AND there's no key/headers, // the metadata map is empty and not added to content Map config = new HashMap<>(); config.put("snowflake.metadata.all", "false"); config.put("snowflake.metadata.createtime", "false"); config.put("snowflake.metadata.topic", "false"); config.put("snowflake.metadata.offset.and.partition", "false"); config.put("snowflake.streaming.metadata.connectorPushTime", "false"); SnowflakeMetadataConfig allDisabledConfig = new SnowflakeMetadataConfig(config); SchemaAndValue schemaAndValue = toConnectData("{\"data\": \"value\"}"); // Create SinkRecord without key and without timestamp to ensure metadata is truly empty SinkRecord kafkaRecord = new SinkRecord( TOPIC, PARTITION, null, // keySchema null, // key schemaAndValue.schema(), schemaAndValue.value(), 0, null, // no timestamp TimestampType.NO_TIMESTAMP_TYPE); SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, allDisabledConfig, true, false); // When ALL individual metadata fields are disabled and no key present, metadata should be empty assertTrue(record.getMetadata().isEmpty()); // Even when includeAllMetadata is true, no metadata column should be added because metadata is // empty Map contentWithMetadata = record.getContentWithMetadata(true); assertFalse(contentWithMetadata.containsKey(TABLE_COLUMN_METADATA)); assertEquals("value", contentWithMetadata.get("data")); } @Test void testTimestamp_WhenNoTimestampType_NotPresent() { // Test that timestamp is NOT present when TimestampType is NO_TIMESTAMP_TYPE Map config = new HashMap<>(); config.put("snowflake.metadata.createtime", "true"); SnowflakeMetadataConfig timestampConfig = new SnowflakeMetadataConfig(config); // Create record without timestamp (NO_TIMESTAMP_TYPE is default in builder) SnowflakeSinkRecord record = createRecordFromJson("{\"data\": \"value\"}", timestampConfig); Map metadata = record.getMetadata(); assertFalse(metadata.containsKey("CreateTime")); assertFalse(metadata.containsKey("LogAppendTime")); } @Test void testLegacyMode_WithTimestampStruct_JacksonCanSerialize() { // Reproducer for PR review comment: when enableSchematization=false, // wrapAsRecordContent() serializes via plain ObjectMapper (no JavaTimeModule). // If convertToMap returns a raw Instant, MAPPER.writeValueAsString() will throw // InvalidDefinitionException. java.util.Date nearEpochDate = new java.util.Date(java.time.Instant.parse("1969-04-08T00:00:00Z").toEpochMilli()); Schema schema = SchemaBuilder.struct().field("ts", org.apache.kafka.connect.data.Timestamp.SCHEMA).build(); Struct struct = new Struct(schema).put("ts", nearEpochDate); SinkRecord kafkaRecord = SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION) .withValueSchema(schema) .withValue(struct) .build(); // enableSchematization=false triggers wrapAsRecordContent → Jackson serialization SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, false, false); // Must not be broken — Jackson must be able to serialize the converted value assertFalse( record.isBroken(), "Record should not be broken but was: " + record.getBrokenReason()); assertTrue(record.isValid()); assertTrue(record.getContent().containsKey("RECORD_CONTENT")); } @Test void testLegacyMode_WithJsonMap_WrapsInRecordContent() { SchemaAndValue schemaAndValue = toConnectData("{\"name\": \"test\", \"value\": 123}"); SinkRecord kafkaRecord = SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION) .withSchemaAndValue(schemaAndValue) .build(); SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, false, true); assertTrue(record.isValid()); Map content = record.getContent(); assertTrue(content.containsKey("RECORD_CONTENT")); assertEquals(1, content.size()); @SuppressWarnings("unchecked") Map recordContent = (Map) content.get("RECORD_CONTENT"); assertEquals("test", recordContent.get("name")); assertEquals(123L, recordContent.get("value")); } @Test void testLegacyMode_WithPlainString_WrapsInRecordContent() { SinkRecord kafkaRecord = SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION) .withValueSchema(Schema.STRING_SCHEMA) .withValue("just a plain string") .build(); SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, false, true); assertTrue(record.isValid()); Map content = record.getContent(); assertTrue(content.containsKey("RECORD_CONTENT")); assertEquals("just a plain string", content.get("RECORD_CONTENT")); } @Test void testLegacyMode_WithByteArray_WrapsInRecordContent() { byte[] bytes = "hello".getBytes(java.nio.charset.StandardCharsets.UTF_8); SinkRecord kafkaRecord = SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION) .withValueSchema(Schema.BYTES_SCHEMA) .withValue(bytes) .build(); SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, false, true); assertTrue(record.isValid()); Map content = record.getContent(); assertTrue(content.containsKey("RECORD_CONTENT")); assertArrayEquals(bytes, (byte[]) content.get("RECORD_CONTENT")); } @Test void testLegacyMode_TombstoneStillWorks() { SinkRecord kafkaRecord = SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION) .withValueSchema(null) .withValue(null) .build(); SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, false, true); assertTrue(record.isTombstone()); } @Test void testSchematizedMode_WithPlainString_StillBroken() { SinkRecord kafkaRecord = SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION) .withValueSchema(Schema.STRING_SCHEMA) .withValue("just a plain string") .build(); SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, true, false); assertTrue(record.isBroken()); } @Test void testNormalizationEnabled_UppercasesColumnNames() { SchemaAndValue schemaAndValue = toConnectData("{\"city\": \"Hsinchu\", \"Country\": \"TW\"}"); SinkRecord kafkaRecord = SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION) .withSchemaAndValue(schemaAndValue) .build(); SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, true, true); assertTrue(record.isValid()); Map content = record.getContent(); // Unquoted identifiers are uppercased assertTrue(content.containsKey("CITY")); assertTrue(content.containsKey("COUNTRY")); assertFalse(content.containsKey("city")); assertFalse(content.containsKey("Country")); } @Test void testNormalizationDisabled_PreservesColumnNames() { SchemaAndValue schemaAndValue = toConnectData("{\"city\": \"Hsinchu\", \"Country\": \"TW\"}"); SinkRecord kafkaRecord = SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION) .withSchemaAndValue(schemaAndValue) .build(); SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, true, false); assertTrue(record.isValid()); Map content = record.getContent(); // Column names preserved as-is assertTrue(content.containsKey("city")); assertTrue(content.containsKey("Country")); assertFalse(content.containsKey("CITY")); assertFalse(content.containsKey("COUNTRY")); } @Test void testNormalizationEnabled_QuotedIdentifierPreservesCase() { // Quoted SQL identifiers strip quotes and preserve case Schema schema = SchemaBuilder.struct() .field("\"MyCol\"", Schema.STRING_SCHEMA) .field("simple", Schema.STRING_SCHEMA) .build(); Struct struct = new Struct(schema).put("\"MyCol\"", "value1").put("simple", "value2"); SinkRecord kafkaRecord = SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION) .withValueSchema(schema) .withValue(struct) .build(); SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, true, true); assertTrue(record.isValid()); Map content = record.getContent(); // Quoted "MyCol" → strips quotes → MyCol (case preserved) assertTrue(content.containsKey("MyCol")); // Unquoted simple → SIMPLE (uppercased) assertTrue(content.containsKey("SIMPLE")); // Schema field names should also be normalized Schema normalizedSchema = record.getSchema(); assertNotNull(normalizedSchema); assertNotNull(normalizedSchema.field("MyCol")); assertNotNull(normalizedSchema.field("SIMPLE")); assertNull(normalizedSchema.field("\"MyCol\"")); assertNull(normalizedSchema.field("simple")); } private static JsonConverter createJsonConverter() { JsonConverter converter = new JsonConverter(); converter.configure(Collections.singletonMap("schemas.enable", false), false); return converter; } private static Stream timestampTypeTestCases() { return Stream.of( Arguments.of(TimestampType.CREATE_TIME, "CreateTime"), Arguments.of(TimestampType.LOG_APPEND_TIME, "LogAppendTime")); } private SchemaAndValue toConnectData(String jsonPayload) { return jsonConverter.toConnectData(TOPIC, jsonPayload.getBytes(StandardCharsets.UTF_8)); } private SnowflakeMetadataConfig createMetadataConfigWithAll() { Map config = new HashMap<>(); config.put("snowflake.metadata.all", "true"); return new SnowflakeMetadataConfig(config); } private SnowflakeSinkRecord createRecordFromJson(String json, SnowflakeMetadataConfig config) { SchemaAndValue schemaAndValue = toConnectData(json); SinkRecord kafkaRecord = SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION) .withSchemaAndValue(schemaAndValue) .build(); return SnowflakeSinkRecord.from(kafkaRecord, config, true, false); } private SinkRecord createSinkRecordWithHeaders( SchemaAndValue schemaAndValue, Headers headers, String key) { return new SinkRecord( TOPIC, PARTITION, Schema.STRING_SCHEMA, key, schemaAndValue.schema(), schemaAndValue.value(), 0, null, TimestampType.NO_TIMESTAMP_TYPE, headers); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/streaming/iceberg/BaseIcebergIT.java ================================================ package com.snowflake.kafka.connector.streaming.iceberg; import static com.snowflake.kafka.connector.internal.TestUtils.executeQueryAndCollectResult; import static com.snowflake.kafka.connector.internal.TestUtils.executeQueryWithParameter; import com.snowflake.kafka.connector.internal.SnowflakeConnectionService; import com.snowflake.kafka.connector.internal.TestUtils; import java.sql.ResultSet; import java.util.function.Function; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; public class BaseIcebergIT { protected static SnowflakeConnectionService snowflakeDatabase; @BeforeAll public static void setup() { snowflakeDatabase = TestUtils.getConnectionServiceWithEncryptedKey(); } @AfterAll public static void teardown() { snowflakeDatabase.close(); } protected static void createIcebergTableWithColumnClause( String tableName, String columnClause, IcebergVersion icebergVersion) { String query = "create or replace iceberg table identifier(?) (" + columnClause + ") " + "external_volume = 'test_exvol' " + "catalog = 'SNOWFLAKE' " + "base_location = 'it' iceberg_version = " + (icebergVersion.ordinal() + 1) + ";"; doExecuteQueryWithParameter(query, tableName); } private static void doExecuteQueryWithParameter(String query, String tableName) { executeQueryWithParameter(snowflakeDatabase.getConnection(), query, tableName); } protected static void dropIcebergTable(String tableName) { String query = "drop iceberg table if exists identifier(?)"; doExecuteQueryWithParameter(query, tableName); } protected static T select( String tableName, String query, Function resultCollector) { return executeQueryAndCollectResult( snowflakeDatabase.getConnection(), query, tableName, resultCollector); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/streaming/iceberg/IcebergIngestionIT.java ================================================ package com.snowflake.kafka.connector.streaming.iceberg; import static com.snowflake.kafka.connector.internal.TestUtils.getConnectorConfigurationForStreaming; import com.snowflake.kafka.connector.ConnectorConfigTools; import com.snowflake.kafka.connector.config.SinkTaskConfig; import com.snowflake.kafka.connector.dlq.InMemoryKafkaRecordErrorReporter; import com.snowflake.kafka.connector.internal.SnowflakeSinkService; import com.snowflake.kafka.connector.internal.TestUtils; import com.snowflake.kafka.connector.internal.streaming.InMemorySinkTaskContext; import com.snowflake.kafka.connector.internal.streaming.StreamingSinkServiceBuilder; import com.snowflake.kafka.connector.streaming.iceberg.sql.ComplexJsonRecord; import com.snowflake.kafka.connector.streaming.iceberg.sql.RecordWithMetadata; import java.nio.charset.StandardCharsets; import java.util.Collections; import java.util.List; import java.util.Map; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.record.TimestampType; import org.apache.kafka.connect.data.Schema; import org.apache.kafka.connect.data.SchemaAndValue; import org.apache.kafka.connect.header.ConnectHeaders; import org.apache.kafka.connect.header.Headers; import org.apache.kafka.connect.json.JsonConverter; import org.apache.kafka.connect.sink.SinkRecord; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; public abstract class IcebergIngestionIT extends BaseIcebergIT { private static final int PARTITION = 0; private String topic; protected String tableName; protected TopicPartition topicPartition; protected SnowflakeSinkService service; protected InMemoryKafkaRecordErrorReporter kafkaRecordErrorReporter; /** * Override in subclasses to create the target Iceberg table before the service starts. KCv4 * requires pre-created Iceberg tables; auto-creation is not supported for them. */ protected void createIcebergTable() {} @BeforeEach public void setUp() { tableName = TestUtils.randomTableName(); topic = tableName; topicPartition = new TopicPartition(topic, PARTITION); createIcebergTable(); Map config = getConnectorConfigurationForStreaming(false); ConnectorConfigTools.setDefaultValues(config); SinkTaskConfig sinkTaskConfig = SinkTaskConfig.builderFrom(config) .tolerateErrors(false) .dlqTopicName("test_DLQ") .topicToTableMap(Collections.singletonMap(topic, tableName)) .build(); kafkaRecordErrorReporter = new InMemoryKafkaRecordErrorReporter(); service = StreamingSinkServiceBuilder.builder(snowflakeDatabase, sinkTaskConfig) .withErrorReporter(kafkaRecordErrorReporter) .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition))) .build(); } @AfterEach public void tearDown() { if (service != null) { service.closeAll(); } dropIcebergTable(tableName); } protected void waitForOffset(long targetOffset) throws Exception { TestUtils.assertWithRetry(() -> service.getOffset(topicPartition) == targetOffset); } protected SinkRecord createKafkaRecord(String jsonString, long offset, boolean withSchema) { JsonConverter converter = new JsonConverter(); converter.configure( Collections.singletonMap("schemas.enable", Boolean.toString(withSchema)), false); SchemaAndValue inputValue = converter.toConnectData(topic, jsonString.getBytes(StandardCharsets.UTF_8)); Headers headers = new ConnectHeaders(); headers.addBoolean("booleanHeader", true); headers.addString("stringHeader", "test"); headers.addInt("intHeader", 123); headers.addDouble("doubleHeader", 1.234); headers.addFloat("floatHeader", 1.234f); headers.addLong("longHeader", 123L); headers.addShort("shortHeader", (short) 123); return new SinkRecord( topic, PARTITION, Schema.STRING_SCHEMA, "test", inputValue.schema(), inputValue.value(), offset, System.currentTimeMillis(), TimestampType.CREATE_TIME, headers); } private final String selectAllSortByOffset = "WITH extracted_data AS (" + "SELECT *, RECORD_METADATA:\"offset\"::number AS offset_extracted " + "FROM identifier(?) " + ") " + "SELECT * FROM extracted_data " + "ORDER BY offset_extracted asc;"; protected List> selectAllComplexJsonRecordFromRecordContent() { return select(tableName, selectAllSortByOffset, ComplexJsonRecord::fromRecordContentColumn); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/streaming/iceberg/IcebergIngestionIntoVariantIT.java ================================================ package com.snowflake.kafka.connector.streaming.iceberg; import static com.snowflake.kafka.connector.streaming.iceberg.IcebergVersion.V3; import static com.snowflake.kafka.connector.streaming.iceberg.sql.ComplexJsonRecord.complexJsonPayload; import static com.snowflake.kafka.connector.streaming.iceberg.sql.ComplexJsonRecord.complexJsonRecordValueExample; import static org.assertj.core.api.Assertions.assertThat; import com.snowflake.kafka.connector.streaming.iceberg.sql.ComplexJsonRecord; import com.snowflake.kafka.connector.streaming.iceberg.sql.MetadataRecord; import com.snowflake.kafka.connector.streaming.iceberg.sql.RecordWithMetadata; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.stream.Collectors; import org.junit.jupiter.api.Test; public class IcebergIngestionIntoVariantIT extends IcebergIngestionIT { @Override protected void createIcebergTable() { createIcebergTableWithColumnClause( tableName, "RECORD_METADATA VARIANT, RECORD_CONTENT VARIANT", V3); } @Test void shouldInsertRecordsLegacyBagOfBits() throws Exception { final long overMaxIntOffset = (long) Integer.MAX_VALUE + 1; final boolean withSchema = false; final String message = complexJsonPayload; service.insert( Arrays.asList( createKafkaRecord(message, 0, withSchema), createKafkaRecord(message, 1, withSchema))); waitForOffset(2); service.insert(Collections.singletonList(createKafkaRecord(message, 2, withSchema))); waitForOffset(3); service.insert( Collections.singletonList(createKafkaRecord(message, overMaxIntOffset, withSchema))); waitForOffset(overMaxIntOffset + 1); assertRecordsInTable(Arrays.asList(0L, 1L, 2L, overMaxIntOffset)); } private void assertRecordsInTable(List expectedOffsets) { List> recordsWithMetadata = selectAllComplexJsonRecordFromRecordContent(); assertThat(recordsWithMetadata) .hasSize(expectedOffsets.size()) .extracting(RecordWithMetadata::getRecord) .containsOnly(complexJsonRecordValueExample); List metadataRecords = recordsWithMetadata.stream() .map(RecordWithMetadata::getMetadata) .collect(Collectors.toList()); assertThat(metadataRecords) .extracting(MetadataRecord::getOffset) .containsExactlyElementsOf(expectedOffsets); assertThat(metadataRecords) .hasSize(expectedOffsets.size()) .allMatch( record -> record.getTopic().equals(topicPartition.topic()) && record.getPartition().equals(topicPartition.partition()) && record.getKey().equals("test") && record.getSnowflakeConnectorPushTime() != null); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/streaming/iceberg/IcebergIngestionNoSchemaEvolutionIT.java ================================================ package com.snowflake.kafka.connector.streaming.iceberg; import static com.snowflake.kafka.connector.streaming.iceberg.IcebergVersion.V2; import static com.snowflake.kafka.connector.streaming.iceberg.sql.ComplexJsonRecord.complexJsonPayload; import static com.snowflake.kafka.connector.streaming.iceberg.sql.ComplexJsonRecord.complexJsonRecordValueExample; import static com.snowflake.kafka.connector.streaming.iceberg.sql.ComplexJsonRecord.complexJsonWithSchema; import static org.assertj.core.api.Assertions.assertThat; import com.snowflake.kafka.connector.Utils; import com.snowflake.kafka.connector.streaming.iceberg.sql.ComplexJsonRecord; import com.snowflake.kafka.connector.streaming.iceberg.sql.MetadataRecord; import com.snowflake.kafka.connector.streaming.iceberg.sql.RecordWithMetadata; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.stream.Collectors; import java.util.stream.Stream; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; public class IcebergIngestionNoSchemaEvolutionIT extends IcebergIngestionIT { private static final String PRIMITIVE_JSON_RECORD_CONTENT_OBJECT_SCHEMA = "object(" + "id_int8 NUMBER(10,0)," + "id_int16 NUMBER(10,0)," + "id_int32 NUMBER(10,0)," + "id_int64 NUMBER(19,0)," + "description STRING," + "rating_float32 FLOAT," + "rating_float64 FLOAT," + "approval BOOLEAN" + ")"; private static final String COMPLEX_JSON_RECORD_CONTENT_OBJECT_SCHEMA = "object(" + "id_int8 NUMBER(10,0)," + "id_int16 NUMBER(10,0)," + "id_int32 NUMBER(10,0)," + "id_int64 NUMBER(19,0)," + "description STRING," + "rating_float32 FLOAT," + "rating_float64 FLOAT," + "approval BOOLEAN," + "array1 ARRAY(LONG)," + "array2 ARRAY(STRING)," + "array3 ARRAY(BOOLEAN)," + "array4 ARRAY(LONG)," + "array5 ARRAY(ARRAY(LONG))," + "nestedRecord " + PRIMITIVE_JSON_RECORD_CONTENT_OBJECT_SCHEMA + "," + "nestedRecord2 " + PRIMITIVE_JSON_RECORD_CONTENT_OBJECT_SCHEMA + ")"; @Override protected void createIcebergTable() { createIcebergTableWithColumnClause( tableName, Utils.TABLE_COLUMN_METADATA + " " + IcebergDDLTypes.ICEBERG_METADATA_OBJECT_SCHEMA + ", " + Utils.TABLE_COLUMN_CONTENT + " " + COMPLEX_JSON_RECORD_CONTENT_OBJECT_SCHEMA, V2); } private static Stream prepareData() { return Stream.of( Arguments.of("Complex JSON with schema", complexJsonWithSchema, true), Arguments.of("Complex JSON without schema", complexJsonPayload, false)); } @ParameterizedTest(name = "{0}") @MethodSource("prepareData") void shouldInsertRecords(String description, String message, boolean withSchema) throws Exception { long overMaxIntOffset = (long) Integer.MAX_VALUE + 1; service.insert( Arrays.asList( createKafkaRecord(message, 0, withSchema), createKafkaRecord(message, 1, withSchema))); waitForOffset(2); service.insert(Collections.singletonList(createKafkaRecord(message, 2, withSchema))); waitForOffset(3); service.insert( Collections.singletonList(createKafkaRecord(message, overMaxIntOffset, withSchema))); waitForOffset(overMaxIntOffset + 1); assertRecordsInTable(Arrays.asList(0L, 1L, 2L, overMaxIntOffset)); } private void assertRecordsInTable(List expectedOffsets) { List> recordsWithMetadata = selectAllComplexJsonRecordFromRecordContent(); assertThat(recordsWithMetadata) .hasSize(expectedOffsets.size()) .extracting(RecordWithMetadata::getRecord) .containsOnly(complexJsonRecordValueExample); List metadataRecords = recordsWithMetadata.stream() .map(RecordWithMetadata::getMetadata) .collect(Collectors.toList()); assertThat(metadataRecords) .extracting(MetadataRecord::getOffset) .containsExactlyElementsOf(expectedOffsets); assertThat(metadataRecords) .hasSize(expectedOffsets.size()) .allMatch( record -> record.getTopic().equals(topicPartition.topic()) && record.getPartition().equals(topicPartition.partition()) && record.getKey().equals("test") && record.getSnowflakeConnectorPushTime() != null); } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/streaming/iceberg/IcebergVersion.java ================================================ package com.snowflake.kafka.connector.streaming.iceberg; public enum IcebergVersion { V1, V2, V3 } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/streaming/iceberg/sql/ComplexJsonRecord.java ================================================ package com.snowflake.kafka.connector.streaming.iceberg.sql; import static com.fasterxml.jackson.databind.DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.ObjectMapper; import com.snowflake.kafka.connector.Utils; import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; import java.util.Objects; import org.assertj.core.api.Assertions; public class ComplexJsonRecord { public static final String complexJsonPayload = loadJsonResource("/com/snowflake/kafka/connector/complexJsonPayload.json"); public static final String complexJsonWithSchema = loadJsonResource("/com/snowflake/kafka/connector/complexJsonWithSchema.json"); private static final ObjectMapper MAPPER = new ObjectMapper().configure(FAIL_ON_UNKNOWN_PROPERTIES, false); public static final ComplexJsonRecord complexJsonRecordValueExample = new ComplexJsonRecord( 8L, 16L, 32L, 64L, "dogs are the best", 0.5, 0.25, true, List.of(1, 2, 3), List.of("a", "b", "c"), List.of(true), List.of(1, 4), List.of(List.of(7, 8, 9), List.of(10, 11, 12)), PrimitiveJsonRecord.primitiveJsonRecordValueExample, PrimitiveJsonRecord.primitiveJsonRecordValueExample); private static String loadJsonResource(final String resourcePath) { try (InputStream is = ComplexJsonRecord.class.getResourceAsStream(resourcePath)) { if (is == null) { throw new RuntimeException("Resource not found: " + resourcePath); } return new String(is.readAllBytes(), StandardCharsets.UTF_8); } catch (IOException e) { throw new RuntimeException("Failed to load resource: " + resourcePath, e); } } private final Long idInt8; private final Long idInt16; private final Long idInt32; private final Long idInt64; private final String description; private final Double ratingFloat32; private final Double ratingFloat64; private final Boolean approval; private final List array1; private final List array2; private final List array3; private final List array4; private final List> array5; private final PrimitiveJsonRecord nestedRecord; private final PrimitiveJsonRecord nestedRecord2; @JsonCreator public ComplexJsonRecord( @JsonProperty("id_int8") Long idInt8, @JsonProperty("id_int16") Long idInt16, @JsonProperty("id_int32") Long idInt32, @JsonProperty("id_int64") Long idInt64, @JsonProperty("description") String description, @JsonProperty("rating_float32") Double ratingFloat32, @JsonProperty("rating_float64") Double ratingFloat64, @JsonProperty("approval") Boolean approval, @JsonProperty("array1") List array1, @JsonProperty("array2") List array2, @JsonProperty("array3") List array3, @JsonProperty("array4") List array4, @JsonProperty("array5") List> array5, @JsonProperty("nestedRecord") PrimitiveJsonRecord nestedRecord, @JsonProperty("nestedRecord2") PrimitiveJsonRecord nestedRecord2) { this.idInt8 = idInt8; this.idInt16 = idInt16; this.idInt32 = idInt32; this.idInt64 = idInt64; this.description = description; this.ratingFloat32 = ratingFloat32; this.ratingFloat64 = ratingFloat64; this.approval = approval; this.array1 = array1; this.array2 = array2; this.array3 = array3; this.array4 = array4; this.array5 = array5; this.nestedRecord = nestedRecord; this.nestedRecord2 = nestedRecord2; } public static List> fromRecordContentColumn( ResultSet resultSet) { List> records = new ArrayList<>(); try { while (resultSet.next()) { String jsonString = resultSet.getString(Utils.TABLE_COLUMN_CONTENT); ComplexJsonRecord record = MAPPER.readValue(jsonString, ComplexJsonRecord.class); MetadataRecord metadata = PrimitiveJsonRecord.fromMetadataSingleRow(resultSet); records.add(RecordWithMetadata.of(metadata, record)); } } catch (SQLException | IOException e) { Assertions.fail("Couldn't map ResultSet to ComplexJsonRecord: " + e.getMessage()); } return records; } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; ComplexJsonRecord that = (ComplexJsonRecord) o; return Objects.equals(idInt8, that.idInt8) && Objects.equals(idInt16, that.idInt16) && Objects.equals(idInt32, that.idInt32) && Objects.equals(idInt64, that.idInt64) && Objects.equals(description, that.description) && Objects.equals(ratingFloat32, that.ratingFloat32) && Objects.equals(ratingFloat64, that.ratingFloat64) && Objects.equals(approval, that.approval) && Objects.equals(array1, that.array1) && Objects.equals(array2, that.array2) && Objects.equals(array3, that.array3) && Objects.equals(array4, that.array4) && Objects.equals(array5, that.array5) && Objects.equals(nestedRecord, that.nestedRecord) && Objects.equals(nestedRecord2, that.nestedRecord2); } @Override public int hashCode() { return Objects.hash( idInt8, idInt16, idInt32, idInt64, description, ratingFloat32, ratingFloat64, approval, array1, array2, array3, array4, array5, nestedRecord, nestedRecord2); } @Override public String toString() { return "ComplexJsonRecord{" + "idInt8=" + idInt8 + ", idInt16=" + idInt16 + ", idInt32=" + idInt32 + ", idInt64=" + idInt64 + ", description='" + description + '\'' + ", ratingFloat32=" + ratingFloat32 + ", ratingFloat64=" + ratingFloat64 + ", approval=" + approval + ", array1=" + array1 + ", array2=" + array2 + ", array3=" + array3 + ", array4=" + array4 + ", array5=" + array5 + ", nestedRecord=" + nestedRecord + ", nestedRecord2=" + nestedRecord2 + '}'; } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/streaming/iceberg/sql/MetadataRecord.java ================================================ package com.snowflake.kafka.connector.streaming.iceberg.sql; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import java.util.Map; import java.util.Objects; public class MetadataRecord { private final Long offset; private final String topic; private final Integer partition; private final String key; private final Integer schemaId; private final Integer keySchemaId; private final Long createTime; private final Long logAppendTime; private final Long snowflakeConnectorPushTime; private final Map headers; @JsonCreator public MetadataRecord( @JsonProperty("offset") Long offset, @JsonProperty("topic") String topic, @JsonProperty("partition") Integer partition, @JsonProperty("key") String key, @JsonProperty("schema_id") Integer schemaId, @JsonProperty("key_schema_id") Integer keySchemaId, @JsonProperty("CreateTime") Long createTime, @JsonProperty("LogAppendTime") Long logAppendTime, @JsonProperty("SnowflakeConnectorPushTime") Long snowflakeConnectorPushTime, @JsonProperty("headers") Map headers) { this.offset = offset; this.topic = topic; this.partition = partition; this.key = key; this.schemaId = schemaId; this.keySchemaId = keySchemaId; this.createTime = createTime; this.logAppendTime = logAppendTime; this.snowflakeConnectorPushTime = snowflakeConnectorPushTime; this.headers = headers; } // Getters for each field public Long getOffset() { return offset; } public String getTopic() { return topic; } public Integer getPartition() { return partition; } public String getKey() { return key; } public Integer getSchemaId() { return schemaId; } public Integer getKeySchemaId() { return keySchemaId; } public Long getCreateTime() { return createTime; } public Long getLogAppendTime() { return logAppendTime; } public Long getSnowflakeConnectorPushTime() { return snowflakeConnectorPushTime; } public Map getHeaders() { return headers; } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; MetadataRecord that = (MetadataRecord) o; return Objects.equals(offset, that.offset) && Objects.equals(topic, that.topic) && Objects.equals(partition, that.partition) && Objects.equals(key, that.key) && Objects.equals(schemaId, that.schemaId) && Objects.equals(keySchemaId, that.keySchemaId) && Objects.equals(createTime, that.createTime) && Objects.equals(logAppendTime, that.logAppendTime) && Objects.equals(snowflakeConnectorPushTime, that.snowflakeConnectorPushTime) && Objects.equals(headers, that.headers); } @Override public int hashCode() { return Objects.hash( offset, topic, partition, key, schemaId, keySchemaId, createTime, logAppendTime, snowflakeConnectorPushTime, headers); } @Override public String toString() { return "MetadataRecord{" + "offset=" + offset + ", topic='" + topic + '\'' + ", partition=" + partition + ", key='" + key + '\'' + ", schemaId=" + schemaId + ", keySchemaId=" + keySchemaId + ", createTime=" + createTime + ", logAppendTime=" + logAppendTime + ", snowflakeConnectorPushTime=" + snowflakeConnectorPushTime + ", headers=" + headers + '}'; } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/streaming/iceberg/sql/PrimitiveJsonRecord.java ================================================ package com.snowflake.kafka.connector.streaming.iceberg.sql; import static com.fasterxml.jackson.databind.DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.ObjectMapper; import com.snowflake.kafka.connector.Utils; import java.io.IOException; import java.sql.ResultSet; import java.sql.SQLException; import java.util.Objects; import org.assertj.core.api.Assertions; public class PrimitiveJsonRecord { public static final PrimitiveJsonRecord primitiveJsonRecordValueExample = new PrimitiveJsonRecord(8L, 16L, 32L, 64L, "dogs are the best", 0.5, 0.25, true); private static final ObjectMapper MAPPER = new ObjectMapper().configure(FAIL_ON_UNKNOWN_PROPERTIES, false); private final Long idInt8; private final Long idInt16; private final Long idInt32; private final Long idInt64; private final String description; private final Double ratingFloat32; private final Double ratingFloat64; private final Boolean approval; @JsonCreator public PrimitiveJsonRecord( @JsonProperty("id_int8") Long idInt8, @JsonProperty("id_int16") Long idInt16, @JsonProperty("id_int32") Long idInt32, @JsonProperty("id_int64") Long idInt64, @JsonProperty("description") String description, @JsonProperty("rating_float32") Double ratingFloat32, @JsonProperty("rating_float64") Double ratingFloat64, @JsonProperty("approval") Boolean approval) { this.idInt8 = idInt8; this.idInt16 = idInt16; this.idInt32 = idInt32; this.idInt64 = idInt64; this.description = description; this.ratingFloat32 = ratingFloat32; this.ratingFloat64 = ratingFloat64; this.approval = approval; } public static MetadataRecord fromMetadataSingleRow(ResultSet resultSet) { try { String jsonString = resultSet.getString(Utils.TABLE_COLUMN_METADATA); return MAPPER.readValue(jsonString, MetadataRecord.class); } catch (SQLException | IOException e) { Assertions.fail("Couldn't map ResultSet to MetadataRecord: " + e.getMessage()); } return null; } public Long getIdInt8() { return idInt8; } public Long getIdInt16() { return idInt16; } public Long getIdInt32() { return idInt32; } public Long getIdInt64() { return idInt64; } public String getDescription() { return description; } public Double getRatingFloat32() { return ratingFloat32; } public Double getRatingFloat64() { return ratingFloat64; } public Boolean isApproval() { return approval; } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; PrimitiveJsonRecord that = (PrimitiveJsonRecord) o; return Objects.equals(idInt8, that.idInt8) && Objects.equals(idInt16, that.idInt16) && Objects.equals(idInt32, that.idInt32) && Objects.equals(idInt64, that.idInt64) && Objects.equals(description, that.description) && Objects.equals(ratingFloat32, that.ratingFloat32) && Objects.equals(ratingFloat64, that.ratingFloat64) && Objects.equals(approval, that.approval); } @Override public int hashCode() { return Objects.hash( idInt8, idInt16, idInt32, idInt64, description, ratingFloat32, ratingFloat64, approval); } @Override public String toString() { return "PrimitiveJsonRecord{" + "idInt8=" + idInt8 + ", idInt16=" + idInt16 + ", idInt32=" + idInt32 + ", idInt64=" + idInt64 + ", description='" + description + '\'' + ", ratingFloat32=" + ratingFloat32 + ", ratingFloat64=" + ratingFloat64 + ", approval=" + approval + '}'; } } ================================================ FILE: src/test/java/com/snowflake/kafka/connector/streaming/iceberg/sql/RecordWithMetadata.java ================================================ package com.snowflake.kafka.connector.streaming.iceberg.sql; public class RecordWithMetadata { private final T record; private final MetadataRecord metadata; private RecordWithMetadata(MetadataRecord metadata, T record) { this.record = record; this.metadata = metadata; } public static RecordWithMetadata of(MetadataRecord metadata, T record) { return new RecordWithMetadata<>(metadata, record); } public T getRecord() { return record; } public MetadataRecord getMetadata() { return metadata; } } ================================================ FILE: src/test/resources/com/snowflake/kafka/connector/complexJsonPayload.json ================================================ { "record_content": { "id_int8": 8, "id_int16": 16, "id_int32": 32, "id_int64": 64, "description": "dogs are the best", "rating_float32": 0.5, "rating_float64": 0.25, "approval": true, "array1": [ 1, 2, 3 ], "array2": [ "a", "b", "c" ], "array3": [ true ], "array4": [ 1, 4 ], "array5": [ [ 7, 8, 9 ], [ 10, 11, 12 ] ], "nestedRecord": { "id_int8": 8, "id_int16": 16, "id_int32": 32, "id_int64": 64, "description": "dogs are the best", "rating_float32": 0.5, "rating_float64": 0.25, "approval": true }, "nestedRecord2": { "id_int8": 8, "id_int16": 16, "id_int32": 32, "id_int64": 64, "description": "dogs are the best", "rating_float32": 0.5, "rating_float64": 0.25, "approval": true } } } ================================================ FILE: src/test/resources/com/snowflake/kafka/connector/complexJsonWithSchema.json ================================================ { "schema": { "type": "struct", "fields": [ { "field": "record_content", "type": "struct", "fields": [ { "field": "id_int8", "type": "int8" }, { "field": "id_int16", "type": "int16" }, { "field": "id_int32", "type": "int32" }, { "field": "id_int64", "type": "int64" }, { "field": "description", "type": "string" }, { "field": "rating_float32", "type": "float" }, { "field": "rating_float64", "type": "double" }, { "field": "approval", "type": "boolean" }, { "field": "array1", "type": "array", "items": { "type": "int32" } }, { "field": "array2", "type": "array", "items": { "type": "string" } }, { "field": "array3", "type": "array", "items": { "type": "boolean" } }, { "field": "array4", "type": "array", "items": { "type": "int32" }, "optional": true }, { "field": "array5", "type": "array", "items": { "type": "array", "items": { "type": "int32" } } }, { "field": "nestedRecord", "type": "struct", "fields": [ { "field": "id_int8", "type": "int8" }, { "field": "id_int16", "type": "int16" }, { "field": "id_int32", "type": "int32" }, { "field": "id_int64", "type": "int64" }, { "field": "description", "type": "string" }, { "field": "rating_float32", "type": "float" }, { "field": "rating_float64", "type": "double" }, { "field": "approval", "type": "boolean" } ], "optional": true, "name": "sf.kc.test" }, { "field": "nestedRecord2", "type": "struct", "fields": [ { "field": "id_int8", "type": "int8" }, { "field": "id_int16", "type": "int16" }, { "field": "id_int32", "type": "int32" }, { "field": "id_int64", "type": "int64" }, { "field": "description", "type": "string" }, { "field": "rating_float32", "type": "float" }, { "field": "rating_float64", "type": "double" }, { "field": "approval", "type": "boolean" } ], "optional": true, "name": "sf.kc.test" } ] } ], "optional": false, "name": "sf.kc.test" }, "payload": { "record_content": { "id_int8": 8, "id_int16": 16, "id_int32": 32, "id_int64": 64, "description": "dogs are the best", "rating_float32": 0.5, "rating_float64": 0.25, "approval": true, "array1": [ 1, 2, 3 ], "array2": [ "a", "b", "c" ], "array3": [ true ], "array4": [ 1, 4 ], "array5": [ [ 7, 8, 9 ], [ 10, 11, 12 ] ], "nestedRecord": { "id_int8": 8, "id_int16": 16, "id_int32": 32, "id_int64": 64, "description": "dogs are the best", "rating_float32": 0.5, "rating_float64": 0.25, "approval": true }, "nestedRecord2": { "id_int8": 8, "id_int16": 16, "id_int32": 32, "id_int64": 64, "description": "dogs are the best", "rating_float32": 0.5, "rating_float64": 0.25, "approval": true } } } } ================================================ FILE: src/test/resources/log4j.properties ================================================ log4j.rootLogger=INFO, STDOUT, file log4j.logger.deng=INFO log4j.appender.STDOUT=org.apache.log4j.ConsoleAppender log4j.appender.STDOUT.layout=org.apache.log4j.PatternLayout log4j.appender.STDOUT.layout.ConversionPattern=%d{dd-MM-yyyy HH:mm:ss} %t %-5p %m %c{1}:%L%n log4j.appender.file=org.apache.log4j.RollingFileAppender log4j.appender.file.File=sf.log log4j.appender.file.layout=org.apache.log4j.PatternLayout # date with format, Thread name, log severity, print only class name({1}) with line number, "-" message to print and platform dependent "\n" # https://logging.apache.org/log4j/1.2/apidocs/org/apache/log4j/PatternLayout.html # Example: 26-04-2021 16:42:16 main DEBUG SnowflakeInternalStage:95 - () log4j.appender.file.layout.ConversionPattern=%d{dd-MM-yyyy HH:mm:ss} %t %-5p %m %c{1}:%L%n log4j.logger.com.snowflake.kafka.connector=TRACE # Avoid httpClient flooding the log log4j.logger.net.snowflake.client.jdbc.internal.apache.http.wire=WARN log4j.logger.net.snowflake.client.jdbc.internal.apache.http.headers=WARN ================================================ FILE: src/test/resources/squid.conf ================================================ acl SSL_ports port 443 acl Safe_ports port 80 # http acl Safe_ports port 21 # ftp acl Safe_ports port 443 # https acl Safe_ports port 70 # gopher acl Safe_ports port 210 # wais acl Safe_ports port 1025-65535 # unregistered ports acl Safe_ports port 280 # http-mgmt acl Safe_ports port 488 # gss-http acl Safe_ports port 591 # filemaker acl Safe_ports port 777 # multiling http acl CONNECT method CONNECT http_access deny !Safe_ports http_access deny CONNECT !SSL_ports http_port 3128 coredump_dir /var/spool/squid refresh_pattern ^ftp: 1440 20% 10080 refresh_pattern ^gopher: 1440 0% 1440 refresh_pattern -i (/cgi-bin/|\?) 0 0% 0 refresh_pattern (Release|Packages(.gz)*)$ 0 20% 2880 refresh_pattern . 0 20% 4320 auth_param basic program /usr/lib/squid/basic_ncsa_auth /etc/squid/passwords auth_param basic realm proxy acl authenticated proxy_auth REQUIRED http_access allow authenticated http_access allow localhost ident_lookup_access deny all http_access deny all ================================================ FILE: test/.gitignore ================================================ apache.tgz apache_log kafka_* confluent-* rest_request_generated/ ================================================ FILE: test/E2E_TEST_PLAN.md ================================================ # E2E Test Plan: Kafka Connector v4 (SSv2) ## Table of Contents - [1. Overview](#1-overview) - [2. Test Dimensions](#2-test-dimensions) - [3. Test Categories](#3-test-categories) - [3.1 Data Ingestion](#31-data-ingestion) - [3.2 Error Handling](#32-error-handling) - [3.3 Schema Evolution](#33-schema-evolution) - [3.4 RECORD_CONTENT Mode](#34-record_content-mode) - [3.5 Connector Lifecycle & Resilience](#35-connector-lifecycle--resilience) - [3.6 Default Pipe Features](#36-default-pipe-features) - [3.7 Load & Stress](#37-load--stress) - [4. Data Type Compatibility](#4-data-type-compatibility-strategy) ### Status Legend | Icon | Meaning | |------|---------| | 🟢 | Done -- test exists and passes | | 🟡 | Known divergence -- test documents behavioral difference between v3 and v4, or has a known gap | | 🔴 | Missing -- test must be written | ### Priority (for 🔴 items) | Tier | Meaning | Criteria | |------|---------|----------| | **P0** | GA blocker | Data correctness risk, no existing coverage, explicit FR requirement | | **P1** | Should have for GA | Important for launch or fast-follow, partial coverage exists | | **P2** | Deferred post-GA | Lower risk, high implementation cost, or substantial existing coverage | --- ## 1. Overview ### Background Kafka Connector v4 replaces v3's dual ingestion engines (file-based Snowpipe + SSv1 Streaming) with SSv2 exclusively. The GA strategy requires **functional parity** with v3 in compatibility mode, plus a new high-throughput mode. ### Connector Operating Modes | Mode | Config | Validation | Schema Evolution | Error Handling | Target Use Case | |------|--------|-----------|-----------------|----------------|-----------------| | **Compatibility** | `snowflake.validation=client_side` | Client-side | Client-side `ALTER TABLE` | Sync DLQ / Sync Abort | v3 migration, parity | | **High-Throughput** | `snowflake.validation=server_side` (default) | None (server) | Server-side (table `ENABLE_SCHEMA_EVOLUTION`) | Async Error Tables | Max throughput (10 GB/s target) | ### PRD Functional Requirements | FR | Name | Scope | |----|------|-------| | FR1 | Client-Side DLQ (`errors.tolerance=all`) -- includes data type validation parity | Compatibility mode | | FR2 | Client-Side Abort (`errors.tolerance=none`) -- includes data type validation parity | Compatibility mode | | FR3 | Validation Toggle (`snowflake.validation`) | Mode switch | | FR4 | Legacy Schema Toggle (`snowflake.enable.schematization`) | Both modes | | FR5 | Default Pipes Only (`MATCH_BY_COLUMN_NAME`) | Both modes | | FR6 | Schema Evolution | Both modes (different paths) | | FR7 | Default Pipe Improvements (Identity, Defaults, Clustering) | Both modes | | FR8 | Performance & Stability Baselines | High-throughput mode | | FR9 | v3/v4 DLQ Parity | Compatibility mode -- requires DLQ error messages are byte-for-byte identical between v3 and v4 | | FR10 | Telemetry & Usage Tracking | Both modes | | FR11 | Pre-Flight Safety Check | High-throughput mode | ### When Dual Testing is Required Running every test in dual mode (v3 + v4) doubles CI time. Dual is justified only when **v3 and v4 can produce different results** for the same input. There are two root causes of behavioral divergence: 1. **Data type handling differences between SDKs**: SSv1 and SSv2 may serialize, validate, or store values differently for the same Snowflake type. Known example: SSv1 parses JSON-like strings into native JSON objects in VARIANT columns, while SSv2 stores them as string literals. Similar differences may exist for BINARY encoding, TIMESTAMP precision, or other types. See [Section 4: Data Type Compatibility Strategy](#4-data-type-compatibility-strategy) for the full analysis. 2. **Client-side validation lifecycle**: SSv1 always validates (built into the SDK, cannot be disabled). V4 has a separate `RowValidator` (copied from SSv1's `DataValidationUtil`) that can be toggled. This affects DLQ routing, abort behavior, and schema evolution triggering. Additionally, v3 requires `schematization=true` for schema evolution; v4 does not. --- ## 2. Test Dimensions Every test scenario can be classified across these independent dimensions: ### Data Format Only formats used in the **Snowpipe Streaming** ingestion path are in scope. Legacy Snowpipe-only converters (`SnowflakeJsonConverter`, `SnowflakeAvroConverter`) are excluded -- we are migrating `SNOWPIPE_STREAMING` mode, not file-based `SNOWPIPE`. | Format | Key Converter | Value Converter | Schema Registry | Platform | |--------|--------------|-----------------|-----------------|----------| | **JSON (native)** | StringConverter | JsonConverter | No | Any | | **Avro SR** | StringConverter | AvroConverter (Confluent) | Yes | Confluent | | **Avro SR (keys+values)** | AvroConverter | AvroConverter | Yes | Confluent | | **Protobuf SR** | StringConverter | ProtobufConverter (Confluent) | Yes | Confluent | | **Protobuf (native)** | StringConverter | Custom (raw bytes) | No | Any | | **String (raw)** | StringConverter | StringConverter | No | Any | | **Bytes (raw)** | ByteArrayConverter | ByteArrayConverter | No | Any | ### Architecture | Value | Config | Behavior | |-------|--------|----------| | `server_side` (default) | `snowflake.validation=server_side` | Server-side only, Error Tables | | `client_side` | `snowflake.validation=client_side` | Client-side validation, DLQ, abort | ### Schematization Mode | Value | Config | Table Layout | |-------|--------|-------------| | `on` (default in v4) | `snowflake.enable.schematization=true` | Flat columns + `RECORD_METADATA` | | `off` | `snowflake.enable.schematization=false` | `RECORD_CONTENT` + `RECORD_METADATA` (VARIANT) | ### Platform | Platform | Schema Registry | Notes | |----------|----------------|-------| | Apache Kafka | Embedded (limited) | No Confluent SR converters | | Confluent Platform | Full SR support | Required for Avro SR, Protobuf SR tests | --- ## 3. Test Categories ### 3.1 Data Ingestion Basic data lands correctly in Snowflake for each format. This is the foundation -- every other category builds on it. #### 3.1.1 JSON (Compatibility Mode) | Status | Test | Version | Rationale | File | |:------:|------|---------|-----------|------| | 🟢 | String keys + JSON values | dual | Asserts data values in VARIANT header column (confirmed v3/v4 difference) | `test_string_json.py` | | 🟢 | JSON keys + JSON values | v4 | Row count + metadata only; no data value assertions | `test_json_json.py` | | 🟢 | JSON without schema + ReplaceField SMT | v4 | SMT runs in KC framework, not SDK-dependent | `test_native_string_json_without_schema.py` | | 🟢 | Complex SMT chain (ValueToKey + ExtractField + ReplaceField) | v4 | Same -- KC framework SMT processing | `test_native_complex_smt.py` | | 🟢 | Nullable values after ExtractField SMT | v4 | SMT + tombstone handling, KC framework level | `test_nullable_values_after_smt.py` | | 🟢 | Snowpipe Streaming multi-partition (3p x 1000) | v4 | Row count + offset uniqueness check only | `test_snowpipe_streaming_string_json.py` | | 🟢 | Multiple topics -> one table (3 topics x 3 partitions) | v4 | Row count + topic distribution check only | `test_multiple_topic_to_one_table_snowpipe_streaming.py` | | 🟢 | Tombstone handling (`behavior.on.null.values=IGNORE`) | v4 | v4-only; dual coverage pending. Asserts data values in VARIANT header column. | `test_snowpipe_streaming_string_json_ignore_tombstone.py` | | 🔴 | Large blob ingestion (20 MiB JSON) | v4 | **P2.** Row count check; tests SDK buffer limits, not validation. v3 equivalent: `TestLargeBlobSnowpipe` | -- | #### 3.1.2 Avro (Compatibility Mode) These tests were originally ported from v3 with identical assertions. v4 was run against those assertions on Confluent 7.8.0 (2026-03-31) and all passed — confirming v4 produces identical results. v3 itself cannot run in the current infrastructure due to the SR classloader conflict, but parity is indirectly verified through the captured reference assertions. | Status | Test | Version | Rationale | File | |:------:|------|---------|-----------|------| | 🟢 | String keys + Avro SR values | v4 | Assertions capture v3 reference behavior (ported from v3). v4 parity confirmed 2026-03-31. v3 cannot run due to SR classloader conflict. | `test_string_avrosr.py` | | 🟢 | Avro SR keys + Avro SR values (NaN, Inf) | v4 | Same -- v4 parity confirmed 2026-03-31. | `test_avrosr_avrosr.py` | | 🟢 | Snowpipe Streaming + Avro SR (3p x 1000) | v4 | Same -- v4 parity confirmed 2026-03-31. | `test_snowpipe_streaming_string_avro_sr.py` | #### 3.1.3 Protobuf (Compatibility Mode) | Status | Test | Version | Rationale | File | |:------:|------|---------|-----------|------| | 🟢 | Confluent Protobuf SR (nested types, special floats) | v4 | Assertions capture v3 reference behavior (ported from v3). v4 parity confirmed 2026-03-31. V3 cannot run due to SR classloader conflict. | `test_confluent_protobuf_protobuf.py` | | 🟢 | Native Protobuf (raw bytes, no SR) | v4 | Protobuf deserialization is converter-level, not SDK | `test_native_string_protobuf.py` | #### 3.1.4 Schema & Type Mapping (Compatibility Mode) The existing `test_schema_mapping.py` is the beginning of type compatibility testing but has significant gaps. It will be **subsumed by the comprehensive `test_type_compatibility.py`** proposed in [Section 4](#4-data-type-compatibility-strategy). The new test file extends coverage to all Snowflake types, adds negative cases, and runs in dual mode. | Status | Test | Version | Rationale | File | |:------:|------|---------|-----------|------| | 🟢 | Type mapping (JSON): 10 types, positive only | v4 | Superseded by `test_type_compatibility.py` for comprehensive dual-mode coverage. | `test_schema_mapping.py` | | 🟢 | Unsupported converter rejection | v4 | Converter rejection is KC framework level | `test_schema_not_supported_converter.py` | #### 3.1.5 Table Creation Auto table creation requires the connector to infer column types from the incoming data schema. Table creation itself is converter-independent — testing with a single converter (Avro SR, which provides an explicit schema) is sufficient. > **Note on v3 reference capture**: The v3-first reference capture technique (used for SR tests in PR #1398) is not feasible here because these tests require Confluent Schema Registry, which triggers the v3 classloader conflict. However, these tests only assert table schema and row counts — not data values — so parity risk is low. The table creation DDL is generated by the same converter code in both versions. | Status | Test | Version | Rationale | Format | File | |:------:|------|---------|-----------|--------|------| | 🟢 | Auto table creation from Avro SR schema | v4 | v4-only; v3 blocked by SR classloader. Asserts table schema and row counts only. | Avro SR | `test_auto_table_creation.py` | | 🟢 | Auto table creation with topic2table mapping | v4 | v4-only; v3 blocked by SR classloader. Asserts table schema and row counts only. | Avro SR | `test_auto_table_creation_topic2table.py` | #### 3.1.6 High-Throughput Mode Ingestion `snowflake.validation=server_side`. | Status | Test | Version | Format | Notes | |:------:|------|---------|--------|-------| | 🔴 | Valid JSON records land correctly; verify toggle default is `server_side` when config omitted | v4 | JSON | **P0.** FR3 -- verify data arrives without client-side RowValidator + default toggle behavior | | 🔴 | Valid Avro SR records land correctly | v4 | Avro SR | **P1.** FR3 | #### 3.1.7 Iceberg Tables > **V3 scope note**: V3 (3.2.x) has iceberg support via `snowflake.streaming.iceberg.enabled=true` > but it was experimental and used custom connector-side code (`IcebergInitService`, > `IcebergTableStreamingRecordMapper`, `IcebergSchemaEvolutionService`) that was removed in v4. > V4 delegates iceberg entirely to SSv2 which handles it transparently. The `v4_config_to_v3` > migration does not inject `snowflake.streaming.iceberg.enabled=true`, so running these tests > against v3 would silently write to regular (non-iceberg) tables rather than fail loudly. > All iceberg tests are therefore v4-only. > > **External volume prerequisite**: tests require an AWS external volume named > `kafka_push_e2e_volume_aws` (override with env var `ICEBERG_EXTERNAL_VOLUME`). | Status | Test | Version | Rationale | Format | Cloud | File | |:------:|------|---------|-----------|--------|-------|------| | 🟢 | Iceberg JSON ingestion (2x2: validation x schematization) | v4 | schema=off: VARIANT bag-of-bits; schema=on: mixed VARIANT+typed table (BIGINT/DOUBLE/TEXT pre-declared, no SE needed); all 4 combos pass | JSON | AWS | `iceberg/test_iceberg_json.py::test_iceberg_json_ingestion` | | 🟢 | Iceberg Avro ingestion (2x2: validation x schematization) | v4 | Same matrix as JSON but with Avro SR + AvroConverter; verifies typed columns and RECORD_METADATA | Avro SR | AWS | `iceberg/test_iceberg_avro.py::test_iceberg_avro_ingestion` | | 🟢 | Iceberg SE JSON — add column (client-side) | v4 | Connector issues `ALTER ICEBERG TABLE ADD COLUMN` when RowValidator detects new columns; table starts with CITY+RECORD_METADATA, wave 1 adds AGE, wave 2 adds COUNTRY | JSON | AWS | `iceberg/test_iceberg_se_json.py::test_iceberg_se_add_column` | | 🟢 | Iceberg SE JSON — multi-wave (client-side) | v4 | Three-wave evolution: city-only → city+age → city+age+country; verifies NULL backfill for pre-existing rows | JSON | AWS | `iceberg/test_iceberg_se_json.py::test_iceberg_se_multi_wave` | | 🟡 | Iceberg SE JSON — server-side (xfail -- known limitation) | v4 | `ENABLE_SCHEMA_EVOLUTION=TRUE` + `validation=server_side` (HT mode) silently discards typed column additions on ICEBERG_VERSION=3 tables; client-side SE via `ALTER ICEBERG TABLE ADD COLUMN` works correctly; remove xfail once Snowflake server-side SE supports typed columns on iceberg | JSON | AWS | `iceberg/test_iceberg_se_json.py::test_iceberg_se_json_server_side` | | 🟢 | Iceberg SE Avro — add column (client-side) | v4 | Same as JSON SE but with Avro SR; verifies column additions from evolving Avro schemas | Avro SR | AWS | `iceberg/test_iceberg_se_avro.py::test_iceberg_se_avro_add_column` | #### 3.1.8 Pre-Flight Check (FR11) | Status | Test | Scenario | Notes | |:------:|------|----------|-------| | 🟢 | No Error Table configured -> startup warning logged, connector runs | validation=false, no Error Table | `test_error_table_without_error_logging` | | 🟢 | Error Table configured -> startup succeeds, errors captured | validation=false, Error Table present | `test_error_table_with_error_logging` | #### 3.1.9 Case Sensitivity | Status | Test | Version | Rationale | File | |:------:|------|---------|-----------|------| | 🟢 | Case-sensitive table name handling | dual | Verifies table name case sensitivity across v3/v4 | `compatibility/test_compatibility_case_sensitivity.py::test_compatibility_case_sensitivity_table_name` | | 🟢 | Case-sensitive ingestion column names | dual | Verifies column name case handling across v3/v4 | `compatibility/test_compatibility_case_sensitivity.py::test_compatibility_case_sensitivity_ingestion_columns` | | 🟢 | Case sensitivity in schema evolution | dual | Verifies SE handles case-sensitive column names | `compatibility/test_compatibility_case_sensitivity.py::test_case_sensitivity_schema_evolution` | #### 3.1.10 Migration | Status | Test | Version | Rationale | File | |:------:|------|---------|-----------|------| | 🟢 | v3→v4 migration without duplicates | dual | Verifies seamless migration path | `compatibility/test_migration.py::test_migration_without_duplicates` | | 🟢 | v3→v4 migration with possible duplicates | dual | Verifies migration handles at-least-once delivery | `compatibility/test_migration.py::test_migration_with_possible_duplicates` | --- ### 3.2 Error Handling Error handling is the highest-risk area for v3/v4 parity. In v3, SSv1 always validates and errors are deterministic. In v4, the `RowValidator` (copied from SSv1's `DataValidationUtil`) is a separate layer that can be toggled. **All compatibility-mode error handling tests must be dual** because they directly exercise client-side validation. #### 3.2.1 Dead Letter Queue -- `errors.tolerance=all` (FR1, Compatibility Mode) | Status | Test | Version | Format | Error Type | File | |:------:|------|---------|--------|-----------|------| | 🟢 | Invalid JSON -> DLQ | v4 | JSON | Deserialization | `test_snowpipe_streaming_string_json_dlq.py` -- v4-only; dual conversion pending | | 🔴 | Schema mapping error -> DLQ | v4 | JSON | Type mismatch | `test_snowpipe_streaming_schema_mapping_dlq.py` -- `@pytest.mark.skip`; broken, not divergent | | 🔴 | DLQ Kafka headers preserved (v3/v4 byte-for-byte comparison) | dual | JSON | Any | **P0.** FR9: DLQ error messages must be identical between v3 and v4 | | 🔴 | DLQ with Avro data | dual | Avro SR | Deserialization | **P2.** FR1 -- v3 parity blocked by SR classloader. DLQ routing is format-independent (KC framework level); format-specific differences unlikely. | | 🔴 | DLQ with Protobuf data | dual | Protobuf SR | Deserialization | **P2.** FR1 -- same reasoning as Avro DLQ. | | 🔴 | DLQ with multi-partition topics | dual | JSON | Mixed | **P1.** FR1 -- only one test (`test_snowpipe_streaming_string_json_ignore_tombstone.py`) currently exercises multi-partition. | #### 3.2.2 Abort -- `errors.tolerance=none` (FR2, Compatibility Mode) | Status | Test | Version | Format | Error Type | Notes | |:------:|------|---------|--------|-----------|-------| | 🔴 | Deserialization error -> task FAILED | dual | JSON | Bad payload | **P1.** FR2. Verify v4 aborts identically to v3 on bad payload. Abort mechanism already verified by `ingest_one_type_abort` fixture; gap is v3/v4 parity for this error type. | | 🔴 | Schema mismatch -> task FAILED | dual | JSON | Type mismatch | **P1.** FR2. Verify v4 aborts identically to v3 on type mismatch. Same — mechanism works, parity not yet verified. | #### 3.2.3 Error Table Routing (High-Throughput Mode) When `snowflake.validation=server_side`, invalid records route to SSv2 Error Tables instead of DLQ. | Status | Test | Version | Format | Notes | |:------:|------|---------|--------|-------| | 🟢 | Invalid records -> SSv2 Error Table (not DLQ) | v4 | JSON | `test_error_table_with_error_logging`, `test_error_table_accounting[v4-ht]` | | 🟢 | Error Table + value validation (VARCHAR overflow) and schema mismatch (missing NOT NULL column) | v4 | JSON | `test_error_table_schema_mismatch` | | 🟢 | Compat routes to DLQ while HT routes to Error Table (same bad record, both modes) | v4 | JSON | `test_error_table_vs_dlq_routing` | --- ### 3.3 Schema Evolution Schema evolution has two code paths: - **Compatibility mode** (`validation.enabled=true`): Client-side `ALTER TABLE ADD COLUMN` / `ALTER TABLE DROP NOT NULL`. The connector's `RowValidator` detects structural mismatches (extra columns, missing NOT NULL) and the `SnowflakeSchemaEvolutionService` issues DDL. - **High-throughput mode** (`validation.enabled=false`): Records go directly to SSv2 SDK. Schema evolution depends on the Snowflake table's `ENABLE_SCHEMA_EVOLUTION = TRUE` property -- the server handles it. #### 3.3.1 Client-Side Schema Evolution (Compatibility Mode) **Analysis notes:** - All `se_*.json` config templates set `snowflake.validation=client_side` but do NOT explicitly set `snowflake.enable.schematization`. The v4 default is `true`, so schematization is implicitly on. - `test_schema_evolution_streaming.py` uses `snowpipe_streaming_schema_evolution.json` which also does not set validation or schematization explicitly (relying on defaults). - **Overlap detected**: `test_se_nonnullable_json` and `test_schema_evolution_drop_not_null` test the same scenario (NOT NULL column dropped by SE). `test_se_auto_table_creation_json` and `test_schema_evolution_add_columns` partially overlap (new columns added via SE). These should be deduplicated when the SE test branches are merged. - **Config_variants gap**: `evo=True, schema=False` combos are all skipped with a TODO. `evo=False, schema=True, valid=False` returns early with no assertions. Tests are dual when they exercise the client-side validation path (structural error detection triggers SE). Tests that only check row counts after SE can be v4-only. | Status | Test | Version | Rationale | Format | File | |:------:|------|---------|-----------|--------|------| | 🟢 | Add columns (JSON, `{city, age}`) | dual | SE triggers via RowValidator structural error detection | JSON | `test_schema_evolution_streaming.py::test_schema_evolution_add_columns` | | 🟢 | Multi-wave evolution (wave 1 -> wave 2) | dual | Same path -- structural error triggers ADD COLUMN | JSON | `test_schema_evolution_streaming.py::test_schema_evolution_multi_wave` | | 🟢 | Happy path (schema matches table) | v4 | No SE triggered, no validation-dependent behavior | JSON | `test_schema_evolution_streaming.py::test_schema_evolution_happy_path` | | 🟢 | Drop NOT NULL constraint | dual | SE triggers via RowValidator null-in-NOT-NULL detection | JSON | `test_schema_evolution_streaming.py::test_schema_evolution_drop_not_null` | | 🟢 | Disabled mid-stream (toggle SE off) | v4 | Tests DDL privilege, not validation path | JSON | `test_schema_evolution_streaming.py::test_schema_evolution_disabled_mid_stream` | | 🟢 | Config matrix (8 combos: `evo x schematization x validation`) | dual | Core validation/SE interaction test. Has internal `pytest.skip()` for certain v3/v4 combos. `evo=True, schema=False` combos skipped with TODO. | JSON | `test_schema_evolution_streaming.py::test_schema_evolution_config_variants` | | 🟢 | Avro SR with 2 topics, different schemas | v4 | v3 can't auto-create tables for Avro SR with topic2table.map; pre-created tables cause pipe invalidation on ALTER TABLE | Avro SR | `schema_evolution/test_se_avro_sr.py` | | 🟢 | Auto table creation + SE (JSON, 2 topics) | dual | SE + auto-create triggers via structural error | JSON | `schema_evolution/test_se_auto_table_creation_json.py` | | 🟢 | Auto table creation + SE (Avro SR, 2 topics) | v4 | Auto table creation is v4-only; v3 requires pre-existing tables | Avro SR | `schema_evolution/test_se_auto_table_creation_avro_sr.py` | | 🟢 | Non-nullable columns + SE | dual | SE triggers via null-in-NOT-NULL path | JSON | `schema_evolution/test_se_nonnullable_json.py` | | 🟢 | Tombstone handling during SE | dual | Asserts data values with SE | JSON | `schema_evolution/test_se_json_ignore_tombstone.py` | | 🟢 | Random batch sizes (flush timing) | dual | Tests timing-sensitive SE path | JSON | `schema_evolution/test_se_random_row_count.py` | | 🟢 | Nullable values after SMT + SE | dual | SE structural error path | JSON + SMT | `schema_evolution/test_se_nullable_values_after_smt.py` | #### 3.3.2 Server-Side Schema Evolution (High-Throughput Mode) When `snowflake.validation=server_side`, the connector does not perform client-side validation or DDL. Records go directly to the SSv2 SDK's `channel.appendRow()`. Schema evolution depends entirely on the Snowflake table property `ENABLE_SCHEMA_EVOLUTION = TRUE`. Note: The connector source has no `MATCH_BY_COLUMN_NAME` or FDN-specific logic. "Server-side SE" means the Snowflake service handles schema mismatches for tables with `ENABLE_SCHEMA_EVOLUTION = TRUE`. The `test_schema_evolution_config_variants` test already covers `evo=True, schema=True, valid=False` for v4 (server-side SE with schematization on). However, important gaps remain: | Status | Test | Version | Format | Notes | Suggested File | |:------:|------|---------|--------|-------|----------------| | 🟢 | Server-side SE: new columns added (validation off, SE on) | v4 | JSON | Minimal coverage; config_variants covers this combo | `test_schema_evolution_ht.py` | | 🔴 | Server-side SE: NOT NULL dropped + schematization off (parametrized) | v4 | JSON | **P1.** Two scenarios in one parametrized test: NOT NULL drop and schematization=off. config_variants skips the latter (TODO). | `test_schema_evolution_ht.py` | | 🔴 | Server-side SE with Avro SR | v4 | Avro SR | **P2.** FR6. Avro provides explicit schema; server-side SE may behave differently than JSON. | `test_schema_evolution_ht.py` | | 🔴 | Concurrent SE from multiple partitions | v4 | JSON | **P1.** Race condition in ALTER TABLE from multiple tasks. Cannot be caught by unit tests. | `test_schema_evolution_ht.py` | --- ### 3.4 RECORD_CONTENT Mode `snowflake.enable.schematization=false` -- data lands in `RECORD_CONTENT` + `RECORD_METADATA` VARIANT columns (FR4). `RECORD_CONTENT` is a VARIANT column. Validation mode (`snowflake.validation`) is irrelevant here — the entire payload goes into VARIANT with no type checking. The `snowflake.validation` config was removed from these test templates. V3 parity was verified by running JSON, String, and ByteArray tests in dual mode (v3 + v4) on Confluent 7.8.0 (2026-03-31) — both versions produce identical results. Tests are now v4-only. Note that v3's own E2E tests (`SnowflakeSinkTaskForStreamingIT.java`) had **no RECORD_CONTENT value assertions** — they only checked row counts and RECORD_METADATA key presence (`offset`, `partition`). Our v4 tests are net-new coverage: field-level content verification, base64 encoding for bytes, and double-encoding edge case handling. | Status | Test | Version | Rationale | Format | File | |:------:|------|---------|-----------|--------|------| | 🟢 | RECORD_CONTENT JSON (StringConverter key, JsonConverter value) | v4 | v3 parity confirmed (dual run 2026-03-31). Assertions capture v3 reference behavior. | JSON (native) | `test_snowpipe_streaming_legacy_string_json.py` | | 🟢 | RECORD_CONTENT StringConverter (raw string payload) | v4 | v3 parity confirmed (dual run 2026-03-31). Assertions capture v3 reference behavior. | String | `test_snowpipe_streaming_legacy_string_converter.py` | | 🟢 | RECORD_CONTENT ByteArrayConverter (base64 payload) | v4 | v3 parity confirmed (dual run 2026-03-31). Assertions capture v3 reference behavior. | Bytes | `test_snowpipe_streaming_legacy_byte_array_converter.py` | | 🟢 | RECORD_CONTENT + Avro SR | v4 | v4 confirmed 2026-03-31. v3 parity cannot be verified: v3's bundled SR classes clash with Confluent 7.8.0 platform SR classes (ServiceConfigurationError). Assertions reflect expected Avro deserialization behavior. | Avro SR | `test_snowpipe_streaming_legacy_avro_sr.py` | | 🔴 | RECORD_CONTENT + SMT (nullable values, ExtractField) | v4 | **P2.** Data values in VARIANT + SMT interaction. v3 equivalent: `TestSnowpipeStreamingNullableValuesAfterSmt` | JSON + SMT | -- | --- ### 3.5 Connector Lifecycle & Resilience All tests send data in phases, performing disruptive operations between sends. These are v4-only: lifecycle operations (pause/resume/restart/delete) are Kafka Connect framework behavior, not SDK-dependent. The connector's interaction with the KC REST API is identical regardless of SSv1 vs SSv2. > **Note on ingestion pattern**: Existing lifecycle tests send a batch, perform the disruptive operation, then send another batch. This "phase-based" approach may not sufficiently exercise interleaving — if all data lands within a single flush cycle, the disruption happens in a quiet window. New resilience tests (channel invalidation, backend errors, network partitions) should use a **continuous ingestion** pattern: a background producer sends records throughout the test while disruptions occur, ensuring the connector handles mid-flight interruptions. #### 3.5.1 Lifecycle Tests (existing) | Status | Test | Operation Sequence | Version | File | |:------:|------|-------------------|---------|------| | 🟢 | Restart (task + connector) | send -> restart -> send -> restart -> send | v4 | `test_kc_restart.py` | | 🟢 | Delete -> Create (new connector, same name) | send -> delete -> create -> send | v4 | `test_kc_delete_create.py` | | 🟢 | Delete -> Create + Chaos | send -> delete -> create (with failures) -> send | v4 | `test_kc_delete_create_chaos.py` | | 🟢 | Delete -> Resume (new connector, inherits offsets) | send -> delete -> resume -> send | v4 | `test_kc_delete_resume.py` | | 🟢 | Delete -> Resume + Chaos | send -> delete -> resume (with failures) -> send | v4 | `test_kc_delete_resume_chaos.py` | | 🟢 | Pause -> Create (new connector while paused) | send -> pause -> create -> send | v4 | `test_kc_pause_create.py` | | 🟢 | Pause -> Create + Chaos | send -> pause -> create (with failures) -> send | v4 | `test_kc_pause_create_chaos.py` | | 🟢 | Pause -> Resume (same connector) | send -> pause -> resume -> send | v4 | `test_kc_pause_resume.py` | | 🟢 | Pause -> Resume + Chaos | send -> pause -> resume (with failures) -> send | v4 | `test_kc_pause_resume_chaos.py` | | 🟢 | Recreate (multiple delete/create cycles) | send -> delete -> recreate -> send x2 | v4 | `test_kc_recreate.py` | | 🟢 | Recreate + Chaos | multiple cycles with failures | v4 | `test_kc_recreate_chaos.py` | #### 3.5.2 CREATE OR REPLACE TABLE Recovery `CREATE OR REPLACE TABLE` mid-stream causes v4 to silently lose data. v3 recovers because SSv1's `isClosed()` detects pipe invalidation. v4's SSv2 SDK does not surface the invalidation — `isClosed()` returns `false` and `appendRow()` succeeds (buffers locally), so the existing recovery path never triggers. **Root cause under investigation.** | Status | Test | Version | Notes | File | |:------:|------|---------|-------|------| | 🔴 | Table replacement recovery (single topic, SE re-evolve) | v4 (xfail) | **P1.** Requires connector fix. Currently v3-only. | `schema_evolution/test_se_replace_table.py` | | 🔴 | Table replacement recovery (multi-topic, SE re-evolve) | v4 (xfail) | **P1.** Same issue. Currently v3-only. | `schema_evolution/test_se_multi_topic_replace_table.py` | #### 3.5.3 Fault Injection & Recovery (missing) These tests should use continuous ingestion (background producer) to exercise mid-flight fault handling. | Status | Test | Fault Type | Version | Notes | |:------:|------|-----------|---------|-------| | 🔴 | Channel invalidation recovery | Server-side channel drop; client must detect and re-open | v4 | **P2.** Verify no data loss after channel re-open under continuous load. Requires SSv2 server-side channel drop simulation -- hard to reproduce in Docker. | | 🔴 | Transient server errors (5xx + 429) | Simulated 5xx errors and 429 throttling during ingestion | v4 | **P2.** Verify backoff/retry and eventual recovery. PR #1386 implemented offset-based backoff for 429; unit tests cover retry logic. E2E requires mock proxy. | | 🔴 | Network partition tolerance | Temporary connectivity loss between KC worker and Snowflake | v4 | **P2.** Verify connector recovers after partition heals, no duplicate/lost records. Requires Docker network manipulation during continuous ingestion -- hard to make deterministic in CI. | --- ### 3.6 Default Pipe Features FR5 (Default Pipes only) + FR7 (Default Pipe Improvements). Must be tested in both compatibility and high-throughput modes. | Status | Test | Feature | Mode | Version | Suggested File | |:------:|------|---------|------|---------|----------------| | 🟢 | Auto-Increment (Identity) columns | FR7 | Compatibility | v4 | `test_default_pipe_features.py` | | 🟢 | Auto-Increment (Identity) columns | FR7 | High-Throughput | v4 | `test_default_pipe_features.py` | | 🟢 | Default timestamp properties | FR7 | Compatibility | v4 | `test_default_pipe_features.py` | | 🟢 | Default timestamp properties | FR7 | High-Throughput | v4 | `test_default_pipe_features.py` | | 🔴 | Pre-clustered tables | FR7 | Compatibility | v4 | `test_default_pipe_features.py` | | 🔴 | Pre-clustered tables | FR7 | High-Throughput | v4 | `test_default_pipe_features.py` | --- ### 3.7 Load & Stress > **Scope**: These are CI-level smoke/pressure tests that run in pre-commit. They verify the connector handles moderate load without failures but are not intended to represent production-scale benchmarking. Dedicated load and benchmarking tests exist separately for validating throughput at scale (e.g., 10 GB/s target for high-throughput mode). | Status | Test | Scale | Version | File | |:------:|------|-------|---------|------| | 🟢 | Pressure: 200 topics x 12 partitions x 10K records (24M total) | High | v4 | `pressure/test_pressure_init.py` | | 🟢 | Pressure + Restart: 10 topics x 3 partitions x 200K records with chaos ops | High | v4 | `pressure/test_pressure_restart.py` | --- ## 4. Data Type Compatibility **Does v4 compatibility mode handle every Snowflake data type the same way v3 does?** V4 client-side validation (`RowValidator` + `DataValidationUtil`, code copied from SSv1 SDK) runs before the SSv2 SDK. Server-side mode bypasses client validation entirely. Divergences occur when SSv2 handles a value differently than SSv1 did, and client-side normalization doesn't compensate. Tests: `test_type_compatibility.py` (JSON, dual mode). Each test covers positive (valid values land correctly) and negative (invalid values routed to DLQ). | Target Data Type | v3 | v4 Client | v4 Server | Notes | |---|:---:|:---:|:---:|---| | NUMBER | 🟢 | 🟢 | 🟢 | | | FLOAT | 🟢 | 🟢 | 🟢 | | | VARCHAR | 🟢 | 🟢 | 🟢 | | | BINARY (hex String input) | 🟢 | 🟢 | 🟡 | Server-side may interpret hex as base64, producing incorrect bytes | | BOOLEAN | 🟢 | 🟢 | 🟢 | | | BOOLEAN (Integer 0/1 input) | 🟢 | 🟢 | 🟡 | Server-side rejects Integer boolean values; rows not ingested | | DATE | 🟢 | 🟢 | 🟢 | | | TIME | 🟢 | 🟢 | 🟢 | | | TIMESTAMP_NTZ | 🟢 | 🟢 | 🟢 | | | TIMESTAMP_NTZ (Integer epoch) | 🟢 | 🟢 | 🟡 | Server-side shifts stored value by default timezone offset (~8h) | | TIMESTAMP_LTZ | 🟢 | 🟢 | 🟢 | | | TIMESTAMP_TZ | 🟢 | 🟢 | 🟢 | | | VARIANT | 🟢 | 🟢 | 🟢 | | | VARIANT (JSON String input) | 🟢 | 🟢 | 🟢 | | | VARIANT (bare String input) | 🟢 | 🟢 | 🟡 | Server-side accepts invalid JSON scalars; client-side correctly rejects to DLQ | | OBJECT | 🟢 | 🟢 | 🟢 | | | ARRAY | 🟢 | 🟢 | 🟢 | | | ARRAY (JSON String input) | 🟢 | 🟢 | 🟡 | Server-side wraps string as single-element array instead of parsing | | NULL | 🟢 | 🟢 | 🟢 | | | NULL (VARIANT column) | 🟢 | 🟡 | 🟡 | Stored as text `'null'` instead of SQL NULL | | Cross-type mismatch | 🟢 | 🟢 | 🟢 | | | GEOGRAPHY, GEOMETRY | 🟢 | 🟢 | 🟢 | Unsupported in Streaming; rejected in all modes | | VECTOR | 🟡 | 🟢 | 🟢 | New in v4. Not supported in v3. | | Structured OBJECT/ARRAY | 🟡 | 🟢 | 🟢 | New in v4. Not supported in v3. | | Collated VARCHAR | 🔴 | 🔴 | 🔴 | Not tested. Unit-level coverage only. | ### Avro-Specific Type Mapping Tests: `test_type_compatibility_avro.py` (Avro SR, v4-compat + v4-ht). V3 parity testing is blocked by the SR classloader conflict. Avro provides typed values (native int, float, boolean, bytes, logical types) unlike schemaless JSON. The AvroConverter produces Kafka Connect Structs with schemas, testing a different pipeline path than JSON. | Avro Type | Target Column | v4 Client | v4 Server | Notes | |---|:---:|:---:|:---:|---| | `int` | NUMBER | 🟢 | 🟢 | 32-bit typed integer | | `long` | NUMBER | 🟢 | 🟢 | 64-bit typed integer | | `float` | FLOAT | 🟢 | 🟢 | 32-bit; incl. NaN, Inf, -Inf as native floats | | `double` | FLOAT | 🟢 | 🟢 | 64-bit; incl. NaN, Inf, -Inf | | `string` | VARCHAR | 🟢 | 🟢 | | | `boolean` | BOOLEAN | 🟢 | 🟢 | Native bool (no 0/1 coercion path) | | `bytes` | BINARY | 🟢 | 🟢 | Raw bytes; RowValidator unwraps ByteBuffer to byte[] | | `date` logical | DATE | 🟢 | 🟢 | Days-from-epoch via Avro logical type | | `timestamp-millis` logical | TIMESTAMP_NTZ | 🟢 | 🟢 | Millis-from-epoch via logical type | | `array` | ARRAY | 🟢 | 🟢 | Native Avro array | | `map` | VARIANT | 🟢 | 🟢 | Avro map → VARIANT | | null unions | various | 🟢 | 🟢 | Nullable union handling | | `bytes` → VARCHAR | divergence | 🟢 | 🟡 | v4-compat rejects byte[]; v4-ht coerces to base64 | | `bytes` → NUMBER | error | 🟢 | 🟢 | Cross-type: byte[] rejected (Avro-specific) | | `float` NaN/Inf → NUMBER | error | 🟢 | 🟢 | Cross-type: native float NaN (Avro-specific) | | `map`/`array` → BOOLEAN | error | 🟢 | 🟢 | Cross-type: typed complex → primitive | ================================================ FILE: test/README.md ================================================ # End-to-End Tests for Snowflake Kafka Connector E2E tests spin up a Kafka cluster in Docker, send records, and verify they appear in Snowflake. CI workflows: **`end-to-end.yaml`** (E2E) and **`end-to-end-stress.yml`** (stress tests) in `.github/workflows/`. ### Prerequisites - Docker (with Docker Compose v2) - Snowflake credentials (`profile.json`) - Built connector plugin (run `build_runtime_jar.sh` first) ## Quick Start ```bash # 1. Build the connector (from project root) export SNOWFLAKE_CREDENTIAL_FILE=/path/to/profile.json ./test/build_runtime_jar.sh . package confluent # or 'apache' # 2. Run tests cd test ./run_tests.sh --platform=confluent --platform-version=7.8.0 ``` ### Usage ```bash ./run_tests.sh --platform= --platform-version= [options] # Confluent examples: ./run_tests.sh --platform=confluent --platform-version=7.8.0 ./run_tests.sh --platform=confluent --platform-version=6.2.15 ./run_tests.sh --platform=confluent --platform-version=7.8.0 --tests=test_string_json # Apache Kafka examples: ./run_tests.sh --platform=apache --platform-version=3.7.2 ./run_tests.sh --platform=apache --platform-version=2.8.2 --java-version=11 # Options: ./run_tests.sh --platform=confluent --platform-version=7.8.0 -- -m pressure # Stress tests ./run_tests.sh --platform=apache --platform-version=3.7.2 --keep # Keep containers after tests ./run_tests.sh --platform=confluent --platform-version=7.8.0 --rebuild # Force rebuild images ./run_tests.sh --platform=confluent --platform-version=7.8.0 --logs # Show logs on failure ./run_tests.sh --platform=apache --platform-version=3.7.2 --cloud=AWS # Target specific Snowflake cloud ``` ### Supported Versions **Confluent Platform:** - `6.2.x` (e.g., 6.2.15) - `7.x` (e.g., 7.6.0, 7.8.2) **Apache Kafka:** Any version available as an official tarball (e.g., 2.8.2, 3.7.2) ## Architecture The test environment uses layered Docker Compose files in `docker/`: - `docker-compose.base.yml` -- test-runner container (shared by all platforms) - `docker-compose.confluent.yml` -- Confluent Platform (Zookeeper, Kafka, Schema Registry, Kafka Connect as separate containers) - `docker-compose.apache.yml` -- Apache Kafka (single container with embedded services) - `docker-compose.amd64.yml` -- forces linux/amd64 emulation (Confluent 6.2.x on ARM) ### Confluent Platform ``` ┌────────────────────────────────────────────────────────────┐ │ zookeeper │ kafka │schema-registry│kafka-connect │ │ :2181 │ :9092 │ :8081 │ :8083 │ ├──────────────┴──────────────┴───────────────┴──────────────┤ │ test-runner │ │ (Python + protobuf + tests) │ └────────────────────────────────────────────────────────────┘ ``` ### Apache Kafka ``` ┌────────────────────────────────────────────────────────────┐ │ kafka │ │ (Zookeeper + Kafka + Connect + Schema Registry) │ │ :2181 :9092 :8083 :8081 │ ├────────────────────────────────────────────────────────────┤ │ test-runner │ │ (Python + protobuf + tests) │ └────────────────────────────────────────────────────────────┘ ``` ### How E2E Tests Work There are two test infrastructures that run side by side: **Pytest (primary)** -- New tests live in `tests/` as standard pytest modules. Fixtures in `conftest.py` handle connector lifecycle, table creation, and cleanup (including Kafka topic deletion). The runner script passes connection addresses and platform version as CLI options; see `conftest.py` for the full list. **Legacy infra** -- Older tests live in `test_suit/` as classes with `send`/`verify`/`clean` methods. They are orchestrated by `test_verify.py` via `test_executor.py`. Connector config templates in `rest_request_template/` have a one-to-one correspondence with these classes (e.g., `travis_correct_json_json.json` maps to `test_suit/test_json_json.py`). The driver replaces placeholder values (e.g., `SNOWFLAKE_TEST_TOPIC`, `CONFLUENT_SCHEMA_REGISTRY`) with runtime values. Both share the same `KafkaDriver` (`lib/driver.py`) and connector config templates. `run_tests.sh` passes all pytest CLI options explicitly when launching the test-runner container. ## Stress Tests Stress tests use the same Docker infrastructure but with `-m pressure` passed to pytest: ```bash ./run_tests.sh --platform=confluent --platform-version=7.6.0 -- -m pressure ``` CI stress (`.github/workflows/end-to-end-stress.yml`) sets `CONNECT_OFFSET_FLUSH_INTERVAL_MS=10000` on the `run-e2e-tests` step so Kafka Connect uses a **10 second** `offset.flush.interval.ms` (less frequent `preCommit` than the default 1 s E2E setting). For local pressure runs, export the same variable before `./run_tests.sh` if you want that behavior. When `-m pressure` is set, pytest selects only the pressure-marked tests: 1. **test_pressure_restart** (`tests/pressure/test_pressure_restart.py`) -- Creates 10 topics with 3 partitions each and sends 200,000 records per partition. During verification, the connector is periodically restarted, paused, resumed, and deleted/recreated to test resilience under load. 2. **test_pressure_init** (`tests/pressure/test_pressure_init.py`) -- Creates 200 topics with 12 partitions each and sends 10,000 records per partition (2,400 partitions, 24M records total). Sends are parallelized across 10 threads. Both tests verify that the exact expected row count appears in Snowflake. ## Debugging ### View logs ```bash docker logs -f test-kafka-connect docker logs -f test-kafka ``` ### Check connector status ```bash curl http://localhost:8083/connectors curl http://localhost:8083/connectors//status ``` ### Manual cleanup ```bash cd test/docker docker compose -f docker-compose.base.yml -f docker-compose.confluent.yml down -v --remove-orphans ``` ## Directory Structure ``` test/ tests/ Pytest test modules (new tests go here) conftest.py Pytest fixtures and CLI options pyproject.toml Pytest configuration lib/ Shared helpers (KafkaDriver, config, crypto) rest_request_template/ Connector config templates (one per test case) test_suit/ Legacy E2E test classes (send/verify/clean) test_data/ Protobuf schema and generated code docker/ Docker Compose files, Dockerfiles, and test runner scripts apache_properties/ Kafka/Zookeeper/Connect config (used by Apache Docker image) build_runtime_jar.sh Builds connector JAR/ZIP test_verify.py Legacy E2E test entry point test_suites.py Legacy test suite registry test_selector.py Legacy test filtering logic test_executor.py Legacy test execution engine ``` ================================================ FILE: test/__init__.py ================================================ ================================================ FILE: test/apache_properties/connect-distributed.properties ================================================ ## # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ## # This file contains some of the configurations for the Kafka Connect distributed worker. This file is intended # to be used with the examples, and some settings may differ from those used in a production system, especially # the `bootstrap.servers` and those specifying replication factors. # A list of host/port pairs to use for establishing the initial connection to the Kafka cluster. bootstrap.servers=localhost:9092 # by default this value is 300000 ( 5minutes) # this is the max threshold of two consecutive polling times. If poll() is not invoked within this time, a rebalance can occur max.poll.interval.ms=360000 # default is 500 records max.poll.records=100 # unique name for the cluster, used in forming the Connect cluster group. Note that this must not conflict with consumer group IDs group.id=connect-cluster # The converters specify the format of data in Kafka and how to translate it into Connect data. Every Connect user will # need to configure these based on the format they want their data in when loaded from or stored into Kafka key.converter=org.apache.kafka.connect.json.JsonConverter value.converter=org.apache.kafka.connect.json.JsonConverter # Converter-specific settings can be passed in by prefixing the Converter's setting with the converter we want to apply # it to key.converter.schemas.enable=true value.converter.schemas.enable=true # Topic to use for storing offsets. This topic should have many partitions and be replicated and compacted. # Kafka Connect will attempt to create the topic automatically when needed, but you can always manually create # the topic before starting Kafka Connect if a specific topic configuration is needed. # Most users will want to use the built-in default replication factor of 3 or in some cases even specify a larger value. # Since this means there must be at least as many brokers as the maximum replication factor used, we'd like to be able # to run this example on a single-broker cluster and so here we instead set the replication factor to 1. offset.storage.topic=connect-offsets offset.storage.replication.factor=1 #offset.storage.partitions=25 # Topic to use for storing connector and task configurations; note that this should be a single partition, highly replicated, # and compacted topic. Kafka Connect will attempt to create the topic automatically when needed, but you can always manually create # the topic before starting Kafka Connect if a specific topic configuration is needed. # Most users will want to use the built-in default replication factor of 3 or in some cases even specify a larger value. # Since this means there must be at least as many brokers as the maximum replication factor used, we'd like to be able # to run this example on a single-broker cluster and so here we instead set the replication factor to 1. config.storage.topic=connect-configs config.storage.replication.factor=1 # Topic to use for storing statuses. This topic can have multiple partitions and should be replicated and compacted. # Kafka Connect will attempt to create the topic automatically when needed, but you can always manually create # the topic before starting Kafka Connect if a specific topic configuration is needed. # Most users will want to use the built-in default replication factor of 3 or in some cases even specify a larger value. # Since this means there must be at least as many brokers as the maximum replication factor used, we'd like to be able # to run this example on a single-broker cluster and so here we instead set the replication factor to 1. status.storage.topic=connect-status status.storage.replication.factor=1 #status.storage.partitions=5 # Flush much faster than normal, which is useful for testing/debugging offset.flush.interval.ms=1000 # These are provided to inform the user about the presence of the REST host and port configs # Hostname & Port for the REST API to listen on. If this is set, it will bind to the interface used to listen to requests. #rest.host.name= rest.port=8083 # The Hostname & Port that will be given out to other workers to connect to i.e. URLs that are routable from other servers. #rest.advertised.host.name= #rest.advertised.port= # Set to a list of filesystem paths separated by commas (,) to enable class loading isolation for plugins # (connectors, converters, transformations). The list should consist of top level directories that include # any combination of: # a) directories immediately containing jars with plugins and their dependencies # b) uber-jars with plugins and their dependencies # c) directories immediately containing the package directory structure of classes of plugins and their dependencies # Examples: # plugin.path=/usr/local/share/java,/usr/local/share/kafka/plugins,/opt/connectors, plugin.path=/usr/local/share/kafka/plugins config.providers=file config.providers.file.class=org.apache.kafka.common.config.provider.FileConfigProvider # Allow connectors to override consumer/producer configs (e.g. consumer.override.auto.offset.reset) connector.client.config.override.policy=All ================================================ FILE: test/apache_properties/file-secrets.txt ================================================ PASSPHRASE=test ================================================ FILE: test/apache_properties/kraft-server.properties ================================================ # KRaft mode server configuration for Apache Kafka 4.x+ # Combined broker + controller on a single node (no ZooKeeper) ############################# KRaft Settings ############################# process.roles=broker,controller node.id=1 controller.quorum.voters=1@localhost:9093 controller.listener.names=CONTROLLER listener.security.protocol.map=CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT ############################# Socket Server Settings ############################# listeners=PLAINTEXT://:9092,CONTROLLER://:9093 inter.broker.listener.name=PLAINTEXT num.network.threads=10 num.io.threads=8 socket.send.buffer.bytes=102400 socket.receive.buffer.bytes=102400 socket.request.max.bytes=104857600 ############################# Message Size Settings ############################# # 30 MiB - matches ZK-based server.properties message.max.bytes=31457280 replica.fetch.max.bytes=31457280 ############################# Log Basics ############################# log.dirs=/tmp/kraft-combined-logs num.partitions=1 num.recovery.threads.per.data.dir=1 ############################# Internal Topic Settings ############################# offsets.topic.replication.factor=1 transaction.state.log.replication.factor=1 transaction.state.log.min.isr=1 ############################# Log Retention Policy ############################# log.retention.hours=168 log.segment.bytes=1073741824 log.retention.check.interval.ms=300000 ############################# Group Coordinator Settings ############################# group.initial.rebalance.delay.ms=0 ================================================ FILE: test/apache_properties/schema-registry.properties ================================================ # # Copyright 2018 Confluent Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # The address the socket server listens on. # FORMAT: # listeners = listener_name://host_name:port # EXAMPLE: # listeners = PLAINTEXT://your.host.name:9092 listeners=http://0.0.0.0:8081 # Zookeeper connection string for the Zookeeper cluster used by your Kafka cluster # (see zookeeper docs for details). # This is a comma separated host:port pairs, each corresponding to a zk # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". #kafkastore.connection.url=localhost:2181 # Alternatively, Schema Registry can now operate without Zookeeper, handling all coordination via # Kafka brokers. Use this setting to specify the bootstrap servers for your Kafka cluster and it # will be used both for selecting the master schema registry instance and for storing the data for # registered schemas. # (Note that you cannot mix the two modes; use this mode only on new deployments or by shutting down # all instances, switching to the new configuration, and then starting the schema registry # instances again.) kafkastore.bootstrap.servers=PLAINTEXT://localhost:9092 # The name of the topic to store schemas in kafkastore.topic=_schemas # If true, API requests that fail will include extra debugging information, including stack traces debug=false ================================================ FILE: test/apache_properties/server.properties ================================================ # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # see kafka.server.KafkaConfig for additional details and defaults ############################# Server Basics ############################# # The id of the broker. This must be set to a unique integer for each broker. broker.id=0 ############################# Socket Server Settings ############################# # The address the socket server listens on. It will get the value returned from # java.net.InetAddress.getCanonicalHostName() if not configured. # FORMAT: # listeners = listener_name://host_name:port # EXAMPLE: # listeners = PLAINTEXT://your.host.name:9092 # Uncomment the following line to run tests on local Mac #listeners=PLAINTEXT://localhost:9092 # Hostname and port the broker will advertise to producers and consumers. If not set, # it uses the value for "listeners" if configured. Otherwise, it will use the value # returned from java.net.InetAddress.getCanonicalHostName(). #advertised.listeners=PLAINTEXT://your.host.name:9092 # Maps listener names to security protocols, the default is for them to be the same. See the config documentation for more details #listener.security.protocol.map=PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL # The number of threads that the server uses for receiving requests from the network and sending responses to the network num.network.threads=10 # The number of threads that the server uses for processing requests, which may include disk I/O num.io.threads=8 # The send buffer (SO_SNDBUF) used by the socket server socket.send.buffer.bytes=102400 # The receive buffer (SO_RCVBUF) used by the socket server socket.receive.buffer.bytes=102400 # The maximum size of a request that the socket server will accept (protection against OOM) socket.request.max.bytes=104857600 ############################# Message Size Settings ############################# # Increasing max message size to test large messages with Kafka Connector. # 30 MiB message.max.bytes=31457280 replica.fetch.max.bytes=31457280 ############################# Log Basics ############################# # A comma separated list of directories under which to store log files log.dirs=/tmp/kafka-logs # The default number of log partitions per topic. More partitions allow greater # parallelism for consumption, but this will also result in more files across # the brokers. num.partitions=1 # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown. # This value is recommended to be increased for installations with data dirs located in RAID array. num.recovery.threads.per.data.dir=1 ############################# Internal Topic Settings ############################# # The replication factor for the group metadata internal topics "__consumer_offsets" and "__transaction_state" # For anything other than development testing, a value greater than 1 is recommended to ensure availability such as 3. offsets.topic.replication.factor=1 transaction.state.log.replication.factor=1 transaction.state.log.min.isr=1 ############################# Log Flush Policy ############################# # Messages are immediately written to the filesystem but by default we only fsync() to sync # the OS cache lazily. The following configurations control the flush of data to disk. # There are a few important trade-offs here: # 1. Durability: Unflushed data may be lost if you are not using replication. # 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. # 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to excessive seeks. # The settings below allow one to configure the flush policy to flush data after a period of time or # every N messages (or both). This can be done globally and overridden on a per-topic basis. # The number of messages to accept before forcing a flush of data to disk #log.flush.interval.messages=10000 # The maximum amount of time a message can sit in a log before we force a flush #log.flush.interval.ms=1000 ############################# Log Retention Policy ############################# # The following configurations control the disposal of log segments. The policy can # be set to delete segments after a period of time, or after a given size has accumulated. # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens # from the end of the log. # The minimum age of a log file to be eligible for deletion due to age log.retention.hours=168 # A size-based retention policy for logs. Segments are pruned from the log unless the remaining # segments drop below log.retention.bytes. Functions independently of log.retention.hours. #log.retention.bytes=1073741824 # The maximum size of a log segment file. When this size is reached a new log segment will be created. log.segment.bytes=1073741824 # The interval at which log segments are checked to see if they can be deleted according # to the retention policies log.retention.check.interval.ms=300000 ############################# Zookeeper ############################# # Zookeeper connection string (see zookeeper docs for details). # This is a comma separated host:port pairs, each corresponding to a zk # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". # You can also append an optional chroot string to the urls to specify the # root directory for all kafka znodes. zookeeper.connect=localhost:2181 # Timeout in ms for connecting to zookeeper zookeeper.connection.timeout.ms=18000 ############################# Group Coordinator Settings ############################# # The following configuration specifies the time, in milliseconds, that the GroupCoordinator will delay the initial consumer rebalance. # The rebalance will be further delayed by the value of group.initial.rebalance.delay.ms as new members join the group, up to a maximum of max.poll.interval.ms. # The default value for this is 3 seconds. # We override this to 0 here as it makes for a better out-of-the-box experience for development and testing. # However, in production environments the default value of 3 seconds is more suitable as this will help to avoid unnecessary, and potentially expensive, rebalances during application startup. group.initial.rebalance.delay.ms=0 ================================================ FILE: test/apache_properties/zookeeper.properties ================================================ # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # the directory where the snapshot is stored. dataDir=/tmp/zookeeper # the port at which the clients will connect clientPort=2181 # disable the per-ip limit on the number of connections since this is a non-production config maxClientCnxns=0 # Disable the adminserver by default to avoid port conflicts. # Set the port to something non-conflicting if choosing to enable this admin.enableServer=false # admin.serverPort=8080 ================================================ FILE: test/build_image.sh ================================================ #!/bin/bash # exit on error set -e # error printing function function error_exit() { echo >&2 $1 exit 1 } # check argument number is 1 or 2 or 3 if [ $# -gt 3 ] || [ $# -lt 1 ]; then error_exit "Usage: ./build_image.sh [] [verify/package/none] . Aborting." fi KAFKA_CONNECT_TAG=$1 SNOWFLAKE_CONNECTOR_PATH=$2 BUILD_METHOD=$3 if [[ -z "${BUILD_METHOD}" ]]; then # Default build method verify BUILD_METHOD="verify" fi # check if connector path is set or checkout from github master if [[ -z "${SNOWFLAKE_CONNECTOR_PATH}" ]]; then # Always re-pull code from github, no one should develop under the test_script folder echo -e "\n=== path to snowflake connector repo is not set, clone snowflake-kafka-connector from github and build ===" SNOWFLAKE_CONNECTOR_PATH="./snowflake-kafka-connector" echo -e "\n=== $SNOWFLAKE_CONNECTOR_PATH will be force deleted ===" rm -rf $SNOWFLAKE_CONNECTOR_PATH mkdir $SNOWFLAKE_CONNECTOR_PATH git clone https://github.com/snowflakedb/snowflake-kafka-connector $SNOWFLAKE_CONNECTOR_PATH fi # check if the provided snowflake connector folder exist if [ ! -d $SNOWFLAKE_CONNECTOR_PATH ]; then error_exit "Provided path to snowflake connector repo $SNOWFLAKE_CONNECTOR_PATH does not exist. Aborting." fi # require the environment variable for credentials if [[ -z "${SNOWFLAKE_CREDENTIAL_FILE}" ]]; then error_exit "Require environment variable SNOWFLAKE_CREDENTIAL_FILE but it's not set. Aborting." fi if [ ! -f "$SNOWFLAKE_CREDENTIAL_FILE" ]; then error_exit "Provided SNOWFLAKE_CREDENTIAL_FILE $SNOWFLAKE_CREDENTIAL_FILE does not exist. Aborting." fi # check required commands command -v docker >/dev/null 2>&1 || error_exit "Require docker but it's not installed. Aborting." command -v minikube >/dev/null 2>&1 || error_exit "Require minikube but it's not installed. Aborting." command -v mvn >/dev/null 2>&1 || error_exit "Require mvn but it's not installed. Aborting." # match all versions of built SF connector (including release candidates like rc1) SNOWFLAKE_PLUGIN_NAME_REGEX="snowflake-kafka-connector-[0-9]*\.[0-9]*\.[0-9]*(-rc[0-9]+)?\.jar$" SNOWFLAKE_PLUGIN_PATH="$SNOWFLAKE_CONNECTOR_PATH/target" SNOWFLAKE_DOCKER_IMAGE="snowflakedb/kc-dev-build" SNOWFLAKE_TAG="dev" KAFKA_CONNECT_DOCKER_IMAGE="confluentinc/cp-kafka-connect" KAFKA_CONNECT_PLUGIN_PATH="/usr/share/confluent-hub-components" KAFKA_CONNECT_PLUGIN_PATH_5_0_0="/usr/share/java" DEV_CONTAINER_NAME="snow-dev-build" # bind minikube to local docker image repo if ! minikube status; then echo -e "\n=== minikube not running, try to start ===" minikube config set memory 8192 minikube config set cpus 4 minikube config set disk-size 20000MB minikube start fi eval $(minikube docker-env) # copy credential to SNOWFLAKE_CONNECTOR_PATH cp -rf $SNOWFLAKE_CREDENTIAL_FILE $SNOWFLAKE_CONNECTOR_PATH || true # build and test the local repo pushd $SNOWFLAKE_CONNECTOR_PATH case $BUILD_METHOD in verify) mvn clean mvn verify -Dgpg.skip=true ;; package) mvn clean mvn package -Dgpg.skip=true ;; none) echo -e "\n=== skip building, please make sure built connector exist ===" ;; *) error_exit "Usage: ./build_image.sh [] [verify/package/none] . Unknown build method $BUILD_METHOD. Aborting." esac popd # get built image name # only match the first line SNOWFLAKE_PLUGIN_NAME=$(ls $SNOWFLAKE_PLUGIN_PATH | grep -E "$SNOWFLAKE_PLUGIN_NAME_REGEX" | head -n 1) echo -e "\n=== built connector name: $SNOWFLAKE_PLUGIN_NAME ===" # download Kafka connect docker image echo -e "\n=== pull image from $KAFKA_CONNECT_DOCKER_IMAGE:$KAFKA_CONNECT_TAG ===" docker pull $KAFKA_CONNECT_DOCKER_IMAGE:$KAFKA_CONNECT_TAG # clean up echo -e "\n=== try to delete container $DEV_CONTAINER_NAME if it exist ===" $(docker rm $DEV_CONTAINER_NAME) || true # copy built jar file to kafka connect image echo -e "\n=== create docker container ===" docker create --name $DEV_CONTAINER_NAME $KAFKA_CONNECT_DOCKER_IMAGE:$KAFKA_CONNECT_TAG echo -e "\n=== copy built snowflake plugin into container ===" docker cp $SNOWFLAKE_PLUGIN_PATH/$SNOWFLAKE_PLUGIN_NAME $DEV_CONTAINER_NAME:$KAFKA_CONNECT_PLUGIN_PATH/$SNOWFLAKE_PLUGIN_NAME || \ docker cp $SNOWFLAKE_PLUGIN_PATH/$SNOWFLAKE_PLUGIN_NAME $DEV_CONTAINER_NAME:$KAFKA_CONNECT_PLUGIN_PATH_5_0_0/$SNOWFLAKE_PLUGIN_NAME echo -e "\n=== commit the mocified container to snowflake image ===" docker commit $DEV_CONTAINER_NAME $SNOWFLAKE_DOCKER_IMAGE:$SNOWFLAKE_TAG # no need to push to docker hub since k8s can use local image # push the image to our docker hub # echo -e "\n=== push snowflake image to docker hub ===" # docker push $SNOWFLAKE_DOCKER_IMAGE:$SNOWFLAKE_TAG # clean up echo -e "\n=== delete container $DEV_CONTAINER_NAME ===" docker rm $DEV_CONTAINER_NAME # copy the jar to plugin path for apache kafka APACHE_KAFKA_CONNECT_PLUGIN_PATH="/usr/local/share/kafka/plugins" mkdir -m 777 -p $APACHE_KAFKA_CONNECT_PLUGIN_PATH || \ sudo mkdir -m 777 -p $APACHE_KAFKA_CONNECT_PLUGIN_PATH cp $SNOWFLAKE_PLUGIN_PATH/$SNOWFLAKE_PLUGIN_NAME $APACHE_KAFKA_CONNECT_PLUGIN_PATH || true echo -e "\n=== copied connector to $APACHE_KAFKA_CONNECT_PLUGIN_PATH ===" ================================================ FILE: test/build_runtime_jar.sh ================================================ #!/bin/bash # exit on error set -e set -x # error printing function function error_exit() { echo >&2 $1 exit 1 } # check argument number is 1 or 2 or 3 if [ $# -gt 4 ] || [ $# -lt 1 ]; then error_exit "Usage: ./build_runtime_jar.sh [] [verify/package/none] [apache/confluent] [AWS/AZURE/GCP]. Default values are: verify, apache, AWS. Exiting script" fi SNOWFLAKE_CONNECTOR_PATH=$1 BUILD_METHOD=$2 BUILD_FOR_RUNTIME=$3 BUILD_FOR_CLOUD=$4 if [[ -z "${BUILD_METHOD}" ]]; then # Default build method verify BUILD_METHOD="verify" fi if [[ $BUILD_FOR_RUNTIME == "confluent" ]]; then POM_FILE_NAME="pom_confluent.xml" else # Default build target is for Apache BUILD_FOR_RUNTIME="apache" POM_FILE_NAME="pom.xml" fi # Some of the integration tests use cloud vendor specific resources if [[ -z "${BUILD_FOR_CLOUD}" ]]; then # Default BUILD_FOR_CLOUD="AWS" fi # check if connector path is set or checkout from github master if [[ -z "${SNOWFLAKE_CONNECTOR_PATH}" ]]; then # Always re-pull code from github, no one should develop under the test_script folder echo -e "\n=== path to snowflake connector repo is not set, clone snowflake-kafka-connector from github and build ===" SNOWFLAKE_CONNECTOR_PATH="./snowflake-kafka-connector" echo -e "\n=== $SNOWFLAKE_CONNECTOR_PATH will be force deleted ===" rm -rf $SNOWFLAKE_CONNECTOR_PATH mkdir $SNOWFLAKE_CONNECTOR_PATH git clone https://github.com/snowflakedb/snowflake-kafka-connector $SNOWFLAKE_CONNECTOR_PATH fi # check if the provided snowflake connector folder exist if [ ! -d $SNOWFLAKE_CONNECTOR_PATH ]; then error_exit "Provided path to snowflake connector repo $SNOWFLAKE_CONNECTOR_PATH does not exist. Aborting." fi # require the environment variable for credentials if [[ -z "${SNOWFLAKE_CREDENTIAL_FILE}" ]]; then error_exit "Require environment variable SNOWFLAKE_CREDENTIAL_FILE but it's not set. Aborting." fi if [ ! -f "$SNOWFLAKE_CREDENTIAL_FILE" ]; then error_exit "Provided SNOWFLAKE_CREDENTIAL_FILE $SNOWFLAKE_CREDENTIAL_FILE does not exist. Aborting." fi # check required commands command -v mvn >/dev/null 2>&1 || error_exit "Require mvn but it's not installed. Aborting." # match all versions of built SF connector (including release candidates like rc1) SNOWFLAKE_PLUGIN_NAME_REGEX="snowflake-kafka-connector-[0-9]*\.[0-9]*\.[0-9]*(-rc[0-9]+)?\.jar$" SNOWFLAKE_PLUGIN_PATH="$SNOWFLAKE_CONNECTOR_PATH/target" KAFKA_CONNECT_PLUGIN_PATH="/usr/local/share/kafka/plugins" # copy credential to SNOWFLAKE_CONNECTOR_PATH cp -rf $SNOWFLAKE_CREDENTIAL_FILE $SNOWFLAKE_CONNECTOR_PATH || true echo "Building Jar for Runtime: $BUILD_FOR_RUNTIME" # build and test the local repo pushd $SNOWFLAKE_CONNECTOR_PATH case $BUILD_METHOD in verify) # mvn clean should clean the target directory, hence using default pom.xml mvn -f $POM_FILE_NAME clean # skip Iceberg tests outside of AWS if [[ $BUILD_FOR_CLOUD == "AWS" ]]; then echo "Running integration tests against AWS cloud" mvn -f $POM_FILE_NAME verify -Dgpg.skip=true -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false -Dmaven.wagon.httpconnectionManager.ttlSeconds=120 -P aws else echo "Running integration tests against non-AWS cloud" mvn -f $POM_FILE_NAME verify -Dgpg.skip=true -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false -Dmaven.wagon.httpconnectionManager.ttlSeconds=120 -P non-aws fi ;; package) # mvn clean should clean the target directory, hence using default pom.xml mvn -f $POM_FILE_NAME clean # mvn package with pom_confluent runs the kafka-connect-maven-plugin which creates a zip file # More information: https://docs.confluent.io/platform/current/connect/kafka-connect-maven-plugin/site/plugin-info.html mvn -f $POM_FILE_NAME package -Dgpg.skip=true -DskipTests -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false -Dmaven.wagon.httpconnectionManager.ttlSeconds=120 ;; none) echo -e "\n=== skip building, please make sure built connector exist ===" ;; *) error_exit "Usage: ./build_image.sh [] [verify/package/none] . Unknown build method $BUILD_METHOD. Aborting." esac popd # get built image name # only match the first line SNOWFLAKE_PLUGIN_NAME=$(ls $SNOWFLAKE_PLUGIN_PATH | grep -E "$SNOWFLAKE_PLUGIN_NAME_REGEX" | head -n 1) echo -e "\nbuilt connector name: $SNOWFLAKE_PLUGIN_NAME" mkdir -m 777 -p $KAFKA_CONNECT_PLUGIN_PATH || \ sudo mkdir -m 777 -p $KAFKA_CONNECT_PLUGIN_PATH if [[ $BUILD_FOR_RUNTIME == "confluent" ]]; then # For confluent, copy the zip file and unzip it later echo "For confluent RUNTIME: Copying Kafka Connect Maven Generated Zip file to a temporary location" cp $SNOWFLAKE_PLUGIN_PATH/components/packages/snowflakeinc-snowflake-kafka-connector-*.zip /tmp/sf-kafka-connect-plugin.zip ls /tmp/sf-kafka-connect-plugin* else # Apache Kafka # Only copy built connector to plugin path cp $SNOWFLAKE_PLUGIN_PATH/$SNOWFLAKE_PLUGIN_NAME $KAFKA_CONNECT_PLUGIN_PATH || true echo -e "copied SF Plugin Connector to $KAFKA_CONNECT_PLUGIN_PATH" fi KAFKA_CONNECT_DOCKER_JAR_PATH="$SNOWFLAKE_CONNECTOR_PATH/docker-setup/snowflake-kafka-docker/jars" mkdir -m 777 -p $KAFKA_CONNECT_DOCKER_JAR_PATH cp $SNOWFLAKE_PLUGIN_PATH/$SNOWFLAKE_PLUGIN_NAME $KAFKA_CONNECT_DOCKER_JAR_PATH || true echo -e "copied connector to $KAFKA_CONNECT_DOCKER_JAR_PATH for docker" ================================================ FILE: test/conftest.py ================================================ import logging import os import sys import time from typing import Dict, List import pytest from _pytest.reports import TestReport from lib.config_migration import v4_config_to_v3 from lib.driver import KafkaDriver from lib.fixtures.session import ( # noqa: F401 — re-exported for pytest discovery sensor_pb2, credentials_unsalted, session_name_salt, test_schema, credentials, driver, ) from lib.fixtures.connector import ( # noqa: F401 create_topics, create_connector, create_custom_connector, ) from lib.fixtures.table import ( # noqa: F401 create_table, snowflake_table, create_iceberg_table, iceberg_external_volume, ) from lib.fixtures.function import connector_version, name_salt # noqa: F401 logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Custom CLI options # --------------------------------------------------------------------------- def pytest_addoption(parser): """Register CLI options. Every option falls back to an environment variable so that tests can be launched inside a container where the compose file already sets the values -- no long CLI arg lists needed. """ group = parser.getgroup("kafka-e2e", "Kafka connector end-to-end test options") group.addoption( "--kafka-address", default=os.environ.get("KAFKA_BOOTSTRAP_SERVERS"), help="Kafka bootstrap server address (env: KAFKA_BOOTSTRAP_SERVERS)", ) group.addoption( "--schema-registry-address", default=os.environ.get("SCHEMA_REGISTRY_URL", ""), help="Schema registry URL (env: SCHEMA_REGISTRY_URL)", ) group.addoption( "--kafka-connect-address", default=os.environ.get("KAFKA_CONNECT_ADDRESS"), help="Kafka Connect REST address (env: KAFKA_CONNECT_ADDRESS)", ) group.addoption( "--platform", choices=["confluent", "apache"], default=os.environ.get("KAFKA_PLATFORM"), help="Kafka platform: 'confluent' or 'apache' (env: KAFKA_PLATFORM)", ) group.addoption( "--platform-version", default=os.environ.get("KAFKA_PLATFORM_VERSION"), help="Kafka / Confluent platform version (env: KAFKA_PLATFORM_VERSION)", ) group.addoption( "--name-salt", default=os.environ.get("TEST_NAME_SALT"), help="Unique salt appended to connector and topic names (env: TEST_NAME_SALT, auto-generated if omitted)", ) # currently unused, all tests run on all clouds group.addoption( "--cloud", choices=["AWS", "GCP", "AZURE"], default=os.environ.get("SF_CLOUD_PLATFORM"), help="Snowflake cloud platform: AWS, GCP, or AZURE (env: SF_CLOUD_PLATFORM)", ) group.addoption( "--enable-ssl", action="store_true", default=os.environ.get("ENABLE_SSL", "").lower() in ("true", "1", "yes"), help="Enable SSL for Kafka connections (env: ENABLE_SSL)", ) _REQUIRED_OPTIONS = { "--kafka-address": "KAFKA_BOOTSTRAP_SERVERS", "--kafka-connect-address": "KAFKA_CONNECT_ADDRESS", "--platform": "KAFKA_PLATFORM", "--platform-version": "KAFKA_PLATFORM_VERSION", } def pytest_configure(config): # An empty salt silently resolves to the unsalted schema name, which is # shared with Java integration tests. Dropping it would break those runs. name_salt_value = config.getoption("--name-salt") if name_salt_value is not None and name_salt_value == "": raise pytest.UsageError( "--name-salt / TEST_NAME_SALT must not be empty " "(omit it entirely to auto-generate, or provide a non-empty value)" ) config.addinivalue_line( "markers", "schema_evolution: schema evolution e2e tests (FR6)" ) config.addinivalue_line( "markers", "compatibility: v3/v4 dual-version compatibility tests" ) config.addinivalue_line( "markers", "correctness: connector correctness tests (schema mapping, DLQ, multi-topic)", ) config.addinivalue_line( "markers", "confluent_only: requires Confluent platform (schema registry)" ) config.addinivalue_line("markers", "pressure: load / stress tests") config.addinivalue_line( "markers", "iceberg: iceberg table tests (requires ICEBERG_EXTERNAL_VOLUME)", ) # Validate required options (set via CLI or env var) missing = [] for opt, env in _REQUIRED_OPTIONS.items(): if config.getoption(opt) is None: missing.append(f" {opt} (or env {env})") if missing: raise pytest.UsageError( "Missing required configuration:\n" + "\n".join(missing) ) def pytest_collection_modifyitems(config, items): if config.getoption("--platform") == "confluent": return skip = pytest.mark.skip(reason="requires Confluent platform (schema registry)") for item in items: if "confluent_only" in item.keywords: item.add_marker(skip) @pytest.fixture() def create_connector_from_file( driver: KafkaDriver, # noqa: F811 name_salt: str, # noqa: F811 connector_version: str, # noqa: F811 ): """DEPRECATED Factory fixture: call to register a connector for the current version. All connectors created during the test are torn down automatically. Args: v4_config_file: Config template for the v4 connector. v3_config_file: Optional separate config template for v3. When omitted, v4_config_file is auto-migrated via v4_config_to_v3. """ created = [] def _create( v4_config_filename: str, *, config_overrides: Dict[str, str] = None ) -> dict: def try_convert_and_apply_overrides(config: Dict[str, str]) -> Dict[str, str]: match connector_version: case "v3": logger.info(f"Will transform {v4_config_filename} to KC v3 config") config = v4_config_to_v3(config) case "v4": pass if config_overrides: config.update(config_overrides) return config rest_request = driver.createConnector( name_salt=name_salt, rest_request_template_filename=v4_config_filename, config_transform=try_convert_and_apply_overrides, ) created.append(rest_request["name"]) return rest_request try: yield _create finally: for connector_name in reversed(created): driver.closeConnector(connector_name) @pytest.fixture(scope="session") def wait_for_rows(driver: KafkaDriver): # noqa: F811 — pytest fixture injection, not a true redefinition """Returns a polling helper that waits until a Snowflake table reaches the expected row count. Supports an optional ``connector_name`` parameter: when provided, each poll iteration also checks the Kafka Connect task status via the REST API. If any task is in FAILED state the helper raises immediately instead of waiting for the full timeout -- a failed task will never produce more rows. Default timeout/interval can be overridden globally via environment variables ``E2E_WAIT_TIMEOUT`` and ``E2E_WAIT_INTERVAL``. """ default_timeout = int(os.environ.get("E2E_WAIT_TIMEOUT", "300")) default_interval = int(os.environ.get("E2E_WAIT_INTERVAL", "5")) def _wait( table_name: str, expected: int, *, timeout: int | None = None, interval: int | None = None, at_least: bool = False, connector_name: str | None = None, max_consecutive_failures: int = 6, ): timeout = timeout or default_timeout interval = interval or default_interval deadline = time.monotonic() + timeout consecutive_failures = 0 while True: count = driver.select_number_of_records(table_name) if count is not None: if count == expected: return count if at_least and count > expected: return count if not at_least and count > expected: raise AssertionError( f"Found more than {expected} rows in {table_name} (got {count})" ) if time.monotonic() >= deadline: raise AssertionError( f"Timed out waiting for {expected} rows in {table_name} " f"(got {count} after {timeout}s)" ) if connector_name is not None: if failed := driver.get_failed_tasks(connector_name): consecutive_failures += 1 if consecutive_failures >= max_consecutive_failures: traces = "\n".join( f" task {t['id']}: {t.get('trace', 'no trace')}" for t in failed ) raise AssertionError( f"Connector {connector_name} has FAILED tasks while " f"waiting for {expected} rows in {table_name} " f"(got {count}):\n{traces}" ) logger.warning( f"Connector {connector_name} has failed tasks " f"({consecutive_failures}/{max_consecutive_failures}), " f"waiting for recovery..." ) else: consecutive_failures = 0 logger.info( f"Waiting for {'at least ' if at_least else ''}{expected} rows " f"in {table_name} (currently {count}), retrying in {interval}s..." ) time.sleep(interval) return _wait # --------------------------------------------------------------------------- # GitHub Actions step summary (failures only) # --------------------------------------------------------------------------- _github_summary_failures: List[TestReport] = [] @pytest.hookimpl(hookwrapper=True) def pytest_runtest_makereport(item): """Collect failed test reports for GITHUB_STEP_SUMMARY.""" outcome = yield report = outcome.get_result() if report.when == "call" and report.failed and report.longrepr: _github_summary_failures.append(report) def _python_error_annotation(report: TestReport) -> None: """Emit a ::error workflow command to stderr for GitHub annotations.""" filename, line, domain = report.location parts = [f"file=test/{filename}", f"title={domain}"] if line is not None: parts.append(f"line={line + 1}") opts = ",".join(parts) # longrepr can be a ReprExceptionInfo (has .reprcrash.message) or a plain # string (e.g. for xpass-strict failures). longrepr = report.longrepr if hasattr(longrepr, "reprcrash") and longrepr.reprcrash is not None: message = longrepr.reprcrash.message else: message = str(longrepr).split("\n", 1)[0] print(f"::error {opts}::{message}", file=sys.stderr) def pytest_sessionfinish(session, exitstatus): """Append failure summary to GITHUB_STEP_SUMMARY when set (e.g. in GitHub Actions).""" summary_path = os.environ.get("GITHUB_STEP_SUMMARY") if not summary_path or not _github_summary_failures or exitstatus == 0: return for report in _github_summary_failures: _python_error_annotation(report) try: with open(summary_path, "a", encoding="utf-8") as summary_file: summary_file.write("\n## Python test failures\n\n") for report in _github_summary_failures: summary_file.write(f"### {report.nodeid}\n\n") summary_file.write("```\n") summary_file.write(report.longreprtext) summary_file.write("\n```\n\n") except OSError: logger.debug("Could not write to GITHUB_STEP_SUMMARY", exc_info=True) ================================================ FILE: test/connect-log4j.properties ================================================ # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Sample Log4j properties file for connect, used only for testing # This can work as an example for SF KC users. log4j.rootLogger=INFO, stdout, connectAppender # Send the logs to the console. log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.layout=org.apache.log4j.PatternLayout # Send the logs to a file, rolling the file at midnight local time. For example, the `File` option specifies the # location of the log files (e.g. ${kafka.logs.dir}/connect.log), and at midnight local time the file is closed # and copied in the same directory but with a filename that ends in the `DatePattern` option. log4j.appender.connectAppender=org.apache.log4j.DailyRollingFileAppender log4j.appender.connectAppender.DatePattern='.'yyyy-MM-dd-HH log4j.appender.connectAppender.File=${kafka.logs.dir}/connect.log log4j.appender.connectAppender.layout=org.apache.log4j.PatternLayout # The `%X{connector.context}` parameter in the layout includes connector-specific and task-specific information # in the log message, where appropriate. This makes it easier to identify those log messages that apply to a # specific connector. Simply add this parameter to the log layout configuration below to include the contextual information. connect.log.pattern=[%d] %p %X{connector.context}%m (%c:%L)%n log4j.appender.stdout.layout.ConversionPattern=${connect.log.pattern} log4j.appender.connectAppender.layout.ConversionPattern=${connect.log.pattern} log4j.logger.org.apache.zookeeper=ERROR log4j.logger.org.reflections=ERROR log4j.logger.com.snowflake.kafka.connector=DEBUG # DEBUG produce tons of logs - use carefully log4j.logger.net.snowflake=INFO ================================================ FILE: test/docker/.gitignore ================================================ # Environment files with credentials .env # Docker build artifacts *.log ================================================ FILE: test/docker/Dockerfile.apache-kafka ================================================ # Dockerfile that mirrors run_test_apache.sh setup # Downloads official Apache Kafka tarball and runs it the same way # Supports both ZooKeeper mode (<=3.x) and KRaft mode (4.x+) ARG JAVA_VERSION=11 FROM eclipse-temurin:${JAVA_VERSION}-jdk ARG KAFKA_VERSION=2.8.2 ARG SCALA_VERSION=2.12 # Install dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ curl \ netcat-openbsd \ wget \ && rm -rf /var/lib/apt/lists/* WORKDIR /opt # Download and extract Apache Kafka RUN curl -sL "https://archive.apache.org/dist/kafka/${KAFKA_VERSION}/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz" -o kafka.tgz && \ tar xzf kafka.tgz && \ mv kafka_${SCALA_VERSION}-${KAFKA_VERSION} kafka && \ rm kafka.tgz # Install FIPS jars RUN wget -q -P /opt/kafka/libs https://repo1.maven.org/maven2/org/bouncycastle/bcpkix-fips/2.1.8/bcpkix-fips-2.1.8.jar && \ wget -q -P /opt/kafka/libs https://repo1.maven.org/maven2/org/bouncycastle/bc-fips/2.1.0/bc-fips-2.1.0.jar # Create plugin directory (same path as in connect-distributed.properties) RUN mkdir -p /usr/local/share/kafka/plugins # Copy config files (both ZK and KRaft configs; start script picks the right one) COPY apache_properties/zookeeper.properties /opt/kafka/config/zookeeper.properties COPY apache_properties/server.properties /opt/kafka/config/server.properties COPY apache_properties/kraft-server.properties /opt/kafka/config/kraft-server.properties COPY apache_properties/connect-distributed.properties /opt/kafka/config/connect-distributed.properties COPY connect-log4j.properties /opt/kafka/config/connect-log4j.properties WORKDIR /opt/kafka # Expose ports: Zookeeper(2181), Kafka(9092), Controller(9093), Kafka Connect(8083) EXPOSE 2181 9092 9093 8083 ================================================ FILE: test/docker/Dockerfile.builder ================================================ # Builder image for compiling protobuf dependencies # Build artifacts are created during image build (cached) and can be copied out FROM maven:3.9-eclipse-temurin-11 # Install protoc 3.21.x (compatible with protobuf-java 3.21.12) ARG PROTOC_VERSION=21.12 RUN apt-get update && apt-get install -y --no-install-recommends \ curl \ unzip \ git \ python3 \ && rm -rf /var/lib/apt/lists/* # Download and install protoc RUN ARCH=$(dpkg --print-architecture) && \ if [ "$ARCH" = "arm64" ]; then PROTOC_ARCH="aarch_64"; else PROTOC_ARCH="x86_64"; fi && \ curl -sLO "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-linux-${PROTOC_ARCH}.zip" && \ unzip -q "protoc-${PROTOC_VERSION}-linux-${PROTOC_ARCH}.zip" -d /usr/local && \ rm "protoc-${PROTOC_VERSION}-linux-${PROTOC_ARCH}.zip" && \ chmod +x /usr/local/bin/protoc WORKDIR /build # Clone and build BlueApron protobuf converter ARG CONVERTER_VERSION=3.1.0 RUN mkdir -p /output && \ git clone -q https://github.com/blueapron/kafka-connect-protobuf-converter /build/converter && \ cd /build/converter && \ git checkout -q tags/v${CONVERTER_VERSION} && \ mvn clean package -q -DskipTests && \ cp target/kafka-connect-protobuf-converter-*-jar-with-dependencies.jar /output/ # Copy protobuf source and compile to Java # Build context is test/ directory COPY test_data/sensor.proto /build/test_data/ COPY test_data/protobuf/pom.xml /build/test_data/protobuf/ RUN mkdir -p /build/test_data/protobuf/src/main/java && \ protoc --proto_path=/build/test_data --java_out=/build/test_data/protobuf/src/main/java sensor.proto # Build protobuf test data JAR RUN cd /build/test_data/protobuf && \ mvn clean package -q -DskipTests && \ mkdir -p /output && \ cp target/kafka-test-protobuf-*-jar-with-dependencies.jar /output/ # Output directory contains the built JARs # Copy them out with: docker cp $(docker create protobuf-builder):/output/. ./target/ WORKDIR /output ================================================ FILE: test/docker/Dockerfile.test-runner ================================================ # Test runner container with Python and all test dependencies FROM python:3.13-slim # Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ curl \ unzip \ netcat-openbsd \ librdkafka-dev \ jq \ && rm -rf /var/lib/apt/lists/* # Install protoc 25.1 from binary (matching CI) ARG PROTOC_VERSION=25.1 RUN ARCH=$(dpkg --print-architecture) && \ if [ "$ARCH" = "arm64" ]; then PROTOC_ARCH="aarch_64"; else PROTOC_ARCH="x86_64"; fi && \ curl -fsSL -o protoc.zip \ "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-linux-${PROTOC_ARCH}.zip" && \ unzip -o protoc.zip -d /usr/local bin/protoc 'include/*' && \ rm protoc.zip && \ chmod +x /usr/local/bin/protoc # Verify protoc version RUN protoc --version # Upgrade pip first RUN pip install --upgrade pip # Install Python dependencies RUN pip install --no-cache-dir \ requests \ certifi \ "confluent-kafka[avro,json,protobuf]==2.13.0" \ avro-python3 \ kafka-python-ng \ "snowflake-connector-python==4.2.0" \ pytest # Create app directory WORKDIR /app CMD ["bash"] ================================================ FILE: test/docker/docker-compose.amd64.yml ================================================ # Platform override for Confluent 6.2.x # These container images are only available for linux/amd64, not linux/arm64. # This file is automatically used when running with Confluent 6.2.x on ARM Macs. services: zookeeper: platform: linux/amd64 kafka: platform: linux/amd64 schema-registry: platform: linux/amd64 kafka-connect: platform: linux/amd64 ================================================ FILE: test/docker/docker-compose.apache.yml ================================================ # Apache Kafka services # Supports both ZooKeeper mode (<=3.x) and KRaft mode (4.x+) # Usage: docker compose -f docker-compose.base.yml -f docker-compose.apache.yml up services: # Single container running Kafka + Kafka Connect # ZooKeeper mode: also runs ZK inside the container # KRaft mode (KRAFT_MODE=true): combined broker+controller, no ZK kafka: image: ghcr.io/snowflakedb/snowflake-kafka-connector/apache-kafka:${KAFKA_VERSION:-2.8.2}-java${JAVA_VERSION:-11} build: context: .. dockerfile: docker/Dockerfile.apache-kafka args: KAFKA_VERSION: ${KAFKA_VERSION:-2.8.2} JAVA_VERSION: ${JAVA_VERSION:-11} SCALA_VERSION: ${SCALA_VERSION:-2.12} hostname: kafka extra_hosts: - "host.docker.internal:host-gateway" environment: KAFKA_HEAP_OPTS: "-Xms512m -Xmx2g" KRAFT_MODE: ${KRAFT_MODE:-false} CONNECT_OFFSET_FLUSH_INTERVAL_MS: ${CONNECT_OFFSET_FLUSH_INTERVAL_MS:-1000} volumes: - ${CONNECTOR_PLUGIN_PATH:-/tmp/sf-kafka-connect-plugin}:/usr/local/share/kafka/plugins/snowflake-connector - ${V3_PLUGIN_PATH:-/tmp/sf-kafka-connect-v3}:/usr/local/share/kafka/plugins/snowflake-connector-v3 - ${EXTRA_JARS_PATH:-/tmp/kafka-connect-extra-jars}:/usr/local/share/kafka/plugins/protobuf-converter - ./scripts/start-apache-kafka.sh:/opt/start.sh:ro command: ["/bin/bash", "/opt/start.sh"] healthcheck: test: ["CMD-SHELL", "curl -sf http://localhost:8083/connectors || exit 1"] interval: 10s timeout: 10s retries: 30 start_period: 60s # Override base test-runner dependencies and connection settings test-runner: depends_on: kafka: condition: service_healthy environment: KAFKA_BOOTSTRAP_SERVERS: kafka:9092 KAFKA_CONNECT_ADDRESS: kafka:8083 KAFKA_CONNECT_HOST: kafka SCHEMA_REGISTRY_URL: "" ================================================ FILE: test/docker/docker-compose.base.yml ================================================ # Base services shared by all platforms # Usage: docker compose -f docker-compose.base.yml -f docker-compose..yml up services: # Test runner - Python tests test-runner: build: context: . dockerfile: Dockerfile.test-runner environment: SNOWFLAKE_CREDENTIAL_FILE: /credentials/profile.json KAFKA_PLATFORM: KAFKA_PLATFORM_VERSION: TEST_NAME_SALT: SF_CLOUD_PLATFORM: ENABLE_SSL: ${ENABLE_SSL:-false} SNOWPIPE_STREAMING_URL: ${SNOWPIPE_STREAMING_URL:-} volumes: - ${SNOWFLAKE_CREDENTIAL_FILE:?SNOWFLAKE_CREDENTIAL_FILE is required}:/credentials/profile.json:ro - ../test_suit:/app/test_suit:ro - ../rest_request_template:/app/rest_request_template:ro - ../test_data:/app/test_data - ../__init__.py:/app/__init__.py:ro - ../lib:/app/lib:ro - ../conftest.py:/app/conftest.py:ro - ../pyproject.toml:/app/pyproject.toml:ro - ../tests:/app/tests:ro working_dir: /app networks: default: ================================================ FILE: test/docker/docker-compose.confluent-kraft.yml ================================================ # KRaft mode override for Confluent 8.x+ # Usage: docker compose -f docker-compose.base.yml -f docker-compose.confluent.yml -f docker-compose.confluent-kraft.yml up # # Layers KRaft-specific Kafka broker config on top of the base Confluent compose # file. Only the differences are specified here; shared services (kafka-connect, # schema-registry, test-runner) are inherited to avoid config drift. services: # Confluent 8.x dropped cp-zookeeper. Replace the inherited zookeeper service # with a tiny stub that satisfies kafka's depends_on from the base file. zookeeper: image: busybox:1.37 command: ["sleep", "infinity"] healthcheck: test: ["CMD-SHELL", "true"] interval: 1s retries: 1 kafka: environment: KAFKA_NODE_ID: 1 KAFKA_PROCESS_ROLES: broker,controller KAFKA_CONTROLLER_QUORUM_VOTERS: 1@kafka:29093 KAFKA_LISTENERS: PLAINTEXT://kafka:29092,CONTROLLER://kafka:29093,PLAINTEXT_HOST://0.0.0.0:9092 KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT KAFKA_LOG_DIRS: /tmp/kraft-combined-logs CLUSTER_ID: MkU3OEVBNTcwNTJENDM2Qk ================================================ FILE: test/docker/docker-compose.confluent.yml ================================================ # Confluent Platform services # Usage: docker compose -f docker-compose.base.yml -f docker-compose.confluent.yml up services: zookeeper: image: confluentinc/cp-zookeeper:${CONFLUENT_VERSION:-7.8.0} hostname: zookeeper environment: ZOOKEEPER_CLIENT_PORT: 2181 ZOOKEEPER_TICK_TIME: 2000 healthcheck: test: ["CMD", "nc", "-z", "localhost", "2181"] interval: 5s timeout: 10s retries: 10 kafka: image: confluentinc/cp-kafka:${CONFLUENT_VERSION:-7.8.0} hostname: kafka depends_on: zookeeper: condition: service_healthy environment: KAFKA_BROKER_ID: 1 KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:29092,PLAINTEXT_HOST://localhost:9092 KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0 KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1 KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1 KAFKA_AUTO_CREATE_TOPICS_ENABLE: "true" healthcheck: test: ["CMD", "kafka-broker-api-versions", "--bootstrap-server", "localhost:9092"] interval: 5s timeout: 10s retries: 10 kafka-connect: image: confluentinc/cp-kafka-connect:${CONFLUENT_VERSION:-7.8.0} hostname: kafka-connect extra_hosts: - "host.docker.internal:host-gateway" depends_on: kafka: condition: service_healthy schema-registry: condition: service_healthy environment: CONNECT_BOOTSTRAP_SERVERS: kafka:29092 CONNECT_REST_ADVERTISED_HOST_NAME: kafka-connect CONNECT_REST_PORT: 8083 CONNECT_GROUP_ID: test-connect-group CONNECT_CONFIG_STORAGE_TOPIC: connect-configs CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: 1 CONNECT_OFFSET_STORAGE_TOPIC: connect-offsets CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: 1 CONNECT_STATUS_STORAGE_TOPIC: connect-status CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: 1 CONNECT_OFFSET_FLUSH_INTERVAL_MS: ${CONNECT_OFFSET_FLUSH_INTERVAL_MS:-1000} CONNECT_KEY_CONVERTER: org.apache.kafka.connect.storage.StringConverter CONNECT_VALUE_CONVERTER: org.apache.kafka.connect.json.JsonConverter CONNECT_VALUE_CONVERTER_SCHEMAS_ENABLE: "false" CONNECT_PLUGIN_PATH: /usr/share/java,/usr/share/confluent-hub-components,/opt/kafka-connect/plugins CONNECT_LOG4J_ROOT_LOGLEVEL: INFO CONNECT_LOG4J_LOGGERS: com.snowflake=INFO CONNECT_LOG4J_APPENDER_STDOUT_LAYOUT_CONVERSIONPATTERN: "[%d] %p %X{connector.context}%m (%c:%L)%n" KAFKA_HEAP_OPTS: "-Xms512m -Xmx6g" KAFKA_OPTS: "${KAFKA_OPTS:-}" SS_LOG_LEVEL: warn volumes: - ${CONNECTOR_PLUGIN_PATH:-/tmp/sf-kafka-connect-plugin}:/opt/kafka-connect/plugins/snowflake-connector - ${V3_PLUGIN_PATH:-/tmp/sf-kafka-connect-v3}:/opt/kafka-connect/plugins/snowflake-connector-v3 - ${EXTRA_JARS_PATH:-/tmp/kafka-connect-extra-jars}:/opt/kafka-connect/plugins/protobuf-converter - ${JOLOKIA_JAR_PATH:-/dev/null}:/opt/jolokia/jolokia-agent.jar:ro healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8083/connectors"] interval: 5s timeout: 10s retries: 20 # Schema Registry for Confluent schema-registry: image: confluentinc/cp-schema-registry:${CONFLUENT_VERSION:-7.8.0} hostname: schema-registry depends_on: kafka: condition: service_healthy environment: SCHEMA_REGISTRY_HOST_NAME: schema-registry SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: kafka:29092 SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:8081 healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8081/subjects"] interval: 5s timeout: 10s retries: 10 # Override base test-runner dependencies test-runner: depends_on: kafka-connect: condition: service_healthy schema-registry: condition: service_healthy environment: KAFKA_BOOTSTRAP_SERVERS: kafka:29092 KAFKA_CONNECT_ADDRESS: kafka-connect:8083 KAFKA_CONNECT_HOST: kafka-connect SCHEMA_REGISTRY_URL: http://schema-registry:8081 ================================================ FILE: test/docker/docker-compose.profile-apache.yml ================================================ # Profiling overlay for Apache Kafka (single-container mode). # Layers JFR, GC logging, JMX, and async-profiler support onto the kafka service. # # Usage: # docker compose -f docker-compose.base.yml -f docker-compose.apache.yml \ # -f docker-compose.profile-apache.yml up # # Or via run_tests.sh: # ./run_tests.sh --platform=apache --platform-version=3.7.0 --profile [--keep] -- ... # # Note: KAFKA_HEAP_OPTS applies to all JVMs in the container (ZK, broker, Connect). # The 2g heap is shared across ZK + broker + Connect, which may be tight for # large-scale profiling workloads. Use the Confluent platform for heavy tests. # JFR and GC logs capture the Connect worker process specifically because # connect-distributed.sh is the last process started and inherits these flags. services: kafka: cap_add: - SYS_PTRACE ports: - "127.0.0.1:9999:9999" environment: KAFKA_HEAP_OPTS: >- -Xms512m -Xmx2g -XX:StartFlightRecording=name=profile,filename=/tmp/profile/kc-profile.jfr,settings=profile,maxsize=500m,dumponexit=true -XX:FlightRecorderOptions=stackdepth=256 -Xlog:gc*:file=/tmp/profile/gc.log:time,uptime,level,tags:filecount=5,filesize=50m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/profile/heapdump.hprof -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.port=9999 -Dcom.sun.management.jmxremote.rmi.port=9999 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Djava.rmi.server.hostname=localhost tmpfs: - /tmp/profile:uid=1000,gid=1000 volumes: - ${ASYNC_PROFILER_PATH:-/dev/null}:/opt/async-profiler:ro ================================================ FILE: test/docker/docker-compose.profile-confluent.yml ================================================ # Profiling overlay for Confluent Platform. # Layers JFR, GC logging, JMX, and async-profiler support onto kafka-connect. # # Usage: # docker compose -f docker-compose.base.yml -f docker-compose.confluent.yml \ # -f docker-compose.profile-confluent.yml up # # Or via run_tests.sh: # ./run_tests.sh --platform=confluent --platform-version=7.8.0 --profile [--keep] -- ... # # Collecting results: # - JFR recording: test/scripts/profile_connect.sh jfr-dump # - GC logs: test/scripts/profile_connect.sh collect ./results # - Heap dumps: test/scripts/profile_connect.sh heap-dump # - Flame graphs: test/scripts/profile_connect.sh async-cpu 60 # # Analysis: # - JFR: jfr summary kc-profile.jfr OR open in JDK Mission Control # - GC: https://gceasy.io # - Heap: Eclipse MAT services: kafka-connect: cap_add: - SYS_PTRACE ports: - "9999:9999" environment: KAFKA_HEAP_OPTS: >- -Xms512m -Xmx6g -XX:StartFlightRecording=name=profile,filename=/tmp/profile/kc-profile.jfr,settings=profile,maxsize=500m,dumponexit=true -XX:FlightRecorderOptions=stackdepth=256 -Xlog:gc*:file=/tmp/profile/gc.log:time,uptime,level,tags:filecount=5,filesize=50m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp/profile/heapdump.hprof -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.port=9999 -Dcom.sun.management.jmxremote.rmi.port=9999 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Djava.rmi.server.hostname=localhost tmpfs: - /tmp/profile:uid=1000,gid=1000 volumes: - ${ASYNC_PROFILER_PATH:-/dev/null}:/opt/async-profiler:ro ================================================ FILE: test/docker/scripts/start-apache-kafka.sh ================================================ #!/bin/bash # Startup script for Apache Kafka in Docker. # Supports two modes controlled by the KRAFT_MODE env var: # KRAFT_MODE=true -> KRaft (Kafka 4.x+): combined broker+controller, no ZooKeeper # KRAFT_MODE=false -> ZooKeeper mode (Kafka <=3.x): ZK + broker + Connect set -e KAFKA_HOME=/opt/kafka LOG_DIR=/var/log/kafka mkdir -p $LOG_DIR echo "Java version:" java -version # CONNECT_OFFSET_FLUSH_INTERVAL_MS, when set, overrides offset.flush.interval.ms in connect-distributed.properties. CONNECT_DISTRIBUTED_CONFIG="$KAFKA_HOME/config/connect-distributed.properties" if [ -n "${CONNECT_OFFSET_FLUSH_INTERVAL_MS:-}" ]; then echo "Setting offset.flush.interval.ms=${CONNECT_OFFSET_FLUSH_INTERVAL_MS} in connect-distributed.properties" sed -i "s/^offset\\.flush\\.interval\\.ms=.*/offset.flush.interval.ms=${CONNECT_OFFSET_FLUSH_INTERVAL_MS}/" \ "$CONNECT_DISTRIBUTED_CONFIG" fi if [ "${KRAFT_MODE:-false}" = "true" ]; then ####################################################################### # KRaft mode (Kafka 4.x+) ####################################################################### echo "=== KRaft mode ===" rm -rf /tmp/kraft-combined-logs 2>/dev/null || true CLUSTER_ID=$($KAFKA_HOME/bin/kafka-storage.sh random-uuid) echo "Generated cluster ID: $CLUSTER_ID" echo "=== Formatting storage ===" $KAFKA_HOME/bin/kafka-storage.sh format \ -t "$CLUSTER_ID" \ -c $KAFKA_HOME/config/kraft-server.properties echo "=== Starting Kafka (KRaft combined broker+controller) ===" $KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_HOME/config/kraft-server.properties > $LOG_DIR/kafka.log 2>&1 & KAFKA_PID=$! echo "Waiting for Kafka broker..." for i in {1..30}; do if nc -z localhost 9092 2>/dev/null; then echo "Kafka broker is ready" break fi sleep 1 done sleep 5 echo "=== Starting Kafka Connect ===" $KAFKA_HOME/bin/connect-distributed.sh $KAFKA_HOME/config/connect-distributed.properties > $LOG_DIR/kc.log 2>&1 & KC_PID=$! echo "Waiting for Kafka Connect..." for i in {1..60}; do if curl -s http://localhost:8083/connectors > /dev/null 2>&1; then echo "Kafka Connect is ready" break fi sleep 2 done echo "=== All services started (KRaft) ===" echo "Kafka PID: $KAFKA_PID" echo "Kafka Connect PID: $KC_PID" trap "kill $KC_PID $KAFKA_PID 2>/dev/null; exit 0" SIGTERM SIGINT else ####################################################################### # ZooKeeper mode (Kafka <=3.x) ####################################################################### echo "=== ZooKeeper mode ===" rm -rf /tmp/kafka-logs /tmp/zookeeper 2>/dev/null || true echo "=== Starting Zookeeper ===" $KAFKA_HOME/bin/zookeeper-server-start.sh $KAFKA_HOME/config/zookeeper.properties > $LOG_DIR/zookeeper.log 2>&1 & ZOOKEEPER_PID=$! echo "Waiting for Zookeeper..." for i in {1..30}; do if nc -z localhost 2181 2>/dev/null; then echo "Zookeeper is ready" break fi sleep 1 done echo "=== Starting Kafka ===" $KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_HOME/config/server.properties > $LOG_DIR/kafka.log 2>&1 & KAFKA_PID=$! echo "Waiting for Kafka..." for i in {1..30}; do if nc -z localhost 9092 2>/dev/null; then echo "Kafka is ready" break fi sleep 1 done sleep 5 echo "=== Starting Kafka Connect ===" $KAFKA_HOME/bin/connect-distributed.sh $KAFKA_HOME/config/connect-distributed.properties > $LOG_DIR/kc.log 2>&1 & KC_PID=$! echo "Waiting for Kafka Connect..." for i in {1..60}; do if curl -s http://localhost:8083/connectors > /dev/null 2>&1; then echo "Kafka Connect is ready" break fi sleep 2 done echo "=== All services started (ZooKeeper) ===" echo "Zookeeper PID: $ZOOKEEPER_PID" echo "Kafka PID: $KAFKA_PID" echo "Kafka Connect PID: $KC_PID" trap "kill $KC_PID $KAFKA_PID $ZOOKEEPER_PID 2>/dev/null; exit 0" SIGTERM SIGINT fi tail -f $LOG_DIR/*.log & wait ================================================ FILE: test/download_v3_jar.sh ================================================ #!/bin/bash # # Downloads the KC v3 connector JAR from Maven Central. # Skips download if the JAR already exists at the target path. # # Usage: # ./download_v3_jar.sh [target_dir] # # Default target: /tmp/sf-kafka-connect-v3 # set -e V3_VERSION="3.5.3" JAR_NAME="snowflake-kafka-connector-${V3_VERSION}.jar" MAVEN_URL="https://repo1.maven.org/maven2/com/snowflake/snowflake-kafka-connector/${V3_VERSION}/${JAR_NAME}" TARGET_DIR="${1:-/tmp/sf-kafka-connect-v3}" TARGET_JAR="${TARGET_DIR}/${JAR_NAME}" if [ -f "$TARGET_JAR" ]; then echo "KC v3 JAR already exists: $TARGET_JAR (skipping download)" >&2 echo "$TARGET_DIR" exit 0 fi mkdir -p "$TARGET_DIR" echo "Downloading KC v3 JAR (${V3_VERSION}) from Maven Central..." >&2 curl -fSL -o "$TARGET_JAR" "$MAVEN_URL" echo "Downloaded: $TARGET_JAR" >&2 echo "$TARGET_DIR" ================================================ FILE: test/lib/__init__.py ================================================ ================================================ FILE: test/lib/config.py ================================================ from dataclasses import asdict, dataclass import json import logging from pathlib import Path import re from lib.crypto import parse_private_key @dataclass class Profile: """Represents the SNOWFLAKE_CREDENTIAL_FILE (profile.json) content. Fields are consumed in two places in end-to-end tests: - Snowflake Python connector, used locally for validating data - Kafka Connect connector config The latter is filled from test-specific templates and sent to Kafka Connect. """ protocol: str = None host: str = None port: int = None account: str = None user: str = None role: str = None private_key: str = None database: str = None schema: str = None warehouse: str = None # Unused in end-to-end tests password: str = None encrypted_private_key: str = None private_key_passphrase: str = None oauth_client_id: str = None oauth_client_secret: str = None oauth_refresh_token: str = None oauth_token_endpoint: str = None des_rsa_key: str = None @staticmethod def load(path: Path) -> "Profile": with open(path) as f: return Profile(**json.load(f)) def get_or_infer_account(self) -> str: if self.account is not None: return self.account reg = "[^/]*snowflakecomputing" # find the account name account = re.findall(reg, self.host) if len(account) != 1 or len(account[0]) < 20: logging.warning( "Format error in 'host' field at profile.json, expecting account.snowflakecomputing.com:443" ) return account[0][:-19] def make_url(self) -> str: protocol_prefix = f"{self.protocol}://" if self.protocol else "" port_suffix = f":{self.port}" if self.port else "" return protocol_prefix + self.host + port_suffix @dataclass class SnowflakeConnectorConfig: """Configuration for the Snowflake Python Connector used by the test driver to execute verification queries.""" protocol: str host: str port: int account: str user: str private_key: bytes database: str schema: str warehouse: str @staticmethod def from_profile(profile: Profile) -> "SnowflakeConnectorConfig": return SnowflakeConnectorConfig( protocol=profile.protocol, host=profile.host.split(":")[0], port=profile.port, account=profile.get_or_infer_account(), user=profile.user, private_key=parse_private_key(profile.private_key), database=profile.database, schema=profile.schema, warehouse=profile.warehouse, ) def to_dict(self) -> dict: return {k: v for k, v in asdict(self).items() if v is not None} ================================================ FILE: test/lib/config_migration.py ================================================ """Config migration between KC v3 and v4 connector configurations.""" import copy from typing import Dict V4_CONNECTOR_CLASS = "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector" V3_CONNECTOR_CLASS = "com.snowflake.kafka.connector.SnowflakeSinkConnector" V3_CONFIG_TEMPLATE = { "connector.class": V3_CONNECTOR_CLASS, "snowflake.ingestion.method": "SNOWPIPE_STREAMING", # placeholders for templating "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", # make tests fast "buffer.flush.time": "1", "snowflake.streaming.max.client.lag": "1", } V4_CONFIG_TEMPLATE = { "connector.class": V4_CONNECTOR_CLASS, # placeholders for templating "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", # no need to validate compatibility when creating a v4 connector directly "snowflake.streaming.validate.compatibility.with.classic": "false", } def v4_config_to_v3(config: Dict[str, str]) -> Dict[str, str]: """Convert a v4 connector config to v3 equivalent.""" v3 = copy.deepcopy(config) v3["connector.class"] = V3_CONNECTOR_CLASS v3["snowflake.ingestion.method"] = "SNOWPIPE_STREAMING" # v3 defaults to schematization off; v4 defaults to on. # Preserve v4's default by setting it explicitly for v3 when unspecified. v3.setdefault("snowflake.enable.schematization", "true") v3.setdefault("buffer.flush.time", "1") v3.setdefault("snowflake.streaming.max.client.lag", "1") # Map v4 include-connector-name back to the v3 channel naming flag include_connector_name = v3.pop( "snowflake.streaming.classic.offset.migration.include.connector.name", None ) if include_connector_name is not None: v3.setdefault( "snowflake.streaming.channel.name.include.connector.name", include_connector_name, ) # Strip v4-only settings. v3.pop("snowflake.streaming.classic.offset.migration", None) return v3 def v3_config_to_v4(config: Dict[str, str]) -> Dict[str, str]: """Convert a v3 connector config to v4 equivalent.""" v4 = copy.deepcopy(config) v4["connector.class"] = V4_CONNECTOR_CLASS # === Deprecated settings === v4.pop("snowflake.ingestion.method", None) # === New defaults === # v4 defaults to schematization on; v3 defaults to off. # Preserve v3's default by setting it explicitly for v4 when unspecified. v4.setdefault("snowflake.enable.schematization", "false") # === Compatibility settings === v4["snowflake.validation"] = "client_side" v4["snowflake.compatibility.enable.column.identifier.normalization"] = "true" v4["snowflake.compatibility.enable.autogenerated.table.name.sanitization"] = "true" # === Offset migration === # v4 migrates committed offsets from SSv1 channels; default to best_effort. v4["snowflake.streaming.classic.offset.migration"] = "best_effort" # If v3 used connector-name-prefixed channel naming, carry it over so # the SSv1 offset migration lookup uses the matching channel name format. include_connector_name = v4.pop( "snowflake.streaming.channel.name.include.connector.name", None ) # IMPORTANT NOTE: In previous versions of the connector, the behavior was not controlled by this setting and exhibited the implicit effective value: # - v2.0.0–v2.0.1: always false; no streaming channel name prefix. # - v2.1.0–v2.1.1: always true (hardcoded connector name prefix, no config knob). # - v2.1.2–v2.5.0: always false; v2.1.2 reverted to V1 naming with automatic migration. # - v3.0.0–v3.3.1: always false; the setting did not exist. # - v3.4.0–v3.5.3: configurable (default false); the setting was introduced here. v4["snowflake.streaming.classic.offset.migration.include.connector.name"] = ( include_connector_name if include_connector_name is not None else "false" ) return v4 ================================================ FILE: test/lib/crypto.py ================================================ import re import textwrap from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives import serialization def normalize_private_key(private_key: str, is_encrypted) -> bytes: """Accepts a private key string and returns a normalized PEM-encoded private key.""" # Remove header, footer, and line breaks. private_key = re.sub("-+[A-Za-z ]+-+", "", private_key) private_key = re.sub("\\s", "", private_key) if is_encrypted: header = "-----BEGIN ENCRYPTED PRIVATE KEY-----" footer = "-----END ENCRYPTED PRIVATE KEY-----" else: header = "-----BEGIN PRIVATE KEY-----" footer = "-----END PRIVATE KEY-----" # Group in lines of 64 characters, append header and footer. return "\n".join([header, *textwrap.wrap(private_key, 64), footer]).encode() def parse_private_key(private_key_str: str, password_str: str | None = None) -> bytes: password: bytes | None = password_str.encode("ascii") if password_str else None private_key_normalized: bytes = normalize_private_key( private_key_str, password is not None ) private_key = serialization.load_pem_private_key( private_key_normalized, password=password, backend=default_backend() ) return private_key.private_bytes( encoding=serialization.Encoding.DER, format=serialization.PrivateFormat.PKCS8, encryption_algorithm=serialization.NoEncryption(), ) ================================================ FILE: test/lib/driver.py ================================================ import json import logging import os import time from typing import Callable, Dict import uuid from pathlib import Path from urllib.parse import urlparse import requests import snowflake.connector from confluent_kafka import ( Consumer, ConsumerGroupTopicPartitions, KafkaError, OFFSET_BEGINNING, Producer, TopicPartition, ) from confluent_kafka.admin import AdminClient, ConfigResource, NewPartitions, NewTopic from confluent_kafka.avro import AvroProducer from lib.config import Profile, SnowflakeConnectorConfig def quote_name(name: str) -> str: return '"' + name.replace('"', '""') + '"' class Error(Exception): """Base class for test exceptions""" pass class ResetAndRetry(Error): """Raised when we want to reset the retry count""" def __init__(self, msg=""): self.msg = msg class RetryableError(Error): """Raised when we can retry""" def __init__(self, msg=""): self.msg = msg class NonRetryableError(Error): """Raised when we cannot retry""" def __init__(self, msg=""): self.msg = msg logger = logging.getLogger(__name__) class KafkaDriver: def __init__( self, kafkaAddress: str, schemaRegistryAddress: str, kafkaConnectAddress: str, credentials: Profile, testVersion: str, enableSSL: bool, ): self.testVersion = testVersion self.credentials = credentials self.TEST_DATA_FOLDER = Path("test_data") self.httpHeader = { "Content-type": "application/json", "Accept": "application/json", } self.SEND_INTERVAL = 0.01 # send a record every 10 ms self.VERIFY_INTERVAL = 10 # verify every 10 secs self.MAX_RETRY = 60 # max wait time 1 min self.MAX_FLUSH_BUFFER_SIZE = ( 5000 # flush buffer when 5000 data was in the queue ) self.kafkaConnectAddress = kafkaConnectAddress self.schemaRegistryAddress = schemaRegistryAddress self.kafkaAddress = kafkaAddress if enableSSL: logger.info("=== Enable SSL ===") self.client_config = { "bootstrap.servers": kafkaAddress, "security.protocol": "SASL_SSL", "ssl.ca.location": "./crts/ca-cert", "sasl.mechanism": "PLAIN", "sasl.username": "client", "sasl.password": "client-secret", } else: self.client_config = { "bootstrap.servers": kafkaAddress, "broker.address.family": "v4", } self.adminClient = AdminClient(self.client_config) producer_config = self.client_config.copy() # Setting max request size to 30 MiB to support large blob tests. producer_config["message.max.bytes"] = 31457280 # 30 MiB self.producer = Producer(producer_config) consumer_config = self.client_config.copy() consumer_config["group.id"] = f"my-group-{uuid.uuid4()}" consumer_config["auto.offset.reset"] = "earliest" self.consumer = Consumer(consumer_config) self._avro_producer_config = producer_config.copy() self._avro_producer_config["schema.registry.url"] = schemaRegistryAddress # Lazy-init: Apache platform has no schema registry, so we can't # create the AvroProducer eagerly. self._avroProducer = None snowflake_connector_config = SnowflakeConnectorConfig.from_profile(credentials) self.snowflake_conn = snowflake.connector.connect( **snowflake_connector_config.to_dict() ) @property def avroProducer(self): if self._avroProducer is None: self._avroProducer = AvroProducer(self._avro_producer_config) return self._avroProducer def msgSendInterval(self): # sleep self.SEND_INTERVAL before send the second message time.sleep(self.SEND_INTERVAL) def startConnectorWaitTime(self): time.sleep(10) def verifyWaitTime(self): # sleep before verifying result in SF DB logger.info( f"=== Sleep {self.VERIFY_INTERVAL} secs before verify result in Snowflake DB ===" ) time.sleep(self.VERIFY_INTERVAL) def verifyWithRetry(self, func, retry_round, configFileName): retryNum = 0 while retryNum < self.MAX_RETRY: try: func(retry_round) break except ResetAndRetry: retryNum = 0 logger.info(f"=== Reset retry count and retry {configFileName} ===") except RetryableError as e: retryNum += 1 logger.warning(f"=== Failed {configFileName}, retryable. {e.msg} ===") self.verifyWaitTime() except NonRetryableError as e: logger.error( f"=== Non retryable error for {configFileName} raised ===\n{e.msg}" ) raise e except snowflake.connector.errors.ProgrammingError as e: logger.error(f"Error in VerifyWithRetry for {configFileName}: {e}") if e.errno == 2003: retryNum += 1 logger.warning( f"=== Failed, table not created for {configFileName} ===" ) self.verifyWaitTime() else: raise if retryNum == self.MAX_RETRY: logger.error(f"=== Max retry exceeded for {configFileName} ===") raise NonRetryableError() def createTopics(self, topicName, partitionNum=1, replicationNum=1): self.adminClient.create_topics( [NewTopic(topicName, partitionNum, replicationNum)] ) def deleteTopic(self, topicName): deleted_topics = self.adminClient.delete_topics([topicName]) for topic, f in deleted_topics.items(): try: f.result() # The result itself is None logger.info(f"Topic deletion successful: {topic}") except Exception as e: logger.error(f"Failed to delete topic {topicName}: {e}") def describeTopic(self, topicName): configs = self.adminClient.describe_configs( resources=[ ConfigResource(restype=ConfigResource.Type.TOPIC, name=topicName) ] ) for _, f in configs.items(): try: configs = f.result() logger.info(f"Topic {topicName} config is as follows:") for key, value in configs.items(): logger.info(f"{key}: {value}") except Exception as e: logger.error(f"Failed to describe topic {topicName}: {e}") def createPartitions(self, topicName, new_total_partitions): kafka_partitions = self.adminClient.create_partitions( new_partitions=[NewPartitions(topicName, new_total_partitions)] ) for topic, f in kafka_partitions.items(): try: f.result() # The result itself is None logger.info(f"Topic {topic} partitions created") except Exception as e: logger.error(f"Failed to create topic partitions {topic}: {e}") def sendBytesData(self, topic, value, key=None, partition=0, headers=None): if not key: for i, v in enumerate(value): self.producer.produce( topic, value=v, partition=partition, headers=headers or [] ) if (i + 1) % self.MAX_FLUSH_BUFFER_SIZE == 0: self.producer.flush() else: for i, (k, v) in enumerate(zip(key, value, strict=True)): self.producer.produce( topic, value=v, key=k, partition=partition, headers=headers or [] ) if (i + 1) % self.MAX_FLUSH_BUFFER_SIZE == 0: self.producer.flush() self.producer.flush() def sendAvroSRData( self, topic, value, value_schema, key=None, key_schema="", partition=0, headers=None, ): if not key: for i, v in enumerate(value): self.avroProducer.produce( topic=topic, value=v, value_schema=value_schema, partition=partition, headers=headers or [], ) if (i + 1) % self.MAX_FLUSH_BUFFER_SIZE == 0: self.producer.flush() else: for i, (k, v) in enumerate(zip(key, value, strict=True)): self.avroProducer.produce( topic=topic, value=v, value_schema=value_schema, key=k, key_schema=key_schema, partition=partition, headers=headers or [], ) if (i + 1) % self.MAX_FLUSH_BUFFER_SIZE == 0: self.producer.flush() self.avroProducer.flush() def consume_messages_dlq(self, config, partition_no, target_dlq_offset_number): """ :param config: Connector config :param partition_no: partition no to search for target offset :param target_dlq_offset_number: Target offset number to find which stops finding any more offsets in DLQ :return: count of offsets """ dlq_topic_name = config["config"]["errors.deadletterqueue.topic.name"] return self.consume_messages( dlq_topic_name, partition_no, target_dlq_offset_number ) def _wait_for_topic(self, topic_name: str, timeout: float = 120) -> None: """Poll broker metadata until topic_name appears. DLQ topics are auto-created by Kafka Connect on the first error record, so they may not exist when consume_messages is called. """ deadline = time.monotonic() + timeout while time.monotonic() < deadline: metadata = self.adminClient.list_topics(timeout=5) if topic_name in metadata.topics: return logger.debug( f"Topic {topic_name!r} not yet visible in broker metadata, waiting..." ) time.sleep(2) raise TimeoutError( f"Topic {topic_name!r} did not appear in broker metadata within {timeout}s" ) def consume_messages(self, topic_name, partition_no, target_offset): """ Consumes messages from a topic and returns how many consumed. This function stops when target_offset number is reached. Uses assign() instead of subscribe() to bypass the async consumer-group rebalance. With subscribe(), if the topic doesn't exist at the time of the call (e.g. a DLQ topic auto-created by Kafka Connect), the broker returns an empty partition assignment and the 60-second timeout expires before any messages are consumed. assign() with OFFSET_BEGINNING is synchronous and works even for newly-created topics. :param topic_name: name of topic :param target_offset: Stops function when this offset is reached for partition 0 :return: Count of messages consumed """ self._wait_for_topic(topic_name) tp = TopicPartition(topic_name, partition_no, OFFSET_BEGINNING) self.consumer.assign([tp]) messages_consumed_count = 0 start_time = time.time() try: while True: if time.time() - start_time >= 120: logger.warning( f"Couldn't find target_offset:{target_offset} in topic:{topic_name} in 120 Seconds" ) break msg = self.consumer.poll(10.0) # Time out in seconds if msg is None: continue if msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: logger.info("Reached end of partition") else: logger.error(f"Error while consuming message: {msg.error()}") else: messages_consumed_count += 1 logger.debug( f"Received message: key={msg.key()}, value={msg.value()}, partition={msg.partition()}, offset={msg.offset()}" ) if ( msg.partition() == partition_no and msg.offset() >= target_offset ): logger.info( f"Reached target offset of {target_offset} for Topic:{topic_name}" ) break except KafkaError as e: logger.error(f"Kafka error: {e}") return messages_consumed_count # returns kafka or confluent version def get_kafka_version(self): return self.testVersion def cleanTableStagePipe(self, topic: str): logger.info(f"=== Drop table {topic} ===") self.snowflake_conn.cursor().execute( f"DROP TABLE IF EXISTS {quote_name(topic)}" ) # Drop SSv2 streaming pipe (current naming convention: tableName-STREAMING) ssv2PipeName = f"{topic}-STREAMING" logger.info(f"=== Drop SSv2 pipe {ssv2PipeName} ===") self.snowflake_conn.cursor().execute( f"DROP PIPE IF EXISTS {quote_name(ssv2PipeName)}" ) logger.info("=== Done ===") def create_table(self, table_name: str): logger.info(f"=== Creating table {table_name} ===") self.snowflake_conn.cursor().execute( f"CREATE TABLE IF NOT EXISTS {quote_name(table_name)} (RECORD_METADATA VARIANT)" ) def drop_table(self, table_name: str): logger.info(f"=== Dropping table {table_name} ===") self.snowflake_conn.cursor().execute( f"DROP TABLE IF EXISTS {quote_name(table_name)}" ) def select_number_of_records(self, table_name: str) -> str | None: try: return ( self.snowflake_conn.cursor() .execute(f"SELECT count(*) FROM {quote_name(table_name)}") .fetchone()[0] ) except snowflake.connector.errors.ProgrammingError as e: if "does not exist or not authorized" in e.msg: return None raise def get_connector_status(self, connector_name: str) -> dict | None: """Query Kafka Connect REST API for connector and task states. Returns the parsed JSON from GET /connectors/{name}/status, e.g.: { "name": "...", "connector": {"state": "RUNNING", ...}, "tasks": [{"id": 0, "state": "RUNNING", ...}, ...] } Returns None if the connector is not found or the request fails. """ url = f"http://{self.kafkaConnectAddress}/connectors/{connector_name}/status" try: r = requests.get(url, timeout=10) if r.ok: return r.json() logger.debug(f"GET {url} returned {r.status_code}: {r.text[:200]}") except Exception as e: logger.debug(f"Failed to query connector status: {e}") return None def wait_for_connector_running( self, connector_name: str, timeout: int = 60, interval: int = 3 ): """Poll until the connector and all its tasks report RUNNING state. Raises TimeoutError if the connector does not reach RUNNING within *timeout* seconds. """ deadline = time.monotonic() + timeout while True: status = self.get_connector_status(connector_name) if status is not None: connector_state = status.get("connector", {}).get("state") tasks = status.get("tasks", []) if ( connector_state == "RUNNING" and tasks and all(t.get("state") == "RUNNING" for t in tasks) ): logger.info( f"Connector {connector_name} is RUNNING with " f"{len(tasks)} task(s)" ) return if time.monotonic() >= deadline: raise TimeoutError( f"Connector {connector_name} did not reach RUNNING state " f"within {timeout}s (last status: {status})" ) time.sleep(interval) def get_failed_tasks(self, connector_name: str) -> list: """Return list of FAILED tasks with their traces, or empty list.""" status = self.get_connector_status(connector_name) if status is None: return [] return [t for t in status.get("tasks", []) if t.get("state") == "FAILED"] def get_consumer_group_offset( self, connector_name: str, topic: str, partition: int = 0 ) -> int | None: """Query the committed consumer group offset for a connector's sink task. Returns the committed offset, or None if no offset has been committed yet. """ group_id = f"connect-{connector_name}" request = ConsumerGroupTopicPartitions( group_id, [TopicPartition(topic, partition)] ) futures = self.adminClient.list_consumer_group_offsets([request]) response = futures[group_id].result() for topic_partition in response.topic_partitions: if topic_partition.error: logger.error( f"Error querying offset for {group_id}/{topic}[{partition}]: " f"{topic_partition.error}" ) return None return topic_partition.offset return None def restartConnector(self, connectorName): requestURL = ( f"http://{self.kafkaConnectAddress}/connectors/{connectorName}/restart" ) r = requests.post(requestURL, headers=self.httpHeader) logger.info(f"{r} restart connector") def restartConnectorAndTasks(self, connectorName): requestURL = f"http://{self.kafkaConnectAddress}/connectors/{connectorName}/restart?includeTasks=true&onlyFailed=false" r = requests.post(requestURL, headers=self.httpHeader) logger.info(f"{r} restart connector and all tasks") def pauseConnector(self, connectorName): requestURL = ( f"http://{self.kafkaConnectAddress}/connectors/{connectorName}/pause" ) r = requests.put(requestURL, headers=self.httpHeader) logger.info(f"{r} pause connector") def resumeConnector(self, connectorName): requestURL = ( f"http://{self.kafkaConnectAddress}/connectors/{connectorName}/resume" ) r = requests.put(requestURL, headers=self.httpHeader) logger.info(f"{r} resume connector") def deleteConnector(self, connectorName): requestURL = f"http://{self.kafkaConnectAddress}/connectors/{connectorName}" r = requests.delete(requestURL, headers=self.httpHeader) logger.info(f"{r} delete connector") def closeConnector(self, connector_name: str, *, wait_timeout: int = None): """Delete a connector. If `wait_timeout` is provided, also wait for it to fully disappear. The Kafka Connect DELETE endpoint returns immediately, but the worker shuts down the task's consumer asynchronously. We poll until a GET returns 404 so the caller can safely assume no consumer is running. """ base_url = f"http://{self.kafkaConnectAddress}/connectors/{connector_name}" logger.info(f"=== Delete connector {connector_name} ===") response = requests.delete(base_url, timeout=10) match response.ok: case True: logger.info(f"Delete response code: {response.status_code}") case False: logger.error( f"Failed to delete connector {connector_name}: {response.text}" ) if wait_timeout is None: return response.ok deadline = time.monotonic() + wait_timeout while time.monotonic() < deadline: try: status_code = requests.get(base_url, timeout=5).status_code except requests.exceptions.RequestException as exc: logger.debug( f"Transient error polling connector {connector_name}: {exc}" ) time.sleep(1) continue if status_code == 404: logger.info(f"Connector {connector_name} fully removed") return True logger.debug( f"Connector {connector_name} still present (status {status_code}), " f"waiting..." ) time.sleep(1) logging.error( f"Connector {connector_name} did not disappear within {wait_timeout}s" ) return False Config = Dict[str, str] def createConnector( self, name_salt: str, *, # Either pass those: unsalted_name: str = None, config_template: Config = None, # Or those (deprecated): rest_request_template_filename: str = None, config_transform: Callable[[Config], Config] = None, ): """Creates the connector either with: - an unsalted name and a config template - a REST request template filename and an optional transform Returns the generated config.""" match rest_request_template_filename: case None: assert unsalted_name is not None assert config_template is not None assert config_transform is None rest_request_template = { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": config_template, } case _: assert unsalted_name is None assert config_template is None rest_request_template_path = ( Path("rest_request_template") / rest_request_template_filename ) logger.info( f"=== Generating connector REST request from {rest_request_template_path} ===" ) unsalted_name = rest_request_template_filename.split(".")[0] with rest_request_template_path.open() as f: rest_request_template = json.load(f) snowflake_connector_name = unsalted_name + name_salt logger.info(f"=== Creating connector: {snowflake_connector_name} ===") logger.info( f"Config template: {json.dumps(rest_request_template['config'], indent=4)}" ) snowflake_topic_name = snowflake_connector_name def replace_values(obj, replacements): """Recursively traverse a parsed JSON object, applying substring replacements to string values.""" if isinstance(obj, dict): return {k: replace_values(v, replacements) for k, v in obj.items()} elif isinstance(obj, list): return [replace_values(item, replacements) for item in obj] elif isinstance(obj, str): for old, new in replacements.items(): obj = obj.replace(old, new) return obj else: return obj rest_request = replace_values( rest_request_template, { "SNOWFLAKE_HOST": self.credentials.make_url(), "SNOWFLAKE_DATABASE": self.credentials.database, "SNOWFLAKE_SCHEMA": self.credentials.schema, "SNOWFLAKE_USER": self.credentials.user, "SNOWFLAKE_ROLE": self.credentials.role, "SNOWFLAKE_PRIVATE_KEY": self.credentials.private_key, "CONFLUENT_SCHEMA_REGISTRY": self.schemaRegistryAddress, "SNOWFLAKE_TEST_TOPIC": snowflake_topic_name, "SNOWFLAKE_CONNECTOR_NAME": snowflake_connector_name, "_NAME_SALT": name_salt, }, ) if config_transform is not None: rest_request["config"] = config_transform(rest_request["config"]) # Allow the Snowpipe Streaming SDK's URL to be overridden for testing # against a local Snowflake deployment. if snowpipe_streaming_url := os.environ.get("SNOWPIPE_STREAMING_URL"): logger.info( f"Overriding Snowpipe Streaming SDK URL to {snowpipe_streaming_url}" ) parsed = urlparse(snowpipe_streaming_url) extra_overrides = [ f"scheme:{parsed.scheme}", f"host:{parsed.hostname}", f"port:{parsed.port}", ] override_key = "snowflake.streaming.client.provider.override.map" match rest_request["config"].get(override_key): case None | "": overrides = extra_overrides case _ as existing_overrides: overrides = [existing_overrides] + extra_overrides rest_request["config"][override_key] = ",".join(overrides) MAX_RETRY = 9 retry = 0 delete_url = ( f"http://{self.kafkaConnectAddress}/connectors/{snowflake_connector_name}" ) post_url = f"http://{self.kafkaConnectAddress}/connectors" while retry < MAX_RETRY: try: logger.info(f"Delete request: {delete_url}") code = requests.delete(delete_url, timeout=10).status_code logger.info(f"Delete request returned: {code}") if code in (200, 201, 404): break except Exception as e: logger.error(f"An exception occurred: {e}") logger.info( "=== sleep for 3 secs to wait for kafka connect to accept connection ===" ) time.sleep(3) retry += 1 if retry == MAX_RETRY: logger.error(f"Kafka Delete request not successful: {delete_url}") logger.info(f"Post HTTP request to Create Connector: {post_url}") r = requests.post(post_url, json=rest_request, headers=self.httpHeader) logger.info( f"Connector Name:{snowflake_connector_name} POST Response:{r.status_code}" ) if not r.ok: logger.error( f"Failed creating connector {snowflake_connector_name}: " f"{r.status_code} {r.reason}, {r.text}" ) time.sleep(10) logger.info( f"Retrying POST request for connector:{snowflake_connector_name}" ) r = requests.post(post_url, json=rest_request, headers=self.httpHeader) logger.info( f"Connector Name:{snowflake_connector_name} POST Response:{r.status_code}" ) if not r.ok: raise RuntimeError( f"Failed to create connector:{snowflake_connector_name}" ) getConnectorResponse = requests.get(post_url) logger.info( f"Get Connectors status:{getConnectorResponse.status_code}, response:{getConnectorResponse.content}" ) return rest_request ================================================ FILE: test/lib/fixtures/__init__.py ================================================ ================================================ FILE: test/lib/fixtures/connector.py ================================================ from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass import logging from typing import Dict, List import pytest from lib.driver import KafkaDriver from lib.config_migration import v3_config_to_v4, v4_config_to_v3 logger = logging.getLogger(__name__) @pytest.fixture def create_topics(driver: KafkaDriver, name_salt): """Use for creating multiple topics and tables in parallel.""" created_topics: List[str] = [] created_tables: List[str] = [] def _create_one(topic, num_partitions, replication_factor, with_table): salted = f"{topic}{name_salt}" logger.info(f"Creating topic {salted}") driver.createTopics(salted, num_partitions, replication_factor) created_topics.append(salted) if with_table: driver.create_table(salted) created_tables.append(salted) return salted def _create( topics: List[str], *, num_partitions=1, replication_factor=1, with_tables=True ): with ThreadPoolExecutor(max_workers=10) as executor: for t in topics: executor.submit( _create_one, t, num_partitions, replication_factor, with_tables ) return [f"{t}{name_salt}" for t in topics] try: yield _create finally: with ThreadPoolExecutor(max_workers=10) as executor: for _ in executor.map(driver.deleteTopic, created_topics): pass for _ in executor.map(driver.drop_table, created_tables): pass @pytest.fixture() def create_custom_connector(driver: KafkaDriver, name_salt: str): @dataclass class Connector: name: str config: Dict[str, str] def close(self, **kwargs): created.remove(self) return driver.closeConnector(self.name, **kwargs) created: List[Connector] = [] def _create( unsalted_name: str, config_template: Dict[str, str], ) -> Connector: rest_request = driver.createConnector( name_salt=name_salt, unsalted_name=unsalted_name, config_template=config_template, ) connector = Connector(name=rest_request["name"], config=rest_request["config"]) created.append(connector) return connector try: yield _create finally: for connector in reversed(created): driver.closeConnector(connector.name) @pytest.fixture def create_connector(create_custom_connector, connector_version: str, request): test_name = request.node.originalname def _create( *, v3_config: dict[str, str] = None, v4_config: dict[str, str] = None, ): assert v3_config or v4_config assert not (v3_config and v4_config) config = None match (connector_version, v3_config, v4_config): case ("v3", _, None): config = v3_config case ("v3", None, _): config = v4_config_to_v3(v4_config) case ("v4", _, None): config = v3_config_to_v4(v3_config) case ("v4", None, _): config = v4_config case _: raise ValueError(f"Invalid connector version: {connector_version}") return create_custom_connector(test_name, config) return _create ================================================ FILE: test/lib/fixtures/function.py ================================================ import pytest @pytest.fixture(params=["v4", "v3"]) def connector_version(request): """The Snowflake Kafka Connector version under test. Every test that (transitively) depends on this fixture is automatically run twice: once for v4 and once for v3. """ return request.param @pytest.fixture def name_salt(session_name_salt, connector_version, request): # noqa: F811 """Diversify names between test runs and connector versions. When a test has additional parametrize dimensions beyond connector_version (e.g. sanitize_autogenerated_table_names), all variants for the same connector version would otherwise share an identical name_salt and thus create the same Kafka topics and Snowflake tables. Kafka topic deletion is asynchronous, so the second variant may observe stale data from the first. To prevent this, embed a short discriminator derived from the extra parametrize values into the salt so every variant gets a unique namespace. """ base = f"{session_name_salt}_V3" if connector_version == "v3" else session_name_salt callspec = getattr(request.node, "callspec", None) if callspec is not None: extra_params = { k: v for k, v in callspec.params.items() if k != "connector_version" } if extra_params: # Build a short suffix from the first character of each extra param # value's string representation to keep names within reasonable length. discriminator = "_".join(str(v)[:1].upper() for v in extra_params.values()) base = f"{base}_{discriminator}" return base ================================================ FILE: test/lib/fixtures/session.py ================================================ import logging import os from pathlib import Path import random import string import subprocess import pytest from lib.config import Profile, SnowflakeConnectorConfig import snowflake from lib.driver import KafkaDriver logger = logging.getLogger(__name__) _PROTO_DIR = Path(__file__).parents[2] / "test_data" @pytest.fixture(scope="session") def sensor_pb2(): """Compile sensor.proto and return the generated module.""" subprocess.run( ["protoc", "--python_out=.", "sensor.proto"], cwd=_PROTO_DIR, check=True, ) import test_data.sensor_pb2 return test_data.sensor_pb2 @pytest.fixture(scope="session") def credentials_unsalted(): """Load the credentials from the environment variable SNOWFLAKE_CREDENTIAL_FILE.""" credential_path = Path(os.environ["SNOWFLAKE_CREDENTIAL_FILE"]) assert credential_path.is_file(), ( f"SNOWFLAKE_CREDENTIAL_FILE={credential_path} does not exist" ) return Profile.load(credential_path) @pytest.fixture(scope="session") def session_name_salt(request): """Common name salt for all tests in this session.""" salt = request.config.getoption("--name-salt") if salt is None: chars = string.ascii_uppercase + string.digits salt = "_" + "".join(random.choices(chars, k=7)) logger.info(f"Using session name salt: {salt}") return salt @pytest.fixture(scope="session") def test_schema(credentials_unsalted, session_name_salt): """Create an isolated schema for this test session and drop it on teardown. The schema name is ``. """ original_schema = credentials_unsalted.schema salted_schema = f"{original_schema}{session_name_salt}" fqn = f"{credentials_unsalted.database}.{salted_schema}" conn_config = SnowflakeConnectorConfig.from_profile(credentials_unsalted) try: logger.info(f"Creating test schema: {fqn}") conn = snowflake.connector.connect(**conn_config.to_dict()) conn.cursor().execute(f"CREATE SCHEMA IF NOT EXISTS {fqn}") yield salted_schema finally: logger.info(f"Dropping test schema: {fqn}") conn = snowflake.connector.connect(**conn_config.to_dict()) conn.cursor().execute(f"DROP SCHEMA IF EXISTS {fqn} CASCADE") conn.close() @pytest.fixture(scope="session") def credentials(credentials_unsalted, test_schema): """Load the credentials from the environment variable SNOWFLAKE_CREDENTIAL_FILE and replaces the schema with its salted version. Mutating `credentials.schema` before the driver is built ensures that every Snowflake object (tables, pipes, channels) created by both the test harness and the Kafka connector lands in the throwaway schema. """ credentials_unsalted.schema = test_schema return credentials_unsalted @pytest.fixture(scope="session") def driver(request, credentials): return KafkaDriver( kafkaAddress=request.config.getoption("--kafka-address"), schemaRegistryAddress=request.config.getoption("--schema-registry-address"), kafkaConnectAddress=request.config.getoption("--kafka-connect-address"), credentials=credentials, testVersion=request.config.getoption("--platform-version"), enableSSL=request.config.getoption("--enable-ssl"), ) ================================================ FILE: test/lib/fixtures/table.py ================================================ import logging import os from typing import List import pytest from lib.driver import KafkaDriver, quote_name from snowflake.connector import DictCursor logger = logging.getLogger(__name__) ICEBERG_EXTERNAL_VOLUME = os.environ.get( "ICEBERG_EXTERNAL_VOLUME", "kafka_push_e2e_volume_aws" ) @pytest.fixture() def snowflake_table( driver: KafkaDriver, name_salt: str, request: pytest.FixtureRequest ): """Tears down the Snowflake table named after the current test at teardown. Table name: ``{test_function_name_without_test_prefix}{name_salt}`` Tests that manually create a table (or rely on auto-table-creation) declare this fixture to ensure the table is dropped after the test completes. """ table_name = (request.node.originalname.removeprefix("test_") + name_salt).upper() yield table_name driver.drop_table(table_name) class Table: """Class with helper functions for working with a Snowflake table. Doesn't create the table unless you call `create`.""" def __init__(self, driver: KafkaDriver, name: str): self.driver = driver self.name = name def create(self, columns: str): self.driver.snowflake_conn.cursor().execute( f"CREATE OR REPLACE TABLE {quote_name(self.name)} {columns}" ) def select(self, projections: str, extra_clauses: str = ""): return ( self.driver.snowflake_conn.cursor(DictCursor) .execute( f"SELECT {projections} FROM {quote_name(self.name)} {extra_clauses}" ) .fetchall() ) def select_scalar(self, projection: str, extra_clauses: str = ""): return ( self.driver.snowflake_conn.cursor() .execute( f"SELECT {projection} FROM {quote_name(self.name)} {extra_clauses}" ) .fetchone()[0] ) def schema(self, *, as_dict: bool = False): return ( ( self.driver.snowflake_conn.cursor(DictCursor) if as_dict else self.driver.snowflake_conn.cursor() ) .execute(f"DESC TABLE {quote_name(self.name)}") .fetchall() ) def drop(self): self.driver.drop_table(self.name) class IcebergTable(Table): """Iceberg table variant — uses ``CREATE/DROP ICEBERG TABLE`` DDL. ``columns`` follows the same convention as :class:`Table` and can include table-level options after the column list, e.g. ``"(RECORD_METADATA VARIANT, CITY TEXT) ENABLE_SCHEMA_EVOLUTION = TRUE"``. The iceberg-specific clauses (``EXTERNAL_VOLUME``, ``CATALOG``, ``BASE_LOCATION``, ``ICEBERG_VERSION``) are appended automatically. """ def create(self, columns: str): self.driver.snowflake_conn.cursor().execute( f"CREATE OR REPLACE ICEBERG TABLE {quote_name(self.name)} " f"{columns} " f"EXTERNAL_VOLUME = '{ICEBERG_EXTERNAL_VOLUME}' " f"CATALOG = 'SNOWFLAKE' " f"BASE_LOCATION = '{self.name}' " f"ICEBERG_VERSION = 3" ) def drop(self): self.driver.snowflake_conn.cursor().execute( f"DROP ICEBERG TABLE IF EXISTS {quote_name(self.name)}" ) @pytest.fixture(scope="session") def iceberg_external_volume(driver: KafkaDriver): """Session-scoped probe: checks whether the iceberg external volume exists. Returns the volume name if available, otherwise calls ``pytest.skip()``. Every test that uses ``create_iceberg_table`` transitively depends on this fixture, so all iceberg tests are skipped in environments where the volume is not provisioned (e.g. AZURE, GCP CI accounts). """ try: rows = ( driver.snowflake_conn.cursor() .execute(f"DESC EXTERNAL VOLUME {ICEBERG_EXTERNAL_VOLUME}") .fetchall() ) if rows: logger.info( "Iceberg external volume %s is available", ICEBERG_EXTERNAL_VOLUME ) return ICEBERG_EXTERNAL_VOLUME except Exception: logger.debug( "Failed to describe external volume %s", ICEBERG_EXTERNAL_VOLUME, exc_info=True, ) pytest.skip( f"Iceberg external volume '{ICEBERG_EXTERNAL_VOLUME}' not found — " f"skipping iceberg tests (set ICEBERG_EXTERNAL_VOLUME env var to override)" ) @pytest.fixture() def create_iceberg_table( driver: KafkaDriver, name_salt: str, request: pytest.FixtureRequest, iceberg_external_volume: str, ): """Creates an iceberg table in the test schema. Mirrors :func:`create_table` but produces :class:`IcebergTable` objects. ``columns`` can include table-level options after the column list, e.g. ``"(RECORD_METADATA VARIANT, CITY TEXT) ENABLE_SCHEMA_EVOLUTION = TRUE"``. Teardown: drops the iceberg table and, when ``cleanup_topic=True`` (the default), also deletes the matching Kafka topic. """ created_tables: List[IcebergTable] = [] topics_to_cleanup: List[str] = [] def _create( unsalted_name: str = None, *, columns: str, cleanup_topic: bool = True ) -> IcebergTable: unsalted_name = unsalted_name or request.node.originalname table_name = unsalted_name + name_salt table = IcebergTable(driver, table_name) table.create(columns) created_tables.append(table) if cleanup_topic: topics_to_cleanup.append(table.name) return table try: yield _create finally: for table in created_tables: table.drop() for topic in topics_to_cleanup: driver.deleteTopic(topic) @pytest.fixture() def create_table(driver: KafkaDriver, name_salt: str, request: pytest.FixtureRequest): """Creates a table in the test schema. Defaults to the test name. `columns` can also be followed with table options, e.g. ``"(col1 TYPE, col2 TYPE) ENABLE_SCHEMA_EVOLUTION = TRUE"``. The Kafka topic is cleaned up after the test. The Snowflake table (and associated stage/pipe) is left for the session-scoped `test_schema` teardown (`DROP SCHEMA ... CASCADE`) to remove. """ created_tables: List[Table] = [] topics_to_cleanup: List[str] = [] def _create( unsalted_name: str = None, *, columns: str, cleanup_topic: bool = True ) -> Table: unsalted_name = unsalted_name or request.node.originalname table_name = unsalted_name + name_salt table = Table(driver, table_name) table.create(columns) created_tables.append(table) if cleanup_topic: topics_to_cleanup.append(table.name) return table try: yield _create finally: for table in created_tables: table.drop() for topic in topics_to_cleanup: driver.deleteTopic(topic) ================================================ FILE: test/lib/matchers.py ================================================ """Lightweight matcher objects for use with pytest's ``assert ==``. These matchers implement ``__eq__`` so they can be dropped into dicts or lists and compared with ``==``. When a comparison fails, pytest's assertion rewriting shows the matcher's ``__repr__`` in the diff, making it clear what was expected. Usage:: from lib.matchers import ANY_INT, RegexMatch assert metadata == { "offset": 0, "CreateTime": ANY_INT, "topic": RegexMatch(r"my_topic_\\w+"), } """ import re class AnyInstance: """Matches any value that is an instance of the given type(s).""" def __init__(self, *expected_types): self._types = expected_types def __eq__(self, other): return isinstance(other, self._types) def __repr__(self): names = ", ".join(t.__name__ for t in self._types) return f"" class RegexMatch: """Matches any string that fully matches the given pattern.""" def __init__(self, pattern): self._pattern = pattern def __eq__(self, other): return isinstance(other, str) and re.fullmatch(self._pattern, other) is not None def __repr__(self): return f"" ANY_INT = AnyInstance(int) ANY_STR = AnyInstance(str) ================================================ FILE: test/lib/utils.py ================================================ from itertools import islice import json import logging import threading import time from typing import Callable from lib.driver import KafkaDriver def wait_for(f: Callable[[], bool], *, timeout: int = 60, interval: int = 5) -> bool: deadline = time.monotonic() + timeout while True: if f(): return True if time.monotonic() > deadline: return False time.sleep(interval) class RecordProducer: """Produces sequentially numbered JSON records to a Kafka topic.""" def __init__(self, driver: KafkaDriver, topic: str): self._driver = driver self._topic = topic self.records_produced = 0 self._generator = self._make_generator() self._stop_event = threading.Event() self._thread = None def _make_generator(self): while True: self.records_produced += 1 yield json.dumps({"number": str(self.records_produced)}).encode() def send(self, n: int): self._driver.sendBytesData(self._topic, islice(self._generator, n), [], 0) def start_continuous(self, batch_size: int = 10, interval: float = 0.1): """Start a background thread that sends records continuously.""" self._stop_event.clear() def _produce(): while not self._stop_event.is_set(): self.send(batch_size) self._stop_event.wait(interval) self._thread = threading.Thread(target=_produce, daemon=True) self._thread.start() logging.info( f"Started continuous producer (batch_size={batch_size}, interval={interval}s)" ) def stop_continuous(self, timeout: float = 5): if self._thread is not None: self._stop_event.set() self._thread.join(timeout=timeout) self._thread = None logging.info( f"Stopped continuous producer (total: {self.records_produced})" ) ================================================ FILE: test/pyproject.toml ================================================ [tool.pytest.ini_options] testpaths = ["tests"] pythonpath = ["."] log_cli = true log_cli_level = "INFO" log_cli_format = "%(asctime)s [%(levelname)s] %(name)s: %(message)s" log_cli_date_format = "%Y-%m-%dT%H:%M:%S%z" markers = [ "compatibility: data-type and version compatibility tests across ingestion modes", "confluent_only: test requires Confluent platform (schema registry)", "iceberg: iceberg table tests (requires ICEBERG_EXTERNAL_VOLUME)", "pressure: long-running stress/pressure test", "compatibility: v3/v4 dual-version compatibility tests", "schema_evolution: schema evolution e2e tests", "correctness: connector correctness tests (schema mapping, DLQ, multi-topic)", ] ================================================ FILE: test/rest_request_template/datagen_connector.json ================================================ { "name": "datagen-stocktrades_kc", "config": { "connector.class": "io.confluent.kafka.connect.datagen.DatagenConnector", "kafka.topic": "SNOWFLAKESINK_JP_KC", "quickstart": "Stock_Trades", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "max.interval": 10, "iterations": 100000, "tasks.max": "1" } } ================================================ FILE: test/rest_request_template/datatype_ingestion.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "1", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "none", "errors.log.enable": true } } ================================================ FILE: test/rest_request_template/iceberg_avro_aws.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "1", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "io.confluent.connect.avro.AvroConverter", "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.streaming.iceberg.enabled": true, "snowflake.streaming.enable.single.buffer": true, "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/iceberg_json_aws.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "1", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.streaming.iceberg.enabled": true, "snowflake.streaming.enable.single.buffer": true, "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/iceberg_schema_evolution_avro_aws.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "1", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "io.confluent.connect.avro.AvroConverter", "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.streaming.iceberg.enabled": true, "snowflake.streaming.enable.single.buffer": true, "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/iceberg_schema_evolution_json_aws.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "1", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.streaming.iceberg.enabled": true, "snowflake.streaming.enable.single.buffer": true, "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/nullable_values_after_smt.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "1", "buffer.count.records": "100", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "errors.tolerance": "all", "errors.log.enable": true, "behavior.on.null.values": "IGNORE", "transforms": "extractField", "transforms.extractField.type": "org.apache.kafka.connect.transforms.ExtractField$Value", "transforms.extractField.field": "optionalField", "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/snowpipe_streaming_legacy_avro_sr.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "1", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "snowflake.enable.schematization": "false", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "io.confluent.connect.avro.AvroConverter", "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/snowpipe_streaming_legacy_byte_array_converter.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "1", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "snowflake.enable.schematization": "false", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.converters.ByteArrayConverter", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/snowpipe_streaming_legacy_string_converter.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "1", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "snowflake.enable.schematization": "false", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.storage.StringConverter", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/snowpipe_streaming_legacy_string_json.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "1", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "snowflake.enable.schematization": "false", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/snowpipe_streaming_schema_evolution.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "1", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_SCHEMA_EVOLUTION_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/snowpipe_streaming_schema_mapping_dlq.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "1", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_SCHEMA_MAPPING_DLQ_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/snowpipe_streaming_string_json_dlq.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "1", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_SNOWPIPE_STREAMING_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/test_kc_delete_create.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "3", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.validation": "client_side", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/test_kc_delete_create_chaos.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "3", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.validation": "client_side", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/test_kc_delete_resume.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "3", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/test_kc_delete_resume_chaos.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "3", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/test_kc_pause_create.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "3", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.validation": "client_side", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/test_kc_pause_create_chaos.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "3", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.validation": "client_side", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/test_kc_pause_resume.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "3", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/test_kc_pause_resume_chaos.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "3", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/test_kc_recreate.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "3", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.validation": "client_side", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/test_kc_recreate_chaos.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "3", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.validation": "client_side", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/test_kc_resilience.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "3", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/test_kc_restart.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "3", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/test_snowpipe_streaming_string_json_ignore_tombstone.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "1", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "none", "errors.log.enable": true, "behavior.on.null.values": "IGNORE", "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/travis_correct_auto_table_creation.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "1", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "io.confluent.connect.avro.AvroConverter", "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/travis_correct_auto_table_creation_topic2table.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC0,SNOWFLAKE_TEST_TOPIC1", "snowflake.topic2table.map": "SNOWFLAKE_TEST_TOPIC0:SNOWFLAKE_CONNECTOR_NAME,SNOWFLAKE_TEST_TOPIC1:SNOWFLAKE_CONNECTOR_NAME", "tasks.max": "1", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "io.confluent.connect.avro.AvroConverter", "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/travis_correct_avro_avro.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.role.name": "SNOWFLAKE_ROLE", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "key.converter": "org.apache.kafka.connect.serialization.AvroConverter", "value.converter": "org.apache.kafka.connect.serialization.AvroConverter", "value.converter.schemas.enable": "true", "key.converter.schemas.enable": "true", "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/travis_correct_avrosr_avrosr.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "io.confluent.connect.avro.AvroConverter", "key.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY", "value.converter": "io.confluent.connect.avro.AvroConverter", "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY", "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/travis_correct_confluent_protobuf_protobuf.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "io.confluent.connect.protobuf.ProtobufConverter", "key.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY", "value.converter": "io.confluent.connect.protobuf.ProtobufConverter", "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY", "transforms": "add_record_content", "transforms.add_record_content.type": "org.apache.kafka.connect.transforms.HoistField$Value", "transforms.add_record_content.field": "RECORD_CONTENT", "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/travis_correct_json_json.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "key.converter.schemas.enable": "false", "snowflake.metadata.createtime": "false", "snowflake.metadata.topic": false, "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/travis_correct_multiple_topic_to_one_table_snowpipe_streaming.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC0,SNOWFLAKE_TEST_TOPIC1,SNOWFLAKE_TEST_TOPIC2", "snowflake.topic2table.map": "SNOWFLAKE_TEST_TOPIC0:SNOWFLAKE_CONNECTOR_NAME,SNOWFLAKE_TEST_TOPIC1:SNOWFLAKE_CONNECTOR_NAME,SNOWFLAKE_TEST_TOPIC2:SNOWFLAKE_CONNECTOR_NAME", "tasks.max": "1", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/travis_correct_native_complex_smt.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "transforms": "createKey,extractInt,dropFieldC2", "transforms.createKey.type": "org.apache.kafka.connect.transforms.ValueToKey", "transforms.createKey.fields": "c1", "transforms.extractInt.type": "org.apache.kafka.connect.transforms.ExtractField$Key", "transforms.extractInt.field": "c1", "transforms.dropFieldC2.type": "org.apache.kafka.connect.transforms.ReplaceField$Value", "transforms.dropFieldC2.exclude": "c2", "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/travis_correct_native_string_json_without_schema.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "transforms": "dropFieldC2", "transforms.dropFieldC2.type": "org.apache.kafka.connect.transforms.ReplaceField$Value", "transforms.dropFieldC2.exclude": "c2", "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/travis_correct_native_string_protobuf.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "com.blueapron.connect.protobuf.ProtobufConverter", "value.converter.protoClassName": "com.snowflake.kafka.test.protobuf.SensorReadingImpl$SensorReading", "transforms": "add_record_content", "transforms.add_record_content.type": "org.apache.kafka.connect.transforms.HoistField$Value", "transforms.add_record_content.field": "RECORD_CONTENT", "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/travis_correct_schema_mapping.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "1", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/travis_correct_schema_not_supported_converter.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "1", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/travis_correct_snowpipe_streaming_string_avro_sr.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "1", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "io.confluent.connect.avro.AvroConverter", "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/travis_correct_snowpipe_streaming_string_json.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "1", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "jmx": "true", "errors.tolerance": "all", "errors.log.enable": true, "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT", "errors.deadletterqueue.topic.replication.factor": 1, "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/travis_correct_string_avro.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.serialization.AvroConverter", "value.converter.schemas.enable": "false", "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/travis_correct_string_avrosr.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "io.confluent.connect.avro.AvroConverter", "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY", "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/travis_correct_string_json.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "snowflake.jdbc.map": "isInsecureMode : true, notYetExistingProp : true", "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/rest_request_template/travis_correct_string_proxy.json ================================================ { "name": "SNOWFLAKE_CONNECTOR_NAME", "config": { "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector", "topics": "SNOWFLAKE_TEST_TOPIC", "snowflake.url.name": "SNOWFLAKE_HOST", "snowflake.user.name": "SNOWFLAKE_USER", "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY", "snowflake.database.name": "SNOWFLAKE_DATABASE", "snowflake.schema.name": "SNOWFLAKE_SCHEMA", "snowflake.role.name": "SNOWFLAKE_ROLE", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "jvm.proxy.host": "localhost", "jvm.proxy.port": "3128", "jvm.proxy.username": "admin", "jvm.proxy.password": "test", "snowflake.jdbc.map": "isInsecureMode : true, notYetExistingProp : true", "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", "snowflake.streaming.validate.compatibility.with.classic": "false" } } ================================================ FILE: test/run_tests.sh ================================================ #!/bin/bash # # Snowflake Kafka Connector - Docker-based E2E Tests # # Usage: # ./run_tests.sh --platform= --platform-version= [options] # # Examples: # ./run_tests.sh --platform=apache --platform-version=2.8.2 # ./run_tests.sh --platform=apache --platform-version=3.7.0 # ./run_tests.sh --platform=apache --platform-version=4.0.0 # ./run_tests.sh --platform=confluent --platform-version=7.8.0 # ./run_tests.sh --platform=confluent --platform-version=8.0.0 # ./run_tests.sh --platform=confluent --platform-version=7.8.0 -- tests/test_string_json.py # # Prerequisites: # - Docker and Docker Compose # - SNOWFLAKE_CREDENTIAL_FILE environment variable set # - Connector plugin built (run build_runtime_jar.sh first) # set -e SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" DOCKER_DIR="$SCRIPT_DIR/docker" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" # Unique Docker Compose project name per worktree, derived from the repo # directory basename. Prevents collisions when multiple worktrees run tests # concurrently (Docker Compose defaults to the parent directory name, which # is always "docker" here). export COMPOSE_PROJECT_NAME="$(basename "$PROJECT_ROOT")" # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' error_exit() { echo -e "${RED}ERROR: $1${NC}" >&2 exit 1 } info() { echo -e "${GREEN}INFO: $1${NC}" } warn() { echo -e "${YELLOW}WARN: $1${NC}" } usage() { echo "Usage: $0 --platform= --platform-version= [options]" echo "" echo "Platform:" echo " --platform=PLATFORM Platform: 'confluent' or 'apache' (default: confluent)" echo " --platform-version=VERSION Kafka/Confluent platform version (default: 7.8.0)" echo " Confluent: 6.2.x, 7.x, 8.x (KRaft)" echo " Apache: 2.x, 3.x, 4.x (KRaft)" echo "" echo "Options:" echo " --cloud=CLOUD Snowflake cloud platform: AWS, GCP, or AZURE" echo " --java-version=VER Java version for Apache Kafka (default: 11)" echo " --jmx Enable JMX metrics scraping via Jolokia" echo " --profile Enable JVM profiling (JFR, GC logs, JMX, async-profiler)" echo " --keep Keep containers running after tests" echo " -i, --interactive Start infra, then drop into a bash shell in the test-runner" echo " --rebuild Force rebuild of images" echo " --logs-dir=DIR Save service logs to a file in DIR on failure" echo " -h, --help Show this help message" echo " -- ARGS Pass remaining args directly to pytest" echo "" echo "Environment:" echo " SNOWFLAKE_CREDENTIAL_FILE Path to Snowflake credentials JSON (required unless LOCAL_PROXY_PORT is set)" echo " LOCAL_PROXY_PORT Port of the proxy for the local Snowflake deployment" echo " CONNECT_OFFSET_FLUSH_INTERVAL_MS Passed through to the Connect worker (see docker-compose; optional)" echo "" echo "Examples:" echo " $0 --platform=confluent --platform-version=7.8.0" echo " $0 --platform=confluent --platform-version=8.0.0 # KRaft mode" echo " $0 --platform=apache --platform-version=2.8.2" echo " $0 --platform=apache --platform-version=4.0.0 # KRaft mode" echo " $0 --platform=confluent --platform-version=7.8.0 -- -k test_string_json" echo " $0 --platform=apache --platform-version=3.7.0 --keep -- -m pressure" echo " $0 --platform=confluent --platform-version=7.8.0 -i # interactive shell" echo " $0 --platform=confluent --platform-version=7.8.0 --profile --keep -- -m pressure" echo " $0 --platform=confluent --platform-version=7.8.0 --logs-dir=/tmp/test-logs" exit 1 } # Parse arguments PLATFORM="confluent" PLATFORM_VERSION="7.8.0" JAVA_VERSION="11" JMX_ENABLED="false" PROFILE_ENABLED="false" KEEP_RUNNING="false" INTERACTIVE="false" FORCE_REBUILD="false" LOGS_DIR="" PASSTHROUGH_ARGS=() while [[ $# -gt 0 ]]; do case $1 in --platform=*) PLATFORM="${1#*=}" shift ;; --platform-version=*) PLATFORM_VERSION="${1#*=}" shift ;; --cloud=*) SF_CLOUD_PLATFORM="${1#*=}" shift ;; --java-version=*) JAVA_VERSION="${1#*=}" shift ;; --jmx) JMX_ENABLED="true" shift ;; --profile) PROFILE_ENABLED="true" shift ;; --keep) KEEP_RUNNING="true" shift ;; -i|--interactive) INTERACTIVE="true" shift ;; --rebuild) FORCE_REBUILD="true" shift ;; --logs-dir=*) LOGS_DIR="${1#*=}" shift ;; -h|--help) usage ;; --) shift PASSTHROUGH_ARGS=("$@") break ;; *) error_exit "Unknown option: $1" ;; esac done # Validate required arguments if [ -z "$PLATFORM" ]; then error_exit "Missing required argument: --platform=" fi if [ -z "$PLATFORM_VERSION" ]; then error_exit "Missing required argument: --platform-version=" fi # Base compose file + platform-specific compose file BASE_COMPOSE="-f docker-compose.base.yml" SCALA_VERSION="2.12" KRAFT_MODE="false" case $PLATFORM in confluent) case $PLATFORM_VERSION in 6.2.*) info "Platform: Confluent $PLATFORM_VERSION" # 6.2.x containers are only available for linux/amd64 COMPOSE_FILES="$BASE_COMPOSE -f docker-compose.confluent.yml -f docker-compose.amd64.yml" info "Note: Confluent 6.2.x requires linux/amd64 (using emulation on ARM)" START_SERVICES="zookeeper kafka schema-registry kafka-connect" ;; 7.*) info "Platform: Confluent $PLATFORM_VERSION" COMPOSE_FILES="$BASE_COMPOSE -f docker-compose.confluent.yml" START_SERVICES="zookeeper kafka schema-registry kafka-connect" ;; 8.*) info "Platform: Confluent $PLATFORM_VERSION (KRaft mode)" COMPOSE_FILES="$BASE_COMPOSE -f docker-compose.confluent.yml -f docker-compose.confluent-kraft.yml" START_SERVICES="kafka schema-registry kafka-connect" ;; *) error_exit "Unsupported Confluent version: $PLATFORM_VERSION (supported: 6.2.x, 7.x, 8.x)" ;; esac CONFLUENT_VERSION="$PLATFORM_VERSION" KAFKA_VERSION="" KAFKA_CONNECT_ADDRESS="kafka-connect:8083" HEALTH_CHECK_SERVICE="kafka-connect" ;; apache) COMPOSE_FILES="$BASE_COMPOSE -f docker-compose.apache.yml" CONFLUENT_VERSION="" KAFKA_VERSION="$PLATFORM_VERSION" KAFKA_CONNECT_ADDRESS="kafka:8083" HEALTH_CHECK_SERVICE="kafka" START_SERVICES="kafka" case $PLATFORM_VERSION in 4.*) info "Platform: Apache Kafka $PLATFORM_VERSION (KRaft mode)" SCALA_VERSION="2.13" KRAFT_MODE="true" JAVA_VERSION="17" ;; *) info "Platform: Apache Kafka $PLATFORM_VERSION (official tarball)" ;; esac ;; *) error_exit "Unknown platform: $PLATFORM (supported: confluent, apache)" ;; esac # Layer profiling overlay (platform-specific to avoid undefined service errors) if [ "$PROFILE_ENABLED" = "true" ]; then COMPOSE_FILES="$COMPOSE_FILES -f docker-compose.profile-${PLATFORM}.yml" info "Profiling enabled: JFR, GC logs, JMX (port 9999), heap dump on OOM" info "Use test/scripts/profile_connect.sh to interact with the profiler" fi # Check prerequisites command -v docker >/dev/null 2>&1 || error_exit "Docker is not installed" command -v docker compose >/dev/null 2>&1 || command -v docker-compose >/dev/null 2>&1 || error_exit "Docker Compose is not installed" # Check credentials file if [ -n "${LOCAL_PROXY_PORT:-}" ]; then # Fetch credentials from local proxy PROXY_CREDENTIAL_URL="http://localhost:${LOCAL_PROXY_PORT}/proxy/kafka-connector-profile" info "Fetching credentials from proxy: $PROXY_CREDENTIAL_URL" SNOWFLAKE_CREDENTIAL_FILE="$(mktemp /tmp/kafka-connector-test-snowflake-credentials-XXXXXX.json)" if ! curl -sf "$PROXY_CREDENTIAL_URL" -o "$SNOWFLAKE_CREDENTIAL_FILE"; then rm -f "$SNOWFLAKE_CREDENTIAL_FILE" error_exit "Failed to fetch credentials from $PROXY_CREDENTIAL_URL" fi info "Credentials fetched to: $SNOWFLAKE_CREDENTIAL_FILE" else if [ -z "$SNOWFLAKE_CREDENTIAL_FILE" ]; then error_exit "SNOWFLAKE_CREDENTIAL_FILE environment variable is not set" fi if [ ! -f "$SNOWFLAKE_CREDENTIAL_FILE" ]; then error_exit "Credential file not found: $SNOWFLAKE_CREDENTIAL_FILE" fi # Convert to absolute path SNOWFLAKE_CREDENTIAL_FILE="$(cd "$(dirname "$SNOWFLAKE_CREDENTIAL_FILE")" && pwd)/$(basename "$SNOWFLAKE_CREDENTIAL_FILE")" fi info "Credentials: $SNOWFLAKE_CREDENTIAL_FILE" # Check for connector plugin based on platform PLUGIN_DIR="/tmp/sf-kafka-connect-plugin" rm -rf "$PLUGIN_DIR" mkdir -p "$PLUGIN_DIR" if [ "$PLATFORM" = "apache" ]; then # Apache: Look for JAR in plugin path PLUGIN_JAR_PATH="/usr/local/share/kafka/plugins" PLUGIN_JAR=$(ls "$PLUGIN_JAR_PATH"/snowflake-kafka-connector-*.jar 2>/dev/null | head -n 1) if [ -z "$PLUGIN_JAR" ]; then error_exit "Connector plugin JAR not found at $PLUGIN_JAR_PATH/. Run './build_runtime_jar.sh . package apache' first." fi info "Using Apache connector JAR: $PLUGIN_JAR" cp "$PLUGIN_JAR" "$PLUGIN_DIR/" elif [ "$PLATFORM" = "confluent" ]; then # Confluent: Look for zip file PLUGIN_ZIP="/tmp/sf-kafka-connect-plugin.zip" if [ ! -f "$PLUGIN_ZIP" ]; then error_exit "Connector plugin zip not found at $PLUGIN_ZIP. Run './build_runtime_jar.sh . package confluent' first." fi info "Extracting Confluent connector zip: $PLUGIN_ZIP" unzip -q "$PLUGIN_ZIP" -d "$PLUGIN_DIR" fi info "Plugin prepared in $PLUGIN_DIR" # Build protobuf dependencies EXTRA_JARS_DIR="/tmp/kafka-connect-extra-jars" mkdir -p "$EXTRA_JARS_DIR" compile_protobuf_dependencies() { info "Building protobuf dependencies..." cd "$DOCKER_DIR" docker build -t protobuf-builder -f Dockerfile.builder .. info "Extracting JARs from image..." CONTAINER_ID=$(docker create protobuf-builder) docker cp "$CONTAINER_ID:/output/." "$EXTRA_JARS_DIR/" docker rm "$CONTAINER_ID" > /dev/null info "Extra JARs prepared in $EXTRA_JARS_DIR:" ls -la "$EXTRA_JARS_DIR" } compile_protobuf_dependencies # Download KC v3 JAR for dual-version testing (skips if already cached) info "Preparing KC v3 connector JAR..." V3_PLUGIN_DIR=$("$SCRIPT_DIR/download_v3_jar.sh") export V3_PLUGIN_PATH="$V3_PLUGIN_DIR" info "v3 plugin path: $V3_PLUGIN_PATH" if [ "$JMX_ENABLED" = "true" ]; then # Download Jolokia JMX agent for metrics scraping JOLOKIA_DIR="/tmp/jolokia" JOLOKIA_VERSION="2.5.1" JOLOKIA_JAR="$JOLOKIA_DIR/jolokia-agent.jar" mkdir -p "$JOLOKIA_DIR" if [ ! -f "$JOLOKIA_JAR" ]; then info "Downloading Jolokia JMX agent v${JOLOKIA_VERSION}..." curl -fsSL -o "$JOLOKIA_JAR" \ "https://repo1.maven.org/maven2/org/jolokia/jolokia-agent-jvm/${JOLOKIA_VERSION}/jolokia-agent-jvm-${JOLOKIA_VERSION}-javaagent.jar" fi export JOLOKIA_JAR_PATH="$JOLOKIA_JAR" export KAFKA_OPTS="-javaagent:/opt/jolokia/jolokia-agent.jar=port=8778,host=0.0.0.0" fi # Generate test name salt TEST_NAME_SALT="$(python3 -c ' import random, string chars = string.ascii_uppercase + string.digits print("_" + "".join(random.choices(chars, k=7))) ')" info "Test name salt: $TEST_NAME_SALT" # Export environment for docker-compose export CONFLUENT_VERSION export KAFKA_VERSION export JAVA_VERSION export SCALA_VERSION export KRAFT_MODE export SNOWFLAKE_CREDENTIAL_FILE export CONNECTOR_PLUGIN_PATH="$PLUGIN_DIR" export EXTRA_JARS_PATH="$EXTRA_JARS_DIR" # Env vars consumed by pytest via conftest.py (inside the test-runner container) export KAFKA_PLATFORM="$PLATFORM" export KAFKA_PLATFORM_VERSION="$PLATFORM_VERSION" export TEST_NAME_SALT if [ -n "${LOCAL_PROXY_PORT:-}" ]; then export SNOWPIPE_STREAMING_URL="http://host.docker.internal:${LOCAL_PROXY_PORT}" info "Snowpipe Streaming URL: $SNOWPIPE_STREAMING_URL" fi cd "$DOCKER_DIR" # Build images BUILD_ARGS="" if [ "$FORCE_REBUILD" = "true" ]; then BUILD_ARGS="--no-cache" fi info "Building test runner image..." docker compose $COMPOSE_FILES build $BUILD_ARGS test-runner if [ "$PLATFORM" = "apache" ]; then APACHE_IMAGE="ghcr.io/snowflakedb/snowflake-kafka-connector/apache-kafka:${KAFKA_VERSION}-java${JAVA_VERSION}" if [ "$FORCE_REBUILD" != "true" ] && docker pull "$APACHE_IMAGE" < /dev/null 2>/dev/null; then info "Using prebuilt Apache Kafka image: $APACHE_IMAGE" else info "Building Apache Kafka image..." docker compose $COMPOSE_FILES build $BUILD_ARGS kafka fi fi # When profiling, force-remove stale containers from prior --keep runs. # Bind mounts (plugin JARs) become stale if the host directory was recreated # while a kept container still held the old mount inode. if [ "$PROFILE_ENABLED" = "true" ]; then info "Cleaning stale containers for fresh profiling..." docker compose $COMPOSE_FILES down -v --remove-orphans 2>/dev/null || true fi # Start services info "Starting services: $START_SERVICES" docker compose $COMPOSE_FILES up -d $START_SERVICES # Wait for services info "Waiting for services to be healthy..." TIMEOUT=300 ELAPSED=0 while [ $ELAPSED -lt $TIMEOUT ]; do if docker compose $COMPOSE_FILES ps $HEALTH_CHECK_SERVICE 2>/dev/null | grep -q "healthy"; then info "All services are healthy!" break fi sleep 5 ELAPSED=$((ELAPSED + 5)) echo -n "." done echo "" if [ $ELAPSED -ge $TIMEOUT ]; then error_exit "Services failed to become healthy within ${TIMEOUT}s" fi # Reset profiling to a clean slate (discard startup/warmup data from prior runs) if [ "$PROFILE_ENABLED" = "true" ]; then PROFILE_CONTAINER=$(docker compose $COMPOSE_FILES ps -q $HEALTH_CHECK_SERVICE) if [ -n "$PROFILE_CONTAINER" ]; then info "Resetting JFR recording to clean slate..." docker exec "$PROFILE_CONTAINER" sh -c ' rm -f /tmp/profile/kc-profile-*.jfr /tmp/profile/flamegraph-*.html 2>/dev/null PID=$(jcmd 2>/dev/null | grep -v jcmd | head -1 | awk "{print \$1}") if [ -n "$PID" ]; then jcmd "$PID" JFR.stop name=profile 2>/dev/null || true jcmd "$PID" JFR.start name=profile filename=/tmp/profile/kc-profile.jfr \ settings=profile maxsize=500m dumponexit=true 2>/dev/null || true fi ' 2>/dev/null || warn "JFR reset failed — profiling data may include startup noise" info "JFR recording restarted — clean slate for this test run" fi fi # Start JMX metrics scraper in the background METRICS_FILE="/tmp/sf-metrics-${PLATFORM}-${PLATFORM_VERSION}-$(date +%Y%m%d-%H%M%S).jsonl" METRICS_PID="" start_metrics_scraper() { local scraper="$PROJECT_ROOT/test/scripts/scrape_metrics.sh" if [ ! -x "$scraper" ]; then error_exit "Metrics scraper not found or not executable: $scraper" fi "$scraper" \ --poll --interval=10 --output="$METRICS_FILE" --host=localhost --port=8778 & METRICS_PID=$! disown "$METRICS_PID" 2>/dev/null || true } stop_metrics_scraper() { if [ -n "$METRICS_PID" ] && kill -0 "$METRICS_PID" 2>/dev/null; then kill "$METRICS_PID" 2>/dev/null || true wait "$METRICS_PID" 2>/dev/null || true METRICS_PID="" fi } cleanup() { stop_metrics_scraper if [ "$KEEP_RUNNING" = "false" ]; then info "Cleaning up containers..." docker compose $COMPOSE_FILES down -v --remove-orphans 2>/dev/null || true else warn "Keeping containers running (--keep specified)" echo "To stop: cd $DOCKER_DIR && docker compose $COMPOSE_FILES down -v" fi } trap cleanup EXIT if [ "$JMX_ENABLED" = "true" ]; then # Give Jolokia a moment to initialize, then start scraping sleep 3 start_metrics_scraper echo "" echo -e "${GREEN}========================================${NC}" echo -e "${GREEN} JMX Metrics: ${METRICS_FILE}${NC}" echo -e "${GREEN}========================================${NC}" echo "" fi # All connection and platform info is passed via env vars (set in # docker-compose + the exports above), so pytest only needs -v here. PYTEST_ARGS=(-v) # Don't remove the test-runner container when --keep is set so the user # can exec into it for debugging. RUN_FLAGS=(-i) if [ "$INTERACTIVE" = "true" ]; then RUN_FLAGS+=("-t") fi if [ "$KEEP_RUNNING" = "false" ]; then RUN_FLAGS+=("--rm") fi # When running in GitHub Actions, mount GITHUB_STEP_SUMMARY so pytest can append failures. if [ -n "${GITHUB_STEP_SUMMARY:-}" ]; then SUMMARY_DIR="$(dirname "$GITHUB_STEP_SUMMARY")" SUMMARY_FILE="$(basename "$GITHUB_STEP_SUMMARY")" RUN_FLAGS+=(-v "${SUMMARY_DIR}:/github_step_summary" -e "GITHUB_STEP_SUMMARY=/github_step_summary/${SUMMARY_FILE}") fi # Run tests (or drop into a shell with --interactive) set +e if [ "$INTERACTIVE" = "true" ]; then info "Starting interactive shell in test-runner (run pytest manually)..." docker compose $COMPOSE_FILES run "${RUN_FLAGS[@]}" test-runner bash TEST_EXIT_CODE=$? else info "Running tests..." docker compose $COMPOSE_FILES run "${RUN_FLAGS[@]}" test-runner \ pytest "${PYTEST_ARGS[@]}" "${PASSTHROUGH_ARGS[@]}" TEST_EXIT_CODE=$? fi set -e # Stop the scraper before containers go away stop_metrics_scraper # Save logs on failure if [ $TEST_EXIT_CODE -ne 0 ] && [ -n "$LOGS_DIR" ]; then mkdir -p "$LOGS_DIR" LOG_FILE="$LOGS_DIR/${PLATFORM}-${PLATFORM_VERSION}-${HEALTH_CHECK_SERVICE}.log" warn "Tests failed. Saving service logs to $LOG_FILE..." docker compose $COMPOSE_FILES logs $HEALTH_CHECK_SERVICE > "$LOG_FILE" 2>&1 fi if [ "$JMX_ENABLED" = "true" ]; then # Print metrics summary METRICS_LINES=0 if [ -f "$METRICS_FILE" ]; then METRICS_LINES=$(wc -l < "$METRICS_FILE") fi echo "" echo -e "${GREEN}========================================${NC}" echo -e "${GREEN} JMX Metrics: ${METRICS_FILE}${NC}" echo -e "${GREEN} Snapshots collected: ${METRICS_LINES}${NC}" if [ "$METRICS_LINES" -gt 0 ] 2>/dev/null; then echo -e "${GREEN} Analyze: ${PROJECT_ROOT}/test/scripts/analyze_metrics.sh ${METRICS_FILE}${NC}" fi echo -e "${GREEN}========================================${NC}" fi if [ "$PROFILE_ENABLED" = "true" ]; then echo "" echo -e "${GREEN}========================================${NC}" echo -e "${GREEN} Profiling artifacts in container${NC}" echo -e "${GREEN} Collect: $PROJECT_ROOT/test/scripts/profile_connect.sh collect [DIR]${NC}" echo -e "${GREEN} Status: $PROJECT_ROOT/test/scripts/profile_connect.sh status${NC}" echo -e "${GREEN}========================================${NC}" fi if [ $TEST_EXIT_CODE -ne 0 ]; then echo -e "\n${RED}========================================${NC}" echo -e "${RED} TESTS FAILED (exit code: $TEST_EXIT_CODE)${NC}" echo -e "${RED}========================================${NC}" exit $TEST_EXIT_CODE fi echo -e "\n${GREEN}========================================${NC}" echo -e "${GREEN} ALL TESTS PASSED${NC}" echo -e "${GREEN}========================================${NC}" exit 0 ================================================ FILE: test/scripts/analyze_metrics.sh ================================================ #!/bin/bash # # Analyze scraped JMX metrics from a JSONL file produced by scrape_metrics.sh. # # Usage: # ./analyze_metrics.sh Aggregate summary (default) # ./analyze_metrics.sh detail Per-task breakdown # ./analyze_metrics.sh lag Per-channel offset lag # # If is omitted, uses the most recent /tmp/sf-metrics-*.jsonl. set -e FILE="${1}" MODE="${2:-summary}" if [ -z "$FILE" ]; then FILE=$(ls -t /tmp/sf-metrics-*.jsonl 2>/dev/null | head -1) if [ -z "$FILE" ]; then echo "No metrics file found. Provide a path or run scrape_metrics.sh first." >&2 exit 1 fi fi if [ ! -f "$FILE" ]; then echo "File not found: $FILE" >&2 exit 1 fi exec python3 - "$FILE" "$MODE" <<'PYEOF' import json, sys, re from collections import defaultdict file_path = sys.argv[1] mode = sys.argv[2] snapshots = [] with open(file_path) as f: for line in f: line = line.strip() if not line: continue data = json.loads(line) if data.get("metrics"): snapshots.append(data) if not snapshots: print("No metric snapshots with data found.") sys.exit(0) last = snapshots[-1] first = snapshots[0] metrics = last["metrics"] # ── helpers ────────────────────────────────────────────────────────────────── def parse_mbean(key): """Extract connector, task/channel, category, name from an MBean key.""" parts = {} _, _, attrs = key.partition(":") for token in attrs.split(","): k, _, v = token.partition("=") parts[k] = v return parts def fmt_dur(seconds): if seconds is None or seconds == 0: return " -" if seconds < 0.001: return f"{seconds*1e6:6.1f}us" if seconds < 1: return f"{seconds*1e3:6.1f}ms" return f"{seconds:6.2f}s " def fmt_count(n): return f"{n:,}" def fmt_rate(r): if r < 1: return f"{r:.3f}" if r < 1000: return f"{r:.1f}" return f"{r:,.0f}" def aggregate_timer(timer_name, timers_by_task): """Aggregate a Timer across all tasks: weighted mean/p50/p95 and worst-case max.""" total_count = 0 weighted_mean = 0.0 weighted_p50 = 0.0 weighted_p95 = 0.0 worst_max = 0.0 for task, timers in sorted(timers_by_task.items()): t = timers.get(timer_name) if not t: continue c = t.get("Count", 0) total_count += c if c > 0: weighted_mean += t.get("Mean", 0) * c weighted_p50 += t.get("50thPercentile", 0) * c weighted_p95 += t.get("95thPercentile", 0) * c worst_max = max(worst_max, t.get("Max", 0)) if total_count > 0: weighted_mean /= total_count weighted_p50 /= total_count weighted_p95 /= total_count return total_count, weighted_mean, weighted_p50, weighted_p95, worst_max def print_timer_row(label, timer_name, timers_by_task): count, mean, p50, p95, mx = aggregate_timer(timer_name, timers_by_task) print(f" {label:<24} {fmt_count(count):>8} " f"{fmt_dur(mean)} {fmt_dur(p50)} " f"{fmt_dur(p95)} {fmt_dur(mx)}") # ── classify metrics ───────────────────────────────────────────────────────── task_timers = defaultdict(dict) task_counters = defaultdict(dict) task_meters = defaultdict(dict) task_gauges = defaultdict(dict) channel_gauges = defaultdict(dict) channel_counters = defaultdict(dict) # Accept both old (latency) and new (duration) names for backward compat TIMER_NAMES = { "put-duration", "precommit-duration", "put-latency", "precommit-latency", "open-duration", "close-duration", "start-duration", "channel-open-duration", "sdk-client-create-duration", "precommit-offset-fetch-duration", } METER_NAMES = {"put-records"} COUNTER_NAMES = {"open-count", "close-count", "precommit-partitions-skipped", "channel-open-count"} GAUGE_NAMES = {"assigned-partitions", "sdk-client-count"} OFFSET_NAMES = {"latest-consumer-offset", "persisted-in-snowflake-offset", "processed-offset"} CHANNEL_COUNTER_NAMES = {"channel-recovery-count"} for key, val in metrics.items(): p = parse_mbean(key) task = p.get("task", "") channel = p.get("channel", "") name = p.get("name", "") cat = p.get("category", "") if task.startswith("task-"): if name in TIMER_NAMES: task_timers[task][name] = val elif name in METER_NAMES: task_meters[task][name] = val elif name in COUNTER_NAMES: task_counters[task][name] = val elif name in GAUGE_NAMES: task_gauges[task][name] = val elif cat == "offsets": if name in OFFSET_NAMES: channel_gauges[channel][name] = val elif name in CHANNEL_COUNTER_NAMES: channel_counters[channel][name] = val num_tasks = len(set(list(task_timers.keys()) + list(task_counters.keys()) + list(task_meters.keys()) + list(task_gauges.keys()))) num_channels = len(channel_gauges) num_snapshots = len(snapshots) # Detect whether we have the new "duration" names or old "latency" names has_new_names = any("put-duration" in t for t in task_timers.values()) PUT_TIMER = "put-duration" if has_new_names else "put-latency" PRECOMMIT_TIMER = "precommit-duration" if has_new_names else "precommit-latency" # ── header ─────────────────────────────────────────────────────────────────── print("=" * 72) print(" Snowflake Kafka Connector - Metrics Analysis") print("=" * 72) print(f" File: {file_path}") print(f" Snapshots: {num_snapshots} ({first['timestamp']} .. {last['timestamp']})") print(f" Tasks: {num_tasks}") print(f" Channels: {num_channels}") print() # ── summary mode (default) ─────────────────────────────────────────────────── if mode == "summary": # --- method durations --- print("-" * 72) print(" Method Durations (aggregated across all tasks, last snapshot)") print("-" * 72) hdr = f" {'method':<24} {'calls':>8} {'mean':>8} {'p50':>8} {'p95':>8} {'max':>8}" print(hdr) print_timer_row("put()", PUT_TIMER, task_timers) print_timer_row("preCommit()", PRECOMMIT_TIMER, task_timers) print_timer_row(" offset fetch (SDK)", "precommit-offset-fetch-duration", task_timers) print() # --- lifecycle durations --- print("-" * 72) print(" Lifecycle Durations (aggregated across all tasks)") print("-" * 72) print(hdr) print_timer_row("start()", "start-duration", task_timers) print_timer_row("open()", "open-duration", task_timers) print_timer_row("close()", "close-duration", task_timers) print_timer_row("channel open (SDK)", "channel-open-duration", task_timers) print_timer_row("SDK client create", "sdk-client-create-duration", task_timers) print() # --- throughput --- print("-" * 72) print(" Throughput") print("-" * 72) total_records = 0 total_mean_rate = 0.0 total_1m_rate = 0.0 for task, meters in sorted(task_meters.items()): m = meters.get("put-records") if m: total_records += m.get("Count", 0) total_mean_rate += m.get("MeanRate", 0) total_1m_rate += m.get("OneMinuteRate", 0) print(f" Total records ingested: {fmt_count(total_records)}") print(f" Mean rate: {fmt_rate(total_mean_rate)} records/sec") print(f" 1-minute rate: {fmt_rate(total_1m_rate)} records/sec") total_skipped = sum( c.get("precommit-partitions-skipped", {}).get("Count", 0) for c in task_counters.values() ) print(f" preCommit partitions skipped: {fmt_count(total_skipped)}") print() # --- lifecycle counts --- print("-" * 72) print(" Lifecycle Counts (across all tasks)") print("-" * 72) total_open = sum(c.get("open-count", {}).get("Count", 0) for c in task_counters.values()) total_close = sum(c.get("close-count", {}).get("Count", 0) for c in task_counters.values()) total_assigned = sum(g.get("assigned-partitions", {}).get("Value", 0) for g in task_gauges.values()) total_channel_open = sum(c.get("channel-open-count", {}).get("Count", 0) for c in task_counters.values()) total_sdk_clients = sum(g.get("sdk-client-count", {}).get("Value", 0) for g in task_gauges.values()) print(f" open() calls: {fmt_count(total_open)}") print(f" close() calls: {fmt_count(total_close)}") print(f" assigned partitions: {fmt_count(total_assigned)} (current)") print(f" channel opens (total): {fmt_count(total_channel_open)}") print(f" SDK clients (current): {fmt_count(total_sdk_clients)}") total_recovery = sum( c.get("channel-recovery-count", {}).get("Count", 0) for c in channel_counters.values() ) print(f" channel recoveries: {fmt_count(total_recovery)}") print() # --- offset lag (computed from raw offsets) --- print("-" * 72) print(" Offset Lag Summary (last snapshot)") print("-" * 72) lags = [] for ch, gauges in channel_gauges.items(): consumer = gauges.get("latest-consumer-offset", {}).get("Value") persisted = gauges.get("persisted-in-snowflake-offset", {}).get("Value") if consumer is not None and persisted is not None and consumer >= 0 and persisted >= 0: lags.append(max(0, consumer - persisted)) if lags: with_lag = sum(1 for l in lags if l > 0) print(f" Channels: {len(lags)}") print(f" With lag > 0: {with_lag}") print(f" Max lag: {max(lags)}") print(f" Mean lag: {sum(lags)/len(lags):.1f}") else: print(" (no offset metrics found)") print() # ── detail mode ────────────────────────────────────────────────────────────── elif mode == "detail": all_tasks = sorted( set(list(task_timers.keys()) + list(task_counters.keys()) + list(task_meters.keys()) + list(task_gauges.keys())), key=lambda t: int(re.search(r"(\d+)$", t).group(1)) if re.search(r"(\d+)$", t) else 0, ) DETAIL_TIMERS = [ ("put()", PUT_TIMER), ("preCommit()", PRECOMMIT_TIMER), (" offset fetch", "precommit-offset-fetch-duration"), ("open()", "open-duration"), ("close()", "close-duration"), ("start()", "start-duration"), ("channel open", "channel-open-duration"), ("SDK client create", "sdk-client-create-duration"), ] print("-" * 72) print(" Per-Task Breakdown (last snapshot)") print("-" * 72) for task in all_tasks: assigned = task_gauges.get(task, {}).get("assigned-partitions", {}).get("Value", 0) opens = task_counters.get(task, {}).get("open-count", {}).get("Count", 0) closes = task_counters.get(task, {}).get("close-count", {}).get("Count", 0) ch_opens = task_counters.get(task, {}).get("channel-open-count", {}).get("Count", 0) sdk_clients = task_gauges.get(task, {}).get("sdk-client-count", {}).get("Value", 0) print(f"\n {task} (partitions={assigned}, opens={opens}, closes={closes}," f" ch_opens={ch_opens}, sdk_clients={sdk_clients})") for label, timer_name in DETAIL_TIMERS: t = task_timers.get(task, {}).get(timer_name) if not t or t.get("Count", 0) == 0: continue c = t.get("Count", 0) print(f" {label:<20} calls={fmt_count(c):>6}" f" mean={fmt_dur(t.get('Mean',0))}" f" p50={fmt_dur(t.get('50thPercentile',0))}" f" p95={fmt_dur(t.get('95thPercentile',0))}" f" max={fmt_dur(t.get('Max',0))}") m = task_meters.get(task, {}).get("put-records") if m: print(f" {'records':<20} total={fmt_count(m.get('Count',0)):>6}" f" mean_rate={fmt_rate(m.get('MeanRate',0))} rec/s" f" 1m_rate={fmt_rate(m.get('OneMinuteRate',0))} rec/s") skipped = task_counters.get(task, {}).get("precommit-partitions-skipped", {}).get("Count", 0) if skipped > 0: print(f" precommit-skipped: {skipped}") print() # ── lag mode ───────────────────────────────────────────────────────────────── elif mode == "lag": print("-" * 72) print(" Offset Lag by Channel (last snapshot)") print("-" * 72) if not channel_gauges: print(" (no channel metrics found)") else: rows = [] for ch, gauges in sorted(channel_gauges.items()): processed = gauges.get("processed-offset", {}).get("Value", -1) consumer = gauges.get("latest-consumer-offset", {}).get("Value", -1) persisted = gauges.get("persisted-in-snowflake-offset", {}).get("Value", -1) lag = max(0, consumer - persisted) if consumer >= 0 and persisted >= 0 else 0 recovery = channel_counters.get(ch, {}).get("channel-recovery-count", {}).get("Count", 0) m = re.search(r"_(\d+)$", ch) part_id = m.group(1) if m else "?" short = ch if len(ch) <= 40 else f"...{ch[-37:]}" rows.append((part_id, short, lag, processed, consumer, persisted, recovery)) hdr = f" {'channel':<42} {'lag':>5} {'processed':>10} {'consumer':>10} {'persisted':>10} {'recover':>7}" print(hdr) for part_id, short, lag, processed, consumer, persisted, recovery in sorted(rows, key=lambda r: int(r[0]) if r[0].isdigit() else 0): flag = " *" if lag > 0 else " " rec_flag = f" {recovery:>7}" if recovery > 0 else f" {recovery:>7}" print(f" {short:<42} {lag:>5}{flag}" f" {processed:>10} {consumer:>10} {persisted:>10}{rec_flag}") print() else: print(f"Unknown mode: {mode}", file=sys.stderr) print("Usage: analyze_metrics.sh [summary|detail|lag]", file=sys.stderr) sys.exit(1) PYEOF ================================================ FILE: test/scripts/profile_connect.sh ================================================ #!/bin/bash # # Profile the Kafka Connect worker running in Docker. # # Wraps JFR and async-profiler commands against the Kafka Connect container. # Requires the profiling overlay (docker-compose.profile-confluent.yml or # docker-compose.profile-apache.yml) to be active. # # Usage: # ./profile_connect.sh [options] # # Commands: # jfr-dump Dump the continuous JFR recording to a file # jfr-stop Stop JFR and dump final recording # heap-dump Take a heap dump (hprof) # thread-dump Print thread dump to stdout # async-cpu [DURATION] CPU flame graph via async-profiler (default: 60s) # async-alloc [DURATION] Allocation flame graph via async-profiler (default: 60s) # async-wall [DURATION] Wall-clock flame graph via async-profiler (default: 60s) # collect [OUTPUT_DIR] Collect all profiling artifacts from the container # status Show JFR recording status and JVM info # # Examples: # ./profile_connect.sh status # ./profile_connect.sh async-cpu 30 # ./profile_connect.sh jfr-dump # ./profile_connect.sh heap-dump # ./profile_connect.sh collect ./profiling-results # # Prerequisites: # - Containers running with docker-compose.profile.yml overlay # - For async-* commands: async-profiler mounted via ASYNC_PROFILER_PATH env var set -e SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" DOCKER_DIR="$SCRIPT_DIR/../docker" RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' error_exit() { echo -e "${RED}ERROR: $1${NC}" >&2; exit 1; } info() { echo -e "${GREEN}INFO: $1${NC}"; } warn() { echo -e "${YELLOW}WARN: $1${NC}"; } # Detect which container is running Kafka Connect detect_container() { local project_root project_root="$(cd "$SCRIPT_DIR/../.." && pwd)" local project_name project_name="$(basename "$project_root")" # Try confluent kafka-connect first, then apache kafka local container container=$(docker ps --filter "name=${project_name}.*kafka-connect" --format '{{.Names}}' | head -1) if [ -z "$container" ]; then container=$(docker ps --filter "name=${project_name}.*kafka" --format '{{.Names}}' | head -1) fi if [ -z "$container" ]; then error_exit "No Kafka Connect container found. Is the test environment running?" fi echo "$container" } # Find the Kafka Connect JVM PID inside the container find_kc_pid() { local container="$1" # Kafka Connect main class local pid pid=$(docker exec "$container" jcmd 2>/dev/null \ | grep -i "ConnectDistributed\|connect-distributed" \ | awk '{print $1}' | head -1) if [ -z "$pid" ]; then # Fallback: find any java process pid=$(docker exec "$container" jcmd 2>/dev/null \ | grep -v "^$\|jcmd" | head -1 | awk '{print $1}') fi if [ -z "$pid" ]; then error_exit "Cannot find Kafka Connect JVM PID in container $container" fi echo "$pid" } COMMAND="${1:-help}" shift || true case "$COMMAND" in status) CONTAINER=$(detect_container) PID=$(find_kc_pid "$CONTAINER") info "Container: $CONTAINER" info "Kafka Connect PID: $PID" echo "" echo "=== JFR Recordings ===" docker exec "$CONTAINER" jcmd "$PID" JFR.check 2>/dev/null || echo "(no JFR recordings)" echo "" echo "=== VM Info ===" docker exec "$CONTAINER" jcmd "$PID" VM.info 2>/dev/null | head -20 echo "" echo "=== Heap Usage ===" docker exec "$CONTAINER" jcmd "$PID" GC.heap_info 2>/dev/null || true ;; jfr-dump) CONTAINER=$(detect_container) PID=$(find_kc_pid "$CONTAINER") OUTFILE="/tmp/profile/kc-profile-$(date +%Y%m%d-%H%M%S).jfr" info "Dumping JFR recording to $OUTFILE..." docker exec "$CONTAINER" jcmd "$PID" JFR.dump name=profile filename="$OUTFILE" info "Done. Retrieve with: docker cp $CONTAINER:$OUTFILE ." ;; jfr-stop) CONTAINER=$(detect_container) PID=$(find_kc_pid "$CONTAINER") OUTFILE="/tmp/profile/kc-profile-final.jfr" info "Stopping JFR recording..." docker exec "$CONTAINER" jcmd "$PID" JFR.stop name=profile filename="$OUTFILE" info "Final recording at $OUTFILE" info "Retrieve with: docker cp $CONTAINER:$OUTFILE ." ;; heap-dump) CONTAINER=$(detect_container) PID=$(find_kc_pid "$CONTAINER") OUTFILE="/tmp/profile/heap-$(date +%Y%m%d-%H%M%S).hprof" info "Taking heap dump (this may pause the JVM briefly)..." docker exec "$CONTAINER" jcmd "$PID" GC.heap_dump "$OUTFILE" info "Heap dump at $OUTFILE" info "Retrieve with: docker cp $CONTAINER:$OUTFILE ." ;; thread-dump) CONTAINER=$(detect_container) PID=$(find_kc_pid "$CONTAINER") docker exec "$CONTAINER" jcmd "$PID" Thread.print ;; async-cpu|async-alloc|async-wall) CONTAINER=$(detect_container) PID=$(find_kc_pid "$CONTAINER") DURATION="${1:-60}" EVENT="${COMMAND#async-}" # Locate async-profiler (prefer /opt mount, fall back to /tmp copy) ASPROF="" for candidate in /opt/async-profiler/bin/asprof /tmp/async-profiler/bin/asprof; do if docker exec "$CONTAINER" test -f "$candidate" 2>/dev/null; then ASPROF="$candidate" break fi done if [ -z "$ASPROF" ]; then error_exit "async-profiler not found in container. Set ASYNC_PROFILER_PATH or docker cp it to /tmp/async-profiler/." fi # Use itimer for cpu profiling — perf_event_open is typically restricted # in containers even with SYS_PTRACE. itimer uses SIGPROF instead. if [ "$EVENT" = "cpu" ]; then EVENT="itimer" fi OUTFILE="/tmp/profile/flamegraph-${COMMAND#async-}-$(date +%Y%m%d-%H%M%S).html" info "Profiling $EVENT for ${DURATION}s (PID $PID)..." docker exec "$CONTAINER" "$ASPROF" \ -d "$DURATION" -f "$OUTFILE" -e "$EVENT" "$PID" info "Flame graph at $OUTFILE" info "Retrieve with: docker cp $CONTAINER:$OUTFILE ." ;; collect) CONTAINER=$(detect_container) OUTPUT_DIR="${1:-./profiling-results-$(date +%Y%m%d-%H%M%S)}" mkdir -p "$OUTPUT_DIR" info "Collecting profiling artifacts from $CONTAINER into $OUTPUT_DIR/" # Dump JFR before collecting PID=$(find_kc_pid "$CONTAINER") docker exec "$CONTAINER" jcmd "$PID" JFR.dump name=profile \ filename="/tmp/profile/kc-profile-collected.jfr" 2>/dev/null || true # Copy via tar pipe — docker cp cannot read from tmpfs mounts if ! docker exec "$CONTAINER" test -d /tmp/profile 2>/dev/null; then warn "/tmp/profile does not exist in container. Was --profile enabled?" elif [ "$(docker exec "$CONTAINER" find /tmp/profile -maxdepth 1 -type f | wc -l)" -eq 0 ]; then warn "/tmp/profile is empty — no profiling artifacts to collect." else docker exec "$CONTAINER" tar cf - -C /tmp/profile . \ | tar xf - -C "$OUTPUT_DIR/" fi info "Collected artifacts:" ls -lh "$OUTPUT_DIR/" # Print analysis hints echo "" echo "=== Analysis ===" echo " JFR: jfr summary $OUTPUT_DIR/kc-profile-collected.jfr" echo " jfr print --events jdk.ExecutionSample $OUTPUT_DIR/*.jfr | head -100" echo " jfr print --events jdk.ObjectAllocationSample $OUTPUT_DIR/*.jfr | head -100" echo " GC: Upload $OUTPUT_DIR/gc.log to https://gceasy.io" echo " Heap: Open $OUTPUT_DIR/*.hprof in Eclipse MAT" echo " Flames: Open $OUTPUT_DIR/flamegraph-*.html in a browser" ;; help|--help|-h) head -30 "$0" | grep "^#" | sed 's/^# \?//' ;; *) error_exit "Unknown command: $COMMAND. Run '$0 help' for usage." ;; esac ================================================ FILE: test/scripts/scrape_metrics.sh ================================================ #!/bin/bash # # Scrape Snowflake Kafka Connector JMX metrics via Jolokia. # # Usage: # ./scrape_metrics.sh [options] # # Modes: # --once Single snapshot to stdout (default) # --poll Continuous scraping to a JSONL file # --interval=SECONDS Poll interval (default: 10) # --output=FILE Output file for --poll mode (default: /tmp/sf-metrics.jsonl) # --host=HOST Jolokia host (default: kafka-connect) # --port=PORT Jolokia port (default: 8778) # --pretty Pretty-print JSON output (--once mode only) # # Examples: # ./scrape_metrics.sh --once --pretty # ./scrape_metrics.sh --poll --interval=5 --output=/tmp/metrics.jsonl # ./scrape_metrics.sh --once --host=localhost set -e HOST="${KAFKA_CONNECT_HOST:-kafka-connect}" PORT="8778" MODE="once" INTERVAL=10 OUTPUT="/tmp/sf-metrics.jsonl" PRETTY="false" while [[ $# -gt 0 ]]; do case $1 in --once) MODE="once"; shift ;; --poll) MODE="poll"; shift ;; --interval=*) INTERVAL="${1#*=}"; shift ;; --output=*) OUTPUT="${1#*=}"; shift ;; --host=*) HOST="${1#*=}"; shift ;; --port=*) PORT="${1#*=}"; shift ;; --pretty) PRETTY="true"; shift ;; -h|--help) head -20 "$0" | grep "^#" | sed 's/^# \?//' exit 0 ;; *) echo "Unknown option: $1" >&2; exit 1 ;; esac done JOLOKIA_URL="http://${HOST}:${PORT}/jolokia" SF_DOMAIN="snowflake.kafka.connector" fetch_snapshot() { local timestamp timestamp=$(date -u +%Y-%m-%dT%H:%M:%SZ) # Fetch all Snowflake connector MBeans in one request local raw raw=$(curl -sf "${JOLOKIA_URL}/read/${SF_DOMAIN}:*" 2>/dev/null) || { echo "{\"timestamp\":\"${timestamp}\",\"error\":\"Cannot reach Jolokia at ${JOLOKIA_URL}\"}" return 1 } # Reshape: add timestamp, extract just the value map echo "$raw" | jq -c --arg ts "$timestamp" '{timestamp: $ts, metrics: .value}' } case $MODE in once) result=$(fetch_snapshot) if [ "$PRETTY" = "true" ]; then echo "$result" | jq . else echo "$result" fi ;; poll) echo "Scraping ${SF_DOMAIN} from ${JOLOKIA_URL} every ${INTERVAL}s → ${OUTPUT}" >&2 mkdir -p "$(dirname "$OUTPUT")" while true; do fetch_snapshot >> "$OUTPUT" sleep "$INTERVAL" done ;; esac ================================================ FILE: test/test_data/.gitignore ================================================ *_pb2.py protobuf/src protobuf/target ================================================ FILE: test/test_data/protobuf/pom.xml ================================================ 4.0.0 com.snowflake kafka-test-protobuf 1.0.0 1.8 UTF-8 com.google.protobuf protobuf-java 3.25.5 org.apache.maven.plugins maven-compiler-plugin 3.3 ${java.version} ${java.version} maven-assembly-plugin 3.1.0 jar-with-dependencies make-assembly package single ================================================ FILE: test/test_data/sensor.proto ================================================ syntax = "proto3"; package com.snowflake.kafka.test.protobuf; option java_outer_classname = "SensorReadingImpl"; message SensorReading { message Device { string deviceID = 1; bool enabled = 2; } Device device = 1; int64 dateTime = 2; double reading = 3; float float_val = 4; int32 int32_val = 5; sint32 sint32_val = 6; sint64 sint64_val = 7; uint32 uint32_val = 8; bytes bytes_val = 9; repeated double double_array_val = 10; uint64 uint64_val = 11; } ================================================ FILE: test/tests/__init__.py ================================================ ================================================ FILE: test/tests/compatibility/__init__.py ================================================ ================================================ FILE: test/tests/compatibility/conftest.py ================================================ """Shared fixtures for data-type ingestion tests. Provides two infrastructure patterns: 1. Single-table batch connector (``results`` fixture) — module-scoped, creates one table + one topic + one connector per ingestion mode. All test cases are defined as data in test_type_compatibility.py, sent in one batch, queried once, then asserted. Used by test_type_compatibility.py. 2. Per-test connector (``typed_table`` + ``standalone_ingest``) — function- scoped, one connector per test. Used by test_unsupported_types.py for types that crash streaming channels. """ import datetime import json import logging import math import time from dataclasses import dataclass, field from pathlib import Path from typing import Any, Literal from uuid import uuid4 import pytest from confluent_kafka import Consumer as KafkaConsumer, KafkaError from lib.config_migration import v4_config_to_v3 from lib.driver import quote_name logger = logging.getLogger(__name__) TEMPLATE_DIR = Path("rest_request_template") BASE_TEMPLATE = "datatype_ingestion.json" # --------------------------------------------------------------------------- # Sentinel for unset expected_value # --------------------------------------------------------------------------- class _Unset: def __repr__(self): return "UNSET" UNSET = _Unset() # --------------------------------------------------------------------------- # Test case definition # --------------------------------------------------------------------------- @dataclass(frozen=True) class Case: """A single test vector: send ``value`` to column ``col``, expect outcome. The optional ``group`` tag controls which test function owns this case. Tests filter on group to avoid name-based set lookups. """ name: str col: str value: Any expect: Literal["ingested", "error"] expected_value: Any = UNSET approx: float | None = None group: str | None = None def cases_where(*, col=None, expect=None, group=None, exclude_groups=None): """Filter CASES (from test_type_compatibility) by column, outcome, and/or group.""" from .test_type_compatibility import CASES result = CASES if col is not None: result = [c for c in result if c.col == col] if expect is not None: result = [c for c in result if c.expect == expect] if group is not None: result = [c for c in result if c.group == group] if exclude_groups is not None: result = [c for c in result if c.group not in exclude_groups] return result # --------------------------------------------------------------------------- # Results dataclass # --------------------------------------------------------------------------- # Map DDL base types to comparison categories _COMPARE_CATEGORIES = { "FLOAT": "float", "VARIANT": "json", "OBJECT": "json", "ARRAY": "json", "TIMESTAMP_LTZ": "timestamp_ltz", "TIMESTAMP_TZ": "timestamp_tz", } def _ddl_category(col: str, columns: dict) -> str: """Derive comparison category from a column's DDL type.""" ddl = columns.get(col, "") base = ddl.split("(")[0].strip().upper() return _COMPARE_CATEGORIES.get(base, "exact") @dataclass(frozen=True) class Results: """Outcome of sending all CASES through one connector instance.""" rows: dict dlq_ids: frozenset mode: str total_sent: int columns: dict # {col_name: ddl_type} — used for comparison dispatch error_table_rows: tuple = () # populated for v4-ht only @property def total_ingested(self): return len(self.rows) @property def total_dlq(self): return len(self.dlq_ids) @property def total_missing(self): return self.total_sent - self.total_ingested - self.total_dlq def assert_ingested(self, case): """Assert that ``case`` landed in the table with the correct value.""" assert case.name in self.rows, ( f"[{case.name}] expected in table but not found " f"(mode={self.mode}, in_dlq={case.name in self.dlq_ids})" ) actual = self.rows[case.name].get(case.col) if not isinstance(case.expected_value, _Unset): expected = case.expected_value else: expected = case.value if expected is None: assert actual is None, f"[{case.name}] expected NULL, got {actual!r}" return if case.approx is not None: assert float(actual) == pytest.approx(float(expected), abs=case.approx), ( f"[{case.name}] approx mismatch: {actual!r} != {expected!r} ± {case.approx}" ) return category = _ddl_category(case.col, self.columns) match category: case "float": self._compare_float(case.name, actual, expected) case "json": self._compare_json(case.name, actual, expected) case "timestamp_ltz": assert isinstance(actual, datetime.datetime), ( f"[{case.name}] expected datetime, got {type(actual).__name__}: {actual!r}" ) assert actual.replace(tzinfo=None) == expected, ( f"[{case.name}] LTZ mismatch (tz-stripped): {actual!r} != {expected!r}" ) case "timestamp_tz": assert isinstance(actual, datetime.datetime), ( f"[{case.name}] expected datetime, got {type(actual).__name__}: {actual!r}" ) assert actual.tzinfo is not None, ( f"[{case.name}] expected tz-aware datetime, got naive: {actual!r}" ) case _: assert actual == expected, ( f"[{case.name}] value mismatch: {actual!r} != {expected!r}" ) def assert_error(self, case): """Assert that ``case`` did NOT land in the table (and hit DLQ if applicable).""" assert case.name not in self.rows, ( f"[{case.name}] expected NOT in table but found: " f"{self.rows[case.name].get(case.col)!r} (mode={self.mode})" ) # v4-ht has no DLQ — errors silently drop records server-side if self.mode != "v4-ht": assert case.name in self.dlq_ids, ( f"[{case.name}] expected in DLQ but not found (mode={self.mode})" ) @staticmethod def _compare_float(name, actual, expected): if isinstance(expected, str): exp_f = float(expected) if math.isnan(exp_f): assert math.isnan(float(actual)), ( f"[{name}] expected NaN, got {actual!r}" ) else: assert float(actual) == exp_f, ( f"[{name}] expected {exp_f}, got {actual!r}" ) else: assert actual == pytest.approx(expected, rel=1e-6), ( f"[{name}] float mismatch: {actual!r} != {expected!r}" ) @staticmethod def _compare_json(name, actual, expected): def _try_parse(val): if isinstance(val, str): try: return json.loads(val) except (json.JSONDecodeError, TypeError): return val return val parsed = _try_parse(actual) exp = _try_parse(expected) if isinstance(expected, str) else expected # SSv1 sometimes double-encodes JSON strings — try one more parse if isinstance(parsed, str) and not isinstance(exp, str): parsed = _try_parse(parsed) assert parsed == exp, ( f"[{name}] JSON mismatch: {parsed!r} != {exp!r} (raw: {actual!r})" ) # --------------------------------------------------------------------------- # Fixtures — shared # --------------------------------------------------------------------------- @pytest.fixture(scope="module", params=["v3", "v4-compat", "v4-ht"]) def ingestion_mode(request): return request.param @pytest.fixture(scope="module") def mode_salt(session_name_salt, ingestion_mode): suffix = {"v3": "_v3", "v4-compat": "", "v4-ht": "_ht"}[ingestion_mode] return f"{session_name_salt}{suffix}" # --------------------------------------------------------------------------- # Connector config builder # --------------------------------------------------------------------------- def _build_mode_config(ingestion_mode, *, dlq_topic=None): """Load the base template config and apply mode-specific overrides. When ``dlq_topic`` is provided, errors.tolerance is set to "all" and DLQ routing is configured. When omitted, errors.tolerance stays "none" so the connector task aborts immediately on validation errors — this gives fast failure for tests that expect errors (e.g. unsupported types). """ base = json.loads((TEMPLATE_DIR / BASE_TEMPLATE).read_text()) config = dict(base["config"]) # Schematization for all modes — required for JSON key → column mapping config["snowflake.enable.schematization"] = "true" if dlq_topic: config["errors.tolerance"] = "all" config["errors.deadletterqueue.topic.name"] = dlq_topic config["errors.deadletterqueue.topic.replication.factor"] = "1" config["errors.deadletterqueue.context.headers.enable"] = "true" match ingestion_mode: case "v3": config = v4_config_to_v3(config) case "v4-compat": config["snowflake.validation"] = "client_side" config["snowflake.compatibility.enable.column.identifier.normalization"] = ( "true" ) config[ "snowflake.compatibility.enable.autogenerated.table.name.sanitization" ] = "true" config["snowflake.streaming.classic.offset.migration"] = "best_effort" config[ "snowflake.streaming.classic.offset.migration.include.connector.name" ] = "false" case "v4-ht": config["snowflake.validation"] = "server_side" config["snowflake.streaming.validate.compatibility.with.classic"] = "false" return config # --------------------------------------------------------------------------- # Fixture — single-table batch connector (test_type_compatibility.py) # --------------------------------------------------------------------------- @pytest.fixture(scope="module") def results(driver, mode_salt, ingestion_mode): """Single-table batch connector for type compatibility tests. Creates one table with all typed columns, sends every CASES entry in a single batch, waits for ingested rows, queries them, reads the DLQ, and yields a frozen Results object for assertion. Why this doesn't reuse wait_for_rows / create_table / create_custom_connector: - wait_for_rows waits for an exact row count, but v4 modes reject varying subsets of test cases so the final count is unknown. We use a stabilization loop (count stops changing for N seconds) instead. - create_table is function-scoped; this fixture is module-scoped (one connector per mode, shared across all test functions). The mode-aware table name casing (v3 uppercases, v4 preserves case) also isn't handled by the existing fixture. - create_custom_connector has the same scope mismatch and provides no benefit over calling driver.createConnector directly (we already handle cleanup in the finally block). """ from .test_type_compatibility import COLUMNS, CASES bootstrap = driver.kafkaAddress table_name = f"dt_compat{mode_salt}" dlq_topic = f"dlq_dt_compat{mode_salt}" # v3 (SnowflakeSinkConnector) uppercases topic→table internally. # v4-compat with autogenerated_table_name_sanitization=true also uppercases. # v4-ht (SnowflakeStreamingSinkConnector) preserves topic case for the table. # We must create and query the Snowflake table with the case the connector # will actually use, otherwise we get a case-sensitive table mismatch. sf_table = table_name if ingestion_mode == "v4-ht" else table_name.upper() quoted_table = quote_name(sf_table) # Consistent timezone for timestamp tests driver.snowflake_conn.cursor().execute("ALTER SESSION SET TIMEZONE = 'UTC'") # Create single table from COLUMNS spec. col_defs = ", ".join(f"{name} {ddl}" for name, ddl in COLUMNS.items()) error_logging = " ERROR_LOGGING = TRUE" if ingestion_mode == "v4-ht" else "" driver.snowflake_conn.cursor().execute( f"CREATE OR REPLACE TABLE {quoted_table} ({col_defs}){error_logging}" ) # v4 connector requires the table property for schema evolution (not the # connector config). Without this, any structural mismatch routes to DLQ. driver.snowflake_conn.cursor().execute( f"ALTER TABLE {quoted_table} SET ENABLE_SCHEMA_EVOLUTION = TRUE" ) # Create topics driver.createTopics(table_name, partitionNum=1, replicationNum=1) driver.createTopics(dlq_topic, partitionNum=1, replicationNum=1) # Register connector via driver.createConnector (handles retries and cleanup) config = _build_mode_config(ingestion_mode, dlq_topic=dlq_topic) rest_request = driver.createConnector( name_salt=mode_salt, unsalted_name="dt_compat", config_template=config, ) connector_name = rest_request["name"] driver.startConnectorWaitTime() # Build and send all records in one batch records = [] keys = [] for i, case in enumerate(CASES): record = {"ID": case.name, "TEST_CASE": case.name} record[case.col] = case.value records.append(json.dumps(record).encode()) keys.append(json.dumps({"number": str(i)}).encode()) driver.sendBytesData(table_name, records, keys) # Wait until row count stabilizes. We cannot wait for an exact count # because v4-compat and v4-ht reject some "ingested" cases due to known # divergences (binary, boolean coercion, int-epoch, etc.). Instead, # poll until the count stops changing for STABLE_SECS. STABLE_SECS = 15 deadline = time.monotonic() + 120 last_count = 0 stable_since = None while time.monotonic() < deadline: count = driver.select_number_of_records(sf_table) or 0 if count != last_count: last_count = count stable_since = time.monotonic() elif stable_since and count > 0: if (time.monotonic() - stable_since) >= STABLE_SECS: logger.info( "Row count stabilized at %d for %ds, proceeding", count, STABLE_SECS, ) break if failed := driver.get_failed_tasks(connector_name): logger.warning( "Connector task failed: %s", failed[0].get("trace", "")[:200] ) break time.sleep(5) else: if last_count == 0: logger.warning( "Stabilization timed out with 0 rows — connector may not be ingesting" ) # Query all rows cursor = driver.snowflake_conn.cursor() cursor.execute( f'SELECT * FROM {quoted_table} ORDER BY RECORD_METADATA:"offset"::int' ) col_names = [desc[0] for desc in cursor.description] raw_rows = cursor.fetchall() row_lookup = {} for row in raw_rows: row_dict = dict(zip(col_names, row)) row_id = row_dict.get("ID") if row_id: row_lookup[row_id] = row_dict # Read DLQ — parse message body JSON to extract case ID dlq_ids = set() consumer = KafkaConsumer( { "bootstrap.servers": bootstrap, "group.id": f"dlq-reader-{uuid4().hex[:8]}", "auto.offset.reset": "earliest", "enable.auto.commit": "false", } ) consumer.subscribe([dlq_topic]) deadline = time.monotonic() + 20 empty_polls = 0 while time.monotonic() < deadline: remaining = max(0.5, deadline - time.monotonic()) msg = consumer.poll(remaining) if msg is None: empty_polls += 1 # After partition assignment, 3 consecutive empty polls → done if empty_polls >= 3 and dlq_ids: break continue empty_polls = 0 if msg.error(): if msg.error().code() != KafkaError._PARTITION_EOF: logger.warning("DLQ consumer error: %s", msg.error()) continue try: body = json.loads(msg.value()) if "ID" in body: dlq_ids.add(body["ID"]) else: logger.warning("DLQ message missing ID field: %s", msg.value()[:200]) except (json.JSONDecodeError, TypeError): logger.warning("Could not parse DLQ message body: %s", msg.value()[:200]) consumer.close() # Query error table for v4-ht mode error_table_rows = [] if ingestion_mode == "v4-ht": try: et_cursor = driver.snowflake_conn.cursor() et_cursor.execute(f"SELECT * FROM ERROR_TABLE({quoted_table})") et_col_names = [desc[0] for desc in et_cursor.description] for row in et_cursor.fetchall(): error_table_rows.append(dict(zip(et_col_names, row))) et_cursor.close() except Exception as e: logger.warning("Could not query error table: %s", e) logger.info( "Results for mode=%s: %d rows, %d DLQ, %d error_table, %d sent", ingestion_mode, len(row_lookup), len(dlq_ids), len(error_table_rows), len(CASES), ) result = Results( rows=row_lookup, dlq_ids=frozenset(dlq_ids), mode=ingestion_mode, total_sent=len(CASES), columns=COLUMNS, error_table_rows=tuple(error_table_rows), ) try: yield result finally: driver.closeConnector(connector_name) try: driver.deleteTopic(table_name) except Exception: pass try: driver.deleteTopic(dlq_topic) except Exception: pass # --------------------------------------------------------------------------- # Fixtures — per-test connector (test_unsupported_types.py) # # These stay separate because unsupported types (GEOGRAPHY, GEOMETRY, VECTOR, # structured OBJECT/ARRAY) crash streaming channels and cannot share a batch # with well-behaved types. # --------------------------------------------------------------------------- @pytest.fixture def typed_table(driver, mode_salt, ingestion_mode): """Factory: create a Snowflake table + Kafka topic for a single test.""" created = [] def _create(test_id, col_ddl): topic = f"{test_id}{mode_salt}" sf_table = topic if ingestion_mode == "v4-ht" else topic.upper() quoted = quote_name(sf_table) driver.snowflake_conn.cursor().execute( f"CREATE OR REPLACE TABLE {quoted} " f"(VALUE_COL {col_ddl}, RECORD_METADATA VARIANT)" ) driver.snowflake_conn.cursor().execute( f"ALTER TABLE {quoted} SET ENABLE_SCHEMA_EVOLUTION = TRUE" ) driver.createTopics(topic, partitionNum=1, replicationNum=1) created.append(topic) return topic try: yield _create finally: for t in created: try: driver.deleteTopic(t) except Exception: pass @pytest.fixture def ingest_one_type_abort(driver, mode_salt, ingestion_mode, typed_table): """Per-test connector (abort mode) for a single column type. Creates a table with one typed column, registers a connector with errors.tolerance=none, sends values, and returns an IngestResult. The connector task fails immediately on validation errors — no DLQ. """ created_connectors = [] def _run(test_id, col_ddl, values, *, timeout=60): topic = typed_table(test_id, col_ddl) sf_table = topic if ingestion_mode == "v4-ht" else topic.upper() # Abort mode (errors.tolerance=none) — connector task fails immediately # on validation errors, giving fast feedback for unsupported types. config = _build_mode_config(ingestion_mode) rest_request = driver.createConnector( name_salt=mode_salt, unsalted_name=test_id, config_template=config, ) connector_name = rest_request["name"] created_connectors.append(connector_name) driver.startConnectorWaitTime() records = [json.dumps({"VALUE_COL": v}).encode() for v in values] keys = [json.dumps({"number": str(i)}).encode() for i in range(len(values))] driver.sendBytesData(topic, records, keys) deadline = time.monotonic() + timeout error = None while time.monotonic() < deadline: if failed := driver.get_failed_tasks(connector_name): error = failed[0].get("trace", "no trace") logger.info("Connector error for %s: %.500s", test_id, error) break tbl = driver.select_number_of_records(sf_table) or 0 if tbl >= len(values): break time.sleep(2) rows = ( driver.snowflake_conn.cursor() .execute( f'SELECT VALUE_COL FROM {quote_name(sf_table)} ORDER BY RECORD_METADATA:"offset"::int' ) .fetchall() ) return IngestResult( values=[r[0] for r in rows], connector_error=error, ) try: yield _run finally: for name in reversed(created_connectors): driver.closeConnector(name) @dataclass class IngestResult: """Legacy result type for standalone_ingest (test_unsupported_types.py).""" values: list dlq_count: int = 0 dlq_errors: list = field(default_factory=list) connector_error: str | None = None ================================================ FILE: test/tests/compatibility/test_compatibility_case_sensitivity.py ================================================ from dataclasses import dataclass import json from typing import Any, Optional import pytest from snowflake.connector import DictCursor from lib.config_migration import V3_CONFIG_TEMPLATE from lib.driver import KafkaDriver from lib.fixtures.table import Table pytestmark = pytest.mark.compatibility @pytest.fixture def case(connector_version: str): """Switches values depending on the connector version.""" def _case(*, v3, v4): match connector_version: case "v3": return v3 case "v4": return v4 case _: raise ValueError(f"Unsupported connector version: {connector_version}") return _case def test_compatibility_case_sensitivity_table_name( driver: KafkaDriver, case, connector_version: str, create_connector, create_topics, name_salt: str, wait_for_rows, ): """Assert table name derived by the connector matches our expectations. Validates compatibility with KC v3, i.e. client-side validation is enabled. """ @dataclass(frozen=True) class TableNameCase: case_name: str # description unsalted_topic_name: str topic2table_value: Optional[str] expected_table_name: str test_cases = [ # If no topic2table.map is provided, the table name is the same as the topic name. # NB the topic name is salted by the driver. TableNameCase("lower_a", "a", None, f"A{name_salt}"), TableNameCase("upper_b", "B", None, f"B{name_salt}"), TableNameCase("lower_c_mapped", "c_topic", f"c{name_salt}", f"C{name_salt}"), TableNameCase("upper_d_mapped", "D_topic", f"D{name_salt}", f"D{name_salt}"), *case( # KC v3 does not support: # - quoted table names in topic2table.map # - arbitrary unicode characters in topic2table.map v3=[], v4=[ TableNameCase( "lower_e_mapped_quoted", "e_topic", f'"e{name_salt}"', f"e{name_salt}", ), TableNameCase( "upper_f_mapped_quoted", "f_topic", f'"F{name_salt}"', f"F{name_salt}", ), TableNameCase( "unicode_mapped_quoted", "g_topic", f'"❄️{name_salt}"', f"❄️{name_salt}", ), ], ), ] topics = create_topics( [test_case.unsalted_topic_name for test_case in test_cases], with_tables=False ) topic2table_map = ",".join( f"{test_case.unsalted_topic_name}{name_salt}:{test_case.topic2table_value}" for test_case in test_cases if test_case.topic2table_value is not None ) if connector_version == "v3" and topic2table_map == "": # In KC v3, topic2table.map cannot be empty. topic2table_map = None connector = create_connector( v3_config={ key: value for key, value in { **V3_CONFIG_TEMPLATE, "topics": ",".join(topics), "tasks.max": "1", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "snowflake.enable.schematization": "true", "snowflake.topic2table.map": topic2table_map, }.items() if value is not None } ) driver.startConnectorWaitTime() for test_case in test_cases: driver.sendBytesData( f"{test_case.unsalted_topic_name}{name_salt}", [json.dumps({"case_name": test_case.case_name}).encode("utf-8")], ) for test_case in test_cases: expected_table = Table(driver, test_case.expected_table_name) wait_for_rows(expected_table.name, 1, connector_name=connector.name) tables = ( driver.snowflake_conn.cursor(DictCursor).execute("show tables").fetchall() ) assert test_case.expected_table_name in [table["name"] for table in tables] # Make sure it's the correct one, i.e. has the data we sent it. assert expected_table.select_scalar("CASE_NAME") == test_case.case_name def test_compatibility_case_sensitivity_ingestion_columns( driver: KafkaDriver, create_connector, create_topics, create_table, wait_for_rows, ): @dataclass(frozen=True) class ColumnIngestionCase: case_name: str column_names: list[str] column_types: list[str] payload: dict[str, str] expected_values: list[Any] test_cases = [ ColumnIngestionCase( case_name="upper_A", column_names=["A"], column_types=["VARCHAR"], payload={"A": "upper A"}, expected_values=["upper A"], ), ColumnIngestionCase( case_name="lower_b_into_upper_B", column_names=["B"], column_types=["VARCHAR"], payload={"b": "lower b into upper B"}, expected_values=["lower b into upper B"], ), ColumnIngestionCase( case_name="lower_c_into_lower_c", column_names=["c"], column_types=["VARCHAR"], # KC v3 requires quotes to not uppercase the key. payload={'"c"': "lower c into lower c"}, expected_values=["lower c into lower c"], ), ColumnIngestionCase( case_name="pair_D_d", column_names=["D", "d"], column_types=["VARCHAR", "VARCHAR"], payload={"D": "upper D", '"d"': "lower d"}, expected_values=["upper D", "lower d"], ), ColumnIngestionCase( case_name="pair_E_f", column_names=["E", "f"], column_types=["VARCHAR", "VARCHAR"], payload={"E": "upper E", '"f"': "lower f"}, expected_values=["upper E", "lower f"], ), ColumnIngestionCase( case_name="unicode", column_names=["❄️"], column_types=["VARCHAR"], payload={'"❄️"': "unicode ❄️"}, expected_values=["unicode ❄️"], ), # We don't process keys beyond the first level. ColumnIngestionCase( case_name="variant", column_names=["V"], column_types=["VARIANT"], payload={"V": {"a": "b", "C": "D", '"e"': "❄️"}}, expected_values=[{"a": "b", "C": "D", '"e"': "❄️"}], ), ] topics = create_topics( [test_case.case_name for test_case in test_cases], with_tables=False, ) tables = [ create_table( test_case.case_name.upper(), columns=( "(" + ", ".join( f'"{column_name}" {column_type}' for column_name, column_type in zip( test_case.column_names, test_case.column_types, strict=True ) ) + ', "RECORD_METADATA" VARIANT)' ), cleanup_topic=False, ) for test_case in test_cases ] connector = create_connector( v3_config={ **V3_CONFIG_TEMPLATE, "topics": ",".join(topics), "tasks.max": "1", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "snowflake.enable.schematization": "true", } ) driver.startConnectorWaitTime() for topic, test_case in zip(topics, test_cases, strict=True): driver.sendBytesData(topic, [json.dumps(test_case.payload).encode("utf-8")]) for test_case, table in zip(test_cases, tables, strict=True): wait_for_rows(table.name, 1, connector_name=connector.name) actual_row = table.select("*")[0] for column_name, expected_value, column_type in zip( test_case.column_names, test_case.expected_values, test_case.column_types, strict=True, ): if column_type == "VARIANT": actual_value = json.loads(actual_row[column_name]) else: actual_value = actual_row[column_name] assert actual_value == expected_value, ( f"{test_case.case_name}.{column_name}: " f"expected {expected_value}, got {actual_value}" ) def test_case_sensitivity_schema_evolution( driver: KafkaDriver, create_connector, create_topics, create_table, wait_for_rows, ): @dataclass(frozen=True) class SchemaEvolutionCase: case_name: str payload: dict[str, str] expected_values: dict[str, str] test_cases = [ SchemaEvolutionCase( case_name="upper_A", payload={"A": "upper A"}, expected_values={"A": "upper A"}, ), SchemaEvolutionCase( case_name="lower_b_into_upper_B", payload={"b": "lower b into upper B"}, expected_values={"B": "lower b into upper B"}, ), SchemaEvolutionCase( case_name="quoted_c", payload={'"c"': "quoted c"}, expected_values={"c": "quoted c"}, ), SchemaEvolutionCase( case_name="pair_D_d", payload={"D": "upper D", '"d"': "lower d"}, expected_values={"D": "upper D", "d": "lower d"}, ), SchemaEvolutionCase( case_name="pair_E_f", payload={"E": "upper E", '"f"': "lower f"}, expected_values={"E": "upper E", "f": "lower f"}, ), # Funny enough, KC v3 is able to ingest an unquoted unicode column # if it immediately follows a schema evolution, # whereas a regular ingestion would fail. SchemaEvolutionCase( case_name="unicode", payload={"❄️": "unicode"}, expected_values={"❄️": "unicode"}, ), ] topics = create_topics( [test_case.case_name for test_case in test_cases], with_tables=False, ) tables = [ create_table( test_case.case_name.upper(), columns='("RECORD_METADATA" VARIANT) ENABLE_SCHEMA_EVOLUTION = TRUE', cleanup_topic=False, ) for test_case in test_cases ] connector = create_connector( v3_config={ **V3_CONFIG_TEMPLATE, "topics": ",".join(topics), "tasks.max": "1", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", # KC v3 needs this connector setting to attempt schema evolution. # KC v4 ignores it and instead relies on the table property below. "snowflake.enable.schematization": "true", } ) driver.startConnectorWaitTime() for topic, test_case in zip(topics, test_cases, strict=True): driver.sendBytesData(topic, [json.dumps(test_case.payload).encode("utf-8")]) for test_case, table in zip(test_cases, tables, strict=True): wait_for_rows(table.name, 1, connector_name=connector.name) actual_column_names = {column[0] for column in table.schema()} expected_column_names = set(test_case.expected_values.keys()) | { "RECORD_METADATA" } assert actual_column_names == expected_column_names, ( f"{test_case.case_name}: " f"expected {expected_column_names}, got {actual_column_names}" ) actual_row = table.select("*")[0] for column_name, expected_value in test_case.expected_values.items(): actual_value = actual_row[column_name] assert actual_value == expected_value, ( f"{test_case.case_name}.{column_name}: " f"expected {expected_value}, got {actual_value}" ) ================================================ FILE: test/tests/compatibility/test_migration.py ================================================ """ ### Migration with duplicates but no gaps During migration, KC v4 will inherit consumer group offsets from KC v3 if the following conditions are met: 1. The new connector is given the same name as the old one. (They will belong to the same consumer group.) 2. At most `offsets.retention.minutes` has passed (defaults to 7 days). Inheriting the consumer group offsets means that the new connector will start ingesting from the last offset committed to *Kafka*. It's possible, especially under continuous load, that Kafka will not be fully caught up to the last offset committed to Snowflake. This will result in duplicate data being ingested, but no gaps. It should be possible to deduplicate the data after ingestion using the RECORD_METADATA column. ### Migration with possible gaps If the new connector has a different name, or too much time has passed, then depending on the value of `auto.offset.reset`, the KC v4 will start ingesting: - for `earliest`: from the beginning of the partition - for `latest`: only data ingested after the new connector was created """ import logging import time import pytest from lib.config_migration import V3_CONFIG_TEMPLATE, v3_config_to_v4 from lib.driver import KafkaDriver from lib.utils import RecordProducer, wait_for pytestmark = pytest.mark.compatibility # Don't parameterize on v3, we create both connector versions explicitly here. @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_migration_without_ingestion( driver: KafkaDriver, name_salt, create_custom_connector, create_table, wait_for_rows, ): """Test migration when there are no in-flight data during switchover.""" test_name = "test_migration_without_duplicates" table = create_table( test_name.upper(), columns='(record_metadata variant, "NUMBER" varchar)' ) topic = f"{test_name}{name_salt}" producer = RecordProducer(driver, topic) v3_config_template = { **V3_CONFIG_TEMPLATE, "topics": topic, "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "snowflake.enable.schematization": "true", } logging.info("Creating v3 connector and sending initial batch") v3_connector = create_custom_connector(test_name, v3_config_template) producer.send(10) logging.info(f"Produced 10 records (total: {producer.records_produced})") wait_for_rows( table_name=table.name, expected=producer.records_produced, connector_name=v3_connector.name, ) logging.info( f"Waiting for Kafka consumer group offset to catch up to {producer.records_produced}" ) assert wait_for( lambda: ( (driver.get_consumer_group_offset(v3_connector.name, topic) or 0) >= producer.records_produced ) ), f"Consumer group offset never reached {producer.records_produced}" logging.info( f"Consumer group offset: {driver.get_consumer_group_offset(v3_connector.name, topic)}" ) logging.info("Closing v3 connector") assert v3_connector.close(wait_timeout=60) logging.info("Sending second batch while connector is down") producer.send(10) logging.info(f"Produced 10 records (total: {producer.records_produced})") logging.info("Creating v4 connector and sending third batch") v4_config_template = v3_config_to_v4(v3_config_template) v4_connector = create_custom_connector(test_name, v4_config_template) producer.send(10) logging.info(f"Produced 10 records (total: {producer.records_produced})") logging.info( f"Waiting for all {producer.records_produced} records to land in Snowflake" ) wait_for_rows( table_name=table.name, expected=producer.records_produced, connector_name=v4_connector.name, ) logging.info( f"All {producer.records_produced} records ingested — no gaps, no duplicates" ) # Don't parameterize on v3, we create both connector versions explicitly here. @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) @pytest.mark.parametrize("ssv1_offset_migration", ["skip", "strict"]) def test_migration_with_ingestion( driver: KafkaDriver, name_salt, create_custom_connector, create_table, wait_for_rows, ssv1_offset_migration, ): """Test migration when there are in-flight data during switchover. With ssv1_offset_migration=skip (default), KC v4 starts from the consumer group offset, which may lag behind the SSv1 committed offset, causing duplicates. With ssv1_offset_migration=strict, KC v4 reads the SSv1 committed offset and uses it as the starting point, so no duplicates should occur. """ # Mixed case on purpose to ensure case sensitivity is handled correctly. test_name = f"test_Migration_with_possible_duplicates_{ssv1_offset_migration}" warmup_records = 10 table = create_table( test_name.upper(), columns='(record_metadata variant, "NUMBER" varchar)', ) topic = f"{test_name}{name_salt}" producer = RecordProducer(driver, topic) v3_config_template = { **V3_CONFIG_TEMPLATE, "topics": topic, "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "snowflake.enable.schematization": "true", } logging.info(f"Creating v3 connector and sending {warmup_records} warmup records") v3_connector = create_custom_connector(test_name, v3_config_template) producer.send(warmup_records) logging.info( f"Produced {warmup_records} records (total: {producer.records_produced})" ) wait_for_rows( table_name=table.name, expected=producer.records_produced, connector_name=v3_connector.name, ) logging.info("Starting continuous producer") producer.start_continuous() try: logging.info("Waiting for v3 to ingest beyond the warmup batch") assert wait_for(lambda: table.select_scalar("count(*)") > warmup_records), ( f"v3 never ingested beyond {warmup_records} warmup records" ) logging.info(f"v3 ingested {table.select_scalar('count(*)')} rows so far") logging.info("Closing v3 connector while data is still flowing") assert v3_connector.close(wait_timeout=60) logging.info( "Creating v4 connector (same name → inherits consumer group offsets)" ) v4_config_template = { **v3_config_to_v4(v3_config_template), "snowflake.streaming.classic.offset.migration": ssv1_offset_migration, # Disable compatibility validation to allow not migrating offsets from SSv1. "snowflake.streaming.validate.compatibility.with.classic": "false", } v4_connector = create_custom_connector(test_name, v4_config_template) logging.info("Letting v4 catch up for 5s before snapshot") time.sleep(5) records_produced_so_far = producer.records_produced logging.info( f"Snapshot: {records_produced_so_far} records produced, " f"{table.select_scalar('count(*)')} rows in Snowflake" ) wait_for_rows( table_name=table.name, at_least=True, expected=records_produced_so_far + 1, connector_name=v4_connector.name, ) logging.info( f"v4 is actively ingesting ({table.select_scalar('count(*)')} rows)" ) finally: producer.stop_continuous() expected = producer.records_produced logging.info( f"Waiting for all {expected} distinct records to land in Snowflake " f"(currently {table.select_scalar('count(distinct number)')} distinct, " f"{table.select_scalar('count(*)')} total)" ) assert wait_for( lambda: table.select_scalar("count(distinct number)") == expected, timeout=120, ), ( f"Expected {expected} distinct records, " f"got {table.select_scalar('count(distinct number)')} distinct / {table.select_scalar('count(*)')} total" ) distinct_offsets = table.select_scalar("count(distinct record_metadata:offset)") total_rows = table.select_scalar("count(*)") logging.info( f"Final: {expected} distinct records, {distinct_offsets} distinct offsets, " f"{total_rows} total rows (duplicates: {total_rows - expected})" ) assert distinct_offsets == expected, ( f"Expected {expected} distinct offsets, got {distinct_offsets}" ) if ssv1_offset_migration == "strict": assert total_rows == expected, ( f"With strict mode, expected exactly {expected} rows (no duplicates), " f"but got {total_rows}" ) else: assert total_rows > expected, ( f"Expected duplicates (total > {expected}), but got {total_rows}" ) # Don't parameterize on v3, we create both connector versions explicitly here. @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_migration_different_connector_name( driver: KafkaDriver, name_salt, create_custom_connector, create_table, wait_for_rows, ): """Prove that SYSTEM$MIGRATE_SSV1_CHANNEL_OFFSET migrates offsets server-side. Uses a *different* connector name for v4 so there is no consumer group inheritance. With auto.offset.reset=earliest, Kafka re-delivers all records from offset 0. With ssv1_offset_migration=skip, v4 would re-ingest everything → duplicates. With ssv1_offset_migration=strict, the system function writes the SSv1 offset to the SSv2 channel, so v4 skips already-committed records → no duplicates. IMPORTANT NOTE: This test only works because KC v3 did *not* append the connector name to the channel name. If it did, we'd be looking up the wrong channel name. """ test_name = "test_Migration_different_connector_name" table = create_table( test_name.upper(), columns='(record_metadata variant, "NUMBER" varchar)' ) topic = f"{test_name}{name_salt}" producer = RecordProducer(driver, topic) v3_config_template = { **V3_CONFIG_TEMPLATE, "topics": topic, "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "snowflake.enable.schematization": "true", } # Phase 1: v3 ingests records via SSv1 logging.info("Creating v3 connector and sending initial batch") v3_connector = create_custom_connector(test_name, v3_config_template) producer.send(20) logging.info(f"Produced 20 records (total: {producer.records_produced})") wait_for_rows( table_name=table.name, expected=producer.records_produced, connector_name=v3_connector.name, ) logging.info("Closing v3 connector") assert v3_connector.close(wait_timeout=60) v3_rows = table.select_scalar("count(*)") logging.info(f"v3 ingested {v3_rows} rows, now closed") # Phase 2: v4 with a DIFFERENT connector name → no consumer group inheritance. # auto.offset.reset=earliest forces Kafka to re-deliver from offset 0. # The system function is the only mechanism that prevents re-ingestion. v4_name = f"{test_name}_v4" v4_config_template = { **v3_config_to_v4(v3_config_template), "snowflake.streaming.classic.offset.migration": "strict", "consumer.override.auto.offset.reset": "earliest", } logging.info( f"Creating v4 connector with different name ({v4_name}) and strict mode" ) v4_connector = create_custom_connector(v4_name, v4_config_template) # Phase 3: Send more records and verify no duplicates producer.send(10) expected = producer.records_produced logging.info(f"Produced 10 more records (total: {expected})") wait_for_rows( table_name=table.name, expected=expected, connector_name=v4_connector.name, ) total_rows = table.select_scalar("count(*)") distinct_offsets = table.select_scalar("count(distinct record_metadata:offset)") logging.info( f"Final: {expected} expected, {distinct_offsets} distinct offsets, " f"{total_rows} total rows" ) assert distinct_offsets == expected, ( f"Expected {expected} distinct offsets, got {distinct_offsets}" ) assert total_rows == expected, ( f"System function migration should prevent duplicates: " f"expected {expected} rows, got {total_rows}" ) # Don't parameterize on v3, we create both connector versions explicitly here. @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_migration_from_snowpipe( driver: KafkaDriver, name_salt, create_custom_connector, create_table, wait_for_rows, ): """Test migration from KC v3 file-based Snowpipe to KC v4 (SSv2). SNOW-3293138: Verifies that a clean switchover from file-based Snowpipe to Snowpipe Streaming produces no gaps and no duplicates when the consumer group offsets are inherited (same connector name). """ test_name = "test_migration_from_snowpipe" warmup_records = 10 table = create_table( test_name.upper(), columns="(record_metadata variant, record_content variant)", ) topic = f"{test_name}{name_salt}" producer = RecordProducer(driver, topic) # File-based Snowpipe: schematization unsupported, buffer.flush.time >= 10. v3_config_template = { **V3_CONFIG_TEMPLATE, "topics": topic, "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "buffer.flush.time": "10", } v3_config_template["snowflake.ingestion.method"] = "SNOWPIPE" v3_config_template.pop("snowflake.streaming.max.client.lag") logging.info( f"Creating v3 Snowpipe connector and sending {warmup_records} warmup records" ) v3_connector = create_custom_connector(test_name, v3_config_template) producer.send(warmup_records) logging.info( f"Produced {warmup_records} records (total: {producer.records_produced})" ) wait_for_rows( table_name=table.name, expected=producer.records_produced, connector_name=v3_connector.name, ) logging.info("Starting continuous producer") producer.start_continuous() try: logging.info("Waiting for v3 to ingest beyond the warmup batch") assert wait_for(lambda: table.select_scalar("count(*)") > warmup_records), ( f"v3 never ingested beyond {warmup_records} warmup records" ) logging.info(f"v3 ingested {table.select_scalar('count(*)')} rows so far") logging.info("Closing v3 connector while data is still flowing") assert v3_connector.close(wait_timeout=60) v3_kafka_offset = driver.get_consumer_group_offset(v3_connector.name, topic) logging.info(f"v3 consumer group offset after shutdown: {v3_kafka_offset}") # File-based Snowpipe ingests staged files asynchronously. If we start # v4 (SSv2) before Snowpipe finishes draining, SSv2 rows from newer # offsets can land before Snowpipe finishes loading older ones, # breaking end-to-end ordering. logging.info("Waiting for Snowpipe to finish ingesting staged files") wait_for_rows(table_name=table.name, expected=v3_kafka_offset, at_least=True) logging.info( "Creating v4 connector (same name → inherits consumer group offsets)" ) v4_config_template = v3_config_to_v4(v3_config_template) v4_connector = create_custom_connector(test_name, v4_config_template) logging.info("Letting v4 catch up for 5s before snapshot") time.sleep(5) records_produced_so_far = producer.records_produced logging.info( f"Snapshot: {records_produced_so_far} records produced, " f"{table.select_scalar('count(*)')} rows in Snowflake" ) wait_for_rows( table_name=table.name, at_least=True, expected=records_produced_so_far + 1, connector_name=v4_connector.name, ) logging.info( f"v4 is actively ingesting ({table.select_scalar('count(*)')} rows)" ) finally: producer.stop_continuous() expected = producer.records_produced logging.info( f"Waiting for all {expected} distinct records to land in Snowflake " f"(currently {table.select_scalar('count(distinct record_content:number)')} " f"distinct, {table.select_scalar('count(*)')} total)" ) wait_for_rows( table_name=table.name, expected=expected, connector_name=v4_connector.name, ) total_rows = table.select_scalar("count(*)") distinct_numbers = table.select_scalar("count(distinct record_content:number)") logging.info( f"Final: {expected} expected, {distinct_numbers} distinct, {total_rows} total" ) assert distinct_numbers == expected, ( f"Expected {expected} distinct records, got {distinct_numbers}" ) assert total_rows == expected, ( f"Expected exactly {expected} rows (no duplicates), got {total_rows}" ) ================================================ FILE: test/tests/compatibility/test_schematization_disabled.py ================================================ import json from lib.config_migration import V3_CONFIG_TEMPLATE from lib.fixtures.table import Table from lib.driver import KafkaDriver def test_compatibility_schematization_disabled_complex( driver: KafkaDriver, create_connector, create_topics, wait_for_rows ): """Nested JSON data lands as queryable VARIANT in RECORD_CONTENT. Table is NOT pre-created — the connector auto-creates it. KCv3 auto-creates with (RECORD_METADATA VARIANT, RECORD_CONTENT VARIANT). KCv4 auto-creates with both columns as VARIANT when schematization=off. Runs for both v3 and v4 to verify compatibility. """ topic = create_topics(["schematization_disabled_complex"], with_tables=False)[0] connector = create_connector( v3_config={ **V3_CONFIG_TEMPLATE, "topics": topic, "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "snowflake.enable.schematization": "false", } ) driver.startConnectorWaitTime() values = [ # 0: nested object with arrays json.dumps( { "user": {"name": "Alice", "scores": [1, 2, 3]}, "tags": ["a", "b"], "count": 42, } ).encode("utf-8"), # 1: deeply nested json.dumps({"a": {"b": {"c": {"d": "deep"}}}}).encode("utf-8"), # 2: flat object json.dumps({"city": "Hsinchu", "age": 30}).encode("utf-8"), ] record_count = len(values) driver.sendBytesData(topic, values, [], partition=0) table = Table(driver, topic.upper()) wait_for_rows(table.name, record_count, connector_name=connector.name) # Verify RECORD_CONTENT column exists and is VARIANT schema = table.schema(as_dict=True) col_schema = next(c for c in schema if c["name"] == "RECORD_CONTENT") assert col_schema["type"] == "VARIANT" # Verify nested object with arrays (offset 0) assert table.select( """ RECORD_CONTENT:user.name::string AS user_name, RECORD_CONTENT:user.scores[0]::number AS first_score, RECORD_CONTENT:tags[0]::string AS first_tag, RECORD_CONTENT:count::number AS cnt """, 'WHERE RECORD_METADATA:"offset"::number = 0', ) == [ { "USER_NAME": "Alice", "FIRST_SCORE": 1, "FIRST_TAG": "a", "CNT": 42, } ] # Verify deeply nested (offset 1) assert table.select( """ RECORD_CONTENT:a.b.c.d::string AS val """, 'WHERE RECORD_METADATA:"offset"::number = 1', ) == [ { "VAL": "deep", } ] # Verify flat object (offset 2) assert table.select( """ RECORD_CONTENT:city::string AS city, RECORD_CONTENT:age::number AS age """, 'WHERE RECORD_METADATA:"offset"::number = 2', ) == [ { "CITY": "Hsinchu", "AGE": 30, } ] assert table.select_scalar("count(*)") == record_count def test_compatibility_schematization_disabled_primitive( driver: KafkaDriver, create_connector, create_topics, wait_for_rows ): """Bare strings via StringConverter land as VARIANT in RECORD_CONTENT. Table is NOT pre-created — the connector auto-creates it. Verifies that primitive (non-JSON) payloads are stored as VARIANT, not inferred as VARCHAR by schema evolution. Runs for both v3 and v4 to verify compatibility. """ topic = create_topics(["schematization_disabled_primitive"], with_tables=False)[0] connector = create_connector( v3_config={ **V3_CONFIG_TEMPLATE, "topics": topic, "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.storage.StringConverter", "snowflake.enable.schematization": "false", } ) driver.startConnectorWaitTime() values = [ b"hello world", b"42", b"true", ] driver.sendBytesData(topic, values, [], partition=0) table = Table(driver, topic.upper()) wait_for_rows(table.name, len(values), connector_name=connector.name) schema = table.schema(as_dict=True) col_schema = next(c for c in schema if c["name"] == "RECORD_CONTENT") assert col_schema["type"] == "VARIANT" rows = table.select( "RECORD_CONTENT::string AS content", 'ORDER BY RECORD_METADATA:"offset"::number', ) assert rows[0]["CONTENT"] == "hello world" assert rows[1]["CONTENT"] == "42" assert rows[2]["CONTENT"] == "true" assert table.select_scalar("count(*)") == len(values) ================================================ FILE: test/tests/compatibility/test_type_compatibility.py ================================================ """Data-type ingestion compatibility tests — single-table architecture. All test cases are defined as data in ``CASES`` (below). One table, one topic, and one connector per ingestion mode — all records are sent in a single batch, queried once, and then asserted via the shared ``results`` fixture. Assertions encode **v3 reference behavior**. Tests that encounter known v4 divergences handle them inline and log a ``DIVERGENCE`` warning — grep for that prefix to find all behavioral differences across modes. Parameterized across three ingestion modes via the ``ingestion_mode`` fixture (module-scoped): - v3: SnowflakeSinkConnector with SNOWPIPE_STREAMING - v4-compat: SnowflakeStreamingSinkConnector with snowflake.validation=client_side - v4-ht: SnowflakeStreamingSinkConnector with snowflake.validation=server_side (server-side validation only, no DLQ — errors silently drop records) Type aliases (INT, STRING, DOUBLE, DECIMAL, CHAR, etc.) are not tested separately — they resolve to the same storage type and code path in Snowflake. Reference: https://docs.snowflake.com/en/sql-reference/intro-summary-data-types """ import datetime import json import logging from enum import Enum import pytest from .conftest import UNSET, Case, cases_where logger = logging.getLogger(__name__) pytestmark = pytest.mark.compatibility # --------------------------------------------------------------------------- # Expect enum — makes the CASES table scannable at a glance # --------------------------------------------------------------------------- class Expect(str, Enum): OK = "ingested" ERR = "error" OK = Expect.OK ERR = Expect.ERR # --------------------------------------------------------------------------- # Table column definitions (col_name → DDL type) # --------------------------------------------------------------------------- COLUMNS = { "ID": "VARCHAR NOT NULL", "TEST_CASE": "VARCHAR", "COL_NUMBER": "NUMBER", "COL_NUMSCALE": "NUMBER(10,2)", "COL_FLOAT": "FLOAT", "COL_VARCHAR": "VARCHAR", "COL_VARCHAR10": "VARCHAR(10)", "COL_BINARY": "BINARY", "COL_BOOLEAN": "BOOLEAN", "COL_DATE": "DATE", "COL_TIME": "TIME", "COL_TS_NTZ": "TIMESTAMP_NTZ", "COL_TS_LTZ": "TIMESTAMP_LTZ", "COL_TS_TZ": "TIMESTAMP_TZ", "COL_VARIANT": "VARIANT", "COL_OBJECT": "OBJECT", "COL_ARRAY": "ARRAY", "RECORD_METADATA": "VARIANT", } # --------------------------------------------------------------------------- # Full test case specification # --------------------------------------------------------------------------- # Each Case populates only ID, TEST_CASE, and ONE typed column; the rest are # NULL. Schematization handles sparse records. # # The ``group`` tag controls which test function owns each case: # None → owned by the per-column test (test_number, test_float, ...) # "float_special"→ test_float_special (NaN/Inf need string-representation docs) # "bool_coercion"→ test_boolean_coercion (known v4-compat divergence) # "ts_epoch" → test_timestamp_ntz_epoch (known v4 divergence) # "xtype" → test_cross_type_mismatch (values sent to wrong column) # "null" → test_null (parametrized across all columns) CASES = [ # ---- NUMBER(38,0) ---- Case("num_int", "COL_NUMBER", 42, OK), Case("num_zero", "COL_NUMBER", 0, OK), Case("num_neg", "COL_NUMBER", -100, OK), Case("num_maxint", "COL_NUMBER", 2147483647, OK), Case("num_minint", "COL_NUMBER", -2147483648, OK), Case("num_bad_str", "COL_NUMBER", "not_a_number", ERR), Case("num_bad_abc", "COL_NUMBER", "abc", ERR), Case("num_bad_obj", "COL_NUMBER", {"obj": 1}, ERR), # ---- NUMBER(10,2) ---- Case("nsc_decimal", "COL_NUMSCALE", 123.45, OK, approx=0.01), Case("nsc_neg", "COL_NUMSCALE", -0.01, OK, approx=0.01), Case("nsc_zero", "COL_NUMSCALE", 0.0, OK, approx=0.01), Case("nsc_max", "COL_NUMSCALE", 99999.99, OK, approx=0.01), Case("nsc_bad_text", "COL_NUMSCALE", "text", ERR), # ---- FLOAT ---- Case("flt_pi", "COL_FLOAT", 3.14, OK), Case("flt_neg", "COL_FLOAT", -1.5, OK), Case("flt_zero", "COL_FLOAT", 0.0, OK), Case("flt_sci", "COL_FLOAT", 1.0e10, OK), Case("flt_bad_text", "COL_FLOAT", "text", ERR), Case("flt_bad_arr", "COL_FLOAT", [1, 2], ERR), # FLOAT special: NaN, Infinity, -Infinity (string representations) Case("flt_nan", "COL_FLOAT", "NaN", OK, group="float_special"), Case("flt_inf", "COL_FLOAT", "Infinity", OK, group="float_special"), Case("flt_ninf", "COL_FLOAT", "-Infinity", OK, group="float_special"), # ---- VARCHAR ---- Case("vc_hello", "COL_VARCHAR", "hello world", OK), Case("vc_special", "COL_VARCHAR", "special chars: !@#$%^&*()", OK), Case("vc_long", "COL_VARCHAR", "a" * 1000, OK), # ---- VARCHAR(10) ---- Case("vc10_short", "COL_VARCHAR10", "hello", OK), Case("vc10_exact", "COL_VARCHAR10", "0123456789", OK), Case("vc10_over", "COL_VARCHAR10", "a" * 20, ERR), # ---- BINARY ---- Case( "bin_hello", "COL_BINARY", "48656C6C6F", OK, expected_value=bytes.fromhex("48656C6C6F"), ), Case( "bin_dead", "COL_BINARY", "DEADBEEF", OK, expected_value=bytes.fromhex("DEADBEEF"), ), Case("bin_zero", "COL_BINARY", "00", OK, expected_value=bytes.fromhex("00")), Case( "bin_long", "COL_BINARY", "FF" * 100, OK, expected_value=bytes.fromhex("FF" * 100), ), # ---- BOOLEAN ---- Case("bool_true", "COL_BOOLEAN", True, OK), Case("bool_false", "COL_BOOLEAN", False, OK), Case("bool_bad_obj", "COL_BOOLEAN", {"key": "value"}, ERR), Case("bool_bad_arr", "COL_BOOLEAN", [1, 2, 3], ERR), Case("bool_bad_str", "COL_BOOLEAN", "random_string", ERR), # Boolean coercion: numeric 0/1 and string tokens. # v4-compat fix: RowValidator normalizes any valid input to Boolean. # v4-ht: RowValidator bypassed; Integer 0/1 reach SSv2 SDK directly and are dropped. Case( "bool_zero", "COL_BOOLEAN", 0, OK, expected_value=False, group="bool_coercion" ), Case("bool_one", "COL_BOOLEAN", 1, OK, expected_value=True, group="bool_coercion"), Case( "bool_str_true", "COL_BOOLEAN", "true", OK, expected_value=True, group="bool_coercion", ), Case( "bool_str_false", "COL_BOOLEAN", "false", OK, expected_value=False, group="bool_coercion", ), Case( "bool_str_yes", "COL_BOOLEAN", "yes", OK, expected_value=True, group="bool_coercion", ), Case( "bool_str_no", "COL_BOOLEAN", "no", OK, expected_value=False, group="bool_coercion", ), Case( "bool_str_on", "COL_BOOLEAN", "on", OK, expected_value=True, group="bool_coercion", ), Case( "bool_str_off", "COL_BOOLEAN", "off", OK, expected_value=False, group="bool_coercion", ), # ---- DATE ---- Case( "date_normal", "COL_DATE", "2024-01-15", OK, expected_value=datetime.date(2024, 1, 15), ), Case( "date_epoch", "COL_DATE", "1970-01-01", OK, expected_value=datetime.date(1970, 1, 1), ), Case( "date_future", "COL_DATE", "2099-12-31", OK, expected_value=datetime.date(2099, 12, 31), ), Case("date_bad", "COL_DATE", "not_a_date", ERR), # ---- TIME ---- Case( "time_normal", "COL_TIME", "13:45:30", OK, expected_value=datetime.time(13, 45, 30), ), Case( "time_midnight", "COL_TIME", "00:00:00", OK, expected_value=datetime.time(0, 0, 0), ), Case( "time_end", "COL_TIME", "23:59:59", OK, expected_value=datetime.time(23, 59, 59) ), Case("time_bad", "COL_TIME", "not_a_time", ERR), # ---- TIMESTAMP_NTZ ---- Case( "tsntz_normal", "COL_TS_NTZ", "2024-01-15T13:45:30", OK, expected_value=datetime.datetime(2024, 1, 15, 13, 45, 30), ), Case( "tsntz_epoch", "COL_TS_NTZ", "1970-01-01T00:00:00", OK, expected_value=datetime.datetime(1970, 1, 1, 0, 0, 0), ), Case( "tsntz_future", "COL_TS_NTZ", "2099-12-31T23:59:59", OK, expected_value=datetime.datetime(2099, 12, 31, 23, 59, 59), ), Case("tsntz_bad", "COL_TS_NTZ", "not_a_timestamp", ERR), # Integer epoch → TIMESTAMP_NTZ # KNOWN DIVERGENCE: v4 RowValidator rejects java.lang.Long for TIMESTAMP_NTZ. Case( "tsntz_int_epoch", "COL_TS_NTZ", 1705312800, OK, expected_value=datetime.datetime(2024, 1, 15, 10, 0, 0), group="ts_epoch", ), # ---- TIMESTAMP_LTZ ---- Case( "tsltz_normal", "COL_TS_LTZ", "2024-01-15T13:45:30+00:00", OK, expected_value=datetime.datetime(2024, 1, 15, 13, 45, 30), ), Case( "tsltz_epoch", "COL_TS_LTZ", "1970-01-01T00:00:00+00:00", OK, expected_value=datetime.datetime(1970, 1, 1, 0, 0, 0), ), Case("tsltz_bad", "COL_TS_LTZ", "not_a_timestamp", ERR), # ---- TIMESTAMP_TZ ---- Case("tstz_offset", "COL_TS_TZ", "2024-01-15T13:45:30+05:00", OK), Case("tstz_utc", "COL_TS_TZ", "1970-01-01T00:00:00+00:00", OK), Case("tstz_bad", "COL_TS_TZ", "not_a_timestamp", ERR), # ---- VARIANT (accepts any JSON type including primitives) ---- Case("var_obj", "COL_VARIANT", {"key": "value", "number": 42}, OK), Case("var_arr", "COL_VARIANT", [1, 2, 3], OK), Case("var_nested", "COL_VARIANT", {"nested": [True, False, None]}, OK), Case("var_int", "COL_VARIANT", 42, OK), Case("var_float", "COL_VARIANT", 3.14, OK), Case("var_bool", "COL_VARIANT", True, OK), # Bare string (not valid JSON) → DLQ on v3/v4-compat; v4-ht ingests it # as a string VARIANT value (server-side accepts non-JSON scalars). Case("var_str", "COL_VARIANT", "hello", ERR, group="variant_bare_str"), # String containing valid JSON — probes SSv1/SSv2 parse divergence. # v3/v4-compat: SSv1/RowValidator parses string into native object {"a":1}. # v4-ht: SSv2 SDK stores the string as a JSON-quoted literal '"{\\"a\\":1}"'. Case( "var_json_str", "COL_VARIANT", '{"a":1}', OK, expected_value={"a": 1}, group="variant_json_str", ), # JSON scalar strings to VARIANT — exercises the String→native re-parse path # for primitives (number, boolean, null). All are valid JSON. Case( "var_json_num", "COL_VARIANT", "42", OK, expected_value=42, group="variant_json_str", ), Case( "var_json_bool", "COL_VARIANT", "true", OK, expected_value=True, group="variant_json_str", ), Case( "var_json_arr", "COL_VARIANT", "[1,2]", OK, expected_value=[1, 2], group="variant_json_str", ), # ---- OBJECT ---- Case("obj_simple", "COL_OBJECT", {"key": "value"}, OK), Case("obj_nested", "COL_OBJECT", {"nested": {"a": 1, "b": 2}}, OK), Case("obj_with_arr", "COL_OBJECT", {"array_val": [1, 2, 3]}, OK), # JSON string that parses to an object Case("obj_str_json", "COL_OBJECT", '{"key":"value"}', OK), # Invalid JSON string → OBJECT: rejected in all modes Case("obj_bad_str", "COL_OBJECT", "not_json", ERR), # Valid JSON but not an object (array) → OBJECT: rejected in all modes Case("obj_str_arr", "COL_OBJECT", "[1,2,3]", ERR), # ---- ARRAY ---- Case("arr_strings", "COL_ARRAY", ["a", "b", "c"], OK), Case("arr_numbers", "COL_ARRAY", [1, 2, 3], OK), Case("arr_objects", "COL_ARRAY", [{"key": "value"}, {"key": "value2"}], OK), # Invalid JSON string → ARRAY: v3/v4-compat reject (DLQ); v4-ht wraps as ["not_json"]. Case("arr_bad_str", "COL_ARRAY", "not_json", ERR, group="array_json_str"), # JSON string sent to ARRAY: v3 (SSv1) parses it into [1,2,3], # v4 (SSv2) stores it as literal string element ["[1,2,3]"]. Case("arr_str_json", "COL_ARRAY", "[1,2,3]", OK, group="array_json_str"), # Non-array JSON string: validateAndParseArray wraps into single-element array. Case( "arr_str_scalar", "COL_ARRAY", "42", OK, expected_value=[42], group="array_json_str", ), # ---- NULL handling (one per supported type) ---- # KNOWN DIVERGENCE for VARIANT: v4-compat stores JSON null as string 'null' # while v3 stores SQL NULL. Case("null_number", "COL_NUMBER", None, OK, group="null"), Case("null_float", "COL_FLOAT", None, OK, group="null"), Case("null_varchar", "COL_VARCHAR", None, OK, group="null"), Case("null_boolean", "COL_BOOLEAN", None, OK, group="null"), Case("null_date", "COL_DATE", None, OK, group="null"), Case("null_time", "COL_TIME", None, OK, group="null"), Case("null_ts_ntz", "COL_TS_NTZ", None, OK, group="null"), Case("null_ts_ltz", "COL_TS_LTZ", None, OK, group="null"), Case("null_ts_tz", "COL_TS_TZ", None, OK, group="null"), Case("null_variant", "COL_VARIANT", None, OK, group="null"), Case("null_object", "COL_OBJECT", None, OK, group="null"), Case("null_array", "COL_ARRAY", None, OK, group="null"), # ---- Cross-type mismatch ---- Case("xtype_str_num_1", "COL_NUMBER", "hello", ERR, group="xtype"), Case("xtype_str_num_2", "COL_NUMBER", "world", ERR, group="xtype"), Case("xtype_num_bool_1", "COL_BOOLEAN", 42, ERR, group="xtype"), Case("xtype_num_bool_2", "COL_BOOLEAN", -1, ERR, group="xtype"), Case("xtype_num_bool_3", "COL_BOOLEAN", 999, ERR, group="xtype"), # Object coerced to JSON string in VARCHAR — accepted by all modes Case( "xtype_obj_str", "COL_VARCHAR", {"key": "value"}, OK, expected_value='{"key":"value"}', group="xtype", ), # List coerced to JSON string in VARCHAR — same as Map (xtype_obj_str) Case( "xtype_list_str", "COL_VARCHAR", [1, 2, 3], OK, expected_value="[1,2,3]", group="xtype", ), # Map serialized to JSON exceeds VARCHAR(10) limit → rejected Case("xtype_map_vc10", "COL_VARCHAR10", {"key": "value"}, ERR, group="xtype"), Case("xtype_arr_num", "COL_NUMBER", [1, 2, 3], ERR, group="xtype"), ] # Groups that have their own dedicated test functions. # Per-column tests (test_number, test_float, ...) exclude these. _SPECIAL_GROUPS = { "float_special", "bool_coercion", "ts_epoch", "xtype", "null", "variant_bare_str", "variant_json_str", "array_json_str", } # --------------------------------------------------------------------------- # Divergence logging — grep for "DIVERGENCE" to find all behavioral diffs # --------------------------------------------------------------------------- _DIVERGENCE_PREFIX = "DIVERGENCE" def _log_divergence(mode, case_name, description): """Log a known behavioral divergence from v3 reference. All divergences use the same prefix so they can be found with: grep DIVERGENCE """ logger.warning("%s [%s] %s: %s", _DIVERGENCE_PREFIX, mode, case_name, description) # --------------------------------------------------------------------------- # Helper: assert all cases for a column, dispatching on expect # --------------------------------------------------------------------------- def _assert_all(results, cases): """Assert every case in the list using v3 reference expectations.""" for c in cases: if c.expect == "ingested": results.assert_ingested(c) else: results.assert_error(c) # --------------------------------------------------------------------------- # Numeric data types # --------------------------------------------------------------------------- def test_number(results): """NUMBER(38,0): integers land, non-numeric values → DLQ/dropped.""" _assert_all(results, cases_where(col="COL_NUMBER", exclude_groups=_SPECIAL_GROUPS)) def test_number_with_scale(results): """NUMBER(10,2): decimal values + non-numeric string to DLQ.""" _assert_all( results, cases_where(col="COL_NUMSCALE", exclude_groups=_SPECIAL_GROUPS) ) def test_float(results): """FLOAT: standard floating-point values + non-numeric → DLQ/dropped.""" _assert_all(results, cases_where(col="COL_FLOAT", exclude_groups=_SPECIAL_GROUPS)) def test_float_special(results): """FLOAT special values: NaN, +Infinity, -Infinity. JSON RFC 8259 does not define NaN/Infinity literals. We send them as string representations which is how DataValidationUtil.validateAndParseReal handles them (via Double.parseDouble). """ _assert_all(results, cases_where(col="COL_FLOAT", group="float_special")) # --------------------------------------------------------------------------- # String & binary data types # --------------------------------------------------------------------------- def test_varchar(results): """VARCHAR: variable-length character strings.""" _assert_all(results, cases_where(col="COL_VARCHAR", exclude_groups=_SPECIAL_GROUPS)) def test_varchar_length_limit(results): """VARCHAR(10): strings at and exceeding declared length limit. Snowflake silently truncates or the connector rejects overlength strings. This probes whether v3 and v4 handle the constraint identically. """ _assert_all( results, cases_where(col="COL_VARCHAR10", exclude_groups=_SPECIAL_GROUPS) ) def test_binary(results): """BINARY: hex-encoded binary data. v3 and v4-compat both correctly decode hex strings to bytes. v4-compat is fixed by SNOW-3256183: client-side RowValidator converts hex → byte[] before handing the row to the Ingest SDK, matching SSv1 behavior. KNOWN DIVERGENCE for v4-ht : server-side validation passes hex strings directly to the SSv2 SDK, which interprets them as base64 when ENABLE_SSV2_DEFAULT_BINARY_FORMAT_BASE64 is set, producing garbled bytes. """ cases = cases_where(col="COL_BINARY", exclude_groups=_SPECIAL_GROUPS) if results.mode in ("v3", "v4-compat"): _assert_all(results, cases) return # v4-ht: log divergence details for diagnostics, then xfail. for c in cases: if c.name in results.rows: actual = results.rows[c.name].get(c.col) if c.expected_value is not UNSET and actual != c.expected_value: _log_divergence( results.mode, c.name, f"ingested with wrong value: {actual!r} (expected {c.expected_value!r})", ) elif c.expected_value is UNSET: _log_divergence( results.mode, c.name, f"ingested (v3 also ingests, value={actual!r})", ) else: in_dlq = c.name in results.dlq_ids _log_divergence( results.mode, c.name, f"rejected (v3 ingests); in_dlq={in_dlq}", ) try: _assert_all(results, cases) except AssertionError as e: pytest.xfail(f"v4-ht SSv2 binary handling diverges from v3: {e}") # --------------------------------------------------------------------------- # Logical data type # --------------------------------------------------------------------------- def test_boolean(results): """BOOLEAN: true/false values + non-coercible objects/arrays → DLQ/dropped.""" _assert_all(results, cases_where(col="COL_BOOLEAN", exclude_groups=_SPECIAL_GROUPS)) def test_boolean_coercion(results): """BOOLEAN coercion: numeric 0/1 and string tokens. v3 and v4-compat both coerce Integer 0->False, 1->True. v4-compat fix: RowValidator now normalizes any valid input to Boolean before passing to the SSv2 SDK (which only accepts Boolean, not Integer/String). KNOWN DIVERGENCE for v4-ht: server-side validation bypasses RowValidator, so Integer 0/1 reach the SSv2 SDK directly and are silently dropped. String tokens ("true"/"false"/"yes"/"no") work on all modes. """ cases = cases_where(group="bool_coercion") numeric_cases = {c.name for c in cases if isinstance(c.value, int)} # String boolean tokens work identically on all modes — always hard assert. for c in cases: if c.name not in numeric_cases: if c.expect == "ingested": results.assert_ingested(c) else: results.assert_error(c) if results.mode in ("v3", "v4-compat"): # Both v3 (SSv1 coercion) and v4-compat (RowValidator normalization) ingest 0/1 correctly. for c in cases: if c.name in numeric_cases: results.assert_ingested(c) return # v4-ht: RowValidator is bypassed; SSv2 SDK silently drops Integer inputs for BOOLEAN. for c in cases: if c.name in numeric_cases: in_dlq = c.name in results.dlq_ids _log_divergence( results.mode, c.name, f"v4-ht drops numeric {c.value} for BOOLEAN (SSv2 SDK rejects Integer); in_dlq={in_dlq}", ) try: for c in cases: if c.name in numeric_cases: results.assert_ingested(c) except AssertionError as e: pytest.xfail(f"v4-ht drops numeric booleans (SSv2 SDK rejects Integer): {e}") # --------------------------------------------------------------------------- # Date & time data types # --------------------------------------------------------------------------- def test_date(results): """DATE: ISO date strings + invalid string to DLQ.""" _assert_all(results, cases_where(col="COL_DATE", exclude_groups=_SPECIAL_GROUPS)) def test_time(results): """TIME: time-of-day strings + invalid string to DLQ.""" _assert_all(results, cases_where(col="COL_TIME", exclude_groups=_SPECIAL_GROUPS)) def test_timestamp_ntz(results): """TIMESTAMP_NTZ: ISO 8601 timestamps + invalid string to DLQ.""" _assert_all(results, cases_where(col="COL_TS_NTZ", exclude_groups=_SPECIAL_GROUPS)) def test_timestamp_ntz_epoch(results): """TIMESTAMP_NTZ with integer epoch. v3: SSv1 SDK converts epoch to UTC client-side via parseInstantGuessScale. v4-compat: RowValidator normalizes Integer epoch to ISO string (same as v3). KNOWN DIVERGENCE: v4-ht bypasses RowValidator; SSv2 SDK passes raw Integer to the Snowflake backend which interprets it using the channel's default timezone (America/Los_Angeles) instead of UTC, producing a -8h shifted timestamp. """ [case] = cases_where(group="ts_epoch") if results.mode in ("v3", "v4-compat"): results.assert_ingested(case) return # v4-ht: log and xfail on expected timezone shift. if case.name in results.rows: actual = results.rows[case.name].get(case.col) expected = ( case.expected_value if not isinstance(case.expected_value, type(UNSET)) else case.value ) if actual != expected: _log_divergence( results.mode, case.name, f"epoch timestamp shifted: got {actual!r}, v3 expects {expected!r}", ) else: in_dlq = case.name in results.dlq_ids _log_divergence( results.mode, case.name, f"v4-ht rejects Long for TIMESTAMP_NTZ; in_dlq={in_dlq}", ) try: results.assert_ingested(case) except AssertionError as e: pytest.xfail(f"v4-ht: SSv2 backend uses channel TZ for integer epoch: {e}") def test_timestamp_ltz(results): """TIMESTAMP_LTZ: timestamps with explicit UTC offset + invalid to DLQ.""" _assert_all(results, cases_where(col="COL_TS_LTZ", exclude_groups=_SPECIAL_GROUPS)) def test_timestamp_tz(results): """TIMESTAMP_TZ: timestamps with explicit timezone + invalid to DLQ.""" _assert_all(results, cases_where(col="COL_TS_TZ", exclude_groups=_SPECIAL_GROUPS)) # --------------------------------------------------------------------------- # Semi-structured data types # --------------------------------------------------------------------------- def test_variant(results): """VARIANT: any JSON type including primitives, objects, arrays. Includes a string containing valid JSON ('{\"a\":1}') to probe the known SSv1/SSv2 divergence: SSv1 parses JSON-like strings into native JSON objects in VARIANT columns, while SSv2 may store them as string literals. """ _assert_all(results, cases_where(col="COL_VARIANT", exclude_groups=_SPECIAL_GROUPS)) def test_object(results): """OBJECT: JSON object values, including from-string JSON.""" _assert_all(results, cases_where(col="COL_OBJECT", exclude_groups=_SPECIAL_GROUPS)) def test_array(results): """ARRAY: JSON array values, including from-string JSON.""" _assert_all(results, cases_where(col="COL_ARRAY", exclude_groups=_SPECIAL_GROUPS)) def test_variant_bare_string(results): """Bare string to VARIANT: DLQ on v3/v4-compat, ingested on v4-ht. KNOWN DIVERGENCE: v3/v4-compat reject bare strings (not valid JSON) and route them to DLQ. v4-ht (server-side only) accepts them as string VARIANT values. """ [case] = cases_where(group="variant_bare_str") if results.mode == "v4-ht": # Server-side accepts bare strings in VARIANT — row is ingested. # Snowflake stores VARIANT strings as JSON-quoted: "hello" → '"hello"' assert case.name in results.rows, ( f"[{case.name}] expected v4-ht to ingest bare string to VARIANT" ) actual = results.rows[case.name].get(case.col) expected_json = json.dumps(case.value) # "hello" → '"hello"' assert actual == expected_json, ( f"[{case.name}] value mismatch: {actual!r} != {expected_json!r}" ) _log_divergence( results.mode, case.name, "bare string ingested as VARIANT (v3 DLQ's it)" ) else: results.assert_error(case) def test_variant_json_string(results): """JSON string sent to VARIANT: v3/v4-compat parse to native object, v4-ht stores as string. Covers JSON object strings ('{"a":1}'), scalar strings ('42', 'true'), and JSON array strings ('[1,2]') sent as String values to a VARIANT column. v3 and v4-compat: RowValidator normalizes the String to a native Java object (Map, List, Integer, Boolean) so the SSv2 SDK stores it correctly. KNOWN DIVERGENCE for v4-ht: server-side validation bypasses RowValidator; the SSv2 SDK receives the raw String and stores it as a JSON-quoted string literal. """ cases = cases_where(group="variant_json_str") if results.mode in ("v3", "v4-compat"): for c in cases: results.assert_ingested(c) return # v4-ht: row is ingested but stored as a JSON-quoted string, not as a native object. divergences = [] for c in cases: assert c.name in results.rows, ( f"[{c.name}] expected row in table on {results.mode}" ) try: results.assert_ingested(c) except AssertionError: actual = results.rows[c.name].get(c.col) _log_divergence( results.mode, c.name, f"JSON string stored as quoted literal {actual!r} (v3 stores as {c.expected_value!r})", ) divergences.append(c.name) if divergences: pytest.xfail( f"v4-ht stores JSON strings as quoted literals in VARIANT: {divergences}" ) def test_array_json_string(results): """String values sent to ARRAY: v3/v4-compat parse or reject, v4-ht wraps as literal element. Covers: - JSON array strings ('[1,2,3]') — v3/v4-compat parse to proper array - Non-array JSON scalars ('42') — v3/v4-compat wrap as single-element array - Invalid JSON strings ('not_json') — v3/v4-compat reject (DLQ) v3 and v4-compat: RowValidator normalizes String to a List so the SSv2 SDK stores it as a proper array. Non-array scalars are wrapped into a single-element array (e.g. '42' → [42]). Invalid JSON is rejected. KNOWN DIVERGENCE for v4-ht: server-side validation bypasses RowValidator; the SSv2 SDK wraps ANY String as a single-element array, including invalid JSON and valid JSON alike. """ cases = cases_where(group="array_json_str") if results.mode in ("v3", "v4-compat"): for c in cases: if c.expect == "ingested": results.assert_ingested(c) else: results.assert_error(c) return # v4-ht: SSv2 wraps all strings as single-element arrays (no rejection) divergences = [] for c in cases: if c.expect == "error": # v3/v4-compat reject this, but v4-ht ingests it as [""] if c.name in results.rows: actual = results.rows[c.name].get(c.col) parsed = json.loads(actual) if isinstance(actual, str) else actual _log_divergence( results.mode, c.name, f"v4-ht ingested (v3 rejects): stored as {parsed!r}", ) divergences.append(c.name) else: # Also rejected on v4-ht — no divergence pass else: assert c.name in results.rows, ( f"[{c.name}] expected row in table on {results.mode}" ) try: results.assert_ingested(c) except AssertionError: actual = results.rows[c.name].get(c.col) parsed = json.loads(actual) if isinstance(actual, str) else actual _log_divergence( results.mode, c.name, f"JSON string stored as literal array element {parsed!r} (v3 stores {c.expected_value or c.value!r})", ) divergences.append(c.name) if divergences: pytest.xfail(f"v4-ht array string handling diverges from v3: {divergences}") # --------------------------------------------------------------------------- # NULL handling # --------------------------------------------------------------------------- @pytest.mark.parametrize( "col", [ "COL_NUMBER", "COL_FLOAT", "COL_VARCHAR", "COL_BOOLEAN", "COL_DATE", "COL_TIME", "COL_TS_NTZ", "COL_TS_LTZ", "COL_TS_TZ", "COL_VARIANT", "COL_OBJECT", "COL_ARRAY", ], ) def test_null(results, col): """NULL in every supported column type — must be stored as SQL NULL. KNOWN DIVERGENCE: v4 stores JSON null in VARIANT as string 'null' instead of SQL NULL. """ c = next(c for c in CASES if c.col == col and c.group == "null") assert c.name in results.rows, ( f"[{c.name}] expected in table but not found (mode={results.mode})" ) actual = results.rows[c.name].get(c.col) # KNOWN DIVERGENCE: v4 stores VARIANT null as string 'null' if actual is not None and results.mode != "v3" and col == "COL_VARIANT": _log_divergence(results.mode, c.name, f"expected SQL NULL, got {actual!r}") pytest.xfail(f"v4 stores VARIANT null as {actual!r} instead of SQL NULL") assert actual is None, ( f"[{c.name}] expected NULL, got {actual!r} (mode={results.mode})" ) # --------------------------------------------------------------------------- # Cross-type mismatch — DLQ behavior # --------------------------------------------------------------------------- def test_cross_type_mismatch(results): """Values sent to incompatible column types — expected DLQ/drop.""" if results.mode == "v3": _assert_all(results, cases_where(group="xtype")) return # v4: track divergences from v3 reference behavior. divergences = [] for c in cases_where(group="xtype"): in_table = c.name in results.rows in_dlq = c.name in results.dlq_ids if c.expect == "ingested": if in_table: results.assert_ingested(c) else: _log_divergence( results.mode, c.name, f"rejected (v3 ingests via coercion); in_dlq={in_dlq}", ) divergences.append(c.name) else: if in_table: actual = results.rows[c.name].get(c.col) _log_divergence( results.mode, c.name, f"ingested (v3 rejects): value={actual!r}", ) divergences.append(c.name) elif results.mode != "v4-ht" and not in_dlq: _log_divergence( results.mode, c.name, "rejected without DLQ (silently dropped)", ) divergences.append(c.name) else: results.assert_error(c) if divergences: pytest.xfail(f"v4 cross-type handling diverges from v3 on: {divergences}") # --------------------------------------------------------------------------- # Error table accounting (v4-ht only) # --------------------------------------------------------------------------- def test_error_table_accounting(results): """v4-ht: verify error table captured rejected records.""" if results.mode != "v4-ht": pytest.skip("Error table only applicable to v4-ht mode") expected_errors = sum( 1 for c in CASES if c.expect == "error" and c.name not in results.rows ) assert len(results.error_table_rows) >= expected_errors, ( f"Expected at least {expected_errors} error table rows for v4-ht but found " f"{len(results.error_table_rows)} — errors may be silently dropped" ) for row in results.error_table_rows: assert row.get("ERROR_CODE") is not None, ( f"Error table row missing ERROR_CODE: {row}" ) ================================================ FILE: test/tests/compatibility/test_type_compatibility_avro.py ================================================ """Avro type compatibility tests across v4-compat and v4-ht ingestion modes. Verifies that Avro-typed values (int, long, float, double, string, boolean, bytes, date logical, timestamp-millis logical, array, map) are correctly ingested into pre-created Snowflake typed columns via the AvroConverter pipeline. Also tests Avro-specific cross-type mismatches (bytes->VARCHAR, float NaN->NUMBER, etc.) that cannot be exercised through JSON. v3 is excluded: Schema Registry classloader conflict prevents v3 from running Avro tests (see E2E_TEST_PLAN.md Section 3.1.2). """ import datetime import json import logging import math import time import pytest from confluent_kafka import avro from lib.config_migration import V4_CONFIG_TEMPLATE from lib.driver import quote_name from .conftest import Case, Results logger = logging.getLogger(__name__) pytestmark = [pytest.mark.confluent_only, pytest.mark.compatibility] # --------------------------------------------------------------------------- # Constants # --------------------------------------------------------------------------- OK = "ingested" ERR = "error" # Avro schema: single record with nullable unions for each typed column, # plus XTYPE_* columns whose Avro types intentionally mismatch the Snowflake # column types (for cross-type error testing). VALUE_SCHEMA = avro.loads( json.dumps( { "type": "record", "name": "TypeTestRecord", "namespace": "com.snowflake.kafka.test", "fields": [ {"name": "ID", "type": "string"}, {"name": "TEST_CASE", "type": "string"}, # Positive: Avro type matches Snowflake column type {"name": "COL_INT", "type": ["null", "int"], "default": None}, {"name": "COL_BIGINT", "type": ["null", "long"], "default": None}, {"name": "COL_FLOAT", "type": ["null", "float"], "default": None}, {"name": "COL_DOUBLE", "type": ["null", "double"], "default": None}, {"name": "COL_VARCHAR", "type": ["null", "string"], "default": None}, {"name": "COL_BOOLEAN", "type": ["null", "boolean"], "default": None}, {"name": "COL_BINARY", "type": ["null", "bytes"], "default": None}, { "name": "COL_DATE", "type": [ "null", {"type": "int", "logicalType": "date"}, ], "default": None, }, { "name": "COL_TS_NTZ", "type": [ "null", {"type": "long", "logicalType": "timestamp-millis"}, ], "default": None, }, { "name": "COL_ARRAY", "type": [ "null", {"type": "array", "items": "string"}, ], "default": None, }, { "name": "COL_VARIANT", "type": [ "null", {"type": "map", "values": "string"}, ], "default": None, }, # Cross-type: Avro type intentionally mismatches Snowflake column { "name": "XTYPE_BYTES_TO_VARCHAR", "type": ["null", "bytes"], "default": None, }, { "name": "XTYPE_BYTES_TO_NUM", "type": ["null", "bytes"], "default": None, }, { "name": "XTYPE_FLOAT_NAN_TO_NUM", "type": ["null", "float"], "default": None, }, { "name": "XTYPE_FLOAT_INF_TO_NUM", "type": ["null", "float"], "default": None, }, { "name": "XTYPE_MAP_TO_BOOL", "type": [ "null", {"type": "map", "values": "string"}, ], "default": None, }, { "name": "XTYPE_ARR_TO_BOOL", "type": [ "null", {"type": "array", "items": "string"}, ], "default": None, }, ], } ) ) # Snowflake table DDL. Positive columns match Avro types; XTYPE_* columns # have intentionally mismatched types. COLUMNS = { "ID": "VARCHAR NOT NULL", "TEST_CASE": "VARCHAR", # Positive "COL_INT": "NUMBER", "COL_BIGINT": "NUMBER", "COL_FLOAT": "FLOAT", "COL_DOUBLE": "FLOAT", "COL_VARCHAR": "VARCHAR", "COL_BOOLEAN": "BOOLEAN", "COL_BINARY": "BINARY", "COL_DATE": "DATE", "COL_TS_NTZ": "TIMESTAMP_NTZ", "COL_ARRAY": "ARRAY", "COL_VARIANT": "VARIANT", # Cross-type mismatch targets "XTYPE_BYTES_TO_VARCHAR": "VARCHAR", "XTYPE_BYTES_TO_NUM": "NUMBER", "XTYPE_FLOAT_NAN_TO_NUM": "NUMBER", "XTYPE_FLOAT_INF_TO_NUM": "NUMBER", "XTYPE_MAP_TO_BOOL": "BOOLEAN", "XTYPE_ARR_TO_BOOL": "BOOLEAN", "RECORD_METADATA": "VARIANT", } # Days from epoch for known dates _DATE_2024_01_15 = (datetime.date(2024, 1, 15) - datetime.date(1970, 1, 1)).days _DATE_EPOCH = 0 # Millis from epoch for known timestamps (UTC) _TS_2024_01_15_10_00 = int( datetime.datetime(2024, 1, 15, 10, 0, 0, tzinfo=datetime.timezone.utc).timestamp() * 1000 ) _TS_EPOCH = 0 # --------------------------------------------------------------------------- # Test cases # --------------------------------------------------------------------------- CASES = [ # ---- NUMBER (Avro int, 32-bit) ---- Case("int_pos", "COL_INT", 42, OK, expected_value=42), Case("int_neg", "COL_INT", -100, OK, expected_value=-100), Case("int_zero", "COL_INT", 0, OK, expected_value=0), Case("int_max", "COL_INT", 2147483647, OK, expected_value=2147483647), # ---- NUMBER (Avro long, 64-bit) ---- Case("long_pos", "COL_BIGINT", 9999999999, OK, expected_value=9999999999), Case("long_neg", "COL_BIGINT", -9999999999, OK, expected_value=-9999999999), Case("long_zero", "COL_BIGINT", 0, OK, expected_value=0), # ---- FLOAT (Avro float, 32-bit) ---- Case("float_pos", "COL_FLOAT", 3.14, OK, approx=0.01), Case("float_neg", "COL_FLOAT", -2.72, OK, approx=0.01), Case("float_nan", "COL_FLOAT", float("nan"), OK, group="float_special"), Case("float_inf", "COL_FLOAT", float("inf"), OK, group="float_special"), Case("float_neginf", "COL_FLOAT", float("-inf"), OK, group="float_special"), # ---- FLOAT (Avro double, 64-bit) ---- Case("dbl_pos", "COL_DOUBLE", 3.14159265358979, OK, approx=1e-6), Case("dbl_neg", "COL_DOUBLE", -2.71828182845905, OK, approx=1e-6), Case("dbl_nan", "COL_DOUBLE", float("nan"), OK, group="float_special"), Case("dbl_inf", "COL_DOUBLE", float("inf"), OK, group="float_special"), Case("dbl_neginf", "COL_DOUBLE", float("-inf"), OK, group="float_special"), # ---- VARCHAR (Avro string) ---- Case("str_normal", "COL_VARCHAR", "hello world", OK), Case("str_empty", "COL_VARCHAR", "", OK), Case("str_unicode", "COL_VARCHAR", "\u3053\u3093\u306b\u3061\u306f", OK), # ---- BOOLEAN (Avro boolean) ---- Case("bool_true", "COL_BOOLEAN", True, OK), Case("bool_false", "COL_BOOLEAN", False, OK), # ---- BINARY (Avro bytes) ---- Case("bin_normal", "COL_BINARY", b"\x01\x02\x03\x04", OK), Case("bin_empty", "COL_BINARY", b"", OK), # ---- DATE (Avro date logical type: days from epoch) ---- Case( "date_normal", "COL_DATE", _DATE_2024_01_15, OK, expected_value=datetime.date(2024, 1, 15), ), Case( "date_epoch", "COL_DATE", _DATE_EPOCH, OK, expected_value=datetime.date(1970, 1, 1), ), # ---- TIMESTAMP_NTZ (Avro timestamp-millis: millis from epoch UTC) ---- Case( "ts_normal", "COL_TS_NTZ", _TS_2024_01_15_10_00, OK, expected_value=datetime.datetime(2024, 1, 15, 10, 0, 0), ), Case( "ts_epoch", "COL_TS_NTZ", _TS_EPOCH, OK, expected_value=datetime.datetime(1970, 1, 1, 0, 0, 0), ), # ---- ARRAY (Avro array of strings) ---- Case("arr_normal", "COL_ARRAY", ["hello", "world"], OK), Case("arr_empty", "COL_ARRAY", [], OK), # ---- VARIANT (Avro map string->string) ---- Case("map_normal", "COL_VARIANT", {"key1": "value1", "key2": "value2"}, OK), Case("map_empty", "COL_VARIANT", {}, OK), # ---- NULL values ---- Case("null_int", "COL_INT", None, OK, group="null"), Case("null_varchar", "COL_VARCHAR", None, OK, group="null"), Case("null_boolean", "COL_BOOLEAN", None, OK, group="null"), Case("null_binary", "COL_BINARY", None, OK, group="null"), Case("null_date", "COL_DATE", None, OK, group="null"), # ---- Cross-type mismatch (Avro-specific, not covered by JSON tests) ---- # bytes→VARCHAR: v4-compat rejects (RowValidator TEXT validation rejects byte[]), # v4-ht coerces to base64 (SDK accepts byte[] for VARCHAR). Case( "xtype_bytes_varchar", "XTYPE_BYTES_TO_VARCHAR", b"\x01\x02", ERR, group="xtype_bytes_varchar", ), Case("xtype_bytes_num", "XTYPE_BYTES_TO_NUM", b"\x01\x02", ERR, group="xtype"), Case("xtype_nan_num", "XTYPE_FLOAT_NAN_TO_NUM", float("nan"), ERR, group="xtype"), Case("xtype_inf_num", "XTYPE_FLOAT_INF_TO_NUM", float("inf"), ERR, group="xtype"), Case("xtype_map_bool", "XTYPE_MAP_TO_BOOL", {"k": "v"}, ERR, group="xtype"), Case("xtype_arr_bool", "XTYPE_ARR_TO_BOOL", ["a"], ERR, group="xtype"), ] # Groups with dedicated test functions (excluded from per-column tests). _SPECIAL_GROUPS = { "float_special", "null", "xtype", "xtype_bytes_varchar", } def _cases_where(*, col=None, expect=None, group=None, exclude_groups=None): """Filter CASES by column, outcome, and/or group.""" result = CASES if col is not None: result = [c for c in result if c.col == col] if expect is not None: result = [c for c in result if c.expect == expect] if group is not None: result = [c for c in result if c.group == group] if exclude_groups is not None: result = [c for c in result if c.group not in exclude_groups] return result # --------------------------------------------------------------------------- # Fixtures # --------------------------------------------------------------------------- @pytest.fixture(scope="module", params=["v4-compat", "v4-ht"]) def avro_mode(request): return request.param @pytest.fixture(scope="module") def avro_mode_salt(session_name_salt, avro_mode): suffix = {"v4-compat": "_avro", "v4-ht": "_avro_ht"}[avro_mode] return f"{session_name_salt}{suffix}" @pytest.fixture(scope="module") def avro_results(driver, avro_mode_salt, avro_mode): """Single-table batch connector for Avro type compatibility tests. Creates one table with all typed columns, sends every CASES entry in a single Avro batch, waits for ingested rows, queries them, and yields a frozen Results object for assertion. """ table_name = f"dt_avro{avro_mode_salt}" sf_table = table_name quoted_table = quote_name(sf_table) # Consistent timezone for timestamp tests driver.snowflake_conn.cursor().execute("ALTER SESSION SET TIMEZONE = 'UTC'") # Create table from COLUMNS spec col_defs = ", ".join(f"{name} {ddl}" for name, ddl in COLUMNS.items()) error_logging = " ERROR_LOGGING = TRUE" if avro_mode == "v4-ht" else "" driver.snowflake_conn.cursor().execute( f"CREATE OR REPLACE TABLE {quoted_table} ({col_defs}){error_logging}" ) driver.snowflake_conn.cursor().execute( f"ALTER TABLE {quoted_table} SET ENABLE_SCHEMA_EVOLUTION = TRUE" ) # Create topic driver.createTopics(table_name, partitionNum=1, replicationNum=1) # Build connector config inline config = { **V4_CONFIG_TEMPLATE, "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "1", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "io.confluent.connect.avro.AvroConverter", "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY", "snowflake.enable.schematization": "true", "errors.tolerance": "all", "errors.log.enable": "true", } match avro_mode: case "v4-compat": config["snowflake.validation"] = "client_side" case "v4-ht": config["snowflake.validation"] = "server_side" rest_request = driver.createConnector( name_salt=avro_mode_salt, unsalted_name="dt_avro", config_template=config, ) connector_name = rest_request["name"] driver.startConnectorWaitTime() # Build and send all records as Avro records = [] for case in CASES: record = { "ID": case.name, "TEST_CASE": case.name, # All nullable fields default to None "COL_INT": None, "COL_BIGINT": None, "COL_FLOAT": None, "COL_DOUBLE": None, "COL_VARCHAR": None, "COL_BOOLEAN": None, "COL_BINARY": None, "COL_DATE": None, "COL_TS_NTZ": None, "COL_ARRAY": None, "COL_VARIANT": None, "XTYPE_BYTES_TO_VARCHAR": None, "XTYPE_BYTES_TO_NUM": None, "XTYPE_FLOAT_NAN_TO_NUM": None, "XTYPE_FLOAT_INF_TO_NUM": None, "XTYPE_MAP_TO_BOOL": None, "XTYPE_ARR_TO_BOOL": None, } record[case.col] = case.value records.append(record) driver.sendAvroSRData(table_name, records, VALUE_SCHEMA) # Wait until row count stabilizes (same approach as JSON test). # Cannot predict exact count: error cases won't land in table. STABLE_SECS = 15 deadline = time.monotonic() + 120 last_count = 0 stable_since = None while time.monotonic() < deadline: count = driver.select_number_of_records(sf_table) or 0 if count != last_count: last_count = count stable_since = time.monotonic() elif stable_since and count > 0: if (time.monotonic() - stable_since) >= STABLE_SECS: logger.info( "Row count stabilized at %d for %ds, proceeding", count, STABLE_SECS, ) break if failed := driver.get_failed_tasks(connector_name): logger.warning( "Connector task failed: %s", failed[0].get("trace", "")[:200] ) break time.sleep(5) else: if last_count == 0: logger.warning( "Stabilization timed out with 0 rows -- connector may not be ingesting" ) # Query all rows cursor = driver.snowflake_conn.cursor() cursor.execute( f'SELECT * FROM {quoted_table} ORDER BY RECORD_METADATA:"offset"::int' ) col_names = [desc[0] for desc in cursor.description] raw_rows = cursor.fetchall() row_lookup = {} for row in raw_rows: row_dict = dict(zip(col_names, row)) row_id = row_dict.get("ID") if row_id: row_lookup[row_id] = row_dict # Query error table for v4-ht mode error_table_rows = [] if avro_mode == "v4-ht": try: et_cursor = driver.snowflake_conn.cursor() et_cursor.execute(f"SELECT * FROM ERROR_TABLE({quoted_table})") et_col_names = [desc[0] for desc in et_cursor.description] for row in et_cursor.fetchall(): error_table_rows.append(dict(zip(et_col_names, row))) et_cursor.close() except Exception as e: logger.warning("Could not query error table: %s", e) logger.info( "Avro results for mode=%s: %d rows, %d error_table, %d sent", avro_mode, len(row_lookup), len(error_table_rows), len(CASES), ) result = Results( rows=row_lookup, dlq_ids=frozenset(), # DLQ messages are Avro-encoded, can't parse case IDs mode=avro_mode, total_sent=len(CASES), columns=COLUMNS, error_table_rows=tuple(error_table_rows), ) try: yield result finally: driver.closeConnector(connector_name) try: driver.deleteTopic(table_name) except Exception: pass # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _assert_all(results, cases): """Assert all cases in a list, dispatching to assert_ingested or assert_error.""" for case in cases: if case.expect == OK: results.assert_ingested(case) else: _assert_error_no_dlq(results, case) def _assert_error_no_dlq(results, case): """Assert case did NOT land in table. No DLQ check (Avro-encoded DLQ).""" assert case.name not in results.rows, ( f"[{case.name}] expected NOT in table but found: " f"{results.rows[case.name].get(case.col)!r} (mode={results.mode})" ) # --------------------------------------------------------------------------- # Tests — positive # --------------------------------------------------------------------------- def test_int(avro_results): """NUMBER from Avro int: 32-bit signed integers.""" _assert_all( avro_results, _cases_where(col="COL_INT", exclude_groups=_SPECIAL_GROUPS) ) def test_long(avro_results): """NUMBER from Avro long: 64-bit signed integers.""" _assert_all( avro_results, _cases_where(col="COL_BIGINT", exclude_groups=_SPECIAL_GROUPS) ) def test_float(avro_results): """FLOAT from Avro float: 32-bit values (excluding NaN/Inf specials).""" _assert_all( avro_results, _cases_where(col="COL_FLOAT", exclude_groups=_SPECIAL_GROUPS) ) def test_double(avro_results): """FLOAT from Avro double: 64-bit values (excluding NaN/Inf specials).""" _assert_all( avro_results, _cases_where(col="COL_DOUBLE", exclude_groups=_SPECIAL_GROUPS) ) def test_float_special(avro_results): """FLOAT NaN/Inf from native Avro float and double values. Unlike JSON where NaN/Inf are string representations, Avro sends native IEEE 754 NaN/Inf float values through the pipeline. Custom comparison: Results._compare_float handles string NaN ("NaN") but not float NaN (which is what Avro produces). We check presence + value directly instead of using assert_ingested. """ for case in _cases_where(group="float_special"): assert case.name in avro_results.rows, ( f"[{case.name}] expected in table but not found (mode={avro_results.mode})" ) actual = avro_results.rows[case.name].get(case.col) sent = case.value if math.isnan(sent): assert actual is not None and math.isnan(float(actual)), ( f"[{case.name}] expected NaN, got {actual!r}" ) elif math.isinf(sent): actual_f = float(actual) assert math.isinf(actual_f) and (actual_f > 0) == (sent > 0), ( f"[{case.name}] expected {'Inf' if sent > 0 else '-Inf'}, got {actual!r}" ) def test_string(avro_results): """VARCHAR from Avro string.""" _assert_all( avro_results, _cases_where(col="COL_VARCHAR", exclude_groups=_SPECIAL_GROUPS) ) def test_boolean(avro_results): """BOOLEAN from Avro boolean: native true/false.""" _assert_all( avro_results, _cases_where(col="COL_BOOLEAN", exclude_groups=_SPECIAL_GROUPS) ) def test_binary(avro_results): """BINARY from Avro bytes: raw byte arrays (not hex strings like JSON).""" _assert_all( avro_results, _cases_where(col="COL_BINARY", exclude_groups=_SPECIAL_GROUPS) ) def test_date(avro_results): """DATE from Avro date logical type (days from epoch).""" _assert_all( avro_results, _cases_where(col="COL_DATE", exclude_groups=_SPECIAL_GROUPS) ) def test_timestamp_ntz(avro_results): """TIMESTAMP_NTZ from Avro timestamp-millis logical type.""" _assert_all( avro_results, _cases_where(col="COL_TS_NTZ", exclude_groups=_SPECIAL_GROUPS) ) def test_array(avro_results): """ARRAY from Avro array of strings.""" _assert_all( avro_results, _cases_where(col="COL_ARRAY", exclude_groups=_SPECIAL_GROUPS) ) def test_variant(avro_results): """VARIANT from Avro map (string->string).""" _assert_all( avro_results, _cases_where(col="COL_VARIANT", exclude_groups=_SPECIAL_GROUPS) ) # --------------------------------------------------------------------------- # Tests — null # --------------------------------------------------------------------------- @pytest.mark.parametrize( "col", ["COL_INT", "COL_VARCHAR", "COL_BOOLEAN", "COL_BINARY", "COL_DATE"], ) def test_null(avro_results, col): """NULL values via Avro nullable unions.""" case = next(c for c in CASES if c.col == col and c.group == "null") avro_results.assert_ingested(case) actual = avro_results.rows[case.name].get(col) assert actual is None, f"[{case.name}] expected NULL, got {actual!r}" # --------------------------------------------------------------------------- # Tests — cross-type mismatch (Avro-specific) # --------------------------------------------------------------------------- def test_cross_type_bytes_to_varchar(avro_results): """Avro bytes → VARCHAR: v4-compat rejects, v4-ht coerces to base64. RowValidator's TEXT validation rejects byte[], so v4-compat errors. The SSv2 SDK (v4-ht) accepts byte[] for VARCHAR and coerces to base64. """ case = next(c for c in CASES if c.name == "xtype_bytes_varchar") if avro_results.mode == "v4-compat": _assert_error_no_dlq(avro_results, case) else: assert case.name in avro_results.rows, ( f"[{case.name}] expected in table (v4-ht coercion) but not found" ) actual = avro_results.rows[case.name].get(case.col) assert actual == "AQI=", f"[{case.name}] expected base64 'AQI=', got {actual!r}" def test_cross_type_mismatch(avro_results): """Avro-specific cross-type errors not covered by JSON tests. These cases send Avro-typed values (bytes, native float NaN/Inf, map, array) to incompatible Snowflake column types. JSON tests can't produce these Java types (byte[], native float NaN, typed Avro map/array). """ for case in _cases_where(group="xtype"): _assert_error_no_dlq(avro_results, case) ================================================ FILE: test/tests/compatibility/test_unsupported_types.py ================================================ """Tests for data types unsupported (or partially supported) by the Kafka connector. These types crash streaming channels or fail in ways that can't share a batch connector with well-behaved types. Each test gets its own connector via the ``ingest_one_type_abort`` fixture (abort mode — errors.tolerance=none). The connector task fails immediately on unsupported types for v3. For v4 modes (v4-compat and v4-ht), the async SDK flush failure does not propagate back to the KC task — the task stays RUNNING and 0 rows land. Types tested: - GEOGRAPHY: GeoJSON data — not supported by Snowpipe Streaming - GEOMETRY: WKT data — not supported by Snowpipe Streaming - VECTOR: embedding arrays — v4 only, not supported by v3 classic SDK - Structured OBJECT/ARRAY: typed columns with parameters — v3 rejects, v4 accepts (known divergence) """ import pytest pytestmark = pytest.mark.compatibility def _assert_connector_error(result, ingestion_mode, type_name, expected_fragments): """Assert the connector failed with an error matching at least one expected fragment. For v4 modes (v4-compat and v4-ht), the SDK async flush failure does not propagate back to the KC task — the task stays RUNNING and 0 rows land. For v3, the channel open is rejected synchronously and the task fails. """ if ingestion_mode in ("v4-ht", "v4-compat"): assert len(result.values) == 0, ( f"Expected no rows for {type_name} on {ingestion_mode}, got {len(result.values)}" ) return assert result.connector_error is not None, ( f"Expected connector task failure for {type_name} on {ingestion_mode}, " f"but connector succeeded with {len(result.values)} rows" ) matched = any(f in result.connector_error for f in expected_fragments) assert matched, ( f"Connector error for {type_name} on {ingestion_mode} did not match " f"any expected pattern {expected_fragments}.\n" f"Actual error (first 500 chars): {result.connector_error[:500]}" ) # Error patterns observed in connector traces for v3: # SFException "does not support columns of type" (channel open rejected by server) # v4 modes never reach this assertion — they exit early with 0 rows. _GEO_ERROR_FRAGMENTS = [ "does not support columns of type", "TopicPartitionChannelInsertionException", "Failed to insert rows", ] # v3 structured types / unsupported column types: channel open or schema setup failure _CHANNEL_OPEN_ERROR_FRAGMENTS = [ "does not support columns of type", "Open channel request failed", "Unknown data type for column", ] # --------------------------------------------------------------------------- # Geospatial types (unsupported by Snowpipe Streaming) # --------------------------------------------------------------------------- def test_dt_geography(ingest_one_type_abort, ingestion_mode): """GEOGRAPHY: GeoJSON point data — unsupported by Snowpipe Streaming.""" result = ingest_one_type_abort( "dt_geography", "GEOGRAPHY", [ '{"type":"Point","coordinates":[-122.35,37.55]}', '{"type":"Point","coordinates":[0,0]}', ], ) _assert_connector_error(result, ingestion_mode, "GEOGRAPHY", _GEO_ERROR_FRAGMENTS) def test_dt_geometry(ingest_one_type_abort, ingestion_mode): """GEOMETRY: WKT geometry data — unsupported by Snowpipe Streaming.""" result = ingest_one_type_abort( "dt_geometry", "GEOMETRY", ["POINT(-122.35 37.55)", "POINT(0 0)"], ) _assert_connector_error(result, ingestion_mode, "GEOMETRY", _GEO_ERROR_FRAGMENTS) # --------------------------------------------------------------------------- # VECTOR type (v4 only) # --------------------------------------------------------------------------- def test_dt_vector(ingest_one_type_abort, ingestion_mode): """VECTOR(FLOAT, 3): vector embeddings — not supported by v3 classic SDK.""" result = ingest_one_type_abort( "dt_vector", "VECTOR(FLOAT, 3)", [[1.0, 2.0, 3.0], [0.0, 0.0, 0.0], [-1.5, 2.5, -3.5]], ) if ingestion_mode == "v3": _assert_connector_error( result, ingestion_mode, "VECTOR", _CHANNEL_OPEN_ERROR_FRAGMENTS ) else: assert len(result.values) == 3, ( f"Expected 3 VECTOR rows, got {len(result.values)}; " f"error={result.connector_error}" ) # --------------------------------------------------------------------------- # Structured OBJECT / ARRAY # # KNOWN DIVERGENCE: v3 ColumnSchema rejects OBJECT/ARRAY with typed parameters, # but v4 accepts them. v4's SSv2 handles structured types natively. # --------------------------------------------------------------------------- def test_dt_structured_object(ingest_one_type_abort, ingestion_mode): """Structured OBJECT(name VARCHAR, age NUMBER) — rejected by v3, accepted by v4.""" result = ingest_one_type_abort( "dt_struct_obj", "OBJECT(name VARCHAR, age NUMBER)", [{"name": "Alice", "age": 30}], ) if ingestion_mode == "v3": _assert_connector_error( result, ingestion_mode, "structured OBJECT", _CHANNEL_OPEN_ERROR_FRAGMENTS ) else: # v4-compat and v4-ht accept structured OBJECT assert len(result.values) == 1, ( f"Expected 1 row for structured OBJECT on {ingestion_mode}, " f"got {len(result.values)}; error={result.connector_error}" ) def test_dt_structured_array(ingest_one_type_abort, ingestion_mode): """Structured ARRAY(NUMBER) — rejected by v3, accepted by v4.""" result = ingest_one_type_abort( "dt_struct_arr", "ARRAY(NUMBER)", [[1, 2, 3]], ) if ingestion_mode == "v3": _assert_connector_error( result, ingestion_mode, "structured ARRAY", _CHANNEL_OPEN_ERROR_FRAGMENTS ) else: # v4-compat and v4-ht accept structured ARRAY assert len(result.values) == 1, ( f"Expected 1 row for structured ARRAY on {ingestion_mode}, " f"got {len(result.values)}; error={result.connector_error}" ) ================================================ FILE: test/tests/high_performance/test_case_sensitivity.py ================================================ from dataclasses import dataclass import json from typing import Optional import pytest from snowflake.connector import DictCursor from lib.config_migration import V4_CONFIG_TEMPLATE from lib.driver import KafkaDriver from lib.fixtures.table import Table @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) @pytest.mark.parametrize( "sanitize_autogenerated_table_names", [True, False], ids=["sanitized", "unsanitized"], ) def test_high_performance_case_sensitivity_table_name( driver: KafkaDriver, create_connector, create_topics, name_salt, wait_for_rows, sanitize_autogenerated_table_names, ): """Assert table name derived by the connector matches our expectations.""" @dataclass(frozen=True) class TableNameCase: case_name: str # description unsalted_topic_name: str topic2table_value: Optional[str] expected_table_name: str test_cases = [ # Without topic2table.map, the connector passes the topic name as the table name. # With sanitization disabled, that name is passed as-is, i.e. not uppercased. TableNameCase( case_name="lower_a", unsalted_topic_name="a", topic2table_value=None, expected_table_name=f"A{name_salt}" if sanitize_autogenerated_table_names else f"a{name_salt}", ), TableNameCase( case_name="upper_b", unsalted_topic_name="B", topic2table_value=None, expected_table_name=f"B{name_salt}", ), TableNameCase( case_name="lower_c_mapped", unsalted_topic_name="c_topic", topic2table_value=f"c{name_salt}", expected_table_name=f"C{name_salt}", ), TableNameCase( case_name="upper_d_mapped", unsalted_topic_name="D_topic", topic2table_value=f"D{name_salt}", expected_table_name=f"D{name_salt}", ), TableNameCase( case_name="lower_e_mapped_quoted", unsalted_topic_name="e_topic", topic2table_value=f'"e{name_salt}"', expected_table_name=f"e{name_salt}", ), TableNameCase( case_name="upper_f_mapped_quoted", unsalted_topic_name="f_topic", topic2table_value=f'"F{name_salt}"', expected_table_name=f"F{name_salt}", ), TableNameCase( case_name="unicode_mapped_quoted", unsalted_topic_name="g_topic", topic2table_value=f'"❄️{name_salt}"', expected_table_name=f"❄️{name_salt}", ), ] topics = create_topics( [test_case.unsalted_topic_name for test_case in test_cases], with_tables=False ) topic2table_map = ",".join( f"{test_case.unsalted_topic_name}{name_salt}:{test_case.topic2table_value}" for test_case in test_cases if test_case.topic2table_value is not None ) connector = create_connector( v4_config={ key: value for key, value in { **V4_CONFIG_TEMPLATE, "topics": ",".join(topics), "snowflake.topic2table.map": topic2table_map, "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", # high-performance defaults, but we also test with sanitized table names "snowflake.validation": "server_side", "snowflake.compatibility.enable.column.identifier.normalization": "false", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true" if sanitize_autogenerated_table_names else "false", }.items() if value is not None } ) driver.startConnectorWaitTime() for test_case in test_cases: driver.sendBytesData( f"{test_case.unsalted_topic_name}{name_salt}", [json.dumps({"case_name": test_case.case_name}).encode("utf-8")], ) for test_case in test_cases: expected_table = Table(driver, test_case.expected_table_name) wait_for_rows(expected_table.name, 1, connector_name=connector.name) tables = ( driver.snowflake_conn.cursor(DictCursor).execute("show tables").fetchall() ) assert test_case.expected_table_name in [table["name"] for table in tables] # Make sure it's the correct one, i.e. has the data we sent it. assert expected_table.select_scalar("CASE_NAME") == test_case.case_name # Cleanup - first remove the connector, then the tables. connector.close() for test_case in test_cases: Table(driver, test_case.expected_table_name).drop() ================================================ FILE: test/tests/iceberg/__init__.py ================================================ from lib.config_migration import V4_CONFIG_TEMPLATE def json_connector_config(topic: str, schematization: bool, validation: bool) -> dict: """Build a v4 connector config for JSON ingestion into an iceberg table.""" config = { **V4_CONFIG_TEMPLATE, "tasks.max": "1", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "snowflake.enable.schematization": str(schematization).lower(), "snowflake.validation": "client_side" if validation else "server_side", "topics": topic, "jmx": "true", } if schematization: # JSON field names are lowercase; Snowflake column names are uppercase. # Normalization uppercases the field names so the row validator and SSv2 # can match them to the pre-declared columns (ID, BODY_TEMPERATURE, etc.). config["snowflake.compatibility.enable.column.identifier.normalization"] = ( "true" ) return config ================================================ FILE: test/tests/iceberg/test_iceberg_avro.py ================================================ """E2E tests for Kafka Connector v4 iceberg Avro ingestion (via Schema Registry). v4-only, confluent-only (requires Schema Registry for AvroConverter). Tests the same schematization x validation matrix as the JSON iceberg tests, but uses Avro-encoded records with Schema Registry. """ import json import logging import pytest from confluent_kafka import avro from lib.config_migration import V4_CONFIG_TEMPLATE from lib.driver import KafkaDriver from lib.matchers import ANY_INT logger = logging.getLogger(__name__) VALUE_SCHEMA = avro.loads( """ { "type": "record", "name": "iceberg_avro_value", "fields": [ {"name": "id", "type": "int"}, {"name": "body_temperature", "type": "double"}, {"name": "name", "type": "string"} ] } """ ) KEY_SCHEMA = avro.loads( """ { "type": "record", "name": "iceberg_avro_key", "fields": [ {"name": "id", "type": "int"} ] } """ ) RECORD_COUNT = 100 def _avro_connector_config(topic: str, schematization: bool, validation: bool) -> dict: config = { **V4_CONFIG_TEMPLATE, "tasks.max": "1", "key.converter": "io.confluent.connect.avro.AvroConverter", "key.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY", "value.converter": "io.confluent.connect.avro.AvroConverter", "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY", "snowflake.enable.schematization": str(schematization).lower(), "snowflake.validation": "client_side" if validation else "server_side", "topics": topic, "jmx": "true", } if schematization: config["snowflake.compatibility.enable.column.identifier.normalization"] = ( "true" ) return config @pytest.mark.iceberg @pytest.mark.confluent_only @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) @pytest.mark.parametrize( "schematization", [True, False], ids=["schema=on", "schema=off"] ) @pytest.mark.parametrize("validation", [True, False], ids=["compat", "ht"]) def test_iceberg_avro_ingestion( driver: KafkaDriver, create_iceberg_table, create_topics, create_connector, wait_for_rows, validation: bool, schematization: bool, ): """Avro SR ingestion into an iceberg table — 2x2 matrix (validation x schematization). ``schema=off`` (bag-of-bits): table has RECORD_METADATA VARIANT, RECORD_CONTENT VARIANT. Avro fields land in RECORD_CONTENT. ``schema=on`` (typed columns): table pre-declares ID, BODY_TEMPERATURE, NAME columns. AvroConverter provides a Kafka Connect schema so the connector maps fields to the pre-declared columns directly. """ val_tag = "compat" if validation else "ht" sch_tag = "s1" if schematization else "s0" base_name = f"iceberg_av_{val_tag}_{sch_tag}" if schematization: columns = ( "(RECORD_METADATA VARIANT, ID BIGINT, BODY_TEMPERATURE DOUBLE, NAME TEXT)" ) else: columns = "(RECORD_METADATA VARIANT, RECORD_CONTENT VARIANT)" table = create_iceberg_table(base_name, columns=columns, cleanup_topic=False) topic = create_topics([base_name], with_tables=False)[0] create_connector( v4_config=_avro_connector_config( topic, schematization=schematization, validation=validation ) ) driver.startConnectorWaitTime() keys = [{"id": i} for i in range(RECORD_COUNT)] values = [ {"id": i, "body_temperature": 36.6, "name": "Steve"} for i in range(RECORD_COUNT) ] driver.sendAvroSRData(topic, values, VALUE_SCHEMA, keys, KEY_SCHEMA, partition=0) wait_for_rows(table.name, RECORD_COUNT) if not schematization: rows = table.select( "PARSE_JSON(RECORD_CONTENT):id::NUMBER AS ID, " "PARSE_JSON(RECORD_CONTENT):body_temperature::FLOAT AS BODY_TEMPERATURE, " "PARSE_JSON(RECORD_CONTENT):name::STRING AS NAME, " "PARSE_JSON(RECORD_METADATA):offset::NUMBER AS OFFSET, " "PARSE_JSON(RECORD_METADATA):partition::NUMBER AS PARTITION, " "PARSE_JSON(RECORD_METADATA):topic::STRING AS TOPIC", "ORDER BY PARSE_JSON(RECORD_METADATA):offset::NUMBER LIMIT 1", ) assert rows, "Expected at least one row" row = rows[0] assert row["ID"] == 0, f"Expected id=0, got {row['ID']}" assert abs(float(row["BODY_TEMPERATURE"]) - 36.6) < 0.01, ( f"Expected body_temperature≈36.6, got {row['BODY_TEMPERATURE']}" ) assert row["NAME"] == "Steve", f"Expected name='Steve', got {row['NAME']}" assert row["OFFSET"] == 0, f"Expected offset=0, got {row['OFFSET']}" assert row["PARTITION"] == 0, f"Expected partition=0, got {row['PARTITION']}" assert row["TOPIC"] == topic, f"Expected topic={topic!r}, got {row['TOPIC']!r}" else: rows = table.select( '"ID", "BODY_TEMPERATURE", "NAME", ' "PARSE_JSON(RECORD_METADATA):offset::NUMBER AS OFFSET, " "PARSE_JSON(RECORD_METADATA):partition::NUMBER AS PARTITION, " "PARSE_JSON(RECORD_METADATA):topic::STRING AS TOPIC", "ORDER BY PARSE_JSON(RECORD_METADATA):offset::NUMBER LIMIT 1", ) assert rows, "Expected at least one row" row = rows[0] assert row["ID"] == 0, f"Expected id=0, got {row['ID']}" assert abs(float(row["BODY_TEMPERATURE"]) - 36.6) < 0.01, ( f"Expected body_temperature≈36.6, got {row['BODY_TEMPERATURE']}" ) assert row["NAME"] == "Steve", f"Expected name='Steve', got {row['NAME']}" assert row["OFFSET"] == 0, f"Expected offset=0, got {row['OFFSET']}" assert row["PARTITION"] == 0, f"Expected partition=0, got {row['PARTITION']}" assert row["TOPIC"] == topic, f"Expected topic={topic!r}, got {row['TOPIC']!r}" # Verify RECORD_METADATA contains key (Avro key schema → key field in metadata) meta_rows = table.select( "PARSE_JSON(RECORD_METADATA) AS META", "ORDER BY PARSE_JSON(RECORD_METADATA):offset::NUMBER LIMIT 1", ) metadata = json.loads(meta_rows[0]["META"]) assert metadata["offset"] == 0 assert metadata["partition"] == 0 assert metadata["topic"] == topic assert metadata["SnowflakeConnectorPushTime"] == ANY_INT ================================================ FILE: test/tests/iceberg/test_iceberg_json.py ================================================ """E2E tests for Kafka Connector v4 iceberg JSON ingestion. These tests are v4-only. V3 is excluded because: - v3 requires ``snowflake.streaming.iceberg.enabled=true`` in the connector config which the config migration does not add (v3 iceberg was experimental) - v3 had custom iceberg code (IcebergInitService, IcebergTableStreamingRecordMapper) that has been removed in v4 - v4 uses SSv2 which handles iceberg tables transparently Prerequisites: - An AWS external volume named ``ICEBERG_EXTERNAL_VOLUME`` must exist in the test Snowflake account. The default is ``kafka_push_e2e_volume_aws``. Override with the environment variable ``ICEBERG_EXTERNAL_VOLUME``. """ import json import logging import pytest from lib.driver import KafkaDriver from tests.iceberg import json_connector_config logger = logging.getLogger(__name__) _SAMPLE_MESSAGE = { "id": 1, "body_temperature": 36.6, "name": "Steve", "approved_coffee_types": ["Espresso", "Doppio", "Ristretto", "Lungo"], "animals_possessed": {"dogs": True, "cats": False}, } RECORD_COUNT = 100 @pytest.mark.iceberg @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) @pytest.mark.parametrize( "schematization", [True, False], ids=["schema=on", "schema=off"] ) @pytest.mark.parametrize("validation", [True, False], ids=["compat", "ht"]) def test_iceberg_json_ingestion( driver: KafkaDriver, create_iceberg_table, create_topics, create_connector, wait_for_rows, validation: bool, schematization: bool, ): """JSON ingestion into an iceberg table — full 2x2 matrix (validation x schematization). Matrix axes: - validation (compat=true / ht=false): controls whether the client-side RowValidator runs. - schematization (on/off): controls how the connector maps records to columns. ``schema=off`` (bag-of-bits): table has ``RECORD_METADATA VARIANT, RECORD_CONTENT VARIANT``. Full JSON payload goes into RECORD_CONTENT. Assertions use ``PARSE_JSON(RECORD_CONTENT):field::TYPE`` because iceberg stores VARIANT as a string-encoded JSON literal. ``schema=on`` (typed columns): table pre-declares all columns from the sample message — scalar fields as typed (ID NUMBER, BODY_TEMPERATURE FLOAT, NAME STRING) and complex fields as VARIANT (APPROVED_COFFEE_TYPES, ANIMALS_POSSESSED). RECORD_METADATA remains VARIANT. No schema evolution is needed. Typed columns are accessed directly; VARIANT columns still need PARSE_JSON(). """ val_tag = "compat" if validation else "ht" sch_tag = "s1" if schematization else "s0" base_name = f"iceberg_jv_{val_tag}_{sch_tag}" if schematization: columns = ( "(RECORD_METADATA VARIANT, " "ID BIGINT, " "BODY_TEMPERATURE DOUBLE, " "NAME TEXT, " "APPROVED_COFFEE_TYPES VARIANT, " "ANIMALS_POSSESSED VARIANT)" ) else: columns = "(RECORD_METADATA VARIANT, RECORD_CONTENT VARIANT)" table = create_iceberg_table(base_name, columns=columns, cleanup_topic=False) topic = create_topics([base_name], with_tables=False)[0] create_connector( v4_config=json_connector_config( topic, schematization=schematization, validation=validation ) ) driver.startConnectorWaitTime() records = [json.dumps(_SAMPLE_MESSAGE).encode("utf-8") for _ in range(RECORD_COUNT)] driver.sendBytesData(topic, records, partition=0) wait_for_rows(table.name, RECORD_COUNT) if not schematization: rows = table.select( "PARSE_JSON(RECORD_CONTENT):id::NUMBER AS ID, " "PARSE_JSON(RECORD_CONTENT):body_temperature::FLOAT AS BODY_TEMPERATURE, " "PARSE_JSON(RECORD_CONTENT):name::STRING AS NAME, " "PARSE_JSON(RECORD_METADATA):offset::NUMBER AS OFFSET, " "PARSE_JSON(RECORD_METADATA):partition::NUMBER AS PARTITION, " "PARSE_JSON(RECORD_METADATA):topic::STRING AS TOPIC, " "PARSE_JSON(RECORD_METADATA):SnowflakeConnectorPushTime::STRING AS PUSH_TIME", "ORDER BY PARSE_JSON(RECORD_METADATA):offset::NUMBER LIMIT 1", ) assert rows, "Expected at least one row in the iceberg table" row = rows[0] assert row["ID"] == 1, f"Expected id=1, got {row['ID']}" assert abs(float(row["BODY_TEMPERATURE"]) - 36.6) < 0.01, ( f"Expected body_temperature≈36.6, got {row['BODY_TEMPERATURE']}" ) assert row["NAME"] == "Steve", f"Expected name='Steve', got {row['NAME']}" assert row["OFFSET"] == 0, f"Expected offset=0, got {row['OFFSET']}" assert row["PARTITION"] == 0, f"Expected partition=0, got {row['PARTITION']}" assert row["TOPIC"] == topic, f"Expected topic={topic!r}, got {row['TOPIC']!r}" assert row["PUSH_TIME"] is not None, ( "Expected SnowflakeConnectorPushTime to be set" ) else: rows = table.select( '"ID", "BODY_TEMPERATURE", "NAME", ' "PARSE_JSON(RECORD_METADATA):offset::NUMBER AS OFFSET, " "PARSE_JSON(RECORD_METADATA):partition::NUMBER AS PARTITION, " "PARSE_JSON(RECORD_METADATA):topic::STRING AS TOPIC, " "PARSE_JSON(RECORD_METADATA):SnowflakeConnectorPushTime::STRING AS PUSH_TIME", "ORDER BY PARSE_JSON(RECORD_METADATA):offset::NUMBER LIMIT 1", ) assert rows, "Expected at least one row" row = rows[0] assert row["ID"] == 1, f"Expected id=1, got {row['ID']}" assert abs(float(row["BODY_TEMPERATURE"]) - 36.6) < 0.01, ( f"Expected body_temperature≈36.6, got {row['BODY_TEMPERATURE']}" ) assert row["NAME"] == "Steve", f"Expected name='Steve', got {row['NAME']}" assert row["OFFSET"] == 0, f"Expected offset=0, got {row['OFFSET']}" assert row["PARTITION"] == 0, f"Expected partition=0, got {row['PARTITION']}" assert row["TOPIC"] == topic, f"Expected topic={topic!r}, got {row['TOPIC']!r}" assert row["PUSH_TIME"] is not None, ( "Expected SnowflakeConnectorPushTime to be set" ) ================================================ FILE: test/tests/iceberg/test_iceberg_se_avro.py ================================================ """Iceberg schema evolution E2E tests — Avro format (via Schema Registry). Tests client-side SE with Avro-encoded records: the connector detects new columns from the Avro schema and issues ``ALTER ICEBERG TABLE ADD COLUMN``. v4-only, confluent-only. """ import logging import pytest from confluent_kafka import avro from lib.config_migration import V4_CONFIG_TEMPLATE from lib.driver import KafkaDriver logger = logging.getLogger(__name__) WAVE1_SCHEMA = avro.loads( """ { "type": "record", "name": "iceberg_se_avro_record", "fields": [ {"name": "CITY", "type": "string"}, {"name": "AGE", "type": "int"} ] } """ ) WAVE2_SCHEMA = avro.loads( """ { "type": "record", "name": "iceberg_se_avro_record", "fields": [ {"name": "CITY", "type": "string"}, {"name": "AGE", "type": "int"}, {"name": "COUNTRY", "type": ["null", "string"], "default": null} ] } """ ) def _avro_se_connector_config(topic: str) -> dict: return { **V4_CONFIG_TEMPLATE, "tasks.max": "1", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "io.confluent.connect.avro.AvroConverter", "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY", "snowflake.enable.schematization": "true", "snowflake.validation": "client_side", "topics": topic, "jmx": "true", } @pytest.mark.iceberg @pytest.mark.schema_evolution @pytest.mark.confluent_only @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_iceberg_se_avro_add_column( driver: KafkaDriver, create_iceberg_table, create_topics, create_connector, wait_for_rows, ): """Iceberg SE with Avro — connector adds columns from evolving Avro schemas. Table starts with RECORD_METADATA VARIANT + CITY TEXT. Wave 1 sends Avro records with ``{CITY, AGE}`` — connector SE adds AGE. Wave 2 uses an evolved Avro schema with ``{CITY, AGE, COUNTRY}`` — connector SE adds COUNTRY. Avro has an explicit schema so the connector knows the exact type for each new column (unlike JSON where types are inferred from values). """ base_name = "iceberg_se_avro" table = create_iceberg_table( base_name, columns="(RECORD_METADATA VARIANT, CITY TEXT) ENABLE_SCHEMA_EVOLUTION = TRUE", cleanup_topic=False, ) topic = create_topics([base_name], with_tables=False)[0] create_connector(v4_config=_avro_se_connector_config(topic)) driver.startConnectorWaitTime() wave1_count = 100 wave1_values = [{"CITY": "Hsinchu", "AGE": i} for i in range(wave1_count)] driver.sendAvroSRData(topic, wave1_values, WAVE1_SCHEMA, partition=0) wait_for_rows(table.name, wave1_count) cols = {row[0] for row in table.schema()} assert "AGE" in cols, ( f"Expected connector SE to add AGE column after wave 1, got: {cols}" ) wave2_count = 50 wave2_values = [ {"CITY": "Taipei", "AGE": 100 + i, "COUNTRY": "TW"} for i in range(wave2_count) ] driver.sendAvroSRData(topic, wave2_values, WAVE2_SCHEMA, partition=0) wait_for_rows(table.name, wave1_count + wave2_count) cols = {row[0] for row in table.schema()} assert "COUNTRY" in cols, ( f"Expected connector SE to add COUNTRY column after wave 2, got: {cols}" ) rows = table.select('"CITY", "COUNTRY"', "WHERE \"CITY\" = 'Taipei' LIMIT 1") assert rows, "Expected at least one wave-2 row with CITY = 'Taipei'" assert rows[0]["CITY"] == "Taipei" assert rows[0]["COUNTRY"] == "TW", ( f"Expected COUNTRY='TW', got {rows[0]['COUNTRY']!r}" ) null_country_count = table.select("COUNT(*) AS CNT", 'WHERE "COUNTRY" IS NULL')[0][ "CNT" ] assert null_country_count == wave1_count, ( f"Expected {wave1_count} rows with NULL COUNTRY, got {null_country_count}" ) ================================================ FILE: test/tests/iceberg/test_iceberg_se_json.py ================================================ """Iceberg schema evolution E2E tests — JSON format. Tests client-side SE (RowValidator-driven ``ALTER ICEBERG TABLE ADD COLUMN``) and documents the server-side SE limitation (xfail). v4-only: v3 iceberg support was removed in v4. """ import json import logging import pytest from lib.driver import KafkaDriver from tests.iceberg import json_connector_config logger = logging.getLogger(__name__) @pytest.mark.iceberg @pytest.mark.schema_evolution @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_iceberg_se_add_column( driver: KafkaDriver, create_iceberg_table, create_topics, create_connector, wait_for_rows, ): """Iceberg schema evolution — connector adds a new column mid-stream (client-side SE). Table starts with RECORD_METADATA VARIANT + CITY TEXT. Wave 1 records carry ``{city, age}``: the connector's RowValidator detects AGE as new and issues ``ALTER ICEBERG TABLE ADD COLUMN``. Wave 2 adds ``country``: SE adds COUNTRY. Uses ``validation=true`` (compat/client-side SE) so the RowValidator drives column additions. Server-side SE (validation=false) does not support typed column additions on iceberg tables. """ base_name = "iceberg_se_addcol" table = create_iceberg_table( base_name, columns="(RECORD_METADATA VARIANT, CITY TEXT) ENABLE_SCHEMA_EVOLUTION = TRUE", cleanup_topic=False, ) topic = create_topics([base_name], with_tables=False)[0] create_connector( v4_config=json_connector_config(topic, schematization=True, validation=True) ) driver.startConnectorWaitTime() wave1_count = 100 driver.sendBytesData( topic, [ json.dumps({"city": "Hsinchu", "age": i}).encode("utf-8") for i in range(wave1_count) ], partition=0, ) wait_for_rows(table.name, wave1_count) cols = {row[0] for row in table.schema()} assert "AGE" in cols, ( f"Expected connector SE to add AGE column after wave 1, got: {cols}" ) wave2_count = 50 driver.sendBytesData( topic, [ json.dumps({"city": "Taipei", "age": 100 + i, "country": "TW"}).encode( "utf-8" ) for i in range(wave2_count) ], partition=0, ) wait_for_rows(table.name, wave1_count + wave2_count) rows = table.select('"CITY", "COUNTRY"', "WHERE \"CITY\" = 'Taipei' LIMIT 1") assert rows, "Expected at least one wave-2 row with CITY = 'Taipei'" assert rows[0]["CITY"] == "Taipei" assert rows[0]["COUNTRY"] == "TW", ( f"Expected COUNTRY='TW', got {rows[0]['COUNTRY']!r}" ) null_country_count = table.select("COUNT(*) AS CNT", 'WHERE "COUNTRY" IS NULL')[0][ "CNT" ] assert null_country_count == wave1_count, ( f"Expected {wave1_count} rows with NULL COUNTRY, got {null_country_count}" ) @pytest.mark.iceberg @pytest.mark.schema_evolution @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_iceberg_se_multi_wave( driver: KafkaDriver, create_iceberg_table, create_topics, create_connector, wait_for_rows, ): """Iceberg SE — connector adds two successive new columns across three waves. Waves: 1. Wave 1 (50 records): ``{city}`` — no SE needed. 2. Wave 2 (50 records): ``{city, age}`` — connector SE adds AGE. 3. Wave 3 (50 records): ``{city, age, country}`` — connector SE adds COUNTRY. After all waves: - Wave-1 rows: AGE IS NULL, COUNTRY IS NULL - Wave-2 rows: AGE set, COUNTRY IS NULL - Wave-3 rows: AGE set, COUNTRY set """ base_name = "iceberg_se_multi" table = create_iceberg_table( base_name, columns="(RECORD_METADATA VARIANT, CITY TEXT) ENABLE_SCHEMA_EVOLUTION = TRUE", cleanup_topic=False, ) topic = create_topics([base_name], with_tables=False)[0] create_connector( v4_config=json_connector_config(topic, schematization=True, validation=True) ) driver.startConnectorWaitTime() wave1_count = 50 driver.sendBytesData( topic, [json.dumps({"city": "Taipei"}).encode("utf-8") for _ in range(wave1_count)], partition=0, ) wait_for_rows(table.name, wave1_count) wave2_count = 50 driver.sendBytesData( topic, [ json.dumps({"city": "Hsinchu", "age": i}).encode("utf-8") for i in range(wave2_count) ], partition=0, ) wait_for_rows(table.name, wave1_count + wave2_count) wave3_count = 50 driver.sendBytesData( topic, [ json.dumps({"city": "Kaohsiung", "age": 200 + i, "country": "TW"}).encode( "utf-8" ) for i in range(wave3_count) ], partition=0, ) wait_for_rows(table.name, wave1_count + wave2_count + wave3_count) w1_null = table.select( "COUNT(*) AS CNT", 'WHERE "CITY" = \'Taipei\' AND "AGE" IS NULL AND "COUNTRY" IS NULL', )[0]["CNT"] assert w1_null == wave1_count, ( f"Expected {wave1_count} wave-1 rows with NULL AGE+COUNTRY, got {w1_null}" ) w2_rows = table.select('"AGE", "COUNTRY"', "WHERE \"CITY\" = 'Hsinchu' LIMIT 1") assert w2_rows, "Expected at least one wave-2 row" assert w2_rows[0]["AGE"] is not None, "Expected AGE set for wave-2 rows" assert w2_rows[0]["COUNTRY"] is None, ( f"Expected COUNTRY NULL for wave-2 rows, got {w2_rows[0]['COUNTRY']!r}" ) w3_rows = table.select('"AGE", "COUNTRY"', "WHERE \"CITY\" = 'Kaohsiung' LIMIT 1") assert w3_rows, "Expected at least one wave-3 row" assert w3_rows[0]["AGE"] is not None, "Expected AGE set for wave-3 rows" assert w3_rows[0]["COUNTRY"] == "TW", ( f"Expected COUNTRY='TW', got {w3_rows[0]['COUNTRY']!r}" ) @pytest.mark.iceberg @pytest.mark.schema_evolution @pytest.mark.xfail( strict=True, reason=( "Server-side SE (ENABLE_SCHEMA_EVOLUTION on the table, validation=false) " "silently discards typed (non-VARIANT) column additions on iceberg tables. " "Client-side SE (validation=true) does work after fixing the connector to " "issue ALTER ICEBERG TABLE ADD COLUMN, but this test exercises the HT path " "(validation=false) where server-side SE is the only mechanism. Remove " "this xfail once Snowflake server-side SE supports typed columns on iceberg." ), ) @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_iceberg_se_json_server_side( driver: KafkaDriver, name_salt: str, create_iceberg_table, create_topics, create_connector, wait_for_rows, ): """JSON schema evolution into an iceberg table (server-side SE, HT mode). Uses ``validation=false`` (HT mode) so client-side validation is never initialized. Records flow directly to SSv2, which relies on ``ENABLE_SCHEMA_EVOLUTION = TRUE`` for server-side column additions. Sends two waves: 1. Wave 1 (100 records): ``{city, age}`` — server-side SE adds CITY, AGE. 2. Wave 2 (50 records): ``{city, age, country}`` — server-side SE adds COUNTRY. """ base_name = "iceberg_se_json" table = create_iceberg_table( base_name, columns="(RECORD_METADATA VARIANT) ENABLE_SCHEMA_EVOLUTION = TRUE", cleanup_topic=False, ) topic = create_topics([base_name], with_tables=False)[0] create_connector( v4_config={ **json_connector_config(topic, schematization=True, validation=False), "errors.tolerance": "all", "errors.log.enable": "true", "errors.deadletterqueue.topic.name": f"DLQ_iceberg_se{name_salt}", "errors.deadletterqueue.topic.replication.factor": "1", } ) driver.startConnectorWaitTime() wave1_count = 100 driver.sendBytesData( topic, [ json.dumps({"city": "Hsinchu", "age": i}).encode("utf-8") for i in range(wave1_count) ], partition=0, ) wait_for_rows(table.name, wave1_count) wave2_count = 50 driver.sendBytesData( topic, [ json.dumps({"city": "Taipei", "age": 100 + i, "country": "TW"}).encode( "utf-8" ) for i in range(wave2_count) ], partition=0, ) wait_for_rows(table.name, wave1_count + wave2_count) cols = {row[0]: row[1] for row in table.schema()} assert "CITY" in cols, f"Expected CITY column, got: {list(cols.keys())}" assert "AGE" in cols, f"Expected AGE column, got: {list(cols.keys())}" assert "COUNTRY" in cols, ( f"Expected COUNTRY column after wave 2, got: {list(cols.keys())}" ) rows = table.select('"CITY", "AGE", "COUNTRY"', "WHERE \"CITY\" = 'Taipei' LIMIT 1") assert rows, "Expected at least one wave-2 row with CITY = 'Taipei'" assert rows[0]["CITY"] == "Taipei" assert rows[0]["COUNTRY"] == "TW" null_country_count = table.select("COUNT(*) AS CNT", 'WHERE "COUNTRY" IS NULL')[0][ "CNT" ] assert null_country_count == wave1_count, ( f"Expected {wave1_count} rows with NULL COUNTRY, got {null_country_count}" ) ================================================ FILE: test/tests/pressure/test_perf_backlog_drain.py ================================================ """ P1 Backlog Drain — profiling-friendly performance test. Defaults: 4 partitions × 1M records × ~250 bytes = 4M rows (~1 GB). All parameters are tunable via environment variables (see below). Scenario: 1. Create a single topic with DRAIN_PARTITIONS partitions (default 4). 2. Loader phase: pre-populate the topic with DRAIN_RECORDS_PER_PARTITION records per partition. Each message is a ~250-byte JSON row. 3. KC phase: start a Snowflake Streaming connector with DRAIN_TASKS_MAX tasks (default 8) to drain the full topic from offset 0 ("cold start"). Runs for up to DRAIN_KC_TIMEOUT seconds (default 900). 4. Post-run: log final offsets, row counts, and drain time. Usage: ./run_tests.sh --platform=confluent --platform-version=7.8.0 --profile --keep \\ -- tests/pressure/test_perf_backlog_drain.py # Larger run (e.g. ~144M rows / ~38 GB): DRAIN_PARTITIONS=4 DRAIN_RECORDS_PER_PARTITION=36000000 \\ ./run_tests.sh ... -- tests/pressure/test_perf_backlog_drain.py """ import json import logging import os import time from concurrent.futures import ThreadPoolExecutor, as_completed import pytest from lib.config_migration import V4_CONFIG_TEMPLATE logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Tunables — override via environment variables for quick profiling runs # --------------------------------------------------------------------------- PARTITION_COUNT = int(os.environ.get("DRAIN_PARTITIONS", "4")) TASKS_MAX = int(os.environ.get("DRAIN_TASKS_MAX", "8")) RECORDS_PER_PARTITION = int(os.environ.get("DRAIN_RECORDS_PER_PARTITION", "1_000_000")) LOADER_THREADS = int(os.environ.get("DRAIN_LOADER_THREADS", "4")) BATCH_SIZE = int(os.environ.get("DRAIN_BATCH_SIZE", "50_000")) KC_TIMEOUT = int(os.environ.get("DRAIN_KC_TIMEOUT", "900")) ROW_SIZE_APPROX = 250 # bytes per JSON message def _make_row(partition: int, id: int) -> bytes: """Build a single JSON message (~250 bytes).""" return json.dumps( { "MESSAGE": f"p{partition}-{id}", "TIMESTAMP": int(time.time() * 1000), "ID": id, "PARTITION": partition, "ROW_SIZE_IN_BYTES": ROW_SIZE_APPROX, } ).encode("utf-8") def _load_partition(driver, topic: str, partition: int, total: int, batch: int): """Send `total` records to a single partition in batches.""" sent = 0 while sent < total: chunk = min(batch, total - sent) values = [_make_row(partition, sent + i) for i in range(chunk)] driver.sendBytesData(topic, values, key=None, partition=partition) sent += chunk if sent % 200_000 == 0 or sent == total: logger.info( "Loader p%d: %d / %d (%.1f%%)", partition, sent, total, 100 * sent / total, ) return sent @pytest.mark.pressure @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_perf_backlog_drain( driver, name_salt, create_topics, create_custom_connector, wait_for_rows, ): total_records = PARTITION_COUNT * RECORDS_PER_PARTITION total_bytes = total_records * ROW_SIZE_APPROX logger.info( "=== P1 Backlog Drain: %d partitions × %d records = %d total (%.1f GB) ===", PARTITION_COUNT, RECORDS_PER_PARTITION, total_records, total_bytes / 1e9, ) # ----------------------------------------------------------------------- # 1. Topic setup # ----------------------------------------------------------------------- topic_unsalted = "perf_backlog_drain" topics = create_topics( [topic_unsalted], num_partitions=PARTITION_COUNT, ) topic = topics[0] logger.info("Topic created: %s (%d partitions)", topic, PARTITION_COUNT) # ----------------------------------------------------------------------- # 2. Loader phase — fill the topic before starting KC # ----------------------------------------------------------------------- logger.info( "=== Loader phase: %d threads, %d records/partition, batch=%d ===", LOADER_THREADS, RECORDS_PER_PARTITION, BATCH_SIZE, ) load_start = time.time() with ThreadPoolExecutor(max_workers=LOADER_THREADS) as pool: futures = { pool.submit( _load_partition, driver, topic, p, RECORDS_PER_PARTITION, BATCH_SIZE, ): p for p in range(PARTITION_COUNT) } for fut in as_completed(futures): p = futures[fut] count = fut.result() logger.info("Partition %d loaded: %d records", p, count) load_elapsed = time.time() - load_start load_throughput = total_bytes / load_elapsed / 1e6 logger.info( "=== Loader done: %.1fs, %.1f MB/s ===", load_elapsed, load_throughput, ) # ----------------------------------------------------------------------- # 3. KC phase — connector starts cold against a full topic # ----------------------------------------------------------------------- logger.info( "=== KC phase: %d tasks, timeout=%ds ===", TASKS_MAX, KC_TIMEOUT, ) kc_start = time.time() connector = create_custom_connector( "perf_backlog_drain", { **V4_CONFIG_TEMPLATE, "tasks.max": str(TASKS_MAX), "topics": topic, "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "snowflake.validation": os.environ.get("DRAIN_VALIDATION", "server_side"), "consumer.override.max.poll.interval.ms": "600000", "consumer.override.auto.offset.reset": "earliest", }, ) driver.wait_for_connector_running(connector.name, timeout=120) logger.info("Connector %s is RUNNING", connector.name) # ----------------------------------------------------------------------- # 4. Wait for all rows to land in Snowflake # ----------------------------------------------------------------------- table_name = topic wait_for_rows( table_name, total_records, timeout=KC_TIMEOUT, interval=10, connector_name=connector.name, ) kc_elapsed = time.time() - kc_start drain_throughput = total_bytes / kc_elapsed / 1e6 logger.info( "=== KC drain complete: %.1fs, %.1f MB/s ===", kc_elapsed, drain_throughput, ) # ----------------------------------------------------------------------- # 5. Post-run stats # ----------------------------------------------------------------------- logger.info("=== Post-run stats ===") logger.info(" Total records: %d", total_records) logger.info( " Load time: %.1fs (%.1f MB/s)", load_elapsed, load_throughput ) logger.info(" Drain time: %.1fs (%.1f MB/s)", kc_elapsed, drain_throughput) logger.info( " Rows/sec (drain): %.0f", total_records / kc_elapsed if kc_elapsed > 0 else 0, ) row_count = driver.select_number_of_records(table_name) logger.info(" Snowflake rows: %s", row_count) assert row_count == total_records, ( f"Expected {total_records} rows but got {row_count}" ) ================================================ FILE: test/tests/pressure/test_pressure_init.py ================================================ import json import logging from concurrent.futures import ThreadPoolExecutor, as_completed import pytest from lib.config_migration import V4_CONFIG_TEMPLATE logger = logging.getLogger(__name__) TOPIC_COUNT = 200 PARTITION_COUNT = 12 RECORD_COUNT = 10_000 THREAD_COUNT = 10 def _send_partition(driver, topic, partition, record_count): values = [ json.dumps( { "numbernumbernumbernumbernumbernumbernumbernumbernumbernumbernumbernumber": str( e ) } ).encode("utf-8") for e in range(record_count) ] driver.sendBytesData(topic, values, [], partition) @pytest.mark.pressure @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_pressure_init(driver, create_topics, create_custom_connector, wait_for_rows): test_name = "test_pressure_init" topics = create_topics( [f"{test_name}_{i}" for i in range(TOPIC_COUNT)], num_partitions=PARTITION_COUNT, ) connector = create_custom_connector( test_name, { **V4_CONFIG_TEMPLATE, "tasks.max": "10", "topics.regex": f"{test_name}.*", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "snowflake.validation": "server_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "false", # Increase max poll interval from 5 to 10 minutes to avoid constant rebalancing. # This is required when we have a large number of topics across tasks. # We might be able to remove this once we parallelize table and pipe metadata lookups. "consumer.override.max.poll.interval.ms": "600000", }, ) driver.startConnectorWaitTime() total = TOPIC_COUNT * PARTITION_COUNT with ThreadPoolExecutor(max_workers=THREAD_COUNT) as executor: futures = [ executor.submit(_send_partition, driver, topics[t], p, RECORD_COUNT) for t in range(TOPIC_COUNT) for p in range(PARTITION_COUNT) ] for i, future in enumerate(as_completed(futures), 1): future.result() if i % 100 == 0 or i == total: logger.info(f"Sent {i}/{total} partitions") for i, topic in enumerate(topics): table_name = topic.upper() logger.info("Verifying topic %d/%d: %s", i + 1, TOPIC_COUNT, table_name) wait_for_rows( table_name, PARTITION_COUNT * RECORD_COUNT, interval=10, timeout=1800, connector_name=connector.name, ) ================================================ FILE: test/tests/pressure/test_pressure_restart.py ================================================ import json import logging import time from concurrent.futures import ThreadPoolExecutor, as_completed import pytest from lib.config_migration import V4_CONFIG_TEMPLATE from lib.driver import KafkaDriver logger = logging.getLogger(__name__) TOPIC_COUNT = 10 PARTITION_COUNT = 3 RECORD_COUNT = 200_000 EXPECTED_PER_TOPIC = PARTITION_COUNT * RECORD_COUNT THREAD_COUNT = 10 def _send_partition(driver, topic, partition, record_count): values = [ json.dumps( { "numbernumbernumbernumbernumbernumbernumbernumbernumbernumbernumbernumber": str( e ) } ).encode("utf-8") for e in range(record_count) ] driver.sendBytesData(topic, values, [], partition) @pytest.mark.pressure @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_pressure_restart(driver: KafkaDriver, create_topics, create_custom_connector): test_name = "test_pressure_restart" topics = create_topics( [f"{test_name}{i}" for i in range(TOPIC_COUNT)], num_partitions=PARTITION_COUNT ) config = { **V4_CONFIG_TEMPLATE, "tasks.max": "10", "topics.regex": f"{test_name}.*", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "snowflake.validation": "server_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "false", } connector = create_custom_connector(test_name, config) total = TOPIC_COUNT * PARTITION_COUNT with ThreadPoolExecutor(max_workers=THREAD_COUNT) as executor: futures = [ executor.submit(_send_partition, driver, topics[t], p, RECORD_COUNT) for t in range(TOPIC_COUNT) for p in range(PARTITION_COUNT) ] for i, future in enumerate(as_completed(futures), 1): future.result() if i % 10 == 0 or i == total: logger.info(f"Sent {i}/{total} partitions") phase = 0 for i, topic in enumerate(topics): table_name = topic.upper() logger.info(f"Verifying topic {i + 1}/{TOPIC_COUNT}: {table_name}") deadline = time.monotonic() + 600 while True: phase = (phase + 1) % 7 match phase: case 2 | 3: driver.restartConnector(connector.name) case 4: driver.pauseConnector(connector.name) case 5: driver.resumeConnector(connector.name) case 6: connector.close() case 0: connector = create_custom_connector(test_name, config) count = driver.select_number_of_records(table_name) if count == EXPECTED_PER_TOPIC: break if time.monotonic() >= deadline: raise AssertionError( f"Timed out waiting for {EXPECTED_PER_TOPIC} rows in {table_name} " f"(got {count} after 600s)" ) logger.info( f"Topic {table_name}: {count}/{EXPECTED_PER_TOPIC} rows, retrying in {driver.VERIFY_INTERVAL}s..." ) time.sleep(driver.VERIFY_INTERVAL) ================================================ FILE: test/tests/schema_evolution/__init__.py ================================================ ================================================ FILE: test/tests/schema_evolution/test_se_auto_table_creation_avro_sr.py ================================================ """Schema evolution with auto table creation (Avro Schema Registry). Migrated from v3 ``TestSchemaEvolutionWithAutoTableCreationAvroSR``. Same logic as the JSON variant but data is produced via AvroProducer with a Schema Registry. """ import pytest from confluent_kafka import avro from lib.config_migration import V4_CONFIG_TEMPLATE INITIAL_BATCH = 12 FLUSH_BATCH = 300 RECORD_COUNT = INITIAL_BATCH + FLUSH_BATCH VALUE_SCHEMAS = [ avro.loads(""" { "type": "record", "name": "value_schema_0", "fields": [ {"name": "PERFORMANCE_STRING", "type": "string"}, {"name": "PERFORMANCE_CHAR", "type": "string"}, {"name": "RATING_INT", "type": "int"} ] } """), avro.loads(""" { "type": "record", "name": "value_schema_1", "fields": [ {"name": "PERFORMANCE_STRING", "type": "string"}, {"name": "RATING_DOUBLE", "type": "float"}, {"name": "APPROVAL", "type": "boolean"} ] } """), ] RECORDS = [ {"PERFORMANCE_STRING": "Excellent", "PERFORMANCE_CHAR": "A", "RATING_INT": 100}, {"PERFORMANCE_STRING": "Excellent", "RATING_DOUBLE": 0.99, "APPROVAL": True}, ] GOLD_TYPES = { "PERFORMANCE_STRING": "VARCHAR", "PERFORMANCE_CHAR": "VARCHAR", "RATING_INT": "NUMBER", "RATING_DOUBLE": "FLOAT", "APPROVAL": "BOOLEAN", "RECORD_METADATA": "VARIANT", } @pytest.mark.schema_evolution @pytest.mark.confluent_only @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_se_auto_table_creation_avro_sr( driver, connector_version, name_salt, create_connector, wait_for_rows, ): """Auto table creation is a v4-only feature; v3 requires pre-existing tables.""" base = f"se_auto_table_creation_avro_sr{name_salt}" table_name = base.upper() topics = [f"{base}{i}" for i in range(2)] for t in topics: driver.createTopics(t, partitionNum=1, replicationNum=1) connector = create_connector( v4_config={ **V4_CONFIG_TEMPLATE, "topics": ",".join(topics), "snowflake.topic2table.map": ",".join(f"{t}:{table_name}" for t in topics), "tasks.max": "1", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "io.confluent.connect.avro.AvroConverter", "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY", "value.converter.schemas.enable": "false", "errors.tolerance": "none", "errors.log.enable": "true", "snowflake.validation": "client_side", } ) connector_name = connector.name driver.startConnectorWaitTime() for i, topic in enumerate(topics): for batch_size in (INITIAL_BATCH, FLUSH_BATCH): values = [RECORDS[i]] * batch_size driver.sendAvroSRData( topic, values, VALUE_SCHEMAS[i], key=[], key_schema="", partition=0 ) wait_for_rows(table_name, RECORD_COUNT * len(topics), connector_name=connector_name) cols = { row[0]: row[1] for row in driver.snowflake_conn.cursor() .execute(f"DESCRIBE TABLE {table_name}") .fetchall() } for col_name, expected_prefix in GOLD_TYPES.items(): assert col_name in cols, f"Missing column {col_name}, got: {list(cols.keys())}" assert cols[col_name].startswith(expected_prefix), ( f"Column {col_name}: expected {expected_prefix}, got {cols[col_name]}" ) ================================================ FILE: test/tests/schema_evolution/test_se_auto_table_creation_json.py ================================================ """Schema evolution with auto table creation (JSON). Migrated from v3 ``TestSchemaEvolutionWithAutoTableCreationJson``. The table does NOT exist initially. The connector auto-creates it from RECORD_METADATA, then schema evolution adds the remaining columns from the record payload. Two topics with different schemas test that all columns end up in one table. """ import json import pytest from lib.config_migration import V4_CONFIG_TEMPLATE INITIAL_BATCH = 12 FLUSH_BATCH = 300 RECORD_COUNT = INITIAL_BATCH + FLUSH_BATCH RECORDS = [ {"PERFORMANCE_STRING": "Excellent", "PERFORMANCE_CHAR": "A", "RATING_INT": 100}, {"PERFORMANCE_STRING": "Excellent", "RATING_DOUBLE": 0.99, "APPROVAL": True}, ] GOLD_TYPES = { "PERFORMANCE_STRING": "VARCHAR", "PERFORMANCE_CHAR": "VARCHAR", "RATING_INT": "NUMBER", "RATING_DOUBLE": "FLOAT", "APPROVAL": "BOOLEAN", "RECORD_METADATA": "VARIANT", } @pytest.mark.schema_evolution @pytest.mark.compatibility def test_se_auto_table_creation_json( driver, connector_version, name_salt, create_connector, wait_for_rows, ): base = f"se_auto_table_creation_json{name_salt}" table_name = base.upper() topics = [f"{base}{i}" for i in range(2)] for t in topics: driver.createTopics(t, partitionNum=1, replicationNum=1) connector = create_connector( v4_config={ **V4_CONFIG_TEMPLATE, "topics": ",".join(topics), "snowflake.topic2table.map": ",".join(f"{t}:{table_name}" for t in topics), "tasks.max": "1", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "errors.tolerance": "none", "errors.log.enable": "true", "snowflake.validation": "client_side", } ) connector_name = connector.name driver.startConnectorWaitTime() for i, topic in enumerate(topics): for batch_size in (INITIAL_BATCH, FLUSH_BATCH): keys = [ json.dumps({"number": str(e)}).encode("utf-8") for e in range(batch_size) ] values = [json.dumps(RECORDS[i]).encode("utf-8") for _ in range(batch_size)] driver.sendBytesData(topic, values, keys) wait_for_rows(table_name, RECORD_COUNT * len(topics), connector_name=connector_name) cols = { row[0]: row[1] for row in driver.snowflake_conn.cursor() .execute(f"DESCRIBE TABLE {table_name}") .fetchall() } for col_name, expected_prefix in GOLD_TYPES.items(): assert col_name in cols, f"Missing column {col_name}, got: {list(cols.keys())}" assert cols[col_name].startswith(expected_prefix), ( f"Column {col_name}: expected {expected_prefix}, got {cols[col_name]}" ) ================================================ FILE: test/tests/schema_evolution/test_se_avro_sr.py ================================================ """Schema evolution with Avro Schema Registry data. Migrated from v3 ``TestSchemaEvolutionAvroSR``. Two topics with different Avro schemas feed into the same table. The connector should evolve the table to accommodate all columns from both schemas. """ import pytest from confluent_kafka import avro from lib.config_migration import V4_CONFIG_TEMPLATE RECORD_COUNT = 100 VALUE_SCHEMAS = [ avro.loads(""" { "type": "record", "name": "value_schema_0", "fields": [ {"name": "PERFORMANCE_CHAR", "type": "string"}, {"name": "PERFORMANCE_STRING", "type": "string"}, {"name": "RATING_INT", "type": "int"} ] } """), avro.loads(""" { "type": "record", "name": "value_schema_1", "fields": [ {"name": "RATING_DOUBLE", "type": "float"}, {"name": "PERFORMANCE_STRING", "type": "string"}, {"name": "APPROVAL", "type": "boolean"}, {"name": "SOME_FLOAT_NAN", "type": "float"} ] } """), ] RECORDS = [ { "PERFORMANCE_STRING": "Excellent", "PERFORMANCE_CHAR": "A", "RATING_INT": 100, }, { "PERFORMANCE_STRING": "Excellent", "RATING_DOUBLE": 0.99, "APPROVAL": True, "SOME_FLOAT_NAN": float("nan"), }, ] GOLD_TYPES = { "PERFORMANCE_STRING": "VARCHAR", "PERFORMANCE_CHAR": "VARCHAR", "RATING_INT": "NUMBER", "RATING_DOUBLE": "FLOAT", "APPROVAL": "BOOLEAN", "SOME_FLOAT_NAN": "FLOAT", "RECORD_METADATA": "VARIANT", } @pytest.mark.schema_evolution @pytest.mark.confluent_only @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_se_avro_sr( driver, connector_version, name_salt, create_connector, snowflake_table, wait_for_rows, ): """v3 with SNOWPIPE_STREAMING cannot auto-create the table for Avro SR data with topic2table.map, and pre-created tables trigger pipe invalidation on ALTER TABLE. Restricted to v4 (auto-creation works). """ base = f"se_avro_sr{name_salt}" table_name = base.upper() topics = [f"{base}{i}" for i in range(2)] for t in topics: driver.createTopics(t, partitionNum=1, replicationNum=1) connector = create_connector( v4_config={ **V4_CONFIG_TEMPLATE, "topics": ",".join(topics), "snowflake.topic2table.map": ",".join(f"{t}:{table_name}" for t in topics), "tasks.max": "1", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "io.confluent.connect.avro.AvroConverter", "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY", "value.converter.schemas.enable": "false", "errors.tolerance": "none", "errors.log.enable": "true", "snowflake.validation": "client_side", } ) connector_name = connector.name driver.startConnectorWaitTime() for i, topic in enumerate(topics): values = [RECORDS[i]] * RECORD_COUNT driver.sendAvroSRData( topic, values, VALUE_SCHEMAS[i], key=[], key_schema="", partition=0 ) wait_for_rows(table_name, RECORD_COUNT * len(topics), connector_name=connector_name) cols = { row[0]: row[1] for row in driver.snowflake_conn.cursor() .execute(f"DESCRIBE TABLE {table_name}") .fetchall() } for col_name, expected_prefix in GOLD_TYPES.items(): assert col_name in cols, f"Missing column {col_name}, got: {list(cols.keys())}" assert cols[col_name].startswith(expected_prefix), ( f"Column {col_name}: expected type starting with {expected_prefix}, " f"got {cols[col_name]}" ) ================================================ FILE: test/tests/schema_evolution/test_se_json_ignore_tombstone.py ================================================ """Schema evolution with tombstone filtering (behavior.on.null.values=IGNORE). Migrated from v3 ``TestSchemaEvolutionJsonIgnoreTombstone``. Two topics feed one table. Each topic sends (RECORD_COUNT - 2) real records plus a null and an empty-string tombstone. With ``behavior.on.null.values=IGNORE`` the tombstones are dropped, so the expected row count is ``2 * (RECORD_COUNT - 2)``. Schema evolution must still create all expected columns. """ import json import pytest from lib.config_migration import V4_CONFIG_TEMPLATE RECORD_COUNT = 100 RECORDS = [ { "PERFORMANCE_STRING": "Excellent", "PERFORMANCE_CHAR": "A", "RATING_INT": 100, }, { "PERFORMANCE_STRING": "Excellent", "RATING_DOUBLE": 0.99, "APPROVAL": True, }, ] GOLD_TYPES = { "PERFORMANCE_STRING": "VARCHAR", "PERFORMANCE_CHAR": "VARCHAR", "RATING_INT": "NUMBER", "RATING_DOUBLE": "FLOAT", "APPROVAL": "BOOLEAN", "RECORD_METADATA": "VARIANT", } @pytest.mark.schema_evolution @pytest.mark.compatibility def test_se_json_ignore_tombstone( driver, connector_version, name_salt, create_connector, snowflake_table, wait_for_rows, ): base = f"se_json_ignore_tombstone{name_salt}" table_name = base.upper() topics = [f"{base}{i}" for i in range(2)] for t in topics: driver.createTopics(t, partitionNum=1, replicationNum=1) connector = create_connector( v4_config={ **V4_CONFIG_TEMPLATE, "topics": ",".join(topics), "snowflake.topic2table.map": ",".join(f"{t}:{table_name}" for t in topics), "tasks.max": "1", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "errors.tolerance": "none", "errors.log.enable": "true", "behavior.on.null.values": "IGNORE", "snowflake.validation": "client_side", } ) connector_name = connector.name driver.startConnectorWaitTime() for i, topic in enumerate(topics): real_count = RECORD_COUNT - 2 keys = [ json.dumps({"number": str(e)}).encode("utf-8") for e in range(real_count) ] values = [json.dumps(RECORDS[i]).encode("utf-8") for _ in range(real_count)] # Tombstones keys.append(json.dumps({"number": str(real_count)}).encode("utf-8")) values.append(None) keys.append(json.dumps({"number": str(real_count + 1)}).encode("utf-8")) values.append(b"") driver.sendBytesData(topic, values, keys) expected_rows = len(topics) * (RECORD_COUNT - 2) wait_for_rows(table_name, expected_rows, connector_name=connector_name) cols = { row[0]: row[1] for row in driver.snowflake_conn.cursor() .execute(f"DESCRIBE TABLE {table_name}") .fetchall() } for col_name, expected_prefix in GOLD_TYPES.items(): assert col_name in cols, f"Missing column {col_name}, got: {list(cols.keys())}" assert cols[col_name].startswith(expected_prefix), ( f"Column {col_name}: expected {expected_prefix}, got {cols[col_name]}" ) ================================================ FILE: test/tests/schema_evolution/test_se_multi_topic_replace_table.py ================================================ """Schema evolution with multiple topics and a mid-stream table replacement. Migrated from v3 ``TestSchemaEvolutionMultiTopicDropTable``. Two topics with different schemas feed into one table. After the first wave is ingested the table is replaced with CREATE OR REPLACE TABLE. The connector must detect the channel invalidation, re-open channels, and re-evolve columns from both topics. """ import json import pytest from lib.config_migration import V4_CONFIG_TEMPLATE RECORD_COUNT = 100 RECORDS = [ { "PERFORMANCE_STRING": "Excellent", "PERFORMANCE_CHAR": "A", "RATING_INT": 100, }, { "PERFORMANCE_STRING": "Excellent", "RATING_DOUBLE": 0.99, "APPROVAL": True, }, ] GOLD_TYPES = { "PERFORMANCE_STRING": "VARCHAR", "PERFORMANCE_CHAR": "VARCHAR", "RATING_INT": "NUMBER", "RATING_DOUBLE": "FLOAT", "APPROVAL": "BOOLEAN", "RECORD_METADATA": "VARIANT", } def _assert_schema(driver, table_name): cols = { row[0]: row[1] for row in driver.snowflake_conn.cursor() .execute(f"DESCRIBE TABLE {table_name}") .fetchall() } for col_name, expected_prefix in GOLD_TYPES.items(): assert col_name in cols, f"Missing column {col_name}, got: {list(cols.keys())}" assert cols[col_name].startswith(expected_prefix), ( f"Column {col_name}: expected {expected_prefix}, got {cols[col_name]}" ) def _send_all(driver, topics, count): for i, topic in enumerate(topics): keys = [json.dumps({"number": str(e)}).encode("utf-8") for e in range(count)] values = [json.dumps(RECORDS[i]).encode("utf-8") for _ in range(count)] driver.sendBytesData(topic, values, keys) @pytest.mark.schema_evolution @pytest.mark.compatibility @pytest.mark.parametrize("connector_version", ["v3"], indirect=True) def test_se_multi_topic_replace_table( driver, connector_version, name_salt, create_connector, snowflake_table, wait_for_rows, ): """CREATE OR REPLACE TABLE mid-stream invalidates v4 streaming channels. SSv2 SDK does not surface pipe invalidation through isClosed(). Restricted to v3. """ base = f"se_multi_topic_replace_table{name_salt}" table_name = base.upper() topics = [f"{base}{i}" for i in range(2)] for t in topics: driver.createTopics(t, partitionNum=1, replicationNum=1) create_connector( v4_config={ **V4_CONFIG_TEMPLATE, "topics": ",".join(topics), "snowflake.topic2table.map": ",".join(f"{t}:{table_name}" for t in topics), "tasks.max": "1", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "errors.tolerance": "all", "errors.log.enable": "true", "snowflake.validation": "client_side", } ) driver.startConnectorWaitTime() # Wave 1 _send_all(driver, topics, RECORD_COUNT) wait_for_rows(table_name, RECORD_COUNT * len(topics)) _assert_schema(driver, table_name) # Replace the table driver.snowflake_conn.cursor().execute( f"CREATE OR REPLACE TABLE {table_name} " f"(RECORD_METADATA VARIANT) " f"ENABLE_SCHEMA_EVOLUTION = TRUE" ) # Wave 2 — after CREATE OR REPLACE TABLE the old channels are invalidated and # reopen with no committed offset. Recovery falls back to the consumer group # offset tracked in PartitionOffsetTracker, which may lag behind the actual # committed position (it only advances when preCommit runs). This can cause # Kafka to replay wave-1 records into the new table, so we must tolerate # more than RECORD_COUNT * len(topics) rows. _send_all(driver, topics, RECORD_COUNT) wait_for_rows(table_name, RECORD_COUNT * len(topics), at_least=True) _assert_schema(driver, table_name) ================================================ FILE: test/tests/schema_evolution/test_se_nonnullable_json.py ================================================ """Schema evolution with NOT NULL columns. Migrated from v3 ``TestSchemaEvolutionNonNullableJson``. The table starts with a NOT NULL column. Records arrive without that column but with new columns. Schema evolution must: - Add the new columns as nullable - All evolved columns should be nullable (verified via DESCRIBE) """ import json import pytest from lib.config_migration import V4_CONFIG_TEMPLATE RECORD_COUNT = 100 RECORD = { "PERFORMANCE_CHAR": "A", "RATING_INT": 100, } GOLD_TYPES = { "PERFORMANCE_STRING": "VARCHAR", "PERFORMANCE_CHAR": "VARCHAR", "RATING_INT": "NUMBER", "RECORD_METADATA": "VARIANT", } @pytest.mark.schema_evolution @pytest.mark.compatibility def test_se_nonnullable_json( driver, connector_version, name_salt, create_connector, snowflake_table, wait_for_rows, ): topic = f"se_nonnullable_json{name_salt}" table_name = topic.upper() driver.snowflake_conn.cursor().execute( f"CREATE OR REPLACE TABLE {table_name} " f"(RECORD_METADATA VARIANT, PERFORMANCE_STRING STRING NOT NULL) " f"ENABLE_SCHEMA_EVOLUTION = TRUE" ) driver.createTopics(topic, partitionNum=1, replicationNum=1) connector = create_connector( v4_config={ **V4_CONFIG_TEMPLATE, "topics": topic, "snowflake.topic2table.map": f"{topic}:{table_name}", "tasks.max": "1", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "errors.tolerance": "none", "errors.log.enable": "true", "snowflake.validation": "client_side", } ) connector_name = connector.name driver.startConnectorWaitTime() keys = [json.dumps({"number": str(i)}).encode("utf-8") for i in range(RECORD_COUNT)] values = [json.dumps(RECORD).encode("utf-8") for _ in range(RECORD_COUNT)] driver.sendBytesData(topic, values, keys) wait_for_rows(table_name, RECORD_COUNT, connector_name=connector_name) rows = ( driver.snowflake_conn.cursor() .execute(f"DESCRIBE TABLE {table_name}") .fetchall() ) cols = {} for row in rows: col_name, col_type, _kind, nullable = row[0], row[1], row[2], row[3] cols[col_name] = col_type assert nullable == "Y", ( f"Column {col_name} should be nullable after schema evolution, " f"but null?={nullable}" ) for col_name, expected_prefix in GOLD_TYPES.items(): assert col_name in cols, f"Missing column {col_name}, got: {list(cols.keys())}" assert cols[col_name].startswith(expected_prefix), ( f"Column {col_name}: expected {expected_prefix}, got {cols[col_name]}" ) ================================================ FILE: test/tests/schema_evolution/test_se_nullable_values_after_smt.py ================================================ """Schema evolution with nullable values produced by an SMT. Migrated from v3 ``TestSchemaEvolutionNullableValuesAfterSmt``. An ``ExtractField$Value`` SMT extracts the ``optionalField`` sub-object. Only every other event contains ``optionalField``, so half the events produce null values and are dropped by ``behavior.on.null.values=IGNORE``. Schema evolution creates ``INDEX`` (from the table DDL) and adds ``FROM_OPTIONAL_FIELD`` from the record payload. The original ``INDEX`` column is NOT NULL; evolved columns must be nullable. """ import json import pytest from snowflake.connector import DictCursor from lib.config_migration import V4_CONFIG_TEMPLATE TOTAL_EVENTS = 200 EXPECTED_ROWS = 100 @pytest.mark.schema_evolution @pytest.mark.compatibility def test_se_nullable_values_after_smt( driver, connector_version, name_salt, create_connector, snowflake_table, wait_for_rows, ): topic = f"se_nullable_values_after_smt{name_salt}" table_name = topic.upper() driver.snowflake_conn.cursor().execute( f"CREATE OR REPLACE TABLE {table_name} " f"(RECORD_METADATA VARIANT, INDEX NUMBER NOT NULL) " f"ENABLE_SCHEMA_EVOLUTION = TRUE" ) driver.createTopics(topic, partitionNum=1, replicationNum=1) connector = create_connector( v4_config={ **V4_CONFIG_TEMPLATE, "topics": topic, "snowflake.topic2table.map": f"{topic}:{table_name}", "tasks.max": "1", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "errors.tolerance": "none", "errors.log.enable": "true", "behavior.on.null.values": "IGNORE", "transforms": "extractField", "transforms.extractField.type": "org.apache.kafka.connect.transforms.ExtractField$Value", "transforms.extractField.field": "optionalField", "snowflake.validation": "client_side", } ) connector_name = connector.name driver.startConnectorWaitTime() values = [] for idx in range(TOTAL_EVENTS): event = {"index": idx, "someKey": "someValue"} if idx % 2 == 0: event["optionalField"] = {"INDEX": idx, "FROM_OPTIONAL_FIELD": True} values.append(json.dumps(event).encode("utf-8")) driver.sendBytesData(topic, values) wait_for_rows(table_name, EXPECTED_ROWS, connector_name=connector_name) # --- Verify table schema --- desc = ( driver.snowflake_conn.cursor(DictCursor) .execute(f"DESCRIBE TABLE {table_name}") .fetchall() ) gold = { "INDEX": {"type_prefix": "NUMBER", "nullable": "N"}, "FROM_OPTIONAL_FIELD": {"type_prefix": "BOOLEAN", "nullable": "Y"}, "RECORD_METADATA": {"type_prefix": "VARIANT", "nullable": "Y"}, } col_map = {row["name"]: row for row in desc} for col_name, expected in gold.items(): assert col_name in col_map, ( f"Missing column {col_name}, got: {list(col_map.keys())}" ) assert col_map[col_name]["type"].startswith(expected["type_prefix"]), ( f"Column {col_name}: expected type starting with " f"{expected['type_prefix']}, got {col_map[col_name]['type']}" ) assert col_map[col_name]["null?"] == expected["nullable"], ( f"Column {col_name}: expected null?={expected['nullable']}, " f"got {col_map[col_name]['null?']}" ) # --- Verify data --- rows = ( driver.snowflake_conn.cursor(DictCursor) .execute( f"SELECT INDEX, FROM_OPTIONAL_FIELD, " f'RECORD_METADATA:"offset"::number AS OFFSET ' f"FROM {table_name} ORDER BY OFFSET" ) .fetchall() ) assert len(rows) == EXPECTED_ROWS expected_indices = list(range(0, TOTAL_EVENTS, 2)) for row, expected_idx in zip(rows, expected_indices): assert row["INDEX"] == expected_idx, ( f"Expected INDEX={expected_idx}, got {row['INDEX']}" ) assert row["FROM_OPTIONAL_FIELD"] is True ================================================ FILE: test/tests/schema_evolution/test_se_random_row_count.py ================================================ """Schema evolution with random initial batch size. Migrated from v3 ``TestSchemaEvolutionWithRandomRowCount``. The initial batch size is randomised (1–299) so that the ALTER TABLE for schema evolution can trigger at different points relative to the flush boundary (300 records). This catches timing-related edge cases in the schema evolution path. """ import json import random import pytest from lib.config_migration import V4_CONFIG_TEMPLATE FLUSH_BATCH = 300 RECORDS = [ {"PERFORMANCE_STRING": "Excellent", "PERFORMANCE_CHAR": "A", "RATING_INT": 100}, {"PERFORMANCE_STRING": "Excellent", "RATING_DOUBLE": 0.99, "APPROVAL": True}, ] GOLD_TYPES = { "PERFORMANCE_STRING": "VARCHAR", "PERFORMANCE_CHAR": "VARCHAR", "RATING_INT": "NUMBER", "RATING_DOUBLE": "FLOAT", "APPROVAL": "BOOLEAN", "RECORD_METADATA": "VARIANT", } @pytest.mark.schema_evolution @pytest.mark.compatibility def test_se_random_row_count( driver, connector_version, name_salt, create_connector, snowflake_table, wait_for_rows, ): initial_batch = random.randrange(1, 300) record_count = initial_batch + FLUSH_BATCH base = f"se_random_row_count{name_salt}" table_name = base.upper() topics = [f"{base}{i}" for i in range(2)] for t in topics: driver.createTopics(t, partitionNum=1, replicationNum=1) connector = create_connector( v4_config={ **V4_CONFIG_TEMPLATE, "topics": ",".join(topics), "snowflake.topic2table.map": ",".join(f"{t}:{table_name}" for t in topics), "tasks.max": "1", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "errors.tolerance": "none", "errors.log.enable": "true", "snowflake.validation": "client_side", } ) connector_name = connector.name driver.startConnectorWaitTime() for i, topic in enumerate(topics): for batch_size in (initial_batch, FLUSH_BATCH): keys = [ json.dumps({"number": str(e)}).encode("utf-8") for e in range(batch_size) ] values = [json.dumps(RECORDS[i]).encode("utf-8") for _ in range(batch_size)] driver.sendBytesData(topic, values, keys) wait_for_rows(table_name, record_count * len(topics), connector_name=connector_name) cols = { row[0]: row[1] for row in driver.snowflake_conn.cursor() .execute(f"DESCRIBE TABLE {table_name}") .fetchall() } for col_name, expected_prefix in GOLD_TYPES.items(): assert col_name in cols, f"Missing column {col_name}, got: {list(cols.keys())}" assert cols[col_name].startswith(expected_prefix), ( f"Column {col_name}: expected {expected_prefix}, got {cols[col_name]}" ) ================================================ FILE: test/tests/schema_evolution/test_se_replace_table.py ================================================ """Schema evolution recovery after CREATE OR REPLACE TABLE. Migrated from v3 ``TestSchemaEvolutionDropTable``. Sends records so the table evolves new columns, then replaces the table with CREATE OR REPLACE TABLE. The connector should detect the channel invalidation, re-open the channel, and re-evolve the schema from scratch. """ import json import pytest from lib.config_migration import V4_CONFIG_TEMPLATE RECORD_COUNT = 100 RECORD = { "PERFORMANCE_STRING": "Excellent", "PERFORMANCE_CHAR": "A", "RATING_INT": 100, } GOLD_TYPES = { "PERFORMANCE_STRING": "VARCHAR", "PERFORMANCE_CHAR": "VARCHAR", "RATING_INT": "NUMBER", "RECORD_METADATA": "VARIANT", } def _assert_schema(driver, table_name): cols = { row[0]: row[1] for row in driver.snowflake_conn.cursor() .execute(f"DESCRIBE TABLE {table_name}") .fetchall() } for col_name, expected_prefix in GOLD_TYPES.items(): assert col_name in cols, f"Missing column {col_name}, got: {list(cols.keys())}" assert cols[col_name].startswith(expected_prefix), ( f"Column {col_name}: expected {expected_prefix}, got {cols[col_name]}" ) def _send_records(driver, topic, count): keys = [json.dumps({"number": str(i)}).encode("utf-8") for i in range(count)] values = [json.dumps(RECORD).encode("utf-8") for _ in range(count)] driver.sendBytesData(topic, values, keys) @pytest.mark.schema_evolution @pytest.mark.compatibility @pytest.mark.parametrize("connector_version", ["v3"], indirect=True) def test_se_replace_table( driver, connector_version, name_salt, create_connector, snowflake_table, wait_for_rows, ): """CREATE OR REPLACE TABLE mid-stream invalidates v4 streaming channels. SSv2 SDK does not surface pipe invalidation through isClosed(). Restricted to v3. """ topic = f"se_replace_table{name_salt}" table_name = topic.upper() driver.snowflake_conn.cursor().execute( f"CREATE OR REPLACE TABLE {table_name} " f"(RECORD_METADATA VARIANT) " f"ENABLE_SCHEMA_EVOLUTION = TRUE" ) driver.createTopics(topic, partitionNum=1, replicationNum=1) create_connector( v4_config={ **V4_CONFIG_TEMPLATE, "topics": topic, "tasks.max": "1", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "errors.tolerance": "all", "errors.log.enable": "true", "snowflake.validation": "client_side", } ) driver.startConnectorWaitTime() # Wave 1: ingest and verify schema evolution _send_records(driver, topic, RECORD_COUNT) wait_for_rows(table_name, RECORD_COUNT) _assert_schema(driver, table_name) # Replace the table (simulating an ops incident) driver.snowflake_conn.cursor().execute( f"CREATE OR REPLACE TABLE {table_name} " f"(RECORD_METADATA VARIANT) " f"ENABLE_SCHEMA_EVOLUTION = TRUE" ) # Wave 2: connector should re-evolve the missing columns _send_records(driver, topic, RECORD_COUNT) wait_for_rows(table_name, RECORD_COUNT) _assert_schema(driver, table_name) ================================================ FILE: test/tests/test_auto_table_creation.py ================================================ import pytest from confluent_kafka import avro from confluent_kafka.schema_registry import Schema, SchemaRegistryClient from time import sleep from lib.fixtures.table import Table FILE_NAME = "travis_correct_auto_table_creation" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 100 VALUE_SCHEMA_STR = """ { "type":"record", "name":"value_schema", "fields":[ {"name":"id","type":"int"}, {"name":"first_name","type":"string"}, {"name":"rating","type":"float"}, {"name":"approval","type":"boolean"}, {"name":"info_map","type":{"type":"map","values":"string"}} ] } """ VALUE_SCHEMA = avro.loads(VALUE_SCHEMA_STR) GOLD_SCHEMA = { "ID": "NUMBER", "FIRST_NAME": "VARCHAR", "RATING": "FLOAT", "APPROVAL": "BOOLEAN", "INFO_MAP": "VARIANT", "RECORD_METADATA": "VARIANT", } RECORD = { "id": 100, "first_name": "Zekai", "rating": 0.99, "approval": True, "info_map": {"TREE_1": "APPLE", "TREE_2": "PINEAPPLE"}, } @pytest.mark.confluent_only @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_auto_table_creation( driver, name_salt, create_connector_from_file, wait_for_rows ): """Verify auto table creation with Avro Schema Registry. The table is NOT pre-created — the connector should auto-create it based on the registered Avro schema. Verifies column types match the expected schema. """ table = Table(driver, f"{FILE_NAME}{name_salt}".upper()) topic = f"{FILE_NAME}{name_salt}" # Register schema with Schema Registry sr_client = SchemaRegistryClient({"url": driver.schemaRegistryAddress}) sr_client.register_schema(f"{topic}-value", Schema(VALUE_SCHEMA_STR, "AVRO")) # Create Kafka topic (but NOT the Snowflake table) driver.createTopics(topic, partitionNum=1, replicationNum=1) try: create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send -- values = [RECORD for _ in range(RECORD_COUNT)] driver.sendAvroSRData(topic, values, VALUE_SCHEMA) sleep(2) # -- Verify row count -- wait_for_rows(table.name, RECORD_COUNT) # -- Verify auto-created table schema -- col_info = table.schema() col_names = [] for col in col_info: col_names.append(col[0]) sf_type = col[1] if "(" in sf_type: sf_type = sf_type[: sf_type.find("(")] assert GOLD_SCHEMA[col[0]] == sf_type, ( f"Column {col[0]}: expected type {GOLD_SCHEMA[col[0]]}, got {sf_type}" ) for expected_col in GOLD_SCHEMA: assert expected_col in col_names, f"Missing column {expected_col}" finally: driver.deleteTopic(topic) ================================================ FILE: test/tests/test_auto_table_creation_topic2table.py ================================================ import pytest from confluent_kafka import avro from confluent_kafka.schema_registry import Schema, SchemaRegistryClient from time import sleep from lib.fixtures.table import Table FILE_NAME = "travis_correct_auto_table_creation_topic2table" CONFIG_FILE = f"{FILE_NAME}.json" TOPIC_COUNT = 2 RECORD_COUNT = 100 VALUE_SCHEMA_STRS = [ """ { "type":"record", "name":"value_schema_0", "fields":[ {"name":"id","type":"int"}, {"name":"approval","type":"boolean"}, {"name":"info_map","type":{"type":"map","values":"string"}} ] } """, """ { "type":"record", "name":"value_schema_1", "fields":[ {"name":"id","type":"int"}, {"name":"first_name","type":"string"}, {"name":"rating","type":"float"} ] } """, ] VALUE_SCHEMAS = [avro.loads(s) for s in VALUE_SCHEMA_STRS] GOLD_SCHEMA = { "ID": "NUMBER", "FIRST_NAME": "VARCHAR", "RATING": "FLOAT", "APPROVAL": "BOOLEAN", "INFO_MAP": "VARIANT", "RECORD_METADATA": "VARIANT", } RECORDS = [ { "id": 100, "approval": True, "info_map": {"TREE_1": "APPLE", "TREE_2": "PINEAPPLE"}, }, {"id": 100, "first_name": "Zekai", "rating": 0.99}, ] @pytest.mark.confluent_only @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_auto_table_creation_topic2table( driver, name_salt, create_connector_from_file, wait_for_rows ): """Verify auto table creation with two topics mapped to one table. Two Avro schemas are registered for two topics. Both topics map to the same Snowflake table via topic2table.map. The connector should auto-create the table with the union of all fields. """ table = Table(driver, f"{FILE_NAME}{name_salt}".upper()) topics = [f"{FILE_NAME}{name_salt}{i}" for i in range(TOPIC_COUNT)] # Register schemas and create Kafka topics sr_client = SchemaRegistryClient({"url": driver.schemaRegistryAddress}) for i, topic in enumerate(topics): sr_client.register_schema( f"{topic}-value", Schema(VALUE_SCHEMA_STRS[i], "AVRO") ) driver.createTopics(topic, partitionNum=1, replicationNum=1) try: create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send -- for i, topic in enumerate(topics): values = [RECORDS[i] for _ in range(RECORD_COUNT)] driver.sendAvroSRData(topic, values, VALUE_SCHEMAS[i]) sleep(2) # -- Verify total row count (both topics → one table) -- wait_for_rows(table.name, RECORD_COUNT * TOPIC_COUNT) # -- Verify auto-created table schema (union of both schemas) -- col_info = table.schema() col_names = [] for col in col_info: col_names.append(col[0]) sf_type = col[1] if "(" in sf_type: sf_type = sf_type[: sf_type.find("(")] assert GOLD_SCHEMA[col[0]] == sf_type, ( f"Column {col[0]}: expected type {GOLD_SCHEMA[col[0]]}, got {sf_type}" ) for expected_col in GOLD_SCHEMA: assert expected_col in col_names, f"Missing column {expected_col}" finally: for topic in topics: driver.deleteTopic(topic) ================================================ FILE: test/tests/test_avrosr_avrosr.py ================================================ import json import pytest from confluent_kafka import avro from lib.matchers import ANY_INT FILE_NAME = "travis_correct_avrosr_avrosr" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 100 KEY_SCHEMA = avro.loads(""" { "type": "record", "name": "key_schema", "fields": [ {"name": "id", "type": "int"} ] } """) VALUE_SCHEMA = avro.loads(""" { "type": "record", "name": "value_schema", "fields": [ {"name": "id", "type": "int"}, {"name": "firstName", "type": "string"}, {"name": "time", "type": "int"}, {"name": "someFloat", "type": "float"}, {"name": "someFloatNaN", "type": "float"}, {"name": "someFloatPositiveInfinity", "type": "float"}, {"name": "someFloatNegativeInfinity", "type": "float"}, {"name": "someDouble", "type": "double"}, {"name": "someDoubleNaN", "type": "double"}, {"name": "someDoublePositiveInfinity", "type": "double"}, {"name": "someDoubleNegativeInfinity", "type": "double"} ] } """) @pytest.mark.confluent_only @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_avrosr_avrosr( driver, name_salt, connector_version, create_connector_from_file, create_table, wait_for_rows, ): # Assertions below capture v3 reference behavior (test ported from v3). # v4 parity confirmed 2026-03-31. v3 cannot run due to SR classloader conflict. table = create_table( FILE_NAME.upper(), columns="(record_metadata variant, id number, firstName varchar, time number, " "someFloat number, someFloatNaN varchar, " "someFloatPositiveInfinity varchar, someFloatNegativeInfinity varchar, " "someDouble number, someDoubleNaN varchar, " "someDoublePositiveInfinity varchar, someDoubleNegativeInfinity varchar)", ) topic = f"{FILE_NAME}{name_salt}" create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send -- keys = [{"id": i} for i in range(RECORD_COUNT)] values = [ { "id": i, "firstName": "abc0", "time": 1835, "someFloat": 21.37, "someFloatNaN": "NaN", "someFloatPositiveInfinity": "inf", "someFloatNegativeInfinity": "-inf", "someDouble": 15.10, "someDoubleNaN": "NaN", "someDoublePositiveInfinity": "inf", "someDoubleNegativeInfinity": "-inf", } for i in range(RECORD_COUNT) ] driver.sendAvroSRData(topic, values, VALUE_SCHEMA, keys, KEY_SCHEMA) # -- Verify row count -- wait_for_rows(table.name, RECORD_COUNT) # -- Verify first row content -- row = table.select("*")[0] assert row["ID"] == 0 assert row["FIRSTNAME"] == "abc0" assert row["TIME"] == 1835 assert row["SOMEFLOAT"] == 21 assert row["SOMEFLOATNAN"] == "NaN" assert row["SOMEFLOATPOSITIVEINFINITY"] == "Inf" assert row["SOMEFLOATNEGATIVEINFINITY"] == "-Inf" assert row["SOMEDOUBLE"] == 15 assert row["SOMEDOUBLENAN"] == "NaN" assert row["SOMEDOUBLEPOSITIVEINFINITY"] == "Inf" assert row["SOMEDOUBLENEGATIVEINFINITY"] == "-Inf" record_metadata = json.loads(row["RECORD_METADATA"]) assert record_metadata == { "CreateTime": ANY_INT, "SnowflakeConnectorPushTime": ANY_INT, "key": {"id": 0}, "offset": 0, "partition": 0, "topic": topic, } ================================================ FILE: test/tests/test_channel_invalidation.py ================================================ """E2E tests for channel invalidation recovery. Uses SYSTEM$STREAMING_CHANNEL_INVALIDATE to set ERR_CHANNEL_MUST_BE_REOPENED on streaming channels and verifies the connector recovers with no data loss. Recovery mechanism: After server-side invalidation, the SDK discovers the error on the next background flush (~25s), marks the channel locally invalid, and the next appendRow() throws synchronously -> Failsafe fallback -> reopenChannel. Requires: SNOW-3291474 (system function) deployed to the test account. JIRA: SNOW-3097571 """ import logging import time import pytest import snowflake.connector from lib.config_migration import V4_CONFIG_TEMPLATE from lib.utils import RecordProducer logger = logging.getLogger(__name__) # Note on table naming: the v4 connector with no sanitization config creates tables # using the exact topic name (case-preserved). Queries use quote_name() which wraps # names in double-quotes (case-sensitive), so table_name must match topic exactly. # Do NOT use topic.upper() — that only works with sanitization enabled. CONNECTOR_CONFIG = { **V4_CONFIG_TEMPLATE, "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "1", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "errors.tolerance": "all", "errors.log.enable": "true", "snowflake.validation": "client_side", } def invalidate_channel(driver, credentials, table_name, topic, partition=0): """Call SYSTEM$STREAMING_CHANNEL_INVALIDATE to set ERR_CHANNEL_MUST_BE_REOPENED.""" connector_name_upper = table_name.upper() channel_name = f"{connector_name_upper}_{topic}_{partition}" pipe_fqn = f'{credentials.database}.{credentials.schema}."{table_name}-STREAMING"' logger.info(f"Invalidating channel={channel_name} on pipe={pipe_fqn}") cur = driver.snowflake_conn.cursor() try: result = cur.execute( f"SELECT SYSTEM$STREAMING_CHANNEL_INVALIDATE('{pipe_fqn}', '{channel_name}')" ).fetchone()[0] except snowflake.connector.errors.ProgrammingError as e: if e.errno == 2140 or "Unknown function" in str(e): pytest.skip( f"SYSTEM$STREAMING_CHANNEL_INVALIDATE is not available on this " f"Snowflake account — skipping channel invalidation test ({e})" ) raise logger.info(f"Invalidation result: {result}") assert "ERR_CHANNEL_MUST_BE_REOPENED" in result, f"Invalidation failed: {result}" return result def _send_to_partition(driver, topic, n, partition): """Send n JSON records to a specific partition. RecordProducer.send() hardcodes partition=0, so multi-partition tests need this helper to route records to specific partitions. """ import json values = [json.dumps({"number": str(i)}).encode() for i in range(n)] driver.sendBytesData(topic, values, [], partition) def _drip_feed_to_partitions(driver, topic, partitions, batch_size=10, interval=1.0): """Start a background thread that drip-feeds records round-robin across partitions.""" import json import threading stop_event = threading.Event() counter = [0] def _produce(): while not stop_event.is_set(): for p in partitions: values = [ json.dumps({"number": str(counter[0] + i)}).encode() for i in range(batch_size) ] driver.sendBytesData(topic, values, [], p) counter[0] += batch_size stop_event.wait(interval) thread = threading.Thread(target=_produce, daemon=True) thread.start() logger.info( f"Started multi-partition drip-feed to partitions {partitions} " f"(batch_size={batch_size}, interval={interval}s)" ) return stop_event, thread def _wait_for_stall(driver, table_name, rows_before, timeout=90): """Wait until ingestion stalls (row count stable for 15s). Returns stalled row count.""" stable_count = 0 last_rows = rows_before deadline = time.monotonic() + timeout while time.monotonic() < deadline: time.sleep(5) current = driver.select_number_of_records(table_name) current = int(current) if current is not None else 0 if current == last_rows: stable_count += 1 else: stable_count = 0 last_rows = current if stable_count >= 3: break logger.info(f"Ingestion stalled at {last_rows} rows (was {rows_before})") return last_rows def _get_partition_row_counts(driver, table_name): """Query per-partition row counts from record_metadata.""" rows = ( driver.snowflake_conn.cursor() .execute( f"SELECT record_metadata:partition::int AS p, count(*) AS c " f'FROM "{table_name}" GROUP BY p ORDER BY p' ) .fetchall() ) return {int(r[0]): int(r[1]) for r in rows} def _assert_task_running(driver, connector_name): """Assert all connector tasks are RUNNING.""" status = driver.get_connector_status(connector_name) assert status is not None, f"Connector {connector_name} not found" tasks = status.get("tasks", []) assert tasks, f"Connector {connector_name} has no tasks" for task in tasks: state = task.get("state") assert state == "RUNNING", ( f"Task {task.get('id')} is {state}, not RUNNING. " f"Trace: {task.get('trace', '')[:500]}" ) @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_invalidation_during_active_ingestion( driver, credentials, name_salt, create_connector, wait_for_rows ): """Test 1: Invalidate a channel while records are actively being ingested. Starts continuous production, waits for some rows to land, then invalidates mid-stream while records are still flowing. """ topic = f"test_invalidation_during_active_ingestion{name_salt}" table_name = topic driver.createTopics(topic, partitionNum=1, replicationNum=1) connector = create_connector(v4_config=CONNECTOR_CONFIG) driver.wait_for_connector_running(connector.name) producer = RecordProducer(driver, topic) # Start continuous production and wait for some rows to commit producer.start_continuous(batch_size=10, interval=0.5) wait_for_rows(table_name, 50, at_least=True, connector_name=connector.name) rows_before = int(driver.select_number_of_records(table_name)) logger.info(f"Phase 1: {rows_before} rows committed while actively ingesting") # Invalidate mid-stream — records are still flowing invalidate_channel(driver, credentials, table_name, topic, partition=0) # Wait for stall to confirm invalidation took effect stalled_rows = _wait_for_stall(driver, table_name, rows_before) # Continue drip-feeding to trigger synchronous recovery wait_for_rows( table_name, stalled_rows + 50, at_least=True, connector_name=connector.name, timeout=120, ) producer.stop_continuous() _assert_task_running(driver, connector.name) final_rows = int(driver.select_number_of_records(table_name)) logger.info(f"Final row count: {final_rows} (stalled at {stalled_rows})") assert final_rows > stalled_rows @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_invalidation_between_batches( driver, credentials, name_salt, create_connector, wait_for_rows ): """Test 2: Invalidate a channel while it is idle between batches. Verifies the connector recovers on the next batch with no data loss. """ topic = f"test_invalidation_between_batches{name_salt}" table_name = topic driver.createTopics(topic, partitionNum=1, replicationNum=1) connector = create_connector(v4_config=CONNECTOR_CONFIG) driver.wait_for_connector_running(connector.name) producer = RecordProducer(driver, topic) # Wave 1: ingest and wait for full commit producer.send(100) wait_for_rows(table_name, 100, connector_name=connector.name) rows_before = int(driver.select_number_of_records(table_name)) logger.info(f"Wave 1 committed ({rows_before} rows)") # Invalidate while idle invalidate_channel(driver, credentials, table_name, topic, partition=0) # Send batch to trigger flush failure, then wait for stall producer.send(100) stalled_rows = _wait_for_stall(driver, table_name, rows_before) # Drip-feed to trigger synchronous recovery producer.start_continuous(batch_size=10, interval=1.0) wait_for_rows( table_name, stalled_rows + 50, at_least=True, connector_name=connector.name, timeout=120, ) producer.stop_continuous() _assert_task_running(driver, connector.name) final_rows = int(driver.select_number_of_records(table_name)) logger.info(f"Final row count: {final_rows} (was {rows_before})") assert final_rows > rows_before @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_invalidation_all_partitions( driver, credentials, name_salt, create_connector, wait_for_rows ): """Test 3: Invalidate all channels simultaneously on a multi-partition topic. Sends records to each partition explicitly, invalidates all channels, and verifies each partition recovers. """ topic = f"test_invalidation_all_partitions{name_salt}" table_name = topic num_partitions = 3 records_per_partition = 100 driver.createTopics(topic, partitionNum=num_partitions, replicationNum=1) connector = create_connector(v4_config=CONNECTOR_CONFIG) driver.wait_for_connector_running(connector.name) # Wave 1: send records to each partition explicitly for p in range(num_partitions): _send_to_partition(driver, topic, records_per_partition, partition=p) total_wave1 = num_partitions * records_per_partition wait_for_rows(table_name, total_wave1, connector_name=connector.name) rows_before = int(driver.select_number_of_records(table_name)) # Verify records landed on all partitions partition_counts_before = _get_partition_row_counts(driver, table_name) logger.info(f"Wave 1 per-partition counts: {partition_counts_before}") for p in range(num_partitions): assert partition_counts_before.get(p, 0) > 0, ( f"Partition {p} has no records before invalidation" ) # Invalidate all partitions for p in range(num_partitions): invalidate_channel(driver, credentials, table_name, topic, partition=p) logger.info(f"All {num_partitions} channels invalidated") # Send to each partition to trigger flush failure, then wait for stall for p in range(num_partitions): _send_to_partition(driver, topic, 50, partition=p) stalled_rows = _wait_for_stall(driver, table_name, rows_before) # Drip-feed to ALL partitions to trigger recovery on each channel stop_event, thread = _drip_feed_to_partitions( driver, topic, list(range(num_partitions)) ) wait_for_rows( table_name, stalled_rows + 50, at_least=True, connector_name=connector.name, timeout=120, ) stop_event.set() thread.join(timeout=5) _assert_task_running(driver, connector.name) # Verify each partition has more rows than before partition_counts_after = _get_partition_row_counts(driver, table_name) logger.info(f"Post-recovery per-partition counts: {partition_counts_after}") for p in range(num_partitions): assert partition_counts_after.get(p, 0) > partition_counts_before.get(p, 0), ( f"Partition {p} did not recover: " f"before={partition_counts_before.get(p, 0)}, " f"after={partition_counts_after.get(p, 0)}" ) @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_invalidation_with_connector_restart( driver, credentials, name_salt, create_connector, wait_for_rows ): """Test 4: Invalidate a channel and then restart the connector. A restart clears the SDK state and reopens the channel fresh. The server-side error code is tied to the old client sequencer; the new channel gets a fresh sequencer, so the error doesn't apply. Recovery is implicit via the restart. """ topic = f"test_invalidation_with_connector_restart{name_salt}" table_name = topic driver.createTopics(topic, partitionNum=1, replicationNum=1) connector = create_connector(v4_config=CONNECTOR_CONFIG) driver.wait_for_connector_running(connector.name) producer = RecordProducer(driver, topic) # Wave 1 producer.send(100) wait_for_rows(table_name, 100, connector_name=connector.name) rows_before = int(driver.select_number_of_records(table_name)) logger.info(f"Wave 1 committed ({rows_before} rows)") # Invalidate then restart invalidate_channel(driver, credentials, table_name, topic, partition=0) driver.restartConnector(connector.name) driver.wait_for_connector_running(connector.name) logger.info("Connector restarted after invalidation") # Drip-feed after restart producer.start_continuous(batch_size=10, interval=1.0) wait_for_rows( table_name, rows_before + 100, at_least=True, connector_name=connector.name, timeout=120, ) producer.stop_continuous() _assert_task_running(driver, connector.name) final_rows = int(driver.select_number_of_records(table_name)) logger.info(f"Final row count: {final_rows} (was {rows_before})") assert final_rows > rows_before @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_invalidation_one_partition_others_healthy( driver, credentials, name_salt, create_connector, wait_for_rows ): """Test 5: Invalidate one partition while others remain healthy. Sends records to each partition explicitly, invalidates only partition 1, and verifies partition 1 recovers while partitions 0 and 2 continue ingesting without interruption. """ topic = f"test_invalidation_one_partition_others_healthy{name_salt}" table_name = topic num_partitions = 3 records_per_partition = 100 driver.createTopics(topic, partitionNum=num_partitions, replicationNum=1) connector = create_connector(v4_config=CONNECTOR_CONFIG) driver.wait_for_connector_running(connector.name) # Wave 1: send to each partition for p in range(num_partitions): _send_to_partition(driver, topic, records_per_partition, partition=p) total_wave1 = num_partitions * records_per_partition wait_for_rows(table_name, total_wave1, connector_name=connector.name) partition_counts_before = _get_partition_row_counts(driver, table_name) logger.info(f"Wave 1 per-partition counts: {partition_counts_before}") for p in range(num_partitions): assert partition_counts_before.get(p, 0) > 0 # Invalidate only partition 1 invalidate_channel(driver, credentials, table_name, topic, partition=1) logger.info("Partition 1 invalidated, partitions 0 and 2 untouched") # Send to all partitions — partitions 0,2 should ingest immediately, # partition 1 will stall then recover for p in range(num_partitions): _send_to_partition(driver, topic, 50, partition=p) rows_after_wave1 = int(driver.select_number_of_records(table_name)) # Wait for stall on partition 1, then drip-feed to ALL partitions stalled_rows = _wait_for_stall(driver, table_name, rows_after_wave1) stop_event, thread = _drip_feed_to_partitions( driver, topic, list(range(num_partitions)) ) wait_for_rows( table_name, stalled_rows + 50, at_least=True, connector_name=connector.name, timeout=120, ) stop_event.set() thread.join(timeout=5) _assert_task_running(driver, connector.name) # Verify partition 1 recovered and all partitions have more data partition_counts_after = _get_partition_row_counts(driver, table_name) logger.info(f"Post-recovery per-partition counts: {partition_counts_after}") # Partition 1 (invalidated) must have recovered assert partition_counts_after.get(1, 0) > partition_counts_before.get(1, 0), ( f"Partition 1 did not recover: " f"before={partition_counts_before.get(1, 0)}, " f"after={partition_counts_after.get(1, 0)}" ) # Healthy partitions should also have more rows for p in [0, 2]: assert partition_counts_after.get(p, 0) > partition_counts_before.get(p, 0), ( f"Healthy partition {p} lost data: " f"before={partition_counts_before.get(p, 0)}, " f"after={partition_counts_after.get(p, 0)}" ) @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_invalidation_offset_consistency( driver, credentials, name_salt, create_connector, wait_for_rows ): """Test 6: Verify offset consistency after invalidation recovery. Checks the full offset range (0..max_offset) has no gaps. Duplicates are OK. """ topic = f"test_invalidation_offset_consistency{name_salt}" table_name = topic driver.createTopics(topic, partitionNum=1, replicationNum=1) connector = create_connector(v4_config=CONNECTOR_CONFIG) driver.wait_for_connector_running(connector.name) producer = RecordProducer(driver, topic) # Wave 1 producer.send(100) wait_for_rows(table_name, 100, connector_name=connector.name) rows_before = int(driver.select_number_of_records(table_name)) logger.info(f"Wave 1 committed ({rows_before} rows)") # Invalidate, wait for stall, drip-feed recovery invalidate_channel(driver, credentials, table_name, topic, partition=0) producer.send(100) stalled_rows = _wait_for_stall(driver, table_name, rows_before) producer.start_continuous(batch_size=10, interval=1.0) wait_for_rows( table_name, stalled_rows + 50, at_least=True, connector_name=connector.name, timeout=120, ) producer.stop_continuous() _assert_task_running(driver, connector.name) # Verify offset integrity: the full range 0..max_offset must have no gaps. # With the recordProcessed fix (SNOW-3344243), the offset rewind replays all # records that were in-flight during the flush-failure window, so no data is lost. cur = driver.snowflake_conn.cursor() offsets = sorted( row[0] for row in cur.execute( f"SELECT DISTINCT record_metadata:offset::int AS off " f'FROM "{table_name}" ORDER BY off' ).fetchall() ) total_rows = int(driver.select_number_of_records(table_name)) distinct_offsets = len(offsets) max_offset = offsets[-1] logger.info( f"Offset check: {total_rows} total rows, {distinct_offsets} distinct offsets, " f"range [0..{max_offset}]" ) # No gaps in the full range 0..max_offset expected_offsets = set(range(max_offset + 1)) actual_offsets = set(offsets) missing = expected_offsets - actual_offsets assert not missing, ( f"Missing offsets (gaps) in range [0..{max_offset}]: " f"{sorted(missing)[:20]}{'...' if len(missing) > 20 else ''}" ) duplicates = total_rows - distinct_offsets if duplicates > 0: logger.info(f"Found {duplicates} duplicate rows (expected after recovery)") @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_invalidation_during_flush( driver, credentials, name_salt, create_connector, wait_for_rows ): """Test 7: Invalidate a channel right after data starts flowing (races with flush). Sends an initial batch, waits for the pipe to be created, then immediately sends more and invalidates — the invalidation races with the flush. """ topic = f"test_invalidation_during_flush{name_salt}" table_name = topic driver.createTopics(topic, partitionNum=1, replicationNum=1) connector = create_connector(v4_config=CONNECTOR_CONFIG) driver.wait_for_connector_running(connector.name) producer = RecordProducer(driver, topic) # First batch: ensure the pipe/channel exists producer.send(100) wait_for_rows(table_name, 100, connector_name=connector.name) rows_before = int(driver.select_number_of_records(table_name)) logger.info(f"Initial batch committed ({rows_before} rows), pipe exists") # Send second batch and immediately invalidate — races with flush producer.send(100) invalidate_channel(driver, credentials, table_name, topic, partition=0) logger.info( "Invalidated immediately after sending second batch (racing with flush)" ) # Wait for stall, then drip-feed for recovery stalled_rows = _wait_for_stall(driver, table_name, rows_before) producer.start_continuous(batch_size=10, interval=1.0) wait_for_rows( table_name, stalled_rows + 50, at_least=True, connector_name=connector.name, timeout=120, ) producer.stop_continuous() _assert_task_running(driver, connector.name) final_rows = int(driver.select_number_of_records(table_name)) logger.info(f"Final row count: {final_rows} (stalled at {stalled_rows})") assert final_rows > stalled_rows ================================================ FILE: test/tests/test_channel_invalidation_recovery.py ================================================ """E2E test: KC task should recover after channel invalidation, not die. Reproduces the bug where AppendRowWithRetryAndFallbackPolicy successfully recovers a channel after InvalidChannelError but re-throws the exception, causing the KC framework to kill the task as "unrecoverable". """ import logging import pytest from lib.config_migration import V4_CONFIG_TEMPLATE from lib.utils import RecordProducer from tests.test_channel_invalidation import ( _assert_task_running, _wait_for_stall, invalidate_channel, ) logger = logging.getLogger(__name__) RECORD_BATCH = 100 CONNECTOR_CONFIG = { **V4_CONFIG_TEMPLATE, "topics": "SNOWFLAKE_TEST_TOPIC", "tasks.max": "1", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "errors.tolerance": "none", "errors.log.enable": "true", "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "snowflake.compatibility.enable.column.identifier.normalization": "true", } @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_channel_invalidation_recovery( driver, credentials, name_salt, create_connector, wait_for_rows, ): """Channel invalidation should not kill the KC task. Steps: 1. Start connector, produce records, verify ingestion works. 2. Invalidate channel via SYSTEM$STREAMING_CHANNEL_INVALIDATE. 3. Send records (buffered by SDK), wait for ingestion to stall — proves the SDK flush failed. 4. Drip-feed new records — appendRow throws SFException synchronously, triggering the Failsafe fallback → reopenChannel. 5. Assert: task is RUNNING, new rows arrive in Snowflake. Without the fix (PR #1401), step 4 re-throws after recovery and kills the task. """ topic = f"test_channel_invalidation_recovery{name_salt}" table_name = topic.upper() driver.createTopics(topic, partitionNum=1, replicationNum=1) connector = create_connector(v4_config=CONNECTOR_CONFIG) driver.wait_for_connector_running(connector.name) # -- Phase 1: Baseline ingestion -- producer = RecordProducer(driver, topic) producer.send(RECORD_BATCH) wait_for_rows(table_name, RECORD_BATCH, connector_name=connector.name) rows_before = int(driver.select_number_of_records(table_name)) logger.info(f"Phase 1: {rows_before} rows ingested") # -- Phase 2: Invalidate the channel -- invalidate_channel(driver, credentials, table_name, topic, partition=0) # -- Phase 3: Trigger flush failure and verify stall -- producer.send(RECORD_BATCH) stalled_rows = _wait_for_stall(driver, table_name, rows_before) assert stalled_rows == rows_before, ( f"Expected ingestion to stall at {rows_before} rows after invalidation, " f"but rows advanced to {stalled_rows}. " f"SYSTEM$STREAMING_CHANNEL_INVALIDATE may not have taken effect." ) # -- Phase 4: Trigger synchronous recovery via drip-feed -- producer.start_continuous(batch_size=10, interval=1.0) wait_for_rows(table_name, rows_before + RECORD_BATCH, at_least=True, timeout=120) producer.stop_continuous() rows_after = int(driver.select_number_of_records(table_name)) _assert_task_running(driver, connector.name) assert rows_after > rows_before, ( f"No new rows after recovery (before={rows_before}, after={rows_after})" ) logger.info( f"Recovery verified: {rows_before} → {stalled_rows} (stalled) → " f"{rows_after} (recovered), task RUNNING" ) ================================================ FILE: test/tests/test_column_identifier_normalization.py ================================================ """E2E tests for column identifier normalization.""" import json import pytest from lib.config_migration import V4_CONFIG_TEMPLATE pytestmark = pytest.mark.correctness TWO_CITY_DDL = '(ID NUMBER, "city" VARCHAR, CITY VARCHAR, RECORD_METADATA VARIANT) ENABLE_SCHEMA_EVOLUTION = TRUE' NORM_MATRIX = [True, False] NORM_IDS = ["norm=on", "norm=off"] @pytest.mark.parametrize("normalization", NORM_MATRIX, ids=NORM_IDS) def test_with_validation( driver, name_salt, connector_version, create_connector, create_table, wait_for_rows, normalization, ): """val=ON, schema_evo=ON. KCv3 always normalizes, so skip v3+norm=OFF. Row 3 triggers schema evolution to add age and AGE columns. """ if connector_version == "v3" and not normalization: pytest.skip("KCv3 always normalizes; norm=OFF is KCv4-only") tag = f"val_n{'1' if normalization else '0'}" table = create_table( f"column_identifier_normalization_{tag}".upper(), columns=TWO_CITY_DDL, ) topic = f"column_identifier_normalization_{tag}{name_salt}" dlq = f"DLQ_NORM_{name_salt}_{tag}" connector = create_connector( v4_config={ **V4_CONFIG_TEMPLATE, "tasks.max": "1", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "snowflake.enable.schematization": "true", "snowflake.compatibility.enable.column.identifier.normalization": str( normalization ).lower(), "snowflake.validation": "client_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "errors.tolerance": "all", "errors.log.enable": "true", "errors.deadletterqueue.topic.name": dlq, "errors.deadletterqueue.topic.replication.factor": "1", "topics": topic, "jmx": "true", } ) driver.startConnectorWaitTime() city_key, age_key = ('"city"', '"age"') if normalization else ("city", "age") rows = [ {"ID": 0, city_key: "lower_0", "CITY": "upper_0"}, {"ID": 1, city_key: "lower_only"}, {"ID": 2, "CITY": "upper_only"}, {"ID": 3, age_key: 10, "AGE": 20}, ] driver.sendBytesData( topic, [json.dumps(r).encode("utf-8") for r in rows], partition=0 ) wait_for_rows(table.name, 4, connector_name=connector.name) row0 = table.select("*", 'WHERE "ID" = 0')[0] assert row0["city"] == "lower_0" assert row0["CITY"] == "upper_0" row1 = table.select("*", 'WHERE "ID" = 1')[0] assert row1["city"] == "lower_only" row2 = table.select("*", 'WHERE "ID" = 2')[0] assert row2["CITY"] == "upper_only" row3 = table.select("*", 'WHERE "ID" = 3')[0] assert row3["city"] is None assert row3["CITY"] is None assert row3["age"] == 10 assert row3["AGE"] == 20 cols = {row[0]: row[1] for row in table.schema()} assert "age" in cols assert "AGE" in cols @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) @pytest.mark.parametrize("normalization", NORM_MATRIX, ids=NORM_IDS) def test_without_validation( driver, name_salt, connector_version, create_connector, create_table, wait_for_rows, normalization, ): """val=OFF, schema_evo=ON, KCv4 only. Server-side MBCN CI fallback writes to ALL case-insensitive-matching columns. Server-side schema evolution uppercases new column names. """ tag = f"noval_n{'1' if normalization else '0'}" table = create_table( f"column_identifier_normalization_{tag}".upper(), columns=TWO_CITY_DDL, ) topic = f"column_identifier_normalization_{tag}{name_salt}" dlq = f"DLQ_NORM_{name_salt}_{tag}" connector = create_connector( v4_config={ **V4_CONFIG_TEMPLATE, "tasks.max": "1", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "snowflake.enable.schematization": "true", "snowflake.compatibility.enable.column.identifier.normalization": str( normalization ).lower(), "snowflake.validation": "server_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", "errors.tolerance": "all", "errors.log.enable": "true", "errors.deadletterqueue.topic.name": dlq, "errors.deadletterqueue.topic.replication.factor": "1", "topics": topic, "jmx": "true", } ) driver.startConnectorWaitTime() city_key, age_key = ('"city"', '"age"') if normalization else ("city", "age") rows = [ {"ID": 0, city_key: "lower_0", "CITY": "upper_0"}, {"ID": 1, city_key: "lower_only"}, {"ID": 2, "CITY": "upper_only"}, {"ID": 3, age_key: 10}, ] driver.sendBytesData( topic, [json.dumps(r).encode("utf-8") for r in rows], partition=0 ) wait_for_rows(table.name, 4, connector_name=connector.name) row0 = table.select("*", 'WHERE "ID" = 0')[0] assert row0["city"] == "lower_0" assert row0["CITY"] == "upper_0" # MBCN CI fallback: single key writes to both CI-matching columns row1 = table.select("*", 'WHERE "ID" = 1')[0] assert row1["city"] == "lower_only" assert row1["CITY"] == "lower_only" row2 = table.select("*", 'WHERE "ID" = 2')[0] assert row2["city"] == "upper_only" assert row2["CITY"] == "upper_only" # Server-side schema evo uppercases new column names row3 = table.select("*", 'WHERE "ID" = 3')[0] assert row3["city"] is None assert row3["CITY"] is None assert row3["AGE"] == 10 cols = {row[0]: row[1] for row in table.schema()} assert "AGE" in cols ================================================ FILE: test/tests/test_confluent_protobuf_protobuf.py ================================================ import json import pytest from confluent_kafka import SerializingProducer from confluent_kafka.schema_registry import SchemaRegistryClient from confluent_kafka.schema_registry.protobuf import ProtobufSerializer from lib.matchers import ANY_INT FILE_NAME = "travis_correct_confluent_protobuf_protobuf" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 100 def _build_sensor(sensor_pb2): sensor = sensor_pb2.SensorReading() sensor.dateTime = 1234 sensor.reading = 321.321 sensor.device.deviceID = "555-4321" sensor.device.enabled = True sensor.float_val = 4321.4321 sensor.int32_val = (1 << 31) - 1 sensor.sint32_val = (1 << 31) - 1 sensor.sint64_val = (1 << 63) - 1 sensor.uint32_val = (1 << 32) - 1 sensor.bytes_val = b"\xde\xad" sensor.double_array_val.extend([1 / 3, 32.21, 434324321]) sensor.uint64_val = (1 << 64) - 1 return sensor @pytest.mark.confluent_only @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_confluent_protobuf_protobuf( sensor_pb2, driver, name_salt, connector_version, create_connector_from_file, create_table, wait_for_rows, request, ): # Assertions below capture v3 reference behavior (test ported from v3). # v4 parity confirmed 2026-03-31. v3 cannot run due to SR classloader conflict. platform_version = request.config.getoption("--platform-version") or "" if platform_version.startswith("8."): pytest.skip("BlueApron protobuf converter incompatible with Confluent 8.x") table = create_table( FILE_NAME.upper(), columns="(record_metadata variant, record_content variant)", ) topic = f"{FILE_NAME}{name_salt}" create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send via schema-registry-backed protobuf producer -- sr_client = SchemaRegistryClient({"url": driver.schemaRegistryAddress}) key_ser = ProtobufSerializer(sensor_pb2.SensorReading, sr_client) val_ser = ProtobufSerializer(sensor_pb2.SensorReading, sr_client) producer = SerializingProducer( { "bootstrap.servers": driver.kafkaAddress, "key.serializer": key_ser, "value.serializer": val_ser, } ) sensor = _build_sensor(sensor_pb2) for _ in range(RECORD_COUNT): producer.produce(topic, sensor, sensor) producer.poll(0) producer.flush() # -- Verify row count -- wait_for_rows(table.name, RECORD_COUNT) # -- Verify first row content -- row = table.select("record_metadata, record_content")[0] sensor_dict = { "bytes_val": "3q0=", "dateTime": 1234, "device": {"deviceID": "555-4321", "enabled": True}, "double_array_val": [0.3333333333333333, 32.21, 4.343243210000000e08], "float_val": 4321.432, "int32_val": 2147483647, "reading": 321.321, "sint32_val": 2147483647, "sint64_val": 9223372036854775807, "uint32_val": 4294967295, "uint64_val": -1, } record_metadata = json.loads(row["RECORD_METADATA"]) assert record_metadata == { "CreateTime": ANY_INT, "SnowflakeConnectorPushTime": ANY_INT, "key": sensor_dict, "offset": ANY_INT, "partition": ANY_INT, "topic": topic, } record_content = json.loads(row["RECORD_CONTENT"]) assert record_content == sensor_dict ================================================ FILE: test/tests/test_default_pipe_features.py ================================================ """E2E tests for FR7 default pipe features: identity and default columns. These tests verify that the Kafka Connector correctly handles tables with AUTOINCREMENT (identity) columns and columns with DEFAULT values. The primary concern is client-side validation: the RowValidator must not reject records that omit server-filled columns. v4-only — no v3 equivalent (FR7 requires SSv2 default pipe support). """ import json import logging import pytest from lib.config_migration import V4_CONFIG_TEMPLATE from lib.driver import KafkaDriver logger = logging.getLogger(__name__) RECORD_COUNT = 20 def _connector_config(topic: str, *, validation: bool) -> dict: """Build a v4 connector config for default pipe feature tests.""" return { **V4_CONFIG_TEMPLATE, "tasks.max": "1", "key.converter": "org.apache.kafka.connect.storage.StringConverter", "value.converter": "org.apache.kafka.connect.json.JsonConverter", "value.converter.schemas.enable": "false", "snowflake.enable.schematization": "true", "snowflake.validation": "client_side" if validation else "server_side", "snowflake.compatibility.enable.column.identifier.normalization": "true", "topics": topic, } @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) @pytest.mark.parametrize("validation", [True, False], ids=["compat", "ht"]) def test_identity_column( driver: KafkaDriver, create_table, create_topics, create_connector, wait_for_rows, validation: bool, ): """Ingest into a table with an AUTOINCREMENT identity column. The record does NOT include a value for the identity column. The server should auto-fill sequential IDs. """ tag = "compat" if validation else "ht" base_name = f"fr7_identity_{tag}" table = create_table( base_name, columns=( "(ID NUMBER AUTOINCREMENT START 1 INCREMENT 1, " "RECORD_METADATA VARIANT, " "DATA VARCHAR)" ), cleanup_topic=False, ) topic = create_topics([base_name], with_tables=False)[0] create_connector(v4_config=_connector_config(topic, validation=validation)) driver.startConnectorWaitTime() records = [ json.dumps({"data": f"row_{i}"}).encode("utf-8") for i in range(RECORD_COUNT) ] driver.sendBytesData(topic, records, partition=0) wait_for_rows(table.name, RECORD_COUNT) rows = table.select('"ID", "DATA"', 'ORDER BY "ID"') assert len(rows) == RECORD_COUNT ids = [row["ID"] for row in rows] assert ids == list(range(1, RECORD_COUNT + 1)), ( f"Expected sequential IDs, got {ids}" ) assert rows[0]["DATA"] == "row_0" @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) @pytest.mark.parametrize("validation", [True, False], ids=["compat", "ht"]) def test_default_timestamp_column( driver: KafkaDriver, create_table, create_topics, create_connector, wait_for_rows, validation: bool, ): """Ingest into a table with a DEFAULT CURRENT_TIMESTAMP() NOT NULL column. The record does NOT include a value for the timestamp column. The server should auto-fill the current timestamp. """ tag = "compat" if validation else "ht" base_name = f"fr7_defts_{tag}" table = create_table( base_name, columns=( "(RECORD_METADATA VARIANT, " "DATA VARCHAR, " "CREATED_AT TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP() NOT NULL)" ), cleanup_topic=False, ) topic = create_topics([base_name], with_tables=False)[0] create_connector(v4_config=_connector_config(topic, validation=validation)) driver.startConnectorWaitTime() records = [ json.dumps({"data": f"row_{i}"}).encode("utf-8") for i in range(RECORD_COUNT) ] driver.sendBytesData(topic, records, partition=0) wait_for_rows(table.name, RECORD_COUNT) rows = table.select('"DATA", "CREATED_AT"', "LIMIT 1") assert rows, "Expected at least one row" assert rows[0]["CREATED_AT"] is not None, ( "CREATED_AT should be filled by server default" ) assert rows[0]["DATA"] == "row_0" @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) @pytest.mark.parametrize("validation", [True, False], ids=["compat", "ht"]) def test_default_numeric_column( driver: KafkaDriver, create_table, create_topics, create_connector, wait_for_rows, validation: bool, ): """Ingest into a table with a DEFAULT 0 NOT NULL numeric column. The record does NOT include a value for the status column. The server should auto-fill with the default value 0. """ tag = "compat" if validation else "ht" base_name = f"fr7_defnum_{tag}" table = create_table( base_name, columns=( "(RECORD_METADATA VARIANT, DATA VARCHAR, STATUS NUMBER DEFAULT 0 NOT NULL)" ), cleanup_topic=False, ) topic = create_topics([base_name], with_tables=False)[0] create_connector(v4_config=_connector_config(topic, validation=validation)) driver.startConnectorWaitTime() records = [ json.dumps({"data": f"row_{i}"}).encode("utf-8") for i in range(RECORD_COUNT) ] driver.sendBytesData(topic, records, partition=0) wait_for_rows(table.name, RECORD_COUNT) rows = table.select('"DATA", "STATUS"', "LIMIT 1") assert rows, "Expected at least one row" assert rows[0]["STATUS"] == 0, ( f"STATUS should be 0 (server default), got {rows[0]['STATUS']}" ) assert rows[0]["DATA"] == "row_0" @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) @pytest.mark.parametrize("validation", [True, False], ids=["compat", "ht"]) def test_mixed_identity_and_defaults( driver: KafkaDriver, create_table, create_topics, create_connector, wait_for_rows, validation: bool, ): """Ingest into a table with identity + default + regular columns. Only the DATA column is populated by the record. The server fills: - ID: auto-increment - CREATED_AT: CURRENT_TIMESTAMP() - STATUS: default 1 """ tag = "compat" if validation else "ht" base_name = f"fr7_mixed_{tag}" table = create_table( base_name, columns=( "(ID NUMBER AUTOINCREMENT, " "RECORD_METADATA VARIANT, " "DATA VARCHAR, " "CREATED_AT TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP() NOT NULL, " "STATUS NUMBER DEFAULT 1 NOT NULL)" ), cleanup_topic=False, ) topic = create_topics([base_name], with_tables=False)[0] create_connector(v4_config=_connector_config(topic, validation=validation)) driver.startConnectorWaitTime() records = [ json.dumps({"data": f"row_{i}"}).encode("utf-8") for i in range(RECORD_COUNT) ] driver.sendBytesData(topic, records, partition=0) wait_for_rows(table.name, RECORD_COUNT) rows = table.select('"ID", "DATA", "CREATED_AT", "STATUS"', 'ORDER BY "ID" LIMIT 5') assert len(rows) >= 1 row = rows[0] assert row["ID"] == 1, f"Expected ID=1, got {row['ID']}" assert row["DATA"] == "row_0" assert row["CREATED_AT"] is not None, "CREATED_AT should be filled by default" assert row["STATUS"] == 1, f"STATUS should be 1 (default), got {row['STATUS']}" ================================================ FILE: test/tests/test_error_table.py ================================================ """E2E tests for Snowflake Error Table support in v4 high-throughput mode. Verifies: 1. Table WITHOUT error logging + v4-ht → connector starts, invalid data silently dropped 2. Table WITH error logging + v4-ht → connector starts, invalid data captured in error table 3. Schema mismatch (extra columns, no schema evolution) + v4-ht → rows captured in error table 4. Same bad record: v4-compat routes to DLQ, v4-ht routes to error table """ import json import logging import os import time from pathlib import Path from typing import Callable import pytest from snowflake.connector.errors import ProgrammingError from lib.driver import KafkaDriver, quote_name from lib.fixtures.table import Table logger = logging.getLogger(__name__) TEMPLATE_DIR = Path("rest_request_template") BASE_TEMPLATE = "datatype_ingestion.json" STABILIZATION_SLEEP = int(os.environ.get("TEST_STABILIZATION_SLEEP", "30")) def _v4_ht_config() -> dict: """Build a v4-ht connector config from the base template. Always sets errors.tolerance=all so that channel errors (row rejections reported by Snowflake) are tolerated and we can observe error table behavior rather than task failure. """ base = json.loads((TEMPLATE_DIR / BASE_TEMPLATE).read_text()) config = dict(base["config"]) config["snowflake.enable.schematization"] = "true" config["snowflake.validation"] = "server_side" config["errors.tolerance"] = "all" config["snowflake.streaming.validate.compatibility.with.classic"] = "false" return config def _v4_compat_dlq_config(dlq_topic: str) -> dict: """Build a v4-compat connector config with DLQ routing.""" base = json.loads((TEMPLATE_DIR / BASE_TEMPLATE).read_text()) config = dict(base["config"]) config["snowflake.enable.schematization"] = "true" config["snowflake.validation"] = "client_side" config["errors.tolerance"] = "all" config["errors.deadletterqueue.topic.name"] = dlq_topic config["errors.deadletterqueue.topic.replication.factor"] = "1" config["errors.deadletterqueue.context.headers.enable"] = "true" config["snowflake.streaming.validate.compatibility.with.classic"] = "false" return config @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_error_table_without_error_logging( driver: KafkaDriver, create_table: Callable, create_custom_connector: Callable, ): """v4-ht targeting a table WITHOUT ERROR_LOGGING — connector starts, errors silently dropped.""" table: Table = create_table( "et_no_logging", columns="(ID VARCHAR NOT NULL, VAL NUMBER, RECORD_METADATA VARIANT)", ) driver.createTopics(table.name, partitionNum=1, replicationNum=1) connector = create_custom_connector("et_no_logging", _v4_ht_config()) driver.startConnectorWaitTime() records = [ json.dumps({"ID": "valid_1", "VAL": 42}).encode(), json.dumps({"ID": "invalid_1", "VAL": "not_a_number"}).encode(), ] keys = [json.dumps({"number": str(i)}).encode() for i in range(len(records))] driver.sendBytesData(table.name, records, keys) time.sleep(STABILIZATION_SLEEP) failed = driver.get_failed_tasks(connector.name) assert not failed, f"Connector task failed: {failed}" count = driver.select_number_of_records(table.name) assert count >= 1, f"Expected at least 1 row, got {count}" # Without ERROR_LOGGING, ERROR_TABLE() raises (no error logging enabled) or returns 0 rows. # Both outcomes confirm that no error logging is happening. cursor = driver.snowflake_conn.cursor() try: cursor.execute(f"SELECT * FROM ERROR_TABLE({quote_name(table.name)})") error_rows = cursor.fetchall() assert len(error_rows) == 0, ( f"Expected 0 error table rows without ERROR_LOGGING, got {len(error_rows)}" ) logger.info("Error table query returned 0 rows (no error logging enabled)") except ProgrammingError: # ERROR_TABLE() raises ProgrammingError when ERROR_LOGGING is not enabled — expected logger.info("Error table query raised as expected (ERROR_LOGGING not enabled)") @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_error_table_with_error_logging( driver: KafkaDriver, create_table: Callable, create_custom_connector: Callable, ): """v4-ht targeting a table WITH ERROR_LOGGING — invalid data captured in error table.""" table: Table = create_table( "et_with_logging", columns=( "(ID VARCHAR NOT NULL, VAL NUMBER, RECORD_METADATA VARIANT) ERROR_LOGGING = TRUE" ), ) driver.createTopics(table.name, partitionNum=1, replicationNum=1) connector = create_custom_connector("et_with_logging", _v4_ht_config()) driver.startConnectorWaitTime() records = [ json.dumps({"ID": "valid_1", "VAL": 42}).encode(), json.dumps({"ID": "invalid_1", "VAL": "not_a_number"}).encode(), json.dumps({"ID": "invalid_2", "VAL": {"nested": True}}).encode(), ] keys = [json.dumps({"number": str(i)}).encode() for i in range(len(records))] driver.sendBytesData(table.name, records, keys) time.sleep(STABILIZATION_SLEEP) failed = driver.get_failed_tasks(connector.name) assert not failed, f"Connector task failed: {failed}" count = driver.select_number_of_records(table.name) assert count >= 1, f"Expected at least 1 row, got {count}" cursor = driver.snowflake_conn.cursor() cursor.execute(f"SELECT * FROM ERROR_TABLE({quote_name(table.name)})") col_names = [desc[0] for desc in cursor.description] error_rows = cursor.fetchall() logger.info("Error table rows (with logging): %d", len(error_rows)) assert len(error_rows) >= 2, ( f"Expected at least 2 error rows (2 invalid records sent), got {len(error_rows)}" ) for row in error_rows: row_dict = dict(zip(col_names, row)) logger.info("Error table entry: %s", row_dict) assert row_dict.get("ERROR_CODE") is not None, ( f"Error table row missing ERROR_CODE: {row_dict}" ) @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_error_table_schema_mismatch( driver: KafkaDriver, create_table: Callable, create_custom_connector: Callable, ): """v4-ht: both value validation and schema mismatch errors land in the error table. Two distinct rejection reasons are exercised: - Value validation: ID value too long for VARCHAR(5) constraint. - Schema mismatch: VAL column is NOT NULL but absent from the record (SSv2 treats the missing key as NULL, violating the constraint). """ table: Table = create_table( "et_schema_mismatch", columns=( "(ID VARCHAR(5) NOT NULL, VAL NUMBER NOT NULL, RECORD_METADATA VARIANT)" " ERROR_LOGGING = TRUE" ), ) driver.createTopics(table.name, partitionNum=1, replicationNum=1) connector = create_custom_connector("et_schema_mismatch", _v4_ht_config()) driver.startConnectorWaitTime() records = [ # Valid record. json.dumps({"ID": "ok", "VAL": 42}).encode(), # Value validation: ID exceeds VARCHAR(5). json.dumps({"ID": "toolong", "VAL": 10}).encode(), # Schema mismatch: VAL is NOT NULL but missing from the payload. json.dumps({"ID": "miss"}).encode(), ] keys = [json.dumps({"number": str(i)}).encode() for i in range(len(records))] driver.sendBytesData(table.name, records, keys) time.sleep(STABILIZATION_SLEEP) failed = driver.get_failed_tasks(connector.name) assert not failed, f"Connector task failed: {failed}" count = driver.select_number_of_records(table.name) assert count >= 1, f"Expected at least 1 row (valid record), got {count}" cursor = driver.snowflake_conn.cursor() cursor.execute(f"SELECT * FROM ERROR_TABLE({quote_name(table.name)})") col_names = [desc[0] for desc in cursor.description] error_rows = cursor.fetchall() logger.info("Schema mismatch error table rows: %d", len(error_rows)) assert len(error_rows) >= 2, ( f"Expected at least 2 error rows (value overflow + NOT NULL violation)," f" got {len(error_rows)}" ) for row in error_rows: row_dict = dict(zip(col_names, row)) assert row_dict.get("ERROR_CODE") is not None, ( f"Error table row missing ERROR_CODE: {row_dict}" ) @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_error_table_vs_dlq_routing( driver: KafkaDriver, create_table: Callable, create_custom_connector: Callable, ): """Same bad record routes differently depending on validation mode. v4-compat (client_side validation + DLQ): invalid records are caught client-side and routed to the DLQ topic. v4-ht (server_side validation + error table): invalid records pass through to Snowflake, which rejects them into the error table. """ # v4-compat table — schema mismatch caught client-side → DLQ. table_compat: Table = create_table( "et_routing_compat", columns="(ID VARCHAR NOT NULL, VAL NUMBER, RECORD_METADATA VARIANT)", ) # v4-ht table — schema mismatch caught server-side → error table. table_ht: Table = create_table( "et_routing_ht", columns="(ID VARCHAR NOT NULL, VAL NUMBER, RECORD_METADATA VARIANT) ERROR_LOGGING = TRUE", ) driver.createTopics(table_compat.name, partitionNum=1, replicationNum=1) driver.createTopics(table_ht.name, partitionNum=1, replicationNum=1) # DLQ topic for the compat connector; must exist before the connector starts. dlq_topic = f"dlq_{table_compat.name.lower()}" driver.createTopics(dlq_topic, partitionNum=1, replicationNum=1) # Each connector uses default topic→table routing (connector name = topic = table). # No topic2table.map needed. connector_compat = create_custom_connector( "et_routing_compat", _v4_compat_dlq_config(dlq_topic) ) connector_ht = create_custom_connector("et_routing_ht", _v4_ht_config()) driver.startConnectorWaitTime() # Send the same records to both topics: one valid, two invalid. records = [ json.dumps({"ID": "valid_1", "VAL": 42}).encode(), json.dumps({"ID": "invalid_1", "VAL": "not_a_number"}).encode(), json.dumps({"ID": "invalid_2", "VAL": {"nested": True}}).encode(), ] keys = [json.dumps({"number": str(i)}).encode() for i in range(len(records))] driver.sendBytesData(table_compat.name, records, keys) driver.sendBytesData(table_ht.name, records, keys) # Two connectors running simultaneously — allow extra time. time.sleep(2 * STABILIZATION_SLEEP) assert not driver.get_failed_tasks(connector_compat.name), ( "Compat connector task failed" ) assert not driver.get_failed_tasks(connector_ht.name), "HT connector task failed" # Both tables should have the valid record. assert driver.select_number_of_records(table_compat.name) >= 1, ( "Expected at least 1 row in compat table" ) assert driver.select_number_of_records(table_ht.name) >= 1, ( "Expected at least 1 row in HT table" ) # v4-compat: invalid records land in DLQ, not error table. dlq_count = driver.consume_messages_dlq({"config": connector_compat.config}, 0, 1) assert dlq_count >= 2, ( f"Expected at least 2 records in DLQ (v4-compat), got {dlq_count}" ) # v4-ht: invalid records land in error table, not DLQ. cursor = driver.snowflake_conn.cursor() cursor.execute(f"SELECT * FROM ERROR_TABLE({quote_name(table_ht.name)})") error_rows = cursor.fetchall() logger.info("v4-ht error table rows: %d", len(error_rows)) assert len(error_rows) >= 2, ( f"Expected at least 2 error table rows (v4-ht), got {len(error_rows)}" ) ================================================ FILE: test/tests/test_json_json.py ================================================ import json from lib.matchers import ANY_INT FILE_NAME = "travis_correct_json_json" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 100 def test_json_json( driver, name_salt, create_connector_from_file, create_table, wait_for_rows ): table = create_table( FILE_NAME.upper(), columns='(record_metadata variant, "NUMBER" varchar)', ) topic = f"{FILE_NAME}{name_salt}" create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send -- keys = [json.dumps({"number": str(i)}).encode("utf-8") for i in range(RECORD_COUNT)] values = [ json.dumps({"number": str(i)}).encode("utf-8") for i in range(RECORD_COUNT) ] driver.sendBytesData(topic, values, keys) # -- Verify row count -- wait_for_rows(table.name, RECORD_COUNT) # -- Verify first row content -- record_metadata = json.loads( table.select_scalar( "record_metadata", "ORDER BY record_metadata:offset LIMIT 1" ) ) assert record_metadata == { "SnowflakeConnectorPushTime": ANY_INT, "key": {"number": "0"}, "offset": 0, "partition": 0, } ================================================ FILE: test/tests/test_kc_delete_create.py ================================================ import json from time import sleep import pytest FILE_NAME = "test_kc_delete_create" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 1000 SLEEP_TIME = 10 def _send_batch(driver, topic, record_count): values = [ json.dumps({"column1": str(i)}).encode("utf-8") for i in range(record_count) ] driver.sendBytesData(topic, values, [], 0) sleep(2) @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_kc_delete_create( driver, name_salt, create_connector_from_file, create_table, wait_for_rows ): table = create_table( FILE_NAME.upper(), columns="(record_metadata variant, column1 varchar)", ) topic = f"{FILE_NAME}{name_salt}" connector_name = f"{FILE_NAME}{name_salt}" driver.createTopics(topic, partitionNum=1, replicationNum=1) create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send 1/2, delete, create, send 2/2 -- _send_batch(driver, topic, RECORD_COUNT) driver.deleteConnector(connector_name) sleep(SLEEP_TIME) driver.createConnector( name_salt=name_salt, rest_request_template_filename=CONFIG_FILE ) sleep(SLEEP_TIME) _send_batch(driver, topic, RECORD_COUNT) # -- Verify -- wait_for_rows(table.name, RECORD_COUNT * 2, connector_name=connector_name) ================================================ FILE: test/tests/test_kc_delete_create_chaos.py ================================================ import json from time import sleep import pytest FILE_NAME = "test_kc_delete_create_chaos" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 1000 SLEEP_TIME = 10 def _send_batch(driver, topic, record_count): values = [ json.dumps({"column1": str(i)}).encode("utf-8") for i in range(record_count) ] driver.sendBytesData(topic, values, [], 0) sleep(2) @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_kc_delete_create_chaos( driver, name_salt, create_connector_from_file, create_table, wait_for_rows ): table = create_table( FILE_NAME.upper(), columns="(record_metadata variant, column1 varchar)", ) topic = f"{FILE_NAME}{name_salt}" connector_name = f"{FILE_NAME}{name_salt}" driver.createTopics(topic, partitionNum=1, replicationNum=1) create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send 1/3, delete (with pressure), send 2/3, create, send 3/3 -- _send_batch(driver, topic, RECORD_COUNT) driver.deleteConnector(connector_name) _send_batch(driver, topic, RECORD_COUNT) sleep(SLEEP_TIME) driver.createConnector( name_salt=name_salt, rest_request_template_filename=CONFIG_FILE ) driver.wait_for_connector_running(connector_name) _send_batch(driver, topic, RECORD_COUNT) # -- Verify -- wait_for_rows(table.name, RECORD_COUNT * 3, connector_name=connector_name) ================================================ FILE: test/tests/test_kc_delete_resume.py ================================================ import json import pytest from time import sleep FILE_NAME = "test_kc_delete_resume" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 1000 SLEEP_TIME = 10 def _send_batch(driver, topic, record_count): values = [ json.dumps({"column1": str(i)}).encode("utf-8") for i in range(record_count) ] driver.sendBytesData(topic, values, [], 0) sleep(2) @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_kc_delete_resume( driver, name_salt, create_connector_from_file, create_table, wait_for_rows ): """Verify that resuming a deleted connector is a no-op. Sequence: 1. Send batch 1 → wait for ingestion → ingested 2. Delete connector 3. Resume connector → fails silently (connector was deleted) 4. Send batch 2 → NOT ingested (no running connector) Only batch 1 should appear in the table (RECORD_COUNT rows). """ table = create_table( FILE_NAME.upper(), columns="(record_metadata variant, column1 varchar)", ) topic = f"{FILE_NAME}{name_salt}" connector_name = f"{FILE_NAME}{name_salt}" driver.createTopics(topic, partitionNum=1, replicationNum=1) create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send batch 1 and wait for it to be ingested before deleting -- _send_batch(driver, topic, RECORD_COUNT) wait_for_rows(table.name, RECORD_COUNT, connector_name=connector_name) # -- Delete connector -- driver.deleteConnector(connector_name) sleep(SLEEP_TIME) # -- Resume (should fail since connector was deleted) -- driver.resumeConnector(connector_name) sleep(SLEEP_TIME) # -- Send batch 2 (no connector running, so this won't be ingested) -- _send_batch(driver, topic, RECORD_COUNT) # -- Verify only batch 1 was ingested -- wait_for_rows(table.name, RECORD_COUNT, connector_name=connector_name) ================================================ FILE: test/tests/test_kc_delete_resume_chaos.py ================================================ import json import time import pytest from time import sleep FILE_NAME = "test_kc_delete_resume_chaos" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 1000 SLEEP_TIME = 10 def _send_batch(driver, topic, record_count): values = [ json.dumps({"column1": str(i)}).encode("utf-8") for i in range(record_count) ] driver.sendBytesData(topic, values, [], 0) sleep(2) @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_kc_delete_resume_chaos( driver, name_salt, create_connector_from_file, create_table, wait_for_rows ): """Verify connector behavior during delete with pressure and a failed resume. Sequence: 1. Send batch 1 → wait for ingestion → ingested 2. Delete connector 3. Send batch 2 (pressure during deletion) → partially ingested 4. Resume connector → fails silently (connector was deleted) 5. Send batch 3 → NOT ingested (no running connector) Expected: between RECORD_COUNT and 2 × RECORD_COUNT rows (batch 1 always ingested, some of batch 2 may be ingested before the deletion completes; batch 3 is never ingested because resume cannot recreate a deleted connector). """ table = create_table( FILE_NAME.upper(), columns="(record_metadata variant, column1 varchar)", ) topic = f"{FILE_NAME}{name_salt}" connector_name = f"{FILE_NAME}{name_salt}" driver.createTopics(topic, partitionNum=1, replicationNum=1) create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send batch 1 and wait for it to be ingested -- _send_batch(driver, topic, RECORD_COUNT) wait_for_rows(table.name, RECORD_COUNT, connector_name=connector_name) # -- Delete connector + pressure (batch 2 sent during deletion) -- driver.deleteConnector(connector_name) _send_batch(driver, topic, RECORD_COUNT) sleep(SLEEP_TIME) # -- Resume (should fail since connector was deleted) -- driver.resumeConnector(connector_name) sleep(SLEEP_TIME) # -- Send batch 3 (no connector running) -- _send_batch(driver, topic, RECORD_COUNT) # -- Verify: between 1 and 2 batches ingested -- # Cannot use wait_for_rows (exact match) since batch 2 may partially arrive # (deleteConnector returns immediately without waiting for full shutdown), # making the total non-deterministic. Poll until count >= RECORD_COUNT instead. deadline = time.monotonic() + 60 while True: count = table.select_scalar("count(*)") if count >= RECORD_COUNT: break if time.monotonic() >= deadline: raise AssertionError( f"Expected at least {RECORD_COUNT} rows in {table.name}, got {count}" ) sleep(5) upper_bound = RECORD_COUNT * 2 assert count <= upper_bound, ( f"Expected at most {upper_bound} rows, got {count} — " f"unexpected duplication or batch 3 was ingested" ) ================================================ FILE: test/tests/test_kc_pause_create.py ================================================ import json from time import sleep import pytest FILE_NAME = "test_kc_pause_create" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 1000 SLEEP_TIME = 10 def _send_batch(driver, topic, record_count): values = [ json.dumps({"column1": str(i)}).encode("utf-8") for i in range(record_count) ] driver.sendBytesData(topic, values, [], 0) sleep(2) @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_kc_pause_create( driver, name_salt, create_connector_from_file, create_table, wait_for_rows ): table = create_table( FILE_NAME.upper(), columns="(record_metadata variant, column1 varchar)", ) topic = f"{FILE_NAME}{name_salt}" connector_name = f"{FILE_NAME}{name_salt}" driver.createTopics(topic, partitionNum=1, replicationNum=1) create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send 1/2, pause, create, send 2/2 -- _send_batch(driver, topic, RECORD_COUNT) driver.pauseConnector(connector_name) sleep(SLEEP_TIME) driver.createConnector( name_salt=name_salt, rest_request_template_filename=CONFIG_FILE ) driver.wait_for_connector_running(connector_name) _send_batch(driver, topic, RECORD_COUNT) # -- Verify -- wait_for_rows(table.name, RECORD_COUNT * 2, connector_name=connector_name) ================================================ FILE: test/tests/test_kc_pause_create_chaos.py ================================================ import json from time import sleep import pytest FILE_NAME = "test_kc_pause_create_chaos" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 1000 SLEEP_TIME = 10 def _send_batch(driver, topic, record_count): values = [ json.dumps({"column1": str(i)}).encode("utf-8") for i in range(record_count) ] driver.sendBytesData(topic, values, [], 0) sleep(2) @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_kc_pause_create_chaos( driver, name_salt, create_connector_from_file, create_table, wait_for_rows ): table = create_table( FILE_NAME.upper(), columns="(record_metadata variant, column1 varchar)", ) topic = f"{FILE_NAME}{name_salt}" connector_name = f"{FILE_NAME}{name_salt}" driver.createTopics(topic, partitionNum=1, replicationNum=1) create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send 1/3, pause (with pressure), send 2/3, create, send 3/3 -- _send_batch(driver, topic, RECORD_COUNT) driver.pauseConnector(connector_name) _send_batch(driver, topic, RECORD_COUNT) sleep(SLEEP_TIME) driver.createConnector( name_salt=name_salt, rest_request_template_filename=CONFIG_FILE ) driver.wait_for_connector_running(connector_name) _send_batch(driver, topic, RECORD_COUNT) # -- Verify -- wait_for_rows(table.name, RECORD_COUNT * 3, connector_name=connector_name) ================================================ FILE: test/tests/test_kc_pause_resume.py ================================================ import json from time import sleep import pytest FILE_NAME = "test_kc_pause_resume" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 1000 SLEEP_TIME = 10 def _send_batch(driver, topic, record_count): values = [ json.dumps({"column1": str(i)}).encode("utf-8") for i in range(record_count) ] driver.sendBytesData(topic, values, [], 0) sleep(2) @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_kc_pause_resume( driver, name_salt, create_connector_from_file, create_table, wait_for_rows ): table = create_table( FILE_NAME.upper(), columns="(record_metadata variant, column1 varchar)", ) topic = f"{FILE_NAME}{name_salt}" connector_name = f"{FILE_NAME}{name_salt}" driver.createTopics(topic, partitionNum=1, replicationNum=1) create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send 1/2, pause, resume, send 2/2 -- _send_batch(driver, topic, RECORD_COUNT) driver.pauseConnector(connector_name) sleep(SLEEP_TIME) driver.resumeConnector(connector_name) sleep(SLEEP_TIME) _send_batch(driver, topic, RECORD_COUNT) # -- Verify -- wait_for_rows(table.name, RECORD_COUNT * 2, connector_name=connector_name) ================================================ FILE: test/tests/test_kc_pause_resume_chaos.py ================================================ import json from time import sleep import pytest FILE_NAME = "test_kc_pause_resume_chaos" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 1000 SLEEP_TIME = 10 def _send_batch(driver, topic, record_count): values = [ json.dumps({"column1": str(i)}).encode("utf-8") for i in range(record_count) ] driver.sendBytesData(topic, values, [], 0) sleep(2) @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_kc_pause_resume_chaos( driver, name_salt, create_connector_from_file, create_table, wait_for_rows ): table = create_table( FILE_NAME.upper(), columns="(record_metadata variant, column1 varchar)", ) topic = f"{FILE_NAME}{name_salt}" connector_name = f"{FILE_NAME}{name_salt}" driver.createTopics(topic, partitionNum=1, replicationNum=1) create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send 1/3, pause (with pressure), send 2/3, resume, send 3/3 -- _send_batch(driver, topic, RECORD_COUNT) driver.pauseConnector(connector_name) _send_batch(driver, topic, RECORD_COUNT) sleep(SLEEP_TIME) driver.resumeConnector(connector_name) sleep(SLEEP_TIME) _send_batch(driver, topic, RECORD_COUNT) # -- Verify -- wait_for_rows(table.name, RECORD_COUNT * 3, connector_name=connector_name) ================================================ FILE: test/tests/test_kc_recreate.py ================================================ import json from time import sleep import pytest FILE_NAME = "test_kc_recreate" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 1000 SLEEP_TIME = 10 def _send_batch(driver, topic, record_count): values = [ json.dumps({"column1": str(i)}).encode("utf-8") for i in range(record_count) ] driver.sendBytesData(topic, values, [], 0) sleep(2) @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_kc_recreate( driver, name_salt, create_connector_from_file, create_table, wait_for_rows ): table = create_table( FILE_NAME.upper(), columns="(record_metadata variant, column1 varchar)", ) topic = f"{FILE_NAME}{name_salt}" driver.createTopics(topic, partitionNum=1, replicationNum=1) create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send 1/2, create (idempotent) twice, send 2/2 -- _send_batch(driver, topic, RECORD_COUNT) driver.createConnector( name_salt=name_salt, rest_request_template_filename=CONFIG_FILE ) sleep(SLEEP_TIME) driver.createConnector( name_salt=name_salt, rest_request_template_filename=CONFIG_FILE ) sleep(SLEEP_TIME) _send_batch(driver, topic, RECORD_COUNT) # -- Verify -- wait_for_rows(table.name, RECORD_COUNT * 2) ================================================ FILE: test/tests/test_kc_recreate_chaos.py ================================================ import json from time import sleep import pytest FILE_NAME = "test_kc_recreate_chaos" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 100 SLEEP_TIME = 10 def _send_batch(driver, topic, record_count): values = [ json.dumps({"column1": str(i)}).encode("utf-8") for i in range(record_count) ] driver.sendBytesData(topic, values, [], 0) sleep(2) @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_kc_recreate_chaos( driver, name_salt, create_connector_from_file, create_table, wait_for_rows ): table = create_table( FILE_NAME.upper(), columns="(record_metadata variant, column1 varchar)", ) topic = f"{FILE_NAME}{name_salt}" driver.createTopics(topic, partitionNum=1, replicationNum=1) create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send 1/2, create (idempotent) twice with pressure, send 2/2 -- _send_batch(driver, topic, RECORD_COUNT) driver.createConnector( name_salt=name_salt, rest_request_template_filename=CONFIG_FILE ) sleep(SLEEP_TIME) driver.createConnector( name_salt=name_salt, rest_request_template_filename=CONFIG_FILE ) sleep(SLEEP_TIME) _send_batch(driver, topic, RECORD_COUNT) # -- Verify -- wait_for_rows(table.name, RECORD_COUNT * 2) ================================================ FILE: test/tests/test_kc_restart.py ================================================ import json from time import sleep import pytest FILE_NAME = "test_kc_restart" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 1000 SLEEP_TIME = 10 def _send_batch(driver, topic, record_count): values = [ json.dumps({"column1": str(i)}).encode("utf-8") for i in range(record_count) ] driver.sendBytesData(topic, values, [], 0) sleep(2) @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_kc_restart( driver, name_salt, create_connector_from_file, create_table, wait_for_rows ): table = create_table( FILE_NAME.upper(), columns="(record_metadata variant, column1 varchar)", ) topic = f"{FILE_NAME}{name_salt}" connector_name = f"{FILE_NAME}{name_salt}" driver.createTopics(topic, partitionNum=1, replicationNum=1) create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send 1/3, restart connector, send 2/3, restart connector+tasks, send 3/3 -- _send_batch(driver, topic, RECORD_COUNT) driver.restartConnector(connector_name) driver.wait_for_connector_running(connector_name) _send_batch(driver, topic, RECORD_COUNT) driver.restartConnectorAndTasks(connector_name) driver.wait_for_connector_running(connector_name) _send_batch(driver, topic, RECORD_COUNT) # -- Verify -- wait_for_rows(table.name, RECORD_COUNT * 3, connector_name=connector_name) ================================================ FILE: test/tests/test_multiple_topic_to_one_table_snowpipe_streaming.py ================================================ import json from time import sleep import pytest pytestmark = pytest.mark.correctness FILE_NAME = "travis_correct_multiple_topic_to_one_table_snowpipe_streaming" CONFIG_FILE = f"{FILE_NAME}.json" TOPIC_COUNT = 3 PARTITION_COUNT = 3 RECORDS_PER_PARTITION = 1000 def test_multiple_topic_to_one_table_snowpipe_streaming( driver, name_salt, create_connector_from_file, create_table, wait_for_rows, ): table = create_table( FILE_NAME.upper(), columns="(record_metadata variant, field1 varchar)", ) topics = [] for i in range(TOPIC_COUNT): t = f"{FILE_NAME}{name_salt}{i}" driver.createTopics(t, partitionNum=PARTITION_COUNT, replicationNum=1) topics.append(t) create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send -- for topic in topics: for p in range(PARTITION_COUNT): values = [ json.dumps({"field1": str(e)}).encode("utf-8") for e in range(RECORDS_PER_PARTITION) ] driver.sendBytesData(topic, values, [], partition=p) sleep(2) total_expected = RECORDS_PER_PARTITION * PARTITION_COUNT * TOPIC_COUNT # -- Verify row count -- wait_for_rows(table.name, total_expected) # -- Verify no over-duplication (each offset+partition combo appears at most TOPIC_COUNT times) -- result = table.select( 'record_metadata:"offset"::string AS offset_no, ' 'record_metadata:"partition"::string AS partition_no', f"GROUP BY offset_no, partition_no HAVING count(*) > {TOPIC_COUNT}", ) assert not result, f"Over-duplication detected: {result[0]}" # -- Verify unique offsets per partition -- rows = table.select( 'count(DISTINCT record_metadata:"offset"::number) AS unique_offsets, ' 'record_metadata:"partition"::number AS partition_no', "GROUP BY partition_no ORDER BY partition_no", ) assert len(rows) == PARTITION_COUNT for p in range(PARTITION_COUNT): assert rows[p]["UNIQUE_OFFSETS"] == RECORDS_PER_PARTITION assert rows[p]["PARTITION_NO"] == p # -- Verify all topics contributed to each partition -- topic_rows = table.select( 'count(DISTINCT record_metadata:"topic"::string) AS topic_no, ' 'record_metadata:"partition"::number AS partition_no', "GROUP BY partition_no ORDER BY partition_no", ) assert len(topic_rows) == PARTITION_COUNT for p in range(PARTITION_COUNT): assert topic_rows[p]["TOPIC_NO"] == TOPIC_COUNT assert topic_rows[p]["PARTITION_NO"] == p # -- Cleanup extra Kafka topics (table/main topic handled by fixture) -- for t in topics: driver.deleteTopic(t) ================================================ FILE: test/tests/test_native_complex_smt.py ================================================ import json from lib.matchers import ANY_INT, RegexMatch FILE_NAME = "travis_correct_native_complex_smt" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 100 def test_native_complex_smt( driver, name_salt, create_connector_from_file, create_table, wait_for_rows ): """Verify a complex SMT chain: ValueToKey + ExtractField$Key + ReplaceField$Value. Connector config transforms: 1. ValueToKey(fields=c1) — copies c1 into the key 2. ExtractField$Key(field=c1) — extracts c1 as the key 3. ReplaceField$Value(blacklist=c2) — drops c2 from the value After transforms, the key holds the c1 value and the value retains only c1. """ table = create_table( FILE_NAME.upper(), columns="(record_metadata variant, c1 variant)", ) topic = f"{FILE_NAME}{name_salt}" create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send 100 records -- values = [ json.dumps({"c1": {"int": str(i)}, "c2": "Suppose to be dropped."}).encode( "utf-8" ) for i in range(RECORD_COUNT) ] driver.sendBytesData(topic, values) # -- Verify row count -- wait_for_rows(table.name, RECORD_COUNT) # -- Verify first row: key extracted, c2 dropped -- row = table.select("*")[0] assert json.loads(row["RECORD_METADATA"]) == { "CreateTime": ANY_INT, "SnowflakeConnectorPushTime": ANY_INT, "key": {"int": RegexMatch(r"\d+")}, "offset": 0, "partition": 0, "topic": topic, } assert json.loads(row["C1"]) == {"int": RegexMatch(r"\d+")} ================================================ FILE: test/tests/test_native_string_json_without_schema.py ================================================ import json from lib.matchers import ANY_INT FILE_NAME = "travis_correct_native_string_json_without_schema" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 100 def test_native_string_json_without_schema( driver, name_salt, create_connector_from_file, create_table, wait_for_rows ): """Verify that an SMT (ReplaceField$Value blacklisting 'c2') drops the c2 field before ingestion, leaving only the 'val' field. Connector config uses StringConverter key + JsonConverter value with a ReplaceField transform that removes 'c2'. """ table = create_table( FILE_NAME.upper(), columns="(record_metadata variant, val varchar)", ) topic = f"{FILE_NAME}{name_salt}" create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send 100 records with 'val' and 'c2' (c2 will be dropped by SMT) -- values = [ json.dumps({"val": str(i), "c2": "Suppose to be dropped."}).encode("utf-8") for i in range(RECORD_COUNT) ] driver.sendBytesData(topic, values) # -- Verify row count -- wait_for_rows(table.name, RECORD_COUNT) # -- Verify first row: only 'val' survives the SMT -- row = table.select("*")[0] assert json.loads(row["RECORD_METADATA"]) == { "CreateTime": ANY_INT, "SnowflakeConnectorPushTime": ANY_INT, "offset": 0, "partition": 0, "topic": topic, } assert row["VAL"] == "0" ================================================ FILE: test/tests/test_native_string_protobuf.py ================================================ import json from lib.matchers import ANY_INT FILE_NAME = "travis_correct_native_string_protobuf" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 100 def _build_sensor(sensor_pb2): sensor = sensor_pb2.SensorReading() sensor.dateTime = 1234 sensor.reading = 321.321 sensor.device.deviceID = "555-4321" sensor.device.enabled = True sensor.float_val = 4321.4321 sensor.int32_val = (1 << 31) - 1 sensor.sint32_val = (1 << 31) - 1 sensor.sint64_val = (1 << 63) - 1 sensor.uint32_val = (1 << 32) - 1 sensor.bytes_val = b"\xde\xad" sensor.double_array_val.extend([1 / 3, 32.21, 434324321]) sensor.uint64_val = (1 << 64) - 1 return sensor def test_native_string_protobuf( sensor_pb2, driver, name_salt, create_connector_from_file, create_table, wait_for_rows, ): table = create_table( FILE_NAME.upper(), columns="(record_metadata variant, record_content variant)", ) topic = f"{FILE_NAME}{name_salt}" create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send -- sensor = _build_sensor(sensor_pb2) values = [sensor.SerializeToString() for _ in range(RECORD_COUNT)] driver.sendBytesData(topic, values) # -- Verify row count -- wait_for_rows(table.name, RECORD_COUNT) # -- Verify first row content -- row = table.select("record_metadata, record_content", "LIMIT 1")[0] record_metadata = json.loads(row["RECORD_METADATA"]) assert record_metadata == { "CreateTime": ANY_INT, "SnowflakeConnectorPushTime": ANY_INT, "offset": 0, "partition": 0, "topic": topic, } record_content = json.loads(row["RECORD_CONTENT"]) assert record_content == { "bytes_val": "3q0=", "dateTime": 1234, "device": {"deviceID": "555-4321", "enabled": True}, "double_array_val": [0.3333333333333333, 32.21, 4.343243210000000e08], "float_val": 4321.432, "int32_val": 2147483647, "reading": 321.321, "sint32_val": 2147483647, "sint64_val": 9223372036854775807, "uint32_val": 4294967295, "uint64_val": 18446744073709551615, } ================================================ FILE: test/tests/test_nullable_values_after_smt.py ================================================ import json FILE_NAME = "nullable_values_after_smt" CONFIG_FILE = f"{FILE_NAME}.json" TOTAL_EVENTS = 200 EXPECTED_ROWS = 100 # only every-other event has optionalField def test_nullable_values_after_smt( driver, name_salt, create_connector_from_file, create_table, wait_for_rows, ): table = create_table( FILE_NAME.upper(), columns="(index number, from_optional_field boolean, record_metadata variant)", ) topic = f"{FILE_NAME}{name_salt}" create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send -- # The connector config has an SMT that extracts only the optionalField sub-object. # Events without optionalField are dropped (behavior.on.null.values = IGNORE). values = [] for idx in range(TOTAL_EVENTS): event = {"index": idx, "someKey": "someValue"} if idx % 2 == 0: event["optionalField"] = {"index": idx, "from_optional_field": True} values.append(json.dumps(event).encode("utf-8")) driver.sendBytesData(topic, values) # -- Verify row count -- wait_for_rows(table.name, EXPECTED_ROWS) # -- Verify content -- rows = table.select( "index, from_optional_field, record_metadata:offset::number AS offset", ) parsed = [ { "index": r["INDEX"], "from_optional_field": r["FROM_OPTIONAL_FIELD"], "offset": r["OFFSET"], } for r in rows ] expected = [ {"index": idx, "from_optional_field": True, "offset": idx} for idx in range(0, TOTAL_EVENTS, 2) ] assert parsed == expected ================================================ FILE: test/tests/test_schema_evolution_streaming.py ================================================ import json import pytest from lib.fixtures.table import Table pytestmark = pytest.mark.schema_evolution FILE_NAME = "snowpipe_streaming_schema_evolution" CONFIG_FILE = f"{FILE_NAME}.json" def _assert_success_rows(table, schematization, record_count): """Shared assertions for successful schema evolution tests.""" cols = {row[0]: row[1] for row in table.schema()} if schematization: assert "CITY" in cols, f"Expected CITY column, got: {list(cols.keys())}" assert "AGE" in cols, f"Expected AGE column, got: {list(cols.keys())}" rows = table.select( '"CITY", "AGE"', 'WHERE RECORD_METADATA:"offset"::number = 0', ) assert rows, "Expected row with offset 0" assert rows[0]["CITY"] == "Hsinchu" assert rows[0]["AGE"] == 0 else: assert "RECORD_CONTENT" in cols, ( f"Expected RECORD_CONTENT column, got: {list(cols.keys())}" ) rows = table.select( "RECORD_CONTENT", 'WHERE RECORD_METADATA:"offset"::number = 0', ) assert rows, "Expected row with offset 0" content = json.loads(rows[0]["RECORD_CONTENT"]) assert content["city"] == "Hsinchu" assert content["age"] == 0 count = table.select_scalar("count(*)") assert count == record_count, f"Expected {record_count} rows, got {count}" def _assert_dlq(driver, config, table, record_count): """Shared assertions for DLQ tests.""" offsets_in_dlq = driver.consume_messages_dlq(config, 0, record_count - 1) assert offsets_in_dlq == record_count, ( f"Expected {record_count} records in DLQ, got {offsets_in_dlq}" ) count = table.select_scalar("count(*)") assert count == 0, f"Expected 0 rows in table (DLQ), got {count}" def test_schema_evolution_add_columns( driver, create_connector_from_file, create_table, create_topics, wait_for_rows ): """ENABLE_SCHEMA_EVOLUTION=TRUE, schematization=on, send records with extra fields. Runs for both v3 and v4. Flat columns CITY, AGE are added via schema evolution. """ table = create_table( FILE_NAME.upper(), columns="(RECORD_METADATA VARIANT) ENABLE_SCHEMA_EVOLUTION = TRUE", cleanup_topic=False, ) topic = create_topics([FILE_NAME], with_tables=False)[0] create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() record_count = 100 values = [ json.dumps({"city": "Hsinchu", "age": i}).encode("utf-8") for i in range(record_count) ] driver.sendBytesData(topic, values, [], partition=0) wait_for_rows(table.name, record_count) _assert_success_rows(table, schematization=True, record_count=record_count) def test_schema_evolution_multi_wave( driver, create_connector_from_file, create_table, create_topics, wait_for_rows ): """Send two waves of records with different schemas. Wave 1: {city, age} -> ADD COLUMN for CITY, AGE Wave 2: {city, age, country} -> ADD COLUMN for COUNTRY Verifies that wave-1 rows have NULL for COUNTRY. """ table = create_table( FILE_NAME.upper(), columns="(RECORD_METADATA VARIANT) ENABLE_SCHEMA_EVOLUTION = TRUE", cleanup_topic=False, ) topic = create_topics([FILE_NAME], with_tables=False)[0] create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() wave1_count = 50 wave1 = [ json.dumps({"city": "Hsinchu", "age": i}).encode("utf-8") for i in range(wave1_count) ] driver.sendBytesData(topic, wave1, [], partition=0) wait_for_rows(table.name, wave1_count) wave2_count = 50 wave2 = [ json.dumps( { "city": "Taipei", "age": 100 + i, "country": "TW", } ).encode("utf-8") for i in range(wave2_count) ] driver.sendBytesData(topic, wave2, [], partition=0) total_expected = wave1_count + wave2_count wait_for_rows(table.name, total_expected) cols = {row[0]: row[1] for row in table.schema()} assert "CITY" in cols assert "AGE" in cols assert "COUNTRY" in cols, ( f"Expected COUNTRY column after wave 2, got: {list(cols.keys())}" ) rows = table.select( '"CITY", "AGE", "COUNTRY"', f'WHERE RECORD_METADATA:"offset"::number = {wave1_count}', ) assert rows, f"Expected row at offset {wave1_count}" assert rows[0]["CITY"] == "Taipei" assert rows[0]["COUNTRY"] == "TW" null_country_count = table.select("count(*)", "WHERE COUNTRY IS NULL")[0][ "COUNT(*)" ] assert null_country_count == wave1_count, ( f"Expected {wave1_count} rows with NULL country, got {null_country_count}" ) def test_schema_evolution_disabled_mid_stream( driver, create_connector_from_file, create_table, create_topics, wait_for_rows ): """ENABLE_SCHEMA_EVOLUTION toggled off after initial evolution.""" table = create_table( FILE_NAME.upper(), columns="(RECORD_METADATA VARIANT) ENABLE_SCHEMA_EVOLUTION = TRUE", cleanup_topic=False, ) topic = create_topics([FILE_NAME], with_tables=False)[0] create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # Wave 1: evolve schema while ENABLE_SCHEMA_EVOLUTION=TRUE wave1_count = 50 wave1 = [ json.dumps({"city": "Hsinchu", "age": i}).encode("utf-8") for i in range(wave1_count) ] driver.sendBytesData(topic, wave1, [], partition=0) wait_for_rows(table.name, wave1_count) _assert_success_rows(table, schematization=True, record_count=wave1_count) # Disable schema evolution on the table driver.snowflake_conn.cursor().execute( "ALTER TABLE identifier(%s) SET ENABLE_SCHEMA_EVOLUTION = FALSE", (table.name,) ) # Wave 2: new column COUNTRY — DDL is still attempted and succeeds # because the test role has OWNERSHIP privilege. wave2_count = 50 wave2 = [ json.dumps({"city": "Taipei", "age": 100 + i, "country": "TW"}).encode("utf-8") for i in range(wave2_count) ] driver.sendBytesData(topic, wave2, [], partition=0) total = wave1_count + wave2_count wait_for_rows(table.name, total) cols = {row[0]: row[1] for row in table.schema()} assert "COUNTRY" in cols, ( f"Expected COUNTRY column (DDL succeeded via OWNERSHIP), got: {list(cols.keys())}" ) count = table.select_scalar("count(*)") assert count == total, f"Expected {total} rows, got {count}" def test_schema_evolution_happy_path( driver, create_connector_from_file, create_table, create_topics, wait_for_rows ): """Send records that match the existing table schema exactly. Validation passes without triggering schema evolution. Verifies that client-side validation does not interfere with normal ingestion. """ table = create_table( FILE_NAME.upper(), columns="(RECORD_METADATA VARIANT, CITY VARCHAR, AGE NUMBER) " "ENABLE_SCHEMA_EVOLUTION = TRUE", cleanup_topic=False, ) topic = create_topics([FILE_NAME], with_tables=False)[0] create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() record_count = 100 values = [ json.dumps({"city": "Hsinchu", "age": i}).encode("utf-8") for i in range(record_count) ] driver.sendBytesData(topic, values, [], partition=0) wait_for_rows(table.name, record_count) rows = table.select( '"CITY", "AGE"', 'WHERE RECORD_METADATA:"offset"::number = 0', ) assert rows, "Expected row with offset 0" assert rows[0]["CITY"] == "Hsinchu" assert rows[0]["AGE"] == 0 def test_schema_evolution_drop_not_null( driver, create_connector_from_file, create_table, create_topics, wait_for_rows ): """Table has a NOT NULL column, but records omit it. Schema evolution should drop the NOT NULL constraint and add the extra column, allowing records to be ingested with NULL for the original column. """ table = create_table( FILE_NAME.upper(), columns="(RECORD_METADATA VARIANT, STATUS VARCHAR NOT NULL) " "ENABLE_SCHEMA_EVOLUTION = TRUE", cleanup_topic=False, ) topic = create_topics([FILE_NAME], with_tables=False)[0] create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() record_count = 50 values = [ json.dumps({"city": "Hsinchu", "age": i}).encode("utf-8") for i in range(record_count) ] driver.sendBytesData(topic, values, [], partition=0) wait_for_rows(table.name, record_count) cols = {row[0]: row[1] for row in table.schema()} assert "CITY" in cols, f"Expected CITY column, got: {list(cols.keys())}" assert "AGE" in cols, f"Expected AGE column, got: {list(cols.keys())}" assert "STATUS" in cols null_status_count = table.select("count(*)", "WHERE STATUS IS NULL")[0]["COUNT(*)"] assert null_status_count == record_count, ( f"Expected {record_count} rows with NULL STATUS, got {null_status_count}" ) @pytest.mark.parametrize("schema_evo", [True, False], ids=["evo=on", "evo=off"]) @pytest.mark.parametrize( "schematization", [True, False], ids=["schema=on", "schema=off"] ) @pytest.mark.parametrize("validation", [True, False], ids=["valid=on", "valid=off"]) def test_schema_evolution_config_variants( driver, name_salt, connector_version, create_connector_from_file, create_table, create_topics, wait_for_rows, schema_evo, schematization, validation, ): """Full config matrix for ENABLE_SCHEMA_EVOLUTION x schematization x validation. Runs for both v3 and v4. Combinations that are inapplicable to a given connector version are skipped with a reason (serving as documentation of the known v3/v4 differences). v4 (KC v4): - Client-side validation works for both schematization=on and off. - schematization=on: validates individual columns (CITY, AGE, etc.) - schematization=off: validates RECORD_CONTENT/RECORD_METADATA VARIANT columns against the table schema. - validation can be toggled via snowflake.validation. v3 (KC v3): - V1 Ingest SDK always performs client-side validation; it cannot be disabled, so all validation=False combos are skipped. Behaviour matrix (for combos that run): schema_evo=True: extra columns are added and records are ingested. schema_evo=False + validation=True: extra columns route to DLQ. schema_evo=False + validation=False (v4 only): server Error Table handles errors; test returns early (no client-side assertion). """ if connector_version == "v3": if not validation: pytest.skip( "KC v3 uses V1 Ingest SDK which always performs client-side " "validation; validation cannot be disabled" ) if schema_evo and not schematization: pytest.skip( "KC v3 does not support schema evolution when schematization is off" ) evo_tag = "evo" if schema_evo else "noevo" sch_tag = "sch" if schematization else "nosch" val_tag = "val" if validation else "noval" variant_name = f"{FILE_NAME}_{evo_tag}_{sch_tag}_{val_tag}" topic = create_topics([variant_name], with_tables=False)[0] dlq_topic = f"DLQ_MATRIX_{variant_name}_{name_salt}" if not schema_evo and schematization: # Pre-create with schema evo disabled so extra columns are rejected. table = create_table( variant_name.upper(), columns="(RECORD_METADATA VARIANT) ENABLE_SCHEMA_EVOLUTION = FALSE", cleanup_topic=False, ) else: table = Table(driver, topic.upper()) overrides = { "topics": topic, "snowflake.enable.schematization": str(schematization).lower(), "snowflake.validation": "client_side" if validation else "server_side", "errors.deadletterqueue.topic.name": dlq_topic, } config = create_connector_from_file(CONFIG_FILE, config_overrides=overrides) driver.startConnectorWaitTime() record_count = 100 if not schematization: # When schematization is off, data is ingested into RECORD_CONTENT as # VARIANT regardless of schema_evo or validation settings. values = [ json.dumps({"city": "Hsinchu", "age": i}).encode("utf-8") for i in range(record_count) ] driver.sendBytesData(topic, values, [], partition=0) wait_for_rows(table.name, record_count) _assert_success_rows(table, schematization, record_count) elif schema_evo: values = [ json.dumps({"city": "Hsinchu", "age": i}).encode("utf-8") for i in range(record_count) ] driver.sendBytesData(topic, values, [], partition=0) wait_for_rows(table.name, record_count) _assert_success_rows(table, schematization, record_count) else: if not validation: # No client-side validation -> server handles the error via Error Table. # DLQ routing only works when client validation is on. return record_count = 5 values = [ json.dumps({"city": "Hsinchu", "age": i}).encode("utf-8") for i in range(record_count) ] driver.sendBytesData(topic, values, [], partition=0) _assert_dlq(driver, config, table, record_count) ================================================ FILE: test/tests/test_schema_mapping.py ================================================ import datetime import json import pytest pytestmark = pytest.mark.correctness FILE_NAME = "travis_correct_schema_mapping" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 100 RECORD = { "PERFORMANCE_STRING": "Excellent", # KCv3 used embedded quotes ('"case_sensitive..."') because the custom # SnowflakeJsonConverter stripped them. The standard JsonConverter # preserves the key as-is, so we omit the embedded quotes. "case_sensitive_PERFORMANCE_CHAR": "A", # Base64-encoded binary value for server_side validation mode, which passes strings # directly to the Ingest SDK. A server-side parameter controls whether the SDK # interprets binary strings as base64 or hex; base64 is the expected default. "PERFORMANCE_BINARY": "/////w==", "RATING_INT": 100, "RATING_DOUBLE": 0.99, "APPROVAL": True, "APPROVAL_DATE": "2022-06-15", "APPROVAL_TIME": "23:59:59.999999", "INFO_ARRAY": ["HELLO", "WORLD"], "INFO": {"TREE_1": "APPLE", "TREE_2": "PINEAPPLE"}, "INFO_OBJECT": {"TREE_1": "APPLE", "TREE_2": "PINEAPPLE"}, } GOLD_VALUES = { "PERFORMANCE_STRING": "Excellent", "case_sensitive_PERFORMANCE_CHAR": "A", "PERFORMANCE_BINARY": b"\xff\xff\xff\xff", "RATING_INT": 100, "RATING_DOUBLE": 0.99, "APPROVAL": True, "APPROVAL_DATE": datetime.date(2022, 6, 15), "APPROVAL_TIME": datetime.time(23, 59, 59, 999999), "INFO_ARRAY": r'["HELLO","WORLD"]', "INFO": r'{"TREE_1":"APPLE","TREE_2":"PINEAPPLE"}', "INFO_OBJECT": r'{"TREE_1":"APPLE","TREE_2":"PINEAPPLE"}', } @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_schema_mapping( driver, name_salt, create_connector_from_file, create_table, wait_for_rows ): """Verify that each data type maps to the correct Snowflake column type and that RECORD_METADATA is automatically added. Tests STRING, CHAR, BINARY, NUMBER, DOUBLE, BOOLEAN, DATE, TIME, ARRAY, VARIANT, and OBJECT columns. """ table = create_table( FILE_NAME.upper(), columns="(" "PERFORMANCE_STRING STRING, " '"case_sensitive_PERFORMANCE_CHAR" CHAR, ' "PERFORMANCE_BINARY BINARY, " "RATING_INT NUMBER, " "RATING_DOUBLE DOUBLE, " "APPROVAL BOOLEAN, " "APPROVAL_DATE DATE, " "APPROVAL_TIME TIME, " "INFO_ARRAY ARRAY, " "INFO VARIANT, " "INFO_OBJECT OBJECT, " "RECORD_METADATA VARIANT" ")", ) topic = f"{FILE_NAME}{name_salt}" # TODO: SNOW-3236195: RowValidator uppercases unquoted column names via # LiteralQuoteUtils.unquoteColumnName(), but DESCRIBE TABLE preserves case for # quoted columns (e.g. "case_sensitive_PERFORMANCE_CHAR"). This causes a false # structural error. Fix by normalizing both sides in RowValidator. create_connector_from_file( CONFIG_FILE, config_overrides={ "snowflake.validation": "server_side", "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true", }, ) driver.startConnectorWaitTime() # -- Send -- keys = [json.dumps({"number": str(i)}).encode("utf-8") for i in range(RECORD_COUNT)] values = [json.dumps(RECORD).encode("utf-8") for _ in range(RECORD_COUNT)] driver.sendBytesData(topic, values, keys) # -- Verify row count -- wait_for_rows(table.name, RECORD_COUNT) # -- Verify content of first row -- row = table.select("*")[0] for field, gold in GOLD_VALUES.items(): actual = row[field] if isinstance(actual, str): # Remove formatting whitespace added by Snowflake assert "".join(actual.split()) == gold, ( f"Column {field}: expected {gold!r}, got {actual!r}" ) else: assert actual == gold, f"Column {field}: expected {gold!r}, got {actual!r}" ================================================ FILE: test/tests/test_schema_not_supported_converter.py ================================================ import json import time import pytest pytestmark = pytest.mark.correctness FILE_NAME = "travis_correct_schema_not_supported_converter" CONFIG_FILE = f"{FILE_NAME}.json" RECORD = { "PERFORMANCE_STRING": "Excellent", '"case_sensitive_PERFORMANCE_CHAR"': "A", "PERFORMANCE_HEX": "FFFFFFFF", "RATING_INT": 100, "RATING_DOUBLE": 0.99, "APPROVAL": "true", "APPROVAL_DATE": "2022-06-15", "APPROVAL_TIME": "23:59:59.999999", "INFO_ARRAY": ["HELLO", "WORLD"], "INFO": {"TREE_1": "APPLE", "TREE_2": "PINEAPPLE"}, "INFO_OBJECT": {"TREE_1": "APPLE", "TREE_2": "PINEAPPLE"}, } def test_schema_not_supported_converter( driver, name_salt, create_connector_from_file, create_table ): table = create_table( FILE_NAME.upper(), columns='(PERFORMANCE_STRING STRING, "case_sensitive_PERFORMANCE_CHAR" CHAR, ' "PERFORMANCE_HEX BINARY, RATING_INT NUMBER, RATING_DOUBLE DOUBLE, " "APPROVAL BOOLEAN, APPROVAL_DATE DATE, APPROVAL_TIME TIME, " "INFO_ARRAY ARRAY, INFO VARIANT, INFO_OBJECT OBJECT)", ) topic = f"{FILE_NAME}{name_salt}" create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send -- keys = [json.dumps({"number": str(i)}).encode("utf-8") for i in range(100)] values = [json.dumps(RECORD).encode("utf-8") for _ in range(100)] driver.sendBytesData(topic, values, keys) # -- Verify: nothing should be ingested with unsupported converters -- time.sleep(30) count = table.select_scalar("count(*)") assert count == 0, ( f"Expected 0 rows but got {count}; unsupported converter should reject all records" ) ================================================ FILE: test/tests/test_snowpipe_streaming_legacy_avro_sr.py ================================================ """RECORD_CONTENT mode with Avro SR converter. Verifies that Avro-encoded records land correctly in the legacy RECORD_CONTENT / RECORD_METADATA VARIANT columns when snowflake.enable.schematization=false. v3 parity cannot be verified: even with v4 removed, v3's bundled SR classes clash with the Confluent 7.8.0 platform's SR classes (ServiceConfigurationError: CelExecutor not a subtype of RuleExecutor). Assertions reflect expected Avro deserialization behavior (JSON object with correct field values). v4-only. """ import json import pytest from confluent_kafka import avro FILE_NAME = "snowpipe_streaming_legacy_avro_sr" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 100 VALUE_SCHEMA = avro.loads(""" { "type": "record", "name": "value_schema", "fields": [ {"name": "id", "type": "int"}, {"name": "firstName", "type": "string"}, {"name": "time", "type": "int"} ] } """) @pytest.mark.confluent_only @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_snowpipe_streaming_legacy_avro_sr( connector_version, driver, name_salt, create_connector_from_file, create_table, wait_for_rows, ): """Verify that Avro SR records land in RECORD_CONTENT as JSON objects.""" table = create_table( FILE_NAME.upper(), columns="(RECORD_METADATA variant, RECORD_CONTENT variant)", ) topic = f"{FILE_NAME}{name_salt}" create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send Avro SR records -- values = [{"id": i, "firstName": "abc0", "time": 1835} for i in range(RECORD_COUNT)] driver.sendAvroSRData(topic, values, VALUE_SCHEMA) # -- Verify row count -- wait_for_rows(table.name, RECORD_COUNT) # -- Verify RECORD_CONTENT for offset 0 -- row = table.select( "RECORD_CONTENT, RECORD_METADATA", 'WHERE RECORD_METADATA:"offset"::number = 0', )[0] content = json.loads(row["RECORD_CONTENT"]) if isinstance(content, str): content = json.loads(content) assert content["id"] == 0, ( f"Expected id=0 in RECORD_CONTENT, got: {row['RECORD_CONTENT']}" ) assert content["firstName"] == "abc0", ( f"Expected firstName=abc0 in RECORD_CONTENT, got: {row['RECORD_CONTENT']}" ) assert content["time"] == 1835, ( f"Expected time=1835 in RECORD_CONTENT, got: {row['RECORD_CONTENT']}" ) metadata = json.loads(row["RECORD_METADATA"]) if isinstance(metadata, str): metadata = json.loads(metadata) for key in ("offset", "partition", "topic"): assert key in metadata, ( f"RECORD_METADATA missing '{key}': {row['RECORD_METADATA']}" ) ================================================ FILE: test/tests/test_snowpipe_streaming_legacy_byte_array_converter.py ================================================ import base64 import json import pytest FILE_NAME = "snowpipe_streaming_legacy_byte_array_converter" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 100 # Assertions capture v3 reference behavior (verified dual on Confluent 7.8.0, # 2026-03-31). Validation mode is irrelevant for RECORD_CONTENT — the entire # payload goes into a VARIANT column with no type checking. @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_snowpipe_streaming_legacy_byte_array_converter( connector_version, driver, name_salt, create_connector_from_file, create_table, wait_for_rows, ): """Verify that ByteArrayConverter is accepted when enable.schematization=false and that raw byte payloads land (base64-encoded) in the legacy RECORD_CONTENT column. """ table = create_table( FILE_NAME.upper(), columns="(RECORD_METADATA variant, RECORD_CONTENT variant)", ) topic = f"{FILE_NAME}{name_salt}" driver.createTopics(topic, partitionNum=1, replicationNum=1) create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send raw byte records -- values = [f"binary payload {i}".encode("utf-8") for i in range(RECORD_COUNT)] driver.sendBytesData(topic, values, [], partition=0) # -- Verify row count -- wait_for_rows(table.name, RECORD_COUNT) # -- Verify RECORD_CONTENT contains base64-encoded data -- row = table.select( "RECORD_CONTENT, RECORD_METADATA", 'WHERE RECORD_METADATA:"offset"::number = 0', )[0] content = str(row["RECORD_CONTENT"]) expected_b64 = base64.b64encode(b"binary payload 0").decode("utf-8") assert expected_b64 in content, ( f"Expected base64 '{expected_b64}' in RECORD_CONTENT, got: {row['RECORD_CONTENT']}" ) metadata = json.loads(row["RECORD_METADATA"]) if isinstance(metadata, str): metadata = json.loads(metadata) for key in ("offset", "partition", "topic"): assert key in metadata, ( f"RECORD_METADATA missing '{key}': {row['RECORD_METADATA']}" ) ================================================ FILE: test/tests/test_snowpipe_streaming_legacy_string_converter.py ================================================ import json import pytest FILE_NAME = "snowpipe_streaming_legacy_string_converter" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 100 # Assertions capture v3 reference behavior (verified dual on Confluent 7.8.0, # 2026-03-31). Validation mode is irrelevant for RECORD_CONTENT — the entire # payload goes into a VARIANT column with no type checking. @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_snowpipe_streaming_legacy_string_converter( connector_version, driver, name_salt, create_connector_from_file, create_table, wait_for_rows, ): """Verify that StringConverter is accepted when enable.schematization=false and that raw string payloads land in the legacy RECORD_CONTENT column. """ table = create_table( FILE_NAME.upper(), columns="(RECORD_METADATA variant, RECORD_CONTENT variant)", ) topic = f"{FILE_NAME}{name_salt}" driver.createTopics(topic, partitionNum=1, replicationNum=1) create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send raw string records -- values = [f"hello world {i}".encode("utf-8") for i in range(RECORD_COUNT)] driver.sendBytesData(topic, values, [], partition=0) # -- Verify row count -- wait_for_rows(table.name, RECORD_COUNT) # -- Verify RECORD_CONTENT contains the string payload -- row = table.select( "RECORD_CONTENT, RECORD_METADATA", 'WHERE RECORD_METADATA:"offset"::number = 0', )[0] content = str(row["RECORD_CONTENT"]) assert "hello world 0" in content, ( f"Expected 'hello world 0' in RECORD_CONTENT, got: {row['RECORD_CONTENT']}" ) metadata = json.loads(row["RECORD_METADATA"]) if isinstance(metadata, str): metadata = json.loads(metadata) for key in ("offset", "partition", "topic"): assert key in metadata, ( f"RECORD_METADATA missing '{key}': {row['RECORD_METADATA']}" ) ================================================ FILE: test/tests/test_snowpipe_streaming_legacy_string_json.py ================================================ import json import pytest FILE_NAME = "snowpipe_streaming_legacy_string_json" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 100 # Assertions capture v3 reference behavior (verified dual on Confluent 7.8.0, # 2026-03-31). Validation mode is irrelevant for RECORD_CONTENT — the entire # payload goes into a VARIANT column with no type checking. @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_snowpipe_streaming_legacy_string_json( connector_version, driver, name_salt, create_connector_from_file, create_table, wait_for_rows, ): """Verify that enable.schematization=false wraps JSON records into the legacy RECORD_CONTENT / RECORD_METADATA VARIANT columns — the same table layout that KC v3 used by default. """ table = create_table( FILE_NAME.upper(), columns="(RECORD_METADATA variant, RECORD_CONTENT variant)", ) topic = f"{FILE_NAME}{name_salt}" driver.createTopics(topic, partitionNum=1, replicationNum=1) create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send JSON records -- values = [ json.dumps({"city": "Portland", "age": i}).encode("utf-8") for i in range(RECORD_COUNT) ] driver.sendBytesData(topic, values, [], partition=0) # -- Verify row count -- wait_for_rows(table.name, RECORD_COUNT) # -- Verify RECORD_CONTENT contains original JSON fields -- row = table.select( "RECORD_CONTENT, RECORD_METADATA", 'WHERE RECORD_METADATA:"offset"::number = 0', )[0] content = json.loads(row["RECORD_CONTENT"]) # VARIANT may store the payload as a JSON-encoded string (double-encoded) if isinstance(content, str): content = json.loads(content) assert content["city"] == "Portland", ( f"Expected city=Portland in RECORD_CONTENT, got: {row['RECORD_CONTENT']}" ) assert content["age"] == 0, ( f"Expected age=0 in RECORD_CONTENT, got: {row['RECORD_CONTENT']}" ) metadata = json.loads(row["RECORD_METADATA"]) if isinstance(metadata, str): metadata = json.loads(metadata) for key in ("offset", "partition", "topic"): assert key in metadata, ( f"RECORD_METADATA missing '{key}': {row['RECORD_METADATA']}" ) ================================================ FILE: test/tests/test_snowpipe_streaming_schema_mapping_dlq.py ================================================ import json import pytest FILE_NAME = "snowpipe_streaming_schema_mapping_dlq" CONFIG_FILE = f"{FILE_NAME}.json" RECORDS_PER_TYPE = 10 # Correct records are ingested; incorrect records go to DLQ EXPECTED_IN_TABLE = RECORDS_PER_TYPE # only correct records EXPECTED_IN_DLQ = 2 * RECORDS_PER_TYPE # two types of incorrect records @pytest.mark.skip(reason="Requires client-side validation") def test_snowpipe_streaming_schema_mapping_dlq( driver, name_salt, create_connector_from_file, create_table, wait_for_rows ): """Verify that schema mapping errors route failing records to the DLQ while correct records are ingested normally. Three types of records are sent: 1. Incorrect: string value in a NUMBER column (not parseable) 2. Incorrect: array where an object is expected 3. Correct: proper types Only type (3) should land in the table. Types (1) and (2) go to DLQ. """ table = create_table( FILE_NAME.upper(), columns="(PERFORMANCE_STRING STRING, RATING_INT NUMBER, RECORD_METADATA VARIANT)", ) topic = f"{FILE_NAME}{name_salt}" config = create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send incorrect data (string in NUMBER column) -- incorrect_record = {"PERFORMANCE_STRING": "Excellent", "RATING_INT": "NO-a-NO"} _send_records(driver, topic, incorrect_record, RECORDS_PER_TYPE) # -- Send incorrect data (array instead of object) -- another_incorrect = [{"PERFORMANCE_STRING": "Excellent", "RATING_INT": 100}] _send_records(driver, topic, another_incorrect, RECORDS_PER_TYPE) # -- Send correct data -- correct_record = {"PERFORMANCE_STRING": "Excellent", "RATING_INT": 100} _send_records(driver, topic, correct_record, RECORDS_PER_TYPE) # -- Verify correct records landed in table -- wait_for_rows(table.name, EXPECTED_IN_TABLE) # -- Verify DLQ received failing records -- offsets_in_dlq = driver.consume_messages_dlq(config, 0, EXPECTED_IN_DLQ - 1) assert offsets_in_dlq == EXPECTED_IN_DLQ, ( f"Expected {EXPECTED_IN_DLQ} records in DLQ, got {offsets_in_dlq}" ) # -- Verify content of ingested rows -- row = table.select("*")[0] for field, gold in {"PERFORMANCE_STRING": "Excellent", "RATING_INT": 100}.items(): actual = row[field] if isinstance(actual, str): assert "".join(actual.split()) == gold, ( f"Column {field}: expected {gold!r}, got {actual!r}" ) else: assert actual == gold, f"Column {field}: expected {gold!r}, got {actual!r}" def _send_records(driver, topic, record, count): keys = [json.dumps({"number": str(i)}).encode("utf-8") for i in range(count)] values = [json.dumps(record).encode("utf-8") for _ in range(count)] driver.sendBytesData(topic, values, keys) ================================================ FILE: test/tests/test_snowpipe_streaming_string_avro_sr.py ================================================ from time import sleep import pytest from confluent_kafka import avro FILE_NAME = "travis_correct_snowpipe_streaming_string_avro_sr" CONFIG_FILE = f"{FILE_NAME}.json" PARTITION_COUNT = 3 RECORDS_PER_PARTITION = 1000 VALUE_SCHEMA = avro.loads(""" { "type": "record", "name": "value_schema", "fields": [ {"name": "id", "type": "int"}, {"name": "firstName", "type": "string"}, {"name": "time", "type": "int"}, {"name": "someFloat", "type": "float"}, {"name": "someFloatNaN", "type": "float"} ] } """) @pytest.mark.confluent_only @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_snowpipe_streaming_string_avro_sr( driver, name_salt, connector_version, create_connector_from_file, create_table, wait_for_rows, ): # Assertions below capture v3 reference behavior (test ported from v3). # v4 parity confirmed 2026-03-31. v3 cannot run due to SR classloader conflict. table = create_table( FILE_NAME.upper(), columns="(record_metadata variant, id number, firstName string, " "time number, someFloat number, someFloatNaN string)", ) topic = f"{FILE_NAME}{name_salt}" driver.createTopics(topic, partitionNum=PARTITION_COUNT, replicationNum=1) create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send -- for p in range(PARTITION_COUNT): values = [ { "id": i, "firstName": "abc0", "time": 1835, "someFloat": 21.37, "someFloatNaN": "NaN", } for i in range(RECORDS_PER_PARTITION) ] driver.sendAvroSRData( topic, values, VALUE_SCHEMA, key=[], key_schema="", partition=p ) sleep(2) total_expected = RECORDS_PER_PARTITION * PARTITION_COUNT # -- Verify row count -- wait_for_rows(table.name, total_expected) # -- Verify no duplicates -- result = table.select( 'record_metadata:"offset"::string AS offset_no, ' 'record_metadata:"partition"::string AS partition_no', "GROUP BY offset_no, partition_no HAVING count(*) > 1", ) assert not result, f"Duplicate detected: {result[0]}" # -- Verify unique offsets per partition -- rows = table.select( 'count(DISTINCT record_metadata:"offset"::number) AS unique_offsets, ' 'record_metadata:"partition"::number AS partition_no', "GROUP BY partition_no ORDER BY partition_no", ) assert len(rows) == PARTITION_COUNT for p in range(PARTITION_COUNT): assert rows[p]["UNIQUE_OFFSETS"] == RECORDS_PER_PARTITION assert rows[p]["PARTITION_NO"] == p ================================================ FILE: test/tests/test_snowpipe_streaming_string_json.py ================================================ import json from time import sleep FILE_NAME = "travis_correct_snowpipe_streaming_string_json" CONFIG_FILE = f"{FILE_NAME}.json" PARTITION_COUNT = 3 RECORDS_PER_PARTITION = 1000 def test_snowpipe_streaming_string_json( driver, name_salt, create_connector_from_file, create_table, wait_for_rows ): table = create_table( FILE_NAME.upper(), columns="(record_metadata variant, fieldName varchar)", ) topic = f"{FILE_NAME}{name_salt}" driver.createTopics(topic, partitionNum=PARTITION_COUNT, replicationNum=1) create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send -- for p in range(PARTITION_COUNT): values = [] for i in range(RECORDS_PER_PARTITION - 2): values.append(json.dumps({"fieldName": str(i)}).encode("utf-8")) values.append(None) values.append(b"") driver.sendBytesData(topic, values, [], partition=p) sleep(2) total_expected = RECORDS_PER_PARTITION * PARTITION_COUNT # -- Verify row count -- wait_for_rows(table.name, total_expected) # -- Verify no duplicates -- result = table.select( 'record_metadata:"offset"::string AS offset_no, ' 'record_metadata:"partition"::string AS partition_no', "GROUP BY offset_no, partition_no HAVING count(*) > 1", ) assert not result, f"Duplicate detected: {result[0]}" # -- Verify unique offsets per partition -- rows = table.select( 'count(DISTINCT record_metadata:"offset"::number) AS unique_offsets, ' 'record_metadata:"partition"::number AS partition_no', "GROUP BY partition_no ORDER BY partition_no", ) assert len(rows) == PARTITION_COUNT for p in range(PARTITION_COUNT): assert rows[p]["UNIQUE_OFFSETS"] == RECORDS_PER_PARTITION, ( f"Partition {p}: expected {RECORDS_PER_PARTITION} unique offsets, " f"got {rows[p]['UNIQUE_OFFSETS']}" ) assert rows[p]["PARTITION_NO"] == p # -- Verify SnowflakeConnectorPushTime is populated -- push_time_count = table.select( "count(*)", "WHERE NOT is_null_value(record_metadata:SnowflakeConnectorPushTime)", )[0]["COUNT(*)"] assert push_time_count == total_expected, ( f"Empty ConnectorPushTime detected ({push_time_count}/{total_expected})" ) ================================================ FILE: test/tests/test_snowpipe_streaming_string_json_dlq.py ================================================ import time import pytest pytestmark = pytest.mark.correctness FILE_NAME = "snowpipe_streaming_string_json_dlq" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 5 EXPECTED_IN_TABLE = 0 EXPECTED_IN_DLQ = 5 def test_snowpipe_streaming_string_json_dlq( driver, name_salt, create_connector_from_file, create_table ): table = create_table( FILE_NAME.upper(), columns="(record_metadata variant, record_content variant)", ) topic = f"{FILE_NAME}{name_salt}" driver.createTopics(topic, partitionNum=1, replicationNum=1) config = create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send invalid data that cannot be deserialized -- invalid = b'{invalid_string"}' values = [invalid for _ in range(RECORD_COUNT)] driver.sendBytesData(topic, values, [], partition=0) # -- Verify: no rows should land in the table -- time.sleep(30) count = table.select_scalar("count(*)") assert count == EXPECTED_IN_TABLE, ( f"Expected {EXPECTED_IN_TABLE} rows but got {count}" ) # -- Verify: records should appear in the DLQ topic -- offsets_in_dlq = driver.consume_messages_dlq(config, 0, EXPECTED_IN_DLQ - 1) assert offsets_in_dlq == EXPECTED_IN_DLQ, ( f"Expected {EXPECTED_IN_DLQ} offsets in DLQ, got {offsets_in_dlq}" ) ================================================ FILE: test/tests/test_snowpipe_streaming_string_json_ignore_tombstone.py ================================================ import json from time import sleep FILE_NAME = "test_snowpipe_streaming_string_json_ignore_tombstone" CONFIG_FILE = f"{FILE_NAME}.json" PARTITION_COUNT = 3 RECORDS_PER_PARTITION = 1000 # Both None and "" are treated as tombstones in streaming mode (community converters). EXPECTED_PER_PARTITION = RECORDS_PER_PARTITION - 2 # TODO: KC v3 uses case-sensitive field names matching. But the column names are upper case by default. LONG_FIELD = "NUMBERNUMBERNUMBERNUMBERNUMBERNUMBERNUMBERNUMBERNUMBERNUMBERNUMBERNUMBER" def test_snowpipe_streaming_string_json_ignore_tombstone( driver, name_salt, create_connector_from_file, create_table, wait_for_rows, ): """Verify Snowpipe Streaming with behavior.on.null.values=IGNORE across multiple partitions. Sends RECORDS_PER_PARTITION records per partition (including a None and "" tombstone in each). Both are dropped by the connector, leaving (RECORDS_PER_PARTITION - 2) × PARTITION_COUNT rows. Verifies: no duplicates, unique offsets per partition. """ table = create_table( FILE_NAME.upper(), columns=f'(record_metadata variant, "{LONG_FIELD}" varchar)', ) topic = f"{FILE_NAME}{name_salt}" driver.createTopics(topic, partitionNum=PARTITION_COUNT, replicationNum=1) config = create_connector_from_file(CONFIG_FILE) connector_name = config["name"] driver.startConnectorWaitTime() # -- Send -- for p in range(PARTITION_COUNT): values = [] for i in range(RECORDS_PER_PARTITION - 2): values.append(json.dumps({LONG_FIELD: str(i)}).encode("utf-8")) values.append(None) values.append(b"") # community converters treat this as a tombstone driver.sendBytesData(topic, values, [], partition=p) sleep(2) total_expected = EXPECTED_PER_PARTITION * PARTITION_COUNT # -- Verify row count -- wait_for_rows(table.name, total_expected, connector_name=connector_name) # -- Verify no duplicates -- result = table.select( 'record_metadata:"offset"::string AS offset_no, ' 'record_metadata:"partition"::string AS partition_no', "GROUP BY offset_no, partition_no HAVING count(*) > 1", ) assert not result, f"Duplicate detected: {result[0]}" # -- Verify unique offsets per partition -- rows = table.select( 'count(DISTINCT record_metadata:"offset"::number) AS unique_offsets, ' 'record_metadata:"partition"::number AS partition_no', "GROUP BY partition_no ORDER BY partition_no", ) assert len(rows) == PARTITION_COUNT for p in range(PARTITION_COUNT): assert rows[p]["UNIQUE_OFFSETS"] == EXPECTED_PER_PARTITION, ( f"Partition {p}: expected {EXPECTED_PER_PARTITION} unique offsets, " f"got {rows[p]['UNIQUE_OFFSETS']}" ) assert rows[p]["PARTITION_NO"] == p ================================================ FILE: test/tests/test_string_avrosr.py ================================================ import json import pytest from confluent_kafka import avro from lib.matchers import ANY_INT FILE_NAME = "travis_correct_string_avrosr" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 100 VALUE_SCHEMA = avro.loads(""" { "type": "record", "name": "value_schema", "fields": [ {"name": "id", "type": "int"}, {"name": "firstName", "type": "string"}, {"name": "time", "type": "int"} ] } """) @pytest.mark.confluent_only @pytest.mark.parametrize("connector_version", ["v4"], indirect=True) def test_string_avrosr( driver, name_salt, connector_version, create_connector_from_file, create_table, wait_for_rows, ): # Assertions below capture v3 reference behavior (test ported from v3). # v4 parity confirmed 2026-03-31. v3 cannot run due to SR classloader conflict. table = create_table( FILE_NAME.upper(), columns="(record_metadata variant, id number, firstName varchar, time number)", ) topic = f"{FILE_NAME}{name_salt}" create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send -- values = [{"id": i, "firstName": "abc0", "time": 1835} for i in range(RECORD_COUNT)] driver.sendAvroSRData(topic, values, VALUE_SCHEMA) # -- Verify row count -- wait_for_rows(table.name, RECORD_COUNT) # -- Verify first row content -- row = table.select("*")[0] assert row["ID"] == 0 assert row["FIRSTNAME"] == "abc0" assert row["TIME"] == 1835 record_metadata = json.loads(row["RECORD_METADATA"]) assert record_metadata == { "CreateTime": ANY_INT, "SnowflakeConnectorPushTime": ANY_INT, "offset": 0, "partition": 0, "topic": topic, } ================================================ FILE: test/tests/test_string_json.py ================================================ import json from lib.matchers import ANY_INT FILE_NAME = "travis_correct_string_json" CONFIG_FILE = f"{FILE_NAME}.json" RECORD_COUNT = 100 def _build_records() -> list[bytes | None]: """Build the list of values to produce. 98 normal JSON records, then a tombstone (None), then an empty-string record that Snowflake custom converters treat as a normal record. """ records: list[bytes | None] = [ json.dumps({"number": str(i)}).encode("utf-8") for i in range(RECORD_COUNT - 2) ] records.append(None) records.append(b"") return records def test_string_json( driver, name_salt, connector_version, create_connector_from_file, create_table, wait_for_rows, ): table = create_table( FILE_NAME.upper(), columns='(record_metadata variant, "NUMBER" varchar)', ) topic = f"{FILE_NAME}{name_salt}" create_connector_from_file(CONFIG_FILE) driver.startConnectorWaitTime() # -- Send -- headers = [("header1", "value1"), ("header2", "{}")] records = _build_records() driver.sendBytesData(topic, records, [], 0, headers) # -- Verify row count -- wait_for_rows(table.name, RECORD_COUNT) # -- Verify first row content -- # Snowflake does not guarantee row ordering without ORDER BY, so we must # select the specific record at offset 0 rather than relying on insertion order. rows = table.select("record_metadata", "WHERE record_metadata:offset::int = 0") record_metadata = json.loads(rows[0]["RECORD_METADATA"]) match connector_version: case "v3": expected_header2 = [] case "v4": expected_header2 = "[]" assert record_metadata == { "CreateTime": ANY_INT, "SnowflakeConnectorPushTime": ANY_INT, "headers": { "header1": "value1", "header2": expected_header2, }, "offset": 0, "partition": 0, "topic": topic, } ================================================ FILE: upload_jar.sh ================================================ #!/usr/bin/env bash if ! VERSION=$(xmllint --xpath '/*[local-name()="project"]/*[local-name()="version"]/text()' pom.xml) then echo "failed to read version from pom.xml" exit 1 fi echo "version to upload: $VERSION" if ! API_KEY_SECRET_ID=$(op item list --tags "connectors-nexus-api-key" --format json | jq -r '.[].id') then echo "failed to find required api key in 1password" exit 1 fi if ! USER_PASS=$(op item get $API_KEY_SECRET_ID --format json | jq -r '.fields[] | select(.type=="CONCEALED") | .value') then echo 'failed to read user:password from 1password' exit 1 fi FILE="https://nexus.int.snowflakecomputing.com/repository/connectors/snowflake-kafka-connector-$USER-$VERSION.jar" echo trying to delete $FILE.... curl -X DELETE \ -u $USER_PASS \ $FILE echo uploading new file to $FILE... curl --fail \ --upload-file ./target/snowflake-kafka-connector-$VERSION.jar \ -u $USER_PASS \ -w "\nHTTP Status: %{http_code}\n" \ $FILE