Repository: snowflakedb/snowflake-kafka-connector
Branch: master
Commit: 1a8c6d75b9a3
Files: 399
Total size: 2.1 MB

Directory structure:
gitextract_xko0tdbw/

├── .githooks/
│   └── pre-commit
├── .github/
│   ├── CODEOWNERS
│   ├── actions/
│   │   ├── build-connector/
│   │   │   └── action.yml
│   │   └── run-e2e-tests/
│   │       └── action.yml
│   ├── dependabot.yaml
│   ├── scripts/
│   │   ├── decrypt_secret.sh
│   │   ├── parse_java_test_reports.py
│   │   ├── profile.json.gpg
│   │   ├── profile_azure.json.gpg
│   │   ├── profile_gcs.json.gpg
│   │   └── squid.conf
│   └── workflows/
│       ├── IntegrationTest.yml
│       ├── build-apache-kafka-images.yml
│       ├── end-to-end-legacy.yml
│       ├── end-to-end-stress.yml
│       ├── end-to-end.yaml
│       ├── formatting.yml
│       └── semgrep.yml
├── .gitignore
├── .java-version
├── LICENSE
├── README.md
├── deploy.sh
├── format.sh
├── pom.xml
├── pom_confluent.xml
├── profile.json.enc
├── profile.json.example
├── scripts/
│   └── process_licenses.py
├── src/
│   ├── main/
│   │   ├── java/
│   │   │   └── com/
│   │   │       └── snowflake/
│   │   │           ├── ingest/
│   │   │           │   └── streaming/
│   │   │           │       └── internal/
│   │   │           │           └── TimestampWrapper.java
│   │   │           └── kafka/
│   │   │               └── connector/
│   │   │                   ├── ConnectorConfigTools.java
│   │   │                   ├── ConnectorConfigValidator.java
│   │   │                   ├── Constants.java
│   │   │                   ├── DefaultConnectorConfigValidator.java
│   │   │                   ├── SemanticVersion.java
│   │   │                   ├── SnowflakeSinkTask.java
│   │   │                   ├── SnowflakeSinkTaskAuthorizationExceptionTracker.java
│   │   │                   ├── SnowflakeStreamingSinkConnector.java
│   │   │                   ├── TopicToTableParser.java
│   │   │                   ├── Utils.java
│   │   │                   ├── config/
│   │   │                   │   ├── AuthenticatorType.java
│   │   │                   │   ├── CommaSeparatedKeyValueValidator.java
│   │   │                   │   ├── ConnectorConfigDefinition.java
│   │   │                   │   ├── SinkTaskConfig.java
│   │   │                   │   ├── SnowflakeValidation.java
│   │   │                   │   └── TopicToTableValidator.java
│   │   │                   ├── dlq/
│   │   │                   │   └── KafkaRecordErrorReporter.java
│   │   │                   ├── internal/
│   │   │                   │   ├── CachingConfig.java
│   │   │                   │   ├── CachingSnowflakeConnectionService.java
│   │   │                   │   ├── DescribeTableRow.java
│   │   │                   │   ├── InternalUtils.java
│   │   │                   │   ├── JdbcProperties.java
│   │   │                   │   ├── JdbcPropertyKeys.java
│   │   │                   │   ├── KCLogger.java
│   │   │                   │   ├── PrivateKeyTool.java
│   │   │                   │   ├── SnowflakeConnectionService.java
│   │   │                   │   ├── SnowflakeConnectionServiceFactory.java
│   │   │                   │   ├── SnowflakeErrors.java
│   │   │                   │   ├── SnowflakeKafkaConnectorException.java
│   │   │                   │   ├── SnowflakeSinkService.java
│   │   │                   │   ├── SnowflakeURL.java
│   │   │                   │   ├── StandardSnowflakeConnectionService.java
│   │   │                   │   ├── URL.java
│   │   │                   │   ├── metrics/
│   │   │                   │   │   ├── MetricsJmxReporter.java
│   │   │                   │   │   ├── MetricsUtil.java
│   │   │                   │   │   ├── NoopTaskMetrics.java
│   │   │                   │   │   ├── SnowflakeSinkTaskMetrics.java
│   │   │                   │   │   └── TaskMetrics.java
│   │   │                   │   ├── schemaevolution/
│   │   │                   │   │   ├── ColumnInfos.java
│   │   │                   │   │   ├── ColumnTypeMapper.java
│   │   │                   │   │   ├── SchemaEvolutionTargetItems.java
│   │   │                   │   │   ├── SnowflakeColumnTypeMapper.java
│   │   │                   │   │   ├── SnowflakeSchemaEvolutionService.java
│   │   │                   │   │   ├── TableSchema.java
│   │   │                   │   │   ├── TableSchemaResolver.java
│   │   │                   │   │   └── ValidationResultMapper.java
│   │   │                   │   ├── streaming/
│   │   │                   │   │   ├── DefaultStreamingConfigValidator.java
│   │   │                   │   │   ├── IngestionMethodConfig.java
│   │   │                   │   │   ├── LatestCommitedOffsetTokenExecutor.java
│   │   │                   │   │   ├── OpenChannelRetryPolicy.java
│   │   │                   │   │   ├── SnowflakeSinkServiceV2.java
│   │   │                   │   │   ├── StreamingClientProperties.java
│   │   │                   │   │   ├── StreamingConfigValidator.java
│   │   │                   │   │   ├── StreamingErrorHandler.java
│   │   │                   │   │   ├── TopicPartitionChannelInsertionException.java
│   │   │                   │   │   ├── channel/
│   │   │                   │   │   │   └── TopicPartitionChannel.java
│   │   │                   │   │   ├── telemetry/
│   │   │                   │   │   │   ├── PeriodicTelemetryReporter.java
│   │   │                   │   │   │   ├── SnowflakeTelemetryChannelCreation.java
│   │   │                   │   │   │   ├── SnowflakeTelemetryChannelStatus.java
│   │   │                   │   │   │   └── SnowflakeTelemetrySsv1Migration.java
│   │   │                   │   │   └── v2/
│   │   │                   │   │       ├── AppendRowWithFallbackPolicy.java
│   │   │                   │   │       ├── BackpressureException.java
│   │   │                   │   │       ├── ClientRecreationException.java
│   │   │                   │   │       ├── ClientRecreator.java
│   │   │                   │   │       ├── PipeNameProvider.java
│   │   │                   │   │       ├── SnowpipeStreamingPartitionChannel.java
│   │   │                   │   │       ├── WaitForLastOffsetCommittedPolicy.java
│   │   │                   │   │       ├── channel/
│   │   │                   │   │       │   └── PartitionOffsetTracker.java
│   │   │                   │   │       ├── client/
│   │   │                   │   │       │   ├── StreamingClientFactory.java
│   │   │                   │   │       │   ├── StreamingClientPool.java
│   │   │                   │   │       │   ├── StreamingClientPools.java
│   │   │                   │   │       │   └── StreamingClientSupplier.java
│   │   │                   │   │       ├── migration/
│   │   │                   │   │       │   ├── Ssv1MigrationMode.java
│   │   │                   │   │       │   └── Ssv1MigrationResponse.java
│   │   │                   │   │       └── service/
│   │   │                   │   │           ├── BatchOffsetFetcher.java
│   │   │                   │   │           ├── PartitionChannelManager.java
│   │   │                   │   │           └── ThreadPools.java
│   │   │                   │   ├── telemetry/
│   │   │                   │   │   ├── SnowflakeTelemetryBasicInfo.java
│   │   │                   │   │   ├── SnowflakeTelemetryService.java
│   │   │                   │   │   ├── SnowflakeTelemetryServiceFactory.java
│   │   │                   │   │   └── TelemetryConstants.java
│   │   │                   │   └── validation/
│   │   │                   │       ├── BinaryStringUtils.java
│   │   │                   │       ├── ByteArraySerializer.java
│   │   │                   │       ├── ColumnLogicalType.java
│   │   │                   │       ├── ColumnPhysicalType.java
│   │   │                   │       ├── ColumnSchema.java
│   │   │                   │       ├── DataValidationUtil.java
│   │   │                   │       ├── DuplicateDetector.java
│   │   │                   │       ├── DuplicateKeyValidatedObject.java
│   │   │                   │       ├── DuplicateKeyValidatingSerializer.java
│   │   │                   │       ├── ErrorCode.java
│   │   │                   │       ├── Power10Util.java
│   │   │                   │       ├── RowValidator.java
│   │   │                   │       ├── SFExceptionValidation.java
│   │   │                   │       ├── SqlIdentifierNormalizer.java
│   │   │                   │       ├── TimestampWrapper.java
│   │   │                   │       ├── Utils.java
│   │   │                   │       ├── ValidationResult.java
│   │   │                   │       └── ZonedDateTimeSerializer.java
│   │   │                   ├── records/
│   │   │                   │   ├── KafkaRecordConverter.java
│   │   │                   │   ├── SnowflakeMetadataConfig.java
│   │   │                   │   └── SnowflakeSinkRecord.java
│   │   │                   └── streaming/
│   │   │                       └── iceberg/
│   │   │                           └── IcebergDDLTypes.java
│   │   └── resources/
│   │       └── com/
│   │           └── snowflake/
│   │               └── kafka/
│   │                   └── connector/
│   │                       ├── ingest_error_messages.properties
│   │                       └── internal/
│   │                           └── validation/
│   │                               └── ingest_error_messages.properties
│   └── test/
│       ├── java/
│       │   └── com/
│       │       └── snowflake/
│       │           └── kafka/
│       │               └── connector/
│       │                   ├── CachingConfigValidatorTest.java
│       │                   ├── ConnectClusterBaseIT.java
│       │                   ├── ConnectorConfigValidatorLogsTest.java
│       │                   ├── ConnectorConfigValidatorTest.java
│       │                   ├── ConnectorIT.java
│       │                   ├── InjectQueryRunner.java
│       │                   ├── InjectQueryRunnerExtension.java
│       │                   ├── InjectSnowflakeDataSource.java
│       │                   ├── InjectSnowflakeDataSourceExtension.java
│       │                   ├── LegacySchemaToggleIT.java
│       │                   ├── SchemaEvolutionAvroSrIT.java
│       │                   ├── SchemaEvolutionBase.java
│       │                   ├── SchemaEvolutionJsonIT.java
│       │                   ├── SinkTaskIT.java
│       │                   ├── SinkTaskProxyIT.java
│       │                   ├── SmtIT.java
│       │                   ├── SnowflakeSinkTaskAuthorizationExceptionTrackerTest.java
│       │                   ├── SnowflakeSinkTaskForStreamingIT.java
│       │                   ├── TopicToTableParserTest.java
│       │                   ├── UtilsTest.java
│       │                   ├── builder/
│       │                   │   └── SinkRecordBuilder.java
│       │                   ├── config/
│       │                   │   ├── ClientValidationConfigTest.java
│       │                   │   ├── SinkTaskConfigTest.java
│       │                   │   ├── SinkTaskConfigTestBuilder.java
│       │                   │   └── SnowflakeSinkConnectorConfigBuilder.java
│       │                   ├── dlq/
│       │                   │   └── InMemoryKafkaRecordErrorReporter.java
│       │                   ├── internal/
│       │                   │   ├── CachingSnowflakeConnectionServiceStatsTest.java
│       │                   │   ├── CachingSnowflakeConnectionServiceTest.java
│       │                   │   ├── ConnectionServiceIT.java
│       │                   │   ├── EmbeddedProxyServer.java
│       │                   │   ├── InternalUtilsTest.java
│       │                   │   ├── JdbcPropertiesTest.java
│       │                   │   ├── KCLoggerTest.java
│       │                   │   ├── NonEncryptedKeyTestSnowflakeConnection.java
│       │                   │   ├── ResetProxyConfigExec.java
│       │                   │   ├── SchematizationTestUtils.java
│       │                   │   ├── SnowflakeConnectionServiceCacheTest.java
│       │                   │   ├── SnowflakeDataSourceFactory.java
│       │                   │   ├── SnowflakeURLTest.java
│       │                   │   ├── StandardSnowflakeConnectionServiceDdlTest.java
│       │                   │   ├── TestUtils.java
│       │                   │   ├── TombstoneRecordIngestionIT.java
│       │                   │   ├── metrics/
│       │                   │   │   ├── MetricsJmxReporterTest.java
│       │                   │   │   └── SnowflakeSinkTaskMetricsTest.java
│       │                   │   ├── schemaevolution/
│       │                   │   │   ├── ColumnInfosTest.java
│       │                   │   │   ├── SchemaEvolutionTargetItemsTest.java
│       │                   │   │   ├── SnowflakeColumnTypeMapperTest.java
│       │                   │   │   ├── SnowflakeSchemaEvolutionServiceTest.java
│       │                   │   │   ├── TableSchemaResolverTest.java
│       │                   │   │   └── ValidationResultMapperTest.java
│       │                   │   ├── streaming/
│       │                   │   │   ├── BatchOffsetFetcherTest.java
│       │                   │   │   ├── ChannelStatusCheckIT.java
│       │                   │   │   ├── CloseTopicPartitionChannelIT.java
│       │                   │   │   ├── DefaultStreamingConfigValidatorTest.java
│       │                   │   │   ├── FakeIngestClientSupplier.java
│       │                   │   │   ├── FakeSnowflakeStreamingIngestChannel.java
│       │                   │   │   ├── FakeSnowflakeStreamingIngestClient.java
│       │                   │   │   ├── InMemorySinkTaskContext.java
│       │                   │   │   ├── OpenChannelRetryPolicyTest.java
│       │                   │   │   ├── SnowflakeSinkServiceV2AvroSchematizationIT.java
│       │                   │   │   ├── SnowflakeSinkServiceV2BaseIT.java
│       │                   │   │   ├── SnowflakeSinkServiceV2IT.java
│       │                   │   │   ├── SnowflakeSinkServiceV2SchematizationIT.java
│       │                   │   │   ├── SnowflakeSinkServiceV2Test.java
│       │                   │   │   ├── SnowflakeSinkServiceV2ValidationLoggingTest.java
│       │                   │   │   ├── StreamingClientPropertiesTest.java
│       │                   │   │   ├── StreamingErrorHandlerIT.java
│       │                   │   │   ├── StreamingManualModeIT.java
│       │                   │   │   ├── StreamingSinkServiceBuilder.java
│       │                   │   │   ├── telemetry/
│       │                   │   │   │   └── PeriodicTelemetryReporterTest.java
│       │                   │   │   └── v2/
│       │                   │   │       ├── AppendRowWithFallbackPolicyTest.java
│       │                   │   │       ├── BackpressureExceptionTest.java
│       │                   │   │       ├── ClientRecreationExceptionTest.java
│       │                   │   │       ├── SnowpipeStreamingPartitionChannelTest.java
│       │                   │   │       ├── StreamingClientManagerIT.java
│       │                   │   │       ├── client/
│       │                   │   │       │   ├── StreamingClientPoolTest.java
│       │                   │   │       │   └── StreamingClientPoolsTest.java
│       │                   │   │       └── service/
│       │                   │   │           └── PartitionChannelManagerTest.java
│       │                   │   ├── telemetry/
│       │                   │   │   ├── SnowflakeTelemetryChannelStatusTest.java
│       │                   │   │   └── SnowflakeTelemetryServiceTest.java
│       │                   │   └── validation/
│       │                   │       ├── DataValidationUtilTest.java
│       │                   │       ├── RowValidatorTest.java
│       │                   │       └── SqlIdentifierNormalizerTest.java
│       │                   ├── mock/
│       │                   │   └── MockResultSetForSizeTest.java
│       │                   ├── records/
│       │                   │   ├── ConverterTest.java
│       │                   │   └── SnowflakeSinkRecordTest.java
│       │                   └── streaming/
│       │                       └── iceberg/
│       │                           ├── BaseIcebergIT.java
│       │                           ├── IcebergIngestionIT.java
│       │                           ├── IcebergIngestionIntoVariantIT.java
│       │                           ├── IcebergIngestionNoSchemaEvolutionIT.java
│       │                           ├── IcebergVersion.java
│       │                           └── sql/
│       │                               ├── ComplexJsonRecord.java
│       │                               ├── MetadataRecord.java
│       │                               ├── PrimitiveJsonRecord.java
│       │                               └── RecordWithMetadata.java
│       └── resources/
│           ├── com/
│           │   └── snowflake/
│           │       └── kafka/
│           │           └── connector/
│           │               ├── complexJsonPayload.json
│           │               ├── complexJsonWithSchema.json
│           │               └── records/
│           │                   ├── test.avro
│           │                   ├── test_key.avro
│           │                   └── test_multi.avro
│           ├── log4j.properties
│           └── squid.conf
├── test/
│   ├── .gitignore
│   ├── E2E_TEST_PLAN.md
│   ├── README.md
│   ├── __init__.py
│   ├── apache_properties/
│   │   ├── connect-distributed.properties
│   │   ├── file-secrets.txt
│   │   ├── kraft-server.properties
│   │   ├── schema-registry.properties
│   │   ├── server.properties
│   │   └── zookeeper.properties
│   ├── build_image.sh
│   ├── build_runtime_jar.sh
│   ├── conftest.py
│   ├── connect-log4j.properties
│   ├── docker/
│   │   ├── .gitignore
│   │   ├── Dockerfile.apache-kafka
│   │   ├── Dockerfile.builder
│   │   ├── Dockerfile.test-runner
│   │   ├── docker-compose.amd64.yml
│   │   ├── docker-compose.apache.yml
│   │   ├── docker-compose.base.yml
│   │   ├── docker-compose.confluent-kraft.yml
│   │   ├── docker-compose.confluent.yml
│   │   ├── docker-compose.profile-apache.yml
│   │   ├── docker-compose.profile-confluent.yml
│   │   └── scripts/
│   │       └── start-apache-kafka.sh
│   ├── download_v3_jar.sh
│   ├── lib/
│   │   ├── __init__.py
│   │   ├── config.py
│   │   ├── config_migration.py
│   │   ├── crypto.py
│   │   ├── driver.py
│   │   ├── fixtures/
│   │   │   ├── __init__.py
│   │   │   ├── connector.py
│   │   │   ├── function.py
│   │   │   ├── session.py
│   │   │   └── table.py
│   │   ├── matchers.py
│   │   └── utils.py
│   ├── pyproject.toml
│   ├── rest_request_template/
│   │   ├── datagen_connector.json
│   │   ├── datatype_ingestion.json
│   │   ├── iceberg_avro_aws.json
│   │   ├── iceberg_json_aws.json
│   │   ├── iceberg_schema_evolution_avro_aws.json
│   │   ├── iceberg_schema_evolution_json_aws.json
│   │   ├── nullable_values_after_smt.json
│   │   ├── snowpipe_streaming_legacy_avro_sr.json
│   │   ├── snowpipe_streaming_legacy_byte_array_converter.json
│   │   ├── snowpipe_streaming_legacy_string_converter.json
│   │   ├── snowpipe_streaming_legacy_string_json.json
│   │   ├── snowpipe_streaming_schema_evolution.json
│   │   ├── snowpipe_streaming_schema_mapping_dlq.json
│   │   ├── snowpipe_streaming_string_json_dlq.json
│   │   ├── test_kc_delete_create.json
│   │   ├── test_kc_delete_create_chaos.json
│   │   ├── test_kc_delete_resume.json
│   │   ├── test_kc_delete_resume_chaos.json
│   │   ├── test_kc_pause_create.json
│   │   ├── test_kc_pause_create_chaos.json
│   │   ├── test_kc_pause_resume.json
│   │   ├── test_kc_pause_resume_chaos.json
│   │   ├── test_kc_recreate.json
│   │   ├── test_kc_recreate_chaos.json
│   │   ├── test_kc_resilience.json
│   │   ├── test_kc_restart.json
│   │   ├── test_snowpipe_streaming_string_json_ignore_tombstone.json
│   │   ├── travis_correct_auto_table_creation.json
│   │   ├── travis_correct_auto_table_creation_topic2table.json
│   │   ├── travis_correct_avro_avro.json
│   │   ├── travis_correct_avrosr_avrosr.json
│   │   ├── travis_correct_confluent_protobuf_protobuf.json
│   │   ├── travis_correct_json_json.json
│   │   ├── travis_correct_multiple_topic_to_one_table_snowpipe_streaming.json
│   │   ├── travis_correct_native_complex_smt.json
│   │   ├── travis_correct_native_string_json_without_schema.json
│   │   ├── travis_correct_native_string_protobuf.json
│   │   ├── travis_correct_schema_mapping.json
│   │   ├── travis_correct_schema_not_supported_converter.json
│   │   ├── travis_correct_snowpipe_streaming_string_avro_sr.json
│   │   ├── travis_correct_snowpipe_streaming_string_json.json
│   │   ├── travis_correct_string_avro.json
│   │   ├── travis_correct_string_avrosr.json
│   │   ├── travis_correct_string_json.json
│   │   └── travis_correct_string_proxy.json
│   ├── run_tests.sh
│   ├── scripts/
│   │   ├── analyze_metrics.sh
│   │   ├── profile_connect.sh
│   │   └── scrape_metrics.sh
│   ├── test_data/
│   │   ├── .gitignore
│   │   ├── protobuf/
│   │   │   └── pom.xml
│   │   ├── sensor.proto
│   │   └── twitter.avro
│   └── tests/
│       ├── __init__.py
│       ├── compatibility/
│       │   ├── __init__.py
│       │   ├── conftest.py
│       │   ├── test_compatibility_case_sensitivity.py
│       │   ├── test_migration.py
│       │   ├── test_schematization_disabled.py
│       │   ├── test_type_compatibility.py
│       │   ├── test_type_compatibility_avro.py
│       │   └── test_unsupported_types.py
│       ├── high_performance/
│       │   └── test_case_sensitivity.py
│       ├── iceberg/
│       │   ├── __init__.py
│       │   ├── test_iceberg_avro.py
│       │   ├── test_iceberg_json.py
│       │   ├── test_iceberg_se_avro.py
│       │   └── test_iceberg_se_json.py
│       ├── pressure/
│       │   ├── test_perf_backlog_drain.py
│       │   ├── test_pressure_init.py
│       │   └── test_pressure_restart.py
│       ├── schema_evolution/
│       │   ├── __init__.py
│       │   ├── test_se_auto_table_creation_avro_sr.py
│       │   ├── test_se_auto_table_creation_json.py
│       │   ├── test_se_avro_sr.py
│       │   ├── test_se_json_ignore_tombstone.py
│       │   ├── test_se_multi_topic_replace_table.py
│       │   ├── test_se_nonnullable_json.py
│       │   ├── test_se_nullable_values_after_smt.py
│       │   ├── test_se_random_row_count.py
│       │   └── test_se_replace_table.py
│       ├── test_auto_table_creation.py
│       ├── test_auto_table_creation_topic2table.py
│       ├── test_avrosr_avrosr.py
│       ├── test_channel_invalidation.py
│       ├── test_channel_invalidation_recovery.py
│       ├── test_column_identifier_normalization.py
│       ├── test_confluent_protobuf_protobuf.py
│       ├── test_default_pipe_features.py
│       ├── test_error_table.py
│       ├── test_json_json.py
│       ├── test_kc_delete_create.py
│       ├── test_kc_delete_create_chaos.py
│       ├── test_kc_delete_resume.py
│       ├── test_kc_delete_resume_chaos.py
│       ├── test_kc_pause_create.py
│       ├── test_kc_pause_create_chaos.py
│       ├── test_kc_pause_resume.py
│       ├── test_kc_pause_resume_chaos.py
│       ├── test_kc_recreate.py
│       ├── test_kc_recreate_chaos.py
│       ├── test_kc_restart.py
│       ├── test_multiple_topic_to_one_table_snowpipe_streaming.py
│       ├── test_native_complex_smt.py
│       ├── test_native_string_json_without_schema.py
│       ├── test_native_string_protobuf.py
│       ├── test_nullable_values_after_smt.py
│       ├── test_schema_evolution_streaming.py
│       ├── test_schema_mapping.py
│       ├── test_schema_not_supported_converter.py
│       ├── test_snowpipe_streaming_legacy_avro_sr.py
│       ├── test_snowpipe_streaming_legacy_byte_array_converter.py
│       ├── test_snowpipe_streaming_legacy_string_converter.py
│       ├── test_snowpipe_streaming_legacy_string_json.py
│       ├── test_snowpipe_streaming_schema_mapping_dlq.py
│       ├── test_snowpipe_streaming_string_avro_sr.py
│       ├── test_snowpipe_streaming_string_json.py
│       ├── test_snowpipe_streaming_string_json_dlq.py
│       ├── test_snowpipe_streaming_string_json_ignore_tombstone.py
│       ├── test_string_avrosr.py
│       └── test_string_json.py
└── upload_jar.sh

================================================
FILE CONTENTS
================================================

================================================
FILE: .githooks/pre-commit
================================================
#!/usr/bin/env bash
#
# Git pre-commit hook that enforces the same formatting checks as CI:
#   1. Java  – google-java-format via ./format.sh  (requires java)
#   2. Python – ruff check + ruff format --check    (requires ruff)
# Each tool is skipped when it is not available.
#
# Install with:
#   git config core.hooksPath .githooks
#
# If you need the corporate secret-scanner hook as well, this script
# delegates to .git/hooks/pre-commit after running its own checks.

set -euo pipefail

REPO_ROOT="$(git rev-parse --show-toplevel)"

# --- Java formatting (google-java-format) ---
if git diff --cached --name-only | grep -q '^src/'; then
  "$REPO_ROOT/format.sh"
fi

# --- Python linting & formatting (ruff) ---
PYTHON_PATHS="test/tests test/lib test/conftest.py"

if git diff --cached --name-only | grep -q '^test/'; then
  if command -v ruff &>/dev/null; then
    ruff check --fix --exit-non-zero-on-fix $PYTHON_PATHS
    ruff format --exit-non-zero-on-format $PYTHON_PATHS
  fi
fi

# Chain to the default hooks directory so the secret-scanner (or any other
# hook installed into .git/hooks/) still runs.
if [ -x .git/hooks/pre-commit ]; then
  exec .git/hooks/pre-commit "$@"
fi


================================================
FILE: .github/CODEOWNERS
================================================
* @snowflakedb/streaming-ingest


================================================
FILE: .github/actions/build-connector/action.yml
================================================
name: Build Connector
description: Build the Snowflake Kafka Connector JAR/ZIP for a given platform

inputs:
  platform:
    description: "Target platform: 'apache' or 'confluent'"
    required: true

runs:
  using: composite
  steps:
    - name: Install Java 11
      uses: actions/setup-java@v3
      with:
        distribution: 'zulu'
        java-version: 11

    - name: Cache local Maven repository
      uses: actions/cache@v4
      with:
        path: ~/.m2/repository
        key: >
          ${{ runner.os }}-maven-${{ hashFiles(
          case(inputs.platform == 'confluent', '**/pom_confluent.xml', '**/pom.xml')
          ) }}
        restore-keys: |
          ${{ runner.os }}-maven-

    - name: Build connector
      shell: bash
      working-directory: test
      run: ./build_runtime_jar.sh ../../snowflake-kafka-connector package "$PLATFORM"
      env:
        SNOWFLAKE_CREDENTIAL_FILE: "../profile.json"
        PLATFORM: ${{ inputs.platform }}


================================================
FILE: .github/actions/run-e2e-tests/action.yml
================================================
name: Run E2E Tests
description: Run Docker-based end-to-end tests for the Snowflake Kafka Connector

inputs:
  platform:
    description: "Target platform: 'apache' or 'confluent'"
    required: true
  platform-version:
    description: "Platform version (e.g. '2.8.2', '7.8.2')"
    required: true
  snowflake-cloud:
    description: "Snowflake cloud provider: 'AWS', 'GCP', or 'AZURE'"
    required: true
  java-version:
    description: "Java version for Apache Kafka (e.g. '11', '17'). Ignored for Confluent."
    required: false
    default: '11'
  marker-filter:
    description: "pytest -m expression controlling which tests run (e.g. 'compatibility' or 'not compatibility and not schema_evolution and not correctness and not pressure')"
    required: false
    default: 'not pressure'
  test-group:
    description: "Short label for this test group used in artifact names (e.g. 'core', 'compatibility', 'schema_and_correctness')"
    required: false
    default: 'default'
  pressure:
    description: "Run pressure/stress tests instead of regular tests"
    required: false
    default: 'false'

runs:
  using: composite
  steps:
    - name: Log in to GHCR (for prebuilt Apache Kafka image)
      if: inputs.platform == 'apache'
      uses: docker/login-action@v3
      with:
        registry: ghcr.io
        username: ${{ github.actor }}
        password: ${{ github.token }}

    - name: Run end-to-end tests
      shell: bash
      working-directory: test
      env:
        SNOWFLAKE_CREDENTIAL_FILE: "${{ github.workspace }}/profile.json"
        PLATFORM: ${{ inputs.platform }}
        PLATFORM_VERSION: ${{ inputs['platform-version'] }}
        SNOWFLAKE_CLOUD: ${{ inputs['snowflake-cloud'] }}
        JAVA_VERSION: ${{ inputs['java-version'] }}
        LOGS_DIR: "${{ github.workspace }}/test-logs"
        MARKER_FILTER: ${{ inputs.pressure == 'true' && 'pressure' || inputs['marker-filter'] }}
      run: |
        ./run_tests.sh \
          --platform="$PLATFORM" \
          --platform-version="$PLATFORM_VERSION" \
          --cloud="$SNOWFLAKE_CLOUD" \
          --java-version="$JAVA_VERSION" \
          --logs-dir="$LOGS_DIR" \
          -- -m "$MARKER_FILTER"

    - name: Upload service logs on failure
      if: failure()
      uses: actions/upload-artifact@v4
      with:
        name: logs-${{ inputs.platform }}-${{ inputs['platform-version'] }}-${{ inputs['snowflake-cloud'] }}-${{ inputs['test-group'] }}
        path: ${{ github.workspace }}/test-logs/
        retention-days: 14
        if-no-files-found: ignore


================================================
FILE: .github/dependabot.yaml
================================================
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.

version: 2
updates:
  - package-ecosystem: "maven" # See documentation for possible values
    directory: "/" # Location of package manifests
    schedule:
      interval: "weekly"
    ignore:
      - dependency-name: "org.apache.kafka:*"
        update-types: ["version-update:semver-major"]


================================================
FILE: .github/scripts/decrypt_secret.sh
================================================
#!/bin/sh

# Decrypt the file
# mkdir $HOME/secrets
# --batch to prevent interactive command --yes to assume "yes" for questions
snowflake_deployment=$1

if [ $snowflake_deployment = 'AWS' ]; then
  gpg --quiet --batch --yes --decrypt --passphrase="$SNOWFLAKE_TEST_PROFILE_SECRET" \
  --output profile.json .github/scripts/profile.json.gpg
elif [ $snowflake_deployment = 'GCS' ]; then
  gpg --quiet --batch --yes --decrypt --passphrase="$SNOWFLAKE_TEST_PROFILE_SECRET" \
  --output profile.json .github/scripts/profile_gcs.json.gpg
else
  gpg --quiet --batch --yes --decrypt --passphrase="$SNOWFLAKE_TEST_PROFILE_SECRET" \
  --output profile.json .github/scripts/profile_azure.json.gpg
fi

================================================
FILE: .github/scripts/parse_java_test_reports.py
================================================
#!/usr/bin/env python3
"""
Parse Maven Surefire and Failsafe XML reports, append a Markdown summary to
GITHUB_STEP_SUMMARY when set, and emit ::error workflow commands to stderr
for annotations. Exits silently when GITHUB_STEP_SUMMARY is unset or no failures.
"""

import os
import re
import sys
import xml.etree.ElementTree as ET
from pathlib import Path


def _classname_to_path(classname: str, connector_root: Path):
    """Return repo-relative path to Java file (src/test/... or src/main/...)."""
    rel = classname.replace(".", "/") + ".java"
    for prefix in ("src/test/java/", "src/main/java/"):
        candidate = connector_root / prefix / rel
        if candidate.exists():
            return prefix + rel
    return None


def _line_in_test_class(stack_trace: str, classname: str):
    """First (File.java:line) in stack trace for the test class (not JUnit/framework)."""
    test_class_file = classname.split(".")[-1] + ".java"
    pattern = re.escape(test_class_file) + r":(\d+)\)"
    match = re.search(pattern, stack_trace)
    return int(match.group(1)) if match else None


def parse_suite(path: Path) -> list[tuple[str, str, str, str, str]]:
    """Parse a TEST-*.xml file; return list of (classname, testname, exc_type, message, stack_trace)."""
    failures = []
    try:
        root = ET.parse(path).getroot()
    except (ET.ParseError, OSError):
        return failures
    for testcase in root.findall(".//testcase"):
        for kind in ("failure", "error"):
            node = testcase.find(kind)
            if node is not None:
                classname = testcase.get("classname", "")
                name = testcase.get("name", "")
                exc_type = (node.get("type") or "").strip()
                message = (node.get("message") or "").strip()
                stack_trace = (node.text or "").strip()
                failures.append((classname, name, exc_type, message, stack_trace))
    return failures


def _first_line_for_annotation(exc_type: str, message: str, stack_trace: str) -> str:
    """First line of failure for ::error message (exception type + message or first stack line)."""
    if message:
        first = f"{exc_type}: {message}" if exc_type else message
    elif stack_trace:
        first = stack_trace.split("\n")[0].strip()
    else:
        first = exc_type or "Failure"
    return first[:500]


def _emit_error_annotation(
    classname: str,
    name: str,
    exc_type: str,
    message: str,
    stack_trace: str,
    connector_root: Path,
    stderr: object,
) -> None:
    """Print one ::error workflow command to stderr for GitHub annotations."""
    title = f"{classname}#{name}"
    first_line = _first_line_for_annotation(exc_type, message, stack_trace)
    first_line_escaped = (
        first_line.replace("%", "%25").replace("\r", "%0D").replace("\n", "%0A")
    )
    file_path = _classname_to_path(classname, connector_root)
    file_line = _line_in_test_class(stack_trace, classname) if stack_trace else None
    parts = [f"title={title}"]
    if file_path:
        parts.append(f"file={file_path}")
    if file_line is not None:
        parts.append(f"line={file_line}")
    opts = ",".join(parts)
    print(f"::error {opts}::{first_line_escaped}", file=stderr)


def main() -> None:
    summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
    if not summary_path:
        sys.exit(0)
    connector_root = (
        Path(sys.argv[1])
        if len(sys.argv) >= 2
        else Path(os.environ.get("GITHUB_WORKSPACE", "."))
    )
    surefire_dir = connector_root / "target" / "surefire-reports"
    failsafe_dir = connector_root / "target" / "failsafe-reports"
    all_failures = []
    for report_dir in (surefire_dir, failsafe_dir):
        if not report_dir.is_dir():
            continue
        for path in sorted(report_dir.glob("TEST-*.xml")):
            all_failures.extend(parse_suite(path))
    if not all_failures:
        sys.exit(0)
    for classname, name, exc_type, message, stack_trace in all_failures:
        _emit_error_annotation(
            classname, name, exc_type, message, stack_trace, connector_root, sys.stderr
        )
    # Group by class for headings: class -> [(name, exc_type, message, stack_trace), ...]
    by_class = {}
    for classname, name, exc_type, message, stack_trace in all_failures:
        by_class.setdefault(classname, []).append(
            (name, exc_type, message, stack_trace)
        )
    lines = ["", "## Java test failures", ""]
    for classname in sorted(by_class.keys()):
        short_name = classname.split(".")[-1] if classname else classname
        lines.append(f"## {short_name}")
        lines.append("")
        lines.append(f"**Class:** `{classname}`")
        lines.append("")
        for name, exc_type, message, stack_trace in by_class[classname]:
            lines.append(f"### {name}")
            lines.append("")
            if exc_type:
                lines.append(f"**Exception type:** `{exc_type}`")
                lines.append("")
            if message:
                lines.append("**Message:**")
                lines.append("")
                lines.append(message)
                lines.append("")
            if stack_trace:
                lines.append("**Stack trace:**")
                lines.append("")
                lines.append("```")
                lines.append(stack_trace)
                lines.append("```")
                lines.append("")
        lines.append("")
    with open(summary_path, "a", encoding="utf-8") as summary_file:
        summary_file.write("\n".join(lines))


if __name__ == "__main__":
    main()


================================================
FILE: .github/scripts/squid.conf
================================================
acl SSL_ports port 443
acl Safe_ports port 80          # http
acl Safe_ports port 21          # ftp
acl Safe_ports port 443         # https
acl Safe_ports port 70          # gopher
acl Safe_ports port 210         # wais
acl Safe_ports port 1025-65535  # unregistered ports
acl Safe_ports port 280         # http-mgmt
acl Safe_ports port 488         # gss-http
acl Safe_ports port 591         # filemaker
acl Safe_ports port 777         # multiling http
acl CONNECT method CONNECT

http_access deny !Safe_ports
http_access deny CONNECT !SSL_ports

http_port 3128
coredump_dir /var/spool/squid

refresh_pattern ^ftp:           1440    20%     10080
refresh_pattern ^gopher:        1440    0%      1440
refresh_pattern -i (/cgi-bin/|\?) 0     0%      0
refresh_pattern (Release|Packages(.gz)*)$      0       20%     2880
refresh_pattern .               0       20%     4320

auth_param basic program /usr/lib/squid/basic_ncsa_auth /etc/squid/passwords
auth_param basic realm proxy
acl authenticated proxy_auth REQUIRED
http_access allow authenticated

http_access allow localhost

ident_lookup_access deny all
http_access deny all

================================================
FILE: .github/workflows/IntegrationTest.yml
================================================
name: Kafka Connector Java Integration Tests

on:
  push:
    branches: [ master ]
  pull_request:
    branches: '**'

jobs:
  build_and_test:
    runs-on: ubuntu-24.04
    strategy:
      fail-fast: false # https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstrategyfail-fast
      matrix:
        snowflake_cloud: ['AWS'] # for now only AWS has support for ssv2
#        snowflake_cloud: [ 'AWS', 'AZURE', 'GCP' ]
    steps:
    - name: Checkout Code
      uses: actions/checkout@v4
    - name: "Install Java 11"
      uses: actions/setup-java@v1
      with:
        java-version: 11
    - name: "Cache local Maven repository"
      uses: actions/cache@v4
      with:
        path: ~/.m2/repository
        key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
        restore-keys: |
          ${{ runner.os }}-maven-
    - name: Install Python
      uses: actions/setup-python@v4
      with:
        python-version: '3.9'
        architecture: 'x64'
    - name: Decrypt profile.json in Snowflake Cloud ${{ matrix.snowflake_cloud }}
      run: ./.github/scripts/decrypt_secret.sh ${{ matrix.snowflake_cloud }}
      env:
        SNOWFLAKE_TEST_PROFILE_SECRET: ${{ secrets.SNOWFLAKE_TEST_PROFILE_SECRET }}
    - name: Install Dependency
      run: |
        pip3 install --upgrade setuptools
        pip3 install requests certifi "confluent-kafka[avro,json,protobuf]==1.9.2"
        pip3 install avro kafka-python
        pip3 install protobuf
        pip3 install --upgrade snowflake-connector-python==2.7.4
        curl https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 | bash
        sudo apt update
        sudo apt-get -y install jq vim
        sudo apt-get -y install protobuf-compiler

    - name: Install Squid as Proxy Server and Apache Utils for Password Authentication
      run: |
        sudo apt-get -y install squid
        sudo apt-get install apache2-utils

    - name: Change squid config and run Proxy Server
      run: |
        sudo touch /etc/squid/passwords
        sudo chmod 777 /etc/squid/passwords
        sudo htpasswd -db -c /etc/squid/passwords admin test
        sudo mv .github/scripts/squid.conf /etc/squid/squid.conf
        sudo service squid start

    - name: Unit and Integration Test - ${{ matrix.snowflake_cloud }}
      id: java_tests
      env:
        SNOWFLAKE_CREDENTIAL_FILE: "${{ github.workspace }}/profile.json"
        SHELL: "/bin/bash"
      run: |
        set -Eeuo pipefail
        # line-buffer stdout/stderr for all child procs
        export PYTHONUNBUFFERED=1
        export RUST_BACKTRACE=1
        cd test
        trap 'echo "::group::Last 1000 lines of logs"; tail -n 1000 build.log || true; echo "::endgroup::"' ERR
        stdbuf -oL -eL ./build_runtime_jar.sh ../../snowflake-kafka-connector verify apache ${{ matrix.snowflake_cloud }} 2>&1 | tee build.log

    - name: Report Java test failures to job summary
      if: failure() && steps.java_tests.outcome == 'failure'
      run: ./.github/scripts/parse_java_test_reports.py "${{ github.workspace }}"


================================================
FILE: .github/workflows/build-apache-kafka-images.yml
================================================
name: Build Apache Kafka Docker images

on:
  push:
    branches: [ master ]
    paths:
      - 'test/docker/Dockerfile.apache-kafka'
      - 'test/apache_properties/**'
      - 'test/connect-log4j.properties'
      - '.github/workflows/build-apache-kafka-images.yml'
  workflow_dispatch:

jobs:
  build:
    runs-on: ubuntu-22.04
    permissions:
      packages: write
    strategy:
      fail-fast: false
      matrix:
        include:
          - kafka_version: '2.8.2'
            scala_version: '2.12'
            java_version: '11'
          - kafka_version: '3.9.2'
            scala_version: '2.12'
            java_version: '11'
          - kafka_version: '4.1.1'
            scala_version: '2.13'
            java_version: '17'
    steps:
      - uses: actions/checkout@v4

      - uses: docker/setup-buildx-action@v3

      - uses: docker/login-action@v3
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: Build and push apache-kafka:${{ matrix.kafka_version }}-java${{ matrix.java_version }}
        uses: docker/build-push-action@v6
        with:
          context: test
          file: test/docker/Dockerfile.apache-kafka
          build-args: |
            KAFKA_VERSION=${{ matrix.kafka_version }}
            SCALA_VERSION=${{ matrix.scala_version }}
            JAVA_VERSION=${{ matrix.java_version }}
          push: true
          tags: ghcr.io/snowflakedb/snowflake-kafka-connector/apache-kafka:${{ matrix.kafka_version }}-java${{ matrix.java_version }}
          platforms: linux/amd64,linux/arm64
          cache-from: type=gha,scope=apache-kafka-${{ matrix.kafka_version }}-java${{ matrix.java_version }}
          cache-to: type=gha,mode=max,scope=apache-kafka-${{ matrix.kafka_version }}-java${{ matrix.java_version }}


================================================
FILE: .github/workflows/end-to-end-legacy.yml
================================================
name: Kafka Connector end-to-end tests (legacy platforms)

# Runs the core e2e suite on older Kafka platform versions.
# Intentionally limited to master push and workflow_dispatch — these jobs are
# skipped on pull_requests to reduce concurrent load on the shared Snowflake
# test account. Full PR coverage is provided by end-to-end.yaml on the new
# platforms (apache 4.1.1 / confluent 8.2.0).
on:
  push:
    branches: [ master ]
  workflow_dispatch:

jobs:
  build_and_test:
    runs-on: ubuntu-22.04
    name: >
      ${{ matrix.platform }} ${{ matrix.platform_version }},
      ${{ matrix.java_test_version && format('Java {0},', matrix.java_test_version) || '' }}
      ${{ matrix.snowflake_cloud }}
      [${{ matrix.test_group }}]
    permissions:
      packages: read
    strategy:
      fail-fast: false
      matrix:
        include:
          # Compatibility, schema_evolution and correctness groups are intentionally
          # skipped here — full coverage runs on the new platforms in end-to-end.yaml.
          - platform: apache
            platform_version: '2.8.2'
            snowflake_cloud: 'AWS'
            java_test_version: '11'
            test_group: core
            marker_filter: 'not compatibility and not schema_evolution and not correctness and not pressure'
          - platform: apache
            platform_version: '3.9.2'
            snowflake_cloud: 'GCP'
            java_test_version: '11'
            test_group: core
            marker_filter: 'not compatibility and not schema_evolution and not correctness and not pressure'
          - platform: confluent
            platform_version: '6.2.15'
            snowflake_cloud: 'AZURE'
            test_group: core
            marker_filter: 'not compatibility and not schema_evolution and not correctness and not pressure'
          - platform: confluent
            platform_version: '7.9.3'
            snowflake_cloud: 'AWS'
            test_group: core
            marker_filter: 'not compatibility and not schema_evolution and not correctness and not pressure'

    steps:
    - uses: actions/checkout@v4

    - name: Decrypt profile.json in Snowflake Cloud ${{ matrix.snowflake_cloud }}
      run: ./.github/scripts/decrypt_secret.sh ${{ matrix.snowflake_cloud }}
      env:
        SNOWFLAKE_TEST_PROFILE_SECRET: ${{ secrets.SNOWFLAKE_TEST_PROFILE_SECRET }}

    - uses: ./.github/actions/build-connector
      with:
        platform: ${{ matrix.platform }}

    - uses: ./.github/actions/run-e2e-tests
      with:
        platform: ${{ matrix.platform }}
        platform-version: ${{ matrix.platform_version }}
        snowflake-cloud: ${{ matrix.snowflake_cloud }}
        java-version: ${{ matrix.java_test_version || '11' }}
        marker-filter: ${{ matrix.marker_filter }}
        test-group: ${{ matrix.test_group }}


================================================
FILE: .github/workflows/end-to-end-stress.yml
================================================
name: Kafka Connector stress test

on:
  push:
    branches: [ master ]
  workflow_dispatch:

jobs:
  build_and_test:
    runs-on: ubuntu-22.04
    name: ${{ matrix.platform }} ${{ matrix.platform_version }}, ${{ matrix.snowflake_cloud }}
    strategy:
      fail-fast: false
      matrix:
        include:
          - platform: confluent
            platform_version: '7.6.0'
            snowflake_cloud: 'AWS'
    steps:
    - uses: actions/checkout@v4

    - name: Decrypt profile.json in Snowflake Cloud ${{ matrix.snowflake_cloud }}
      run: ./.github/scripts/decrypt_secret.sh ${{ matrix.snowflake_cloud }}
      env:
        SNOWFLAKE_TEST_PROFILE_SECRET: ${{ secrets.SNOWFLAKE_TEST_PROFILE_SECRET }}

    - uses: ./.github/actions/build-connector
      with:
        platform: ${{ matrix.platform }}

    - uses: ./.github/actions/run-e2e-tests
      env:
        # Less frequent preCommit than default E2E
        CONNECT_OFFSET_FLUSH_INTERVAL_MS: '10000'
      with:
        platform: ${{ matrix.platform }}
        platform-version: ${{ matrix.platform_version }}
        snowflake-cloud: ${{ matrix.snowflake_cloud }}
        pressure: 'true'


================================================
FILE: .github/workflows/end-to-end.yaml
================================================
name: Kafka Connector end-to-end tests

on:
  push:
    branches: [ master ]
  pull_request:
    branches: ['**']
  workflow_dispatch:

jobs:
  build_and_test:
    runs-on: ubuntu-22.04
    name: >
      ${{ matrix.platform }} ${{ matrix.platform_version }},
      ${{ matrix.java_test_version && format('Java {0},', matrix.java_test_version) || '' }}
      ${{ matrix.snowflake_cloud }}
      [${{ matrix.test_group }}]
    permissions:
      packages: read
    strategy:
      fail-fast: false
      matrix:
        include:
          # ── New platforms: full suite split into 3 parallel groups ───────
          - platform: apache
            platform_version: '4.1.1'
            snowflake_cloud: 'AWS'
            java_test_version: '17'
            test_group: compatibility
            marker_filter: 'compatibility and not schema_evolution'
          - platform: apache
            platform_version: '4.1.1'
            snowflake_cloud: 'AWS'
            java_test_version: '17'
            test_group: schema_and_correctness
            marker_filter: 'schema_evolution or correctness'
          - platform: apache
            platform_version: '4.1.1'
            snowflake_cloud: 'AWS'
            java_test_version: '17'
            test_group: core
            marker_filter: 'not compatibility and not schema_evolution and not correctness and not pressure'

          - platform: confluent
            platform_version: '8.2.0'
            snowflake_cloud: 'AWS'
            test_group: compatibility
            marker_filter: 'compatibility and not schema_evolution'
          - platform: confluent
            platform_version: '8.2.0'
            snowflake_cloud: 'AWS'
            test_group: schema_and_correctness
            marker_filter: 'schema_evolution or correctness'
          - platform: confluent
            platform_version: '8.2.0'
            snowflake_cloud: 'AWS'
            test_group: core
            marker_filter: 'not compatibility and not schema_evolution and not correctness and not pressure'

    steps:
    - uses: actions/checkout@v4

    - name: Decrypt profile.json in Snowflake Cloud ${{ matrix.snowflake_cloud }}
      run: ./.github/scripts/decrypt_secret.sh ${{ matrix.snowflake_cloud }}
      env:
        SNOWFLAKE_TEST_PROFILE_SECRET: ${{ secrets.SNOWFLAKE_TEST_PROFILE_SECRET }}

    - uses: ./.github/actions/build-connector
      with:
        platform: ${{ matrix.platform }}

    - uses: ./.github/actions/run-e2e-tests
      with:
        platform: ${{ matrix.platform }}
        platform-version: ${{ matrix.platform_version }}
        snowflake-cloud: ${{ matrix.snowflake_cloud }}
        java-version: ${{ matrix.java_test_version || '11' }}
        marker-filter: ${{ matrix.marker_filter }}
        test-group: ${{ matrix.test_group }}


================================================
FILE: .github/workflows/formatting.yml
================================================
name: formatting

on:
  pull_request:
  push:
    branches:
      - master

jobs:
  java:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: axel-op/googlejavaformat-action@v3
        with:
          skip-commit: true
          version: v1.24.0
          args: "-n --set-exit-if-changed"

  python:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: "3.x"
      - run: pip install ruff
      - run: ruff check test/tests test/lib test/conftest.py
      - run: ruff format --check test/tests test/lib test/conftest.py

  formatting-done:
    needs: [java, python]
    runs-on: ubuntu-latest
    steps:
      - run: echo "All formatting checks passed"


================================================
FILE: .github/workflows/semgrep.yml
================================================
---
name: Run semgrep checks

on:
  pull_request:
    branches: [master]

permissions:
  contents: read

jobs:
  run-semgrep-reusable-workflow:
    uses: snowflakedb/reusable-workflows/.github/workflows/semgrep-v2.yml@main
    secrets:
      token: ${{ secrets.SEMGREP_APP_TOKEN }}


================================================
FILE: .gitignore
================================================
.DS_Store
.envrc

# IDEs
.idea/
.settings/
.claude/
.cursor/
.vscode/
.project/

# Python
__pycache__
venv

# Java
.cache/
.classpath
*.iml
target
docker-setup

*.log
*.log.*

profile*.json

licenses/
ai-docs/
docs/

CLAUDE.md
.mcp.json
profile.txt
profile_qa3.json
profiling-results*/


================================================
FILE: .java-version
================================================
11


================================================
FILE: LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright 2019 Snowflake Computing, Inc.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: README.md
================================================
# `snowflake-kafka-connector`

[![License](http://img.shields.io/:license-Apache%202-brightgreen.svg)](http://www.apache.org/licenses/LICENSE-2.0.txt)

The Snowflake Kafka Connector is a plugin for Apache Kafka Connect. It ingests data from a Kafka Topic into a Snowflake Table. 

[Official documentation](https://docs.snowflake.com/en/user-guide/kafka-connector) for the Snowflake Kafka Connector

## Contributing

### Guidelines

The following requirements must be met before you can merge your PR:
- Tests: all test suites must pass, see the [test README](https://github.com/snowflakedb/snowflake-kafka-connector/blob/master/test/README.md)
- Formatting: Java sources must pass [Google Java Format](https://github.com/google/google-java-format) (`./format.sh`) and Python test code must pass `ruff check` + `ruff format --check`. The [pre-commit hook](#pre-commit-hook) runs both automatically.
- CLA: all contributers must sign the Snowflake CLA. This is a one time signature, please provide your email so we can work with you to get this signed after you open a PR.

Thank you for contributing! We will review and approve PRs as soon as we can.

### Pre-commit hook

A pre-commit hook is provided in `.githooks/` that enforces the same formatting checks as CI.
Python formatting is skipped when ruff is not available. To enable the hook:

```bash
git config core.hooksPath .githooks
```

### Unit tests

```bash
mvn package -Dgpg.skip=true
```

Runs all test files in `src/test` that do not end with `IT`. Requires `SNOWFLAKE_CREDENTIAL_FILE` to be set.

### Integration tests

```bash
mvn verify -Dgpg.skip=true
```

Runs all test files in `src/test`, including unit tests.

### End-to-end tests

Refer to [test/README.md](test/README.md).

## Third party licenses
Custom license handling process is run during build to meet legal standards.
- License files are copied directly from JAR if present in one of the following locations: META-INF/LICENSE.txt, META-INF/LICENSE, META-INF/LICENSE.md
- If no license file is found then license must be manually added to [`process_licenses.py`](https://github.com/snowflakedb/snowflake-kafka-connector/blob/master/scripts/process_licenses.py) script in order to pass build

## Test and Code Coverage Statuses

[![Kafka Connector integration test](https://github.com/snowflakedb/snowflake-kafka-connector/actions/workflows/IntegrationTest.yml/badge.svg?branch=master)](https://github.com/snowflakedb/snowflake-kafka-connector/actions/workflows/IntegrationTest.yml)

[![Kafka Connector end-to-end test](https://github.com/snowflakedb/snowflake-kafka-connector/actions/workflows/end-to-end.yaml/badge.svg?branch=master)](https://github.com/snowflakedb/snowflake-kafka-connector/actions/workflows/end-to-end.yaml)

[![Kafka Connector end-to-end test (legacy platforms)](https://github.com/snowflakedb/snowflake-kafka-connector/actions/workflows/end-to-end-legacy.yml/badge.svg?branch=master)](https://github.com/snowflakedb/snowflake-kafka-connector/actions/workflows/end-to-end-legacy.yml)

[![Kafka Connector stress test](https://github.com/snowflakedb/snowflake-kafka-connector/actions/workflows/end-to-end-stress.yml/badge.svg?branch=master)](https://github.com/snowflakedb/snowflake-kafka-connector/actions/workflows/end-to-end-stress.yml)

<!-- [![codecov](https://codecov.io/gh/snowflakedb/snowflake-kafka-connector/branch/master/graph/badge.svg)](https://codecov.io/gh/snowflakedb/snowflake-kafka-connector) -->


================================================
FILE: deploy.sh
================================================
#!/bin/bash

# exit on error
set -e

THIS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

if [ -z "$GPG_KEY_ID" ]; then
  echo "[ERROR] Key Id not specified!"
  exit 1
fi

if [ -z "$GPG_KEY_PASSPHRASE" ]; then
  echo "[ERROR] GPG passphrase is not specified for $GPG_KEY_ID!"
  exit 1
fi

if [ -z "$GPG_PRIVATE_KEY" ]; then
  echo "[ERROR] GPG private key file is not specified!"
  exit 1
fi

echo "[INFO] Import PGP Key"
if ! gpg --list-secret-key | grep "$GPG_KEY_ID"; then
  gpg --allow-secret-key-import --import "$GPG_PRIVATE_KEY"
fi

CENTRAL_DEPLOY_SETTINGS_XML="$THIS_DIR/mvn_settings_central_deploy.xml"

cat > $CENTRAL_DEPLOY_SETTINGS_XML << SETTINGS.XML
<?xml version="1.0" encoding="UTF-8"?>
<settings xmlns="http://maven.apache.org/SETTINGS/1.0.0"
     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
     xsi:schemaLocation="http://maven.apache.org/SETTINGS/1.0.0 http://maven.apache.org/xsd/settings-1.0.0.xsd">
  <servers>
    <server>
      <id>central</id>
      <username>$SONATYPE_USER</username>
      <password>$SONATYPE_PWD</password>
    </server>
  </servers>
  <profiles>
      <profile>
        <id>central</id>
        <activation>
          <activeByDefault>true</activeByDefault>
        </activation>
        <properties>
          <gpg.executable>gpg2</gpg.executable>
          <gpg.keyname>$GPG_KEY_ID</gpg.keyname>
          <gpg.passphrase>$GPG_KEY_PASSPHRASE</gpg.passphrase>
        </properties>
      </profile>
    </profiles>
</settings>
SETTINGS.XML

mvn --settings $CENTRAL_DEPLOY_SETTINGS_XML -DskipTests clean deploy

#confluent release
mvn -f pom_confluent.xml --settings $CENTRAL_DEPLOY_SETTINGS_XML -DskipTests clean package
#white source
# whitesource/run_whitesource.sh

aws s3 cp target/components/packages/*.zip s3://sfc-eng-jenkins/repository/kafka/


================================================
FILE: format.sh
================================================
#!/usr/bin/env bash

set -euo pipefail
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
cd "$SCRIPT_DIR"

DOWNLOAD_URL="https://github.com/google/google-java-format/releases/download/v1.24.0/google-java-format-1.24.0-all-deps.jar"
JAR_FILE="./.cache/google-java-format-1.24.0-all-deps.jar"

if [ ! -f "${JAR_FILE}" ]; then
  mkdir -p "$(dirname "${JAR_FILE}")"
  echo "Downloading Google Java format to ${JAR_FILE}"
  curl -# -L --fail "${DOWNLOAD_URL}" --output "${JAR_FILE}"
fi

if ! command -v java > /dev/null; then
  echo "Java not installed."
  exit 1
fi
echo "Running Google Java Format"
find ./src -type f -name "*.java" -print0 | xargs -0 java -jar "${JAR_FILE}" --replace --set-exit-if-changed && echo "OK"


================================================
FILE: pom.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!--
  ~ /*
  ~  * Copyright (c) 2019 - 2024 Snowflake Computing Inc. All rights reserved.
  ~  */
  -->

<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.snowflake</groupId>
    <artifactId>snowflake-kafka-connector</artifactId>
    <version>4.1.0</version>
    <packaging>jar</packaging>
    <name>Snowflake Kafka Connector</name>
    <description>Snowflake Kafka Connect Sink Connector</description>
    <url>https://www.snowflake.com/</url>

    <developers>
        <developer>
            <name>Snowflake Support Team</name>
            <email>snowflake-java@snowflake.com</email>
            <organization>Snowflake Computing</organization>
            <organizationUrl>https://www.snowflake.com</organizationUrl>
        </developer>
    </developers>

    <licenses>
        <license>
            <name>Apache License, Version 2.0</name>
            <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
            <distribution>repo</distribution>
        </license>
    </licenses>

    <scm>
        <connection>
            scm:git:https://github.com/snowflakedb/snowflake-kafka-connector.git
        </connection>
        <developerConnection>
            scm:git:https://github.com/snowflakedb/snowflake-kafka-connector.git
        </developerConnection>
        <url>https://github.com/snowflakedb/snowflake-kafka-connector</url>
    </scm>

    <!-- Set our Language Level to Java 11 -->
    <properties>
        <maven.compiler.source>11</maven.compiler.source>
        <maven.compiler.target>11</maven.compiler.target>
        <skipTests>false</skipTests>
        <skipUnitTests>${skipTests}</skipUnitTests>
        <skipIntegrationTests>${skipTests}</skipIntegrationTests>

        <kafka.version>3.9.2</kafka.version>
        <awaitility.version>4.3.0</awaitility.version>
        <assertj-core.version>3.27.7</assertj-core.version>
        <confluent.version>7.9.2</confluent.version>
        <!--Compatible protobuf version https://github.com/confluentinc/common/blob/v7.7.0/pom.xml#L91 -->
        <protobuf.version>3.25.5</protobuf.version>
        <guava.version>33.6.0-jre</guava.version>

        <jackson.version>2.21.2</jackson.version>
        <commons-compress.version>1.28.0</commons-compress.version>
        <maven-surefire-plugin.version>3.5.5</maven-surefire-plugin.version>
        <snowflake-jdbc.version>4.2.0</snowflake-jdbc.version>
        <slf4j-api.version>2.0.17</slf4j-api.version>
        <commons-lang3.version>3.20.0</commons-lang3.version>
        <auto-value.version>1.11.1</auto-value.version>

    </properties>


    <repositories>
        <repository>
            <id>confluent</id>
            <name>Confluent</name>
            <url>https://packages.confluent.io/maven/</url>
        </repository>

        <repository>
            <id>cloudera-repo</id>
            <url>
                https://repository.cloudera.com/content/repositories/releases/
            </url>
            <releases>
                <enabled>true</enabled>
            </releases>
            <snapshots>
                <enabled>true</enabled>
            </snapshots>
        </repository>
    </repositories>

    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-site-plugin</artifactId>
                <version>3.12.1</version>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-project-info-reports-plugin</artifactId>
                <version>3.3.0</version>
            </plugin>
            <plugin>
                <groupId>org.sonatype.central</groupId>
                <artifactId>central-publishing-maven-plugin</artifactId>
                <version>0.10.0</version>
                <extensions>true</extensions>
                <configuration>
                    <autoPublish>true</autoPublish>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-surefire-plugin</artifactId>
                <version>${maven-surefire-plugin.version}</version>
                <configuration>
                    <skipTests>${skipUnitTests}</skipTests>
                    <useModulePath>false</useModulePath>
                    <argLine>--add-opens java.base/java.util=ALL-UNNAMED</argLine>
                </configuration>
            </plugin>

            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.15.0</version>
                <configuration>
                    <annotationProcessorPaths>
                        <path>
                            <groupId>com.google.auto.value</groupId>
                            <artifactId>auto-value</artifactId>
                            <version>${auto-value.version}</version>
                        </path>
                    </annotationProcessorPaths>
                </configuration>
            </plugin>

            <plugin>
                <artifactId>maven-assembly-plugin</artifactId>
                <version>3.8.0</version>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
                <configuration>
                    <archive>
                        <manifest>
                            <addClasspath>true</addClasspath>
                        </manifest>
                    </archive>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                    <finalName>${project.artifactId}-${project.version}
                    </finalName>
                    <appendAssemblyId>false</appendAssemblyId>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-jar-plugin</artifactId>
                <version>3.5.0</version>
                <executions>
                    <execution>
                        <id>default-jar</id>
                        <phase>none</phase>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-gpg-plugin</artifactId>
                <version>3.2.8</version>
                <executions>
                    <execution>
                        <id>sign-artifacts</id>
                        <phase>install</phase>
                        <goals>
                            <goal>sign</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-source-plugin</artifactId>
                <version>3.4.0</version>
                <executions>
                    <execution>
                        <id>attach-sources</id>
                        <goals>
                            <goal>jar-no-fork</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-javadoc-plugin</artifactId>
                <version>3.12.0</version>
                <executions>
                    <execution>
                        <id>attach-javadocs</id>
                        <phase>prepare-package</phase>
                        <goals>
                            <goal>jar</goal>
                        </goals>
                        <configuration>
                            <failOnError>false</failOnError>
                            <failOnWarnings>false</failOnWarnings>
                            <doclint>none</doclint>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>org.codehaus.mojo</groupId>
                <artifactId>exec-maven-plugin</artifactId>
                <version>3.2.0</version>
                <configuration>
                    <mainClass>com.snowflake.kafka.connector.internal.ResetProxyConfigExec</mainClass>
                    <classpathScope>test</classpathScope>
                </configuration>
            </plugin>
        </plugins>

        <!-- disable default maven deploy plugin since we are using gpg:sign-and-deploy-file -->
        <pluginManagement>
            <plugins>
                <plugin>
                    <artifactId>maven-deploy-plugin</artifactId>
                    <configuration>
                        <skip>true</skip>
                    </configuration>
                </plugin>
            </plugins>
        </pluginManagement>
    </build>
    <dependencies>
        <!-- https://mvnrepository.com/artifact/org.bouncycastle/bcpkix-fips -->
        <dependency>
            <groupId>org.bouncycastle</groupId>
            <artifactId>bcpkix-fips</artifactId>
            <version>2.1.11</version>
            <scope>provided</scope>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.kafka/connect-api -->
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>connect-api</artifactId>
            <version>${kafka.version}</version>
            <scope>provided</scope>
            <exclusions>
                <exclusion>
                    <groupId>javax.ws.rs</groupId>
                    <artifactId>javax.ws.rs-api</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-api</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>com.github.luben</groupId>
                    <artifactId>zstd-jni</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.lz4</groupId>
                    <artifactId>lz4-java</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.xerial.snappy</groupId>
                    <artifactId>snappy-java</artifactId>
                </exclusion>
            </exclusions>
        </dependency>

        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-clients</artifactId>
            <version>${kafka.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>org.lz4</groupId>
                    <artifactId>lz4-java</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.xerial.snappy</groupId>
                    <artifactId>snappy-java</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-api</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>com.github.luben</groupId>
                    <artifactId>zstd-jni</artifactId>
                </exclusion>
            </exclusions>
        </dependency>

        <!--JDBC driver for building connection with Snowflake-->
        <dependency>
            <groupId>net.snowflake</groupId>
            <artifactId>snowflake-jdbc</artifactId>
            <version>${snowflake-jdbc.version}</version>
        </dependency>

        <dependency>
            <groupId>com.google.protobuf</groupId>
            <artifactId>protobuf-java</artifactId>
            <version>${protobuf.version}</version>
        </dependency>
        <dependency>
            <groupId>com.google.protobuf</groupId>
            <artifactId>protobuf-java-util</artifactId>
            <version>${protobuf.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.avro</groupId>
            <artifactId>avro</artifactId>
            <version>1.12.1</version>
            <exclusions>
                <exclusion>
                    <groupId>com.fasterxml.jackson.core</groupId>
                    <artifactId>jackson-core</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>com.fasterxml.jackson.core</groupId>
                    <artifactId>jackson-databind</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.apache.commons</groupId>
                    <artifactId>commons-compress</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-api</artifactId>
                </exclusion>
            </exclusions>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-compress -->
        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-compress</artifactId>
            <version>${commons-compress.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>org.apache.commons</groupId>
                    <artifactId>commons-lang3</artifactId>
                </exclusion>
            </exclusions>
        </dependency>

        <!-- Direct dependency for commons-lang3 with latest version to fix CVE-2025-48924 -->
        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-lang3</artifactId>
            <version>${commons-lang3.version}</version>
        </dependency>
        
        <!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-core -->
        <dependency>
            <groupId>com.fasterxml.jackson.core</groupId>
            <artifactId>jackson-core</artifactId>
            <version>${jackson.version}</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind -->
        <dependency>
            <groupId>com.fasterxml.jackson.core</groupId>
            <artifactId>jackson-databind</artifactId>
            <version>${jackson.version}</version>
        </dependency>

        <!-- <https://mvnrepository.com/artifact/io.confluent/kafka-schema-registry-client?repo=confluent-packages -->
        <dependency>
            <groupId>io.confluent</groupId>
            <artifactId>kafka-schema-registry-client</artifactId>
            <version>${confluent.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>io.confluent</groupId>
                    <artifactId>common-utils</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>io.confluent</groupId>
                    <artifactId>common-config</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>io.swagger</groupId>
                    <artifactId>swagger-annotations</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>io.swagger</groupId>
                    <artifactId>swagger-core</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <!-- https://mvnrepository.com/artifact/io.confluent/kafka-connect-avro-data -->
        <dependency>
            <groupId>io.confluent</groupId>
            <artifactId>kafka-avro-serializer</artifactId>
            <version>${confluent.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>io.confluent</groupId>
                    <artifactId>common-utils</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <!-- https://mvnrepository.com/artifact/io.confluent/kafka-connect-avro-converter -->
        <dependency>
            <groupId>io.confluent</groupId>
            <artifactId>kafka-connect-avro-converter</artifactId>
            <version>${confluent.version}</version>
        </dependency>

        <dependency>
            <groupId>io.confluent</groupId>
            <artifactId>kafka-schema-registry-client-encryption</artifactId>
            <version>${confluent.version}</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/io.dropwizard.metrics/metrics-core -->
        <dependency>
            <groupId>io.dropwizard.metrics</groupId>
            <artifactId>metrics-core</artifactId>
            <version>4.2.33</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/io.dropwizard.metrics/metrics-jmx -->
        <dependency>
            <groupId>io.dropwizard.metrics</groupId>
            <artifactId>metrics-jmx</artifactId>
            <version>4.2.33</version>
        </dependency>

        <dependency>
            <groupId>com.google.guava</groupId>
            <artifactId>guava</artifactId>
            <version>${guava.version}</version>
        </dependency>

        <!-- Annotations (@AutoValue) visible to source; processor is on annotationProcessorPaths only -->
        <dependency>
            <groupId>com.google.auto.value</groupId>
            <artifactId>auto-value-annotations</artifactId>
            <version>${auto-value.version}</version>
        </dependency>
        <!-- Processor only needed at compile time (generates AutoValue_* classes) -->
        <dependency>
            <groupId>com.google.auto.value</groupId>
            <artifactId>auto-value</artifactId>
            <version>${auto-value.version}</version>
            <scope>provided</scope>
        </dependency>

        <!-- Caffeine cache library for client-side validation (copied from SSv1 SDK) -->
        <dependency>
            <groupId>com.github.ben-manes.caffeine</groupId>
            <artifactId>caffeine</artifactId>
            <version>3.2.4</version>
        </dependency>

        <!-- https://github.com/failsafe-lib/failsafe-->
        <dependency>
            <groupId>dev.failsafe</groupId>
            <artifactId>failsafe</artifactId>
            <version>3.3.2</version>
        </dependency>

        <!--junit for unit test-->
        <dependency>
            <groupId>org.junit.jupiter</groupId>
            <artifactId>junit-jupiter-engine</artifactId>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.junit.jupiter</groupId>
            <artifactId>junit-jupiter-params</artifactId>
            <scope>test</scope>
        </dependency>
        <!--JUnit Jupiter Engine to depend on the JUnit4 engine and JUnit 4 API  -->
        <dependency>
            <groupId>org.junit.vintage</groupId>
            <artifactId>junit-vintage-engine</artifactId>
            <scope>test</scope>
        </dependency>
        <!-- Optional : override the JUnit 4 API version provided by junit-vintage-engine -->
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.13.1</version>
            <scope>test</scope>
        </dependency>

        <!--Mockito for unit test-->
        <dependency>
            <groupId>org.mockito</groupId>
            <artifactId>mockito-core</artifactId>
            <version>5.23.0</version>
            <scope>test</scope>
        </dependency>

        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>
            <version>${slf4j-api.version}</version>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>${slf4j-api.version}</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-core</artifactId>
            <version>2.25.4</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>com.github.stefanbirkner</groupId>
            <artifactId>system-rules</artifactId>
            <version>1.19.0</version>
            <scope>test</scope>
        </dependency>

        <!-- Apache Commons DBUtils for database operations in tests -->
        <dependency>
            <groupId>commons-dbutils</groupId>
            <artifactId>commons-dbutils</artifactId>
            <version>1.8.1</version>
            <scope>test</scope>
        </dependency>

        <!-- Apache Commons DBCP2 for connection pooling in tests -->
        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-dbcp2</artifactId>
            <version>2.14.0</version>
            <scope>test</scope>
        </dependency>

        <!--Kafka JSON converter for SMT unit test-->
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>connect-json</artifactId>
            <version>0.9.0.0</version>
            <scope>test</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka_2.13</artifactId>
            <version>${kafka.version}</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka_2.13</artifactId>
            <type>test-jar</type>
            <classifier>test</classifier>
            <version>${kafka.version}</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-server-common</artifactId>
            <type>test-jar</type>
            <classifier>test</classifier>
            <version>${kafka.version}</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>connect-runtime</artifactId>
            <version>${kafka.version}</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>connect-runtime</artifactId>
            <version>${kafka.version}</version>
            <classifier>test</classifier>
            <type>test-jar</type>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-clients</artifactId>
            <version>${kafka.version}</version>
            <classifier>test</classifier>
            <type>test-jar</type>
            <scope>test</scope>
            <exclusions>
                <exclusion>
                    <groupId>org.lz4</groupId>
                    <artifactId>lz4-java</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-api</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.awaitility</groupId>
            <artifactId>awaitility</artifactId>
            <version>${awaitility.version}</version>
            <scope>test</scope>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.assertj/assertj-core -->
        <dependency>
            <groupId>org.assertj</groupId>
            <artifactId>assertj-core</artifactId>
            <version>${assertj-core.version}</version>
            <scope>test</scope>
        </dependency>

        <!-- Testcontainers for embedded proxy server in tests -->
        <dependency>
            <groupId>org.testcontainers</groupId>
            <artifactId>testcontainers</artifactId>
            <version>2.0.5</version>
            <scope>test</scope>
        </dependency>

        <dependency>
            <groupId>com.snowflake</groupId>
            <artifactId>snowpipe-streaming</artifactId>
            <version>1.4.0</version>
        </dependency>

    </dependencies>

    <profiles>
        <profile>
            <id>aws</id>
            <build>
                <plugins>
                    <plugin>
                        <groupId>org.apache.maven.plugins</groupId>
                        <artifactId>maven-failsafe-plugin</artifactId>
                        <version>3.5.5</version>
                        <configuration>
                            <useModulePath>false</useModulePath>
                            <includes>
                                <include>**/*IT.java</include>
                            </includes>
                            <excludes>
                                <exclude>none</exclude>
                            </excludes>
                            <skipTests>${skipIntegrationTests}</skipTests>
                        </configuration>
                        <executions>
                            <execution>
                                <goals>
                                    <goal>integration-test</goal>
                                    <goal>verify</goal>
                                </goals>
                            </execution>
                        </executions>
                    </plugin>
                </plugins>
            </build>
        </profile>

        <profile>
            <id>non-aws</id>
            <build>
                <plugins>
                    <plugin>
                        <groupId>org.apache.maven.plugins</groupId>
                        <artifactId>maven-failsafe-plugin</artifactId>
                        <version>3.5.5</version>
                        <configuration>
                            <useModulePath>false</useModulePath>
                            <includes>
                                <include>**/*IT.java</include>
                            </includes>
                            <excludes>
                                <!-- External volume configured only on AWS env -->
                                <exclude>**/*Iceberg*IT.java</exclude>
                                <!-- Rowset API is disabled on Azure and GCP -->
                                <exclude>**/*RowSchemaProvider*IT.java</exclude>
                                <exclude>**/*StreamingIngestClientV2Provider*IT.java</exclude>
                                <exclude>**/*SnowflakeSinkServiceV2*IT.java</exclude>
                            </excludes>
                            <skipTests>${skipIntegrationTests}</skipTests>
                        </configuration>
                        <executions>
                            <execution>
                                <goals>
                                    <goal>integration-test</goal>
                                    <goal>verify</goal>
                                </goals>
                            </execution>
                        </executions>
                    </plugin>
                </plugins>
            </build>
        </profile>
    </profiles>

    <dependencyManagement>
        <dependencies>
            <dependency>
                <groupId>org.junit</groupId>
                <artifactId>junit-bom</artifactId>
                <version>5.14.4</version>
                <type>pom</type>
                <scope>import</scope>
            </dependency>
        </dependencies>
    </dependencyManagement>
</project>


================================================
FILE: pom_confluent.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!--
  ~ /*
  ~  * Copyright (c) 2019 - 2024 Snowflake Computing Inc. All rights reserved.
  ~  */
  -->

<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.snowflake</groupId>
    <artifactId>snowflake-kafka-connector</artifactId>
    <version>4.1.0</version>
    <packaging>jar</packaging>
    <name>Snowflake Kafka Connector</name>
    <description>Snowflake Kafka Connect Sink Connector</description>
    <url>https://www.snowflake.com/</url>

    <developers>
        <developer>
            <name>Snowflake Support Team</name>
            <email>snowflake-java@snowflake.com</email>
            <organization>Snowflake Computing</organization>
            <organizationUrl>https://www.snowflake.com</organizationUrl>
        </developer>
    </developers>

    <licenses>
        <license>
            <name>Apache License, Version 2.0</name>
            <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
            <distribution>repo</distribution>
        </license>
    </licenses>

    <scm>
        <connection>
            scm:git:https://github.com/snowflakedb/snowflake-kafka-connector.git
        </connection>
        <developerConnection>
            scm:git:https://github.com/snowflakedb/snowflake-kafka-connector.git
        </developerConnection>
        <url>https://github.com/snowflakedb/snowflake-kafka-connector</url>
    </scm>

    <!-- Set our Language Level to Java 11 -->
    <properties>
        <maven.compiler.source>11</maven.compiler.source>
        <maven.compiler.target>11</maven.compiler.target>
        <skipTests>false</skipTests>
        <skipUnitTests>${skipTests}</skipUnitTests>
        <skipIntegrationTests>${skipTests}</skipIntegrationTests>
        <license.processing.dependencyJarsDir>${project.build.directory}/dependency-jars
        </license.processing.dependencyJarsDir>
        <license.processing.dependencyListFile>${project.build.directory}/dependency-list.txt
        </license.processing.dependencyListFile>
        <!--
            /licenses folder is copied into zip file distribution by kafka-connect-maven-plugin
            It is an undocumented behaviour that originates from the zip file structure https://docs.confluent.io/platform/current/connect/confluent-hub/component-archive.html
            There is no way to change the path for something inside /target, so it needs to be cleaned up independently
        -->
        <license.processing.targetDir>${project.build.directory}/../licenses</license.processing.targetDir>

        <kafka.version>3.9.1</kafka.version>
        <awaitility.version>4.2.2</awaitility.version>
        <assertj-core.version>3.26.3</assertj-core.version>
        <confluent.version>7.9.2</confluent.version>
        <!--Compatible protobuf version https://github.com/confluentinc/common/blob/v7.7.0/pom.xml#L91 -->
        <protobuf.version>3.25.5</protobuf.version>
        <guava.version>33.4.0-jre</guava.version>

        <jackson.version>2.18.2</jackson.version>
        <commons-compress.version>1.27.1</commons-compress.version>
        <maven-surefire-plugin.version>3.5.2</maven-surefire-plugin.version>
        <snowflake-jdbc.version>4.0.2</snowflake-jdbc.version>
        <slf4j-api.version>2.0.17</slf4j-api.version>
        <parquet.version>1.14.4</parquet.version>
        <commons-lang3.version>3.18.0</commons-lang3.version>
        <auto-value.version>1.10.4</auto-value.version>

    </properties>


    <repositories>
        <repository>
            <id>confluent</id>
            <name>Confluent</name>
            <url>https://packages.confluent.io/maven/</url>
        </repository>

        <repository>
            <id>cloudera-repo</id>
            <url>
                https://repository.cloudera.com/content/repositories/releases/
            </url>
            <releases>
                <enabled>true</enabled>
            </releases>
            <snapshots>
                <enabled>true</enabled>
            </snapshots>
        </repository>
    </repositories>

    <build>
        <plugins>
            <!--
            Copy all project dependencies to target/dependency-jars directory. License processing Python script will check
            only this directory for the license files of SDK dependencies.
            -->
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-dependency-plugin</artifactId>
                <version>3.6.0</version>
                <executions>
                    <execution>
                        <id>copy-dependencies</id>
                        <goals>
                            <goal>copy-dependencies</goal>
                        </goals>
                        <phase>generate-resources</phase>
                        <configuration>
                            <outputDirectory>${license.processing.dependencyJarsDir}</outputDirectory>
                            <overWriteReleases>false</overWriteReleases>
                            <overWriteSnapshots>false</overWriteSnapshots>
                            <overWriteIfNewer>true</overWriteIfNewer>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
            <!--
            Compile the list of SDK dependencies in 'compile' and 'runtime' scopes.
            This list is an entry point for the license processing python script.
            -->
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-dependency-plugin</artifactId>
                <version>3.6.0</version>
                <configuration>
                    <includeScope>runtime</includeScope>
                    <outputFile>${license.processing.dependencyListFile}</outputFile>
                </configuration>
                <executions>
                    <execution>
                        <goals>
                            <goal>list</goal>
                        </goals>
                        <phase>generate-resources</phase>
                    </execution>
                </executions>
            </plugin>
            <!--
            Plugin executes license processing Python script, which copies third party license files into the "license"
            directory which is later copied into zip file distribution.
            -->
            <plugin>
                <groupId>org.codehaus.mojo</groupId>
                <artifactId>exec-maven-plugin</artifactId>
                <version>3.2.0</version>
                <executions>
                    <execution>
                        <id>process-third-party-licenses</id>
                        <goals>
                            <goal>exec</goal>
                        </goals>
                        <phase>generate-resources</phase>
                        <configuration>
                            <executable>python3</executable>
                            <arguments>
                                <argument>${project.basedir}/scripts/process_licenses.py</argument>
                                <argument>${license.processing.dependencyListFile}</argument>
                                <argument>${license.processing.dependencyJarsDir}</argument>
                                <argument>${license.processing.targetDir}</argument>
                            </arguments>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>io.confluent</groupId>
                <version>0.11.1</version>
                <artifactId>kafka-connect-maven-plugin</artifactId>
                <executions>
                    <execution>
                        <goals>
                            <goal>kafka-connect</goal>
                        </goals>
                        <configuration>
                            <title>Snowflake Sink Connector</title>
                            <documentationUrl>
                                https://docs.snowflake.com/en/connectors/kafkahp/about
                            </documentationUrl>
                            <description>
                                The Snowflake High Performance Kafka connector lets you
                                quickly and easily move messages in formats
                                such as Avro, JSON, and Protobuf from Kafka topics
                                into Snowflake tables. It uses the Snowpipe Streaming High Performance architecture.
                            </description>
                            <logo>logo/snowflake.png</logo>

                            <supportProviderName>Snowflake Inc.
                            </supportProviderName>
                            <supportSummary>Some features of the connector may be in preview
                                as mentioned in the documentation, and
                                provided primarily for evaluation and testing
                                purposes. If you decide to use a preview feature
                                in production, please contact Snowflake Support
                                before doing so.
                            </supportSummary>
                            <ownerUsername>snowflakeinc</ownerUsername>
                            <ownerType>organization</ownerType>
                            <ownerName>Snowflake Inc.</ownerName>
                            <ownerUrl>https://snowflake.com/</ownerUrl>
                            <ownerLogo>logo/snowflake.png</ownerLogo>

                            <componentTypes>
                                <componentType>sink</componentType>
                            </componentTypes>
                            <tags>
                                <tag>Snowflake</tag>
                                <tag>jdbc</tag>
                                <tag>database</tag>
                                <tag>dbms</tag>
                                <tag>rdbms</tag>
                                <tag>sql</tag>
                                <tag>data warehouse</tag>
                                <tag>high performance</tag>
                            </tags>
                            <confluentControlCenterIntegration>true
                            </confluentControlCenterIntegration>
                            <singleMessageTransforms>true
                            </singleMessageTransforms>
                            <supportedEncodings>any</supportedEncodings>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
            <!-- Clean additional files created outside of target -->
            <plugin>
                <artifactId>maven-clean-plugin</artifactId>
                <version>3.3.2</version>
                <configuration>
                    <failOnError>false</failOnError>
                    <filesets>
                        <fileset>
                            <directory>${project.basedir}/licenses</directory>
                            <includes>
                                <include>**</include>
                            </includes>
                        </fileset>
                    </filesets>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-site-plugin</artifactId>
                <version>3.12.1</version>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-project-info-reports-plugin</artifactId>
                <version>3.3.0</version>
            </plugin>
            <plugin>
                <groupId>org.sonatype.central</groupId>
                <artifactId>central-publishing-maven-plugin</artifactId>
                <version>0.8.0</version>
                <extensions>true</extensions>
                <configuration>
                    <autoPublish>true</autoPublish>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-surefire-plugin</artifactId>
                <version>${maven-surefire-plugin.version}</version>
                <configuration>
                    <skipTests>${skipUnitTests}</skipTests>
                </configuration>
            </plugin>

            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.11.0</version>
                <configuration>
                    <annotationProcessorPaths>
                        <path>
                            <groupId>com.google.auto.value</groupId>
                            <artifactId>auto-value</artifactId>
                            <version>${auto-value.version}</version>
                        </path>
                    </annotationProcessorPaths>
                </configuration>
            </plugin>

            <plugin>
                <artifactId>maven-assembly-plugin</artifactId>
                <version>3.4.2</version>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
                <configuration>
                    <archive>
                        <manifest>
                            <addClasspath>true</addClasspath>
                        </manifest>
                    </archive>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                    <finalName>${project.artifactId}-${project.version}
                    </finalName>
                    <appendAssemblyId>false</appendAssemblyId>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-jar-plugin</artifactId>
                <version>3.2.2</version>
                <executions>
                    <execution>
                        <id>default-jar</id>
                        <phase>none</phase>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-gpg-plugin</artifactId>
                <version>3.0.1</version>
                <executions>
                    <execution>
                        <id>sign-artifacts</id>
                        <phase>package</phase>
                        <goals>
                            <goal>sign</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-source-plugin</artifactId>
                <version>3.3.1</version>
                <executions>
                    <execution>
                        <id>attach-sources</id>
                        <goals>
                            <goal>jar-no-fork</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-javadoc-plugin</artifactId>
                <version>3.11.2</version>
                <executions>
                    <execution>
                        <id>attach-javadocs</id>
                        <goals>
                            <goal>jar</goal>
                        </goals>
                        <configuration>
                            <failOnError>false</failOnError>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
        </plugins>

        <!-- disable default maven deploy plugin since we are using gpg:sign-and-deploy-file -->
        <pluginManagement>
            <plugins>
                <plugin>
                    <artifactId>maven-deploy-plugin</artifactId>
                    <configuration>
                        <skip>true</skip>
                    </configuration>
                </plugin>
            </plugins>
        </pluginManagement>
    </build>
    <dependencies>
        <!-- https://mvnrepository.com/artifact/org.bouncycastle/bcpkix-fips -->
        <dependency>
            <!-- Do not match this with pom.xml-->
            <!-- This dependency won't be present in the kafka connect runtime, hence we are packaging this in an uber jar -->
            <groupId>org.bouncycastle</groupId>
            <artifactId>bcpkix-fips</artifactId>
            <version>2.1.8</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.kafka/connect-api -->
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>connect-api</artifactId>
            <version>${kafka.version}</version>
            <scope>provided</scope>
            <exclusions>
                <exclusion>
                    <groupId>javax.ws.rs</groupId>
                    <artifactId>javax.ws.rs-api</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-api</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>com.github.luben</groupId>
                    <artifactId>zstd-jni</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.lz4</groupId>
                    <artifactId>lz4-java</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.xerial.snappy</groupId>
                    <artifactId>snappy-java</artifactId>
                </exclusion>
            </exclusions>
        </dependency>

        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-clients</artifactId>
            <version>${kafka.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>org.lz4</groupId>
                    <artifactId>lz4-java</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-api</artifactId>
                </exclusion>
            </exclusions>
        </dependency>

        <!--JDBC driver for building connection with Snowflake-->
        <dependency>
            <groupId>net.snowflake</groupId>
            <artifactId>snowflake-jdbc</artifactId>
            <version>${snowflake-jdbc.version}</version>
        </dependency>

        <dependency>
            <groupId>com.google.protobuf</groupId>
            <artifactId>protobuf-java</artifactId>
            <version>${protobuf.version}</version>
        </dependency>
        <dependency>
            <groupId>com.google.protobuf</groupId>
            <artifactId>protobuf-java-util</artifactId>
            <version>${protobuf.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.avro</groupId>
            <artifactId>avro</artifactId>
            <version>1.11.4</version>
            <exclusions>
                <exclusion>
                    <groupId>com.fasterxml.jackson.core</groupId>
                    <artifactId>jackson-core</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>com.fasterxml.jackson.core</groupId>
                    <artifactId>jackson-databind</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.apache.commons</groupId>
                    <artifactId>commons-compress</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-api</artifactId>
                </exclusion>
            </exclusions>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-compress -->
        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-compress</artifactId>
            <version>${commons-compress.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>org.apache.commons</groupId>
                    <artifactId>commons-lang3</artifactId>
                </exclusion>
            </exclusions>
        </dependency>

        <!-- Direct dependency for commons-lang3 with latest version to fix CVE-2025-48924 -->
        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-lang3</artifactId>
            <version>${commons-lang3.version}</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-core -->
        <dependency>
            <groupId>com.fasterxml.jackson.core</groupId>
            <artifactId>jackson-core</artifactId>
            <version>${jackson.version}</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind -->
        <dependency>
            <groupId>com.fasterxml.jackson.core</groupId>
            <artifactId>jackson-databind</artifactId>
            <version>${jackson.version}</version>
        </dependency>

        <!-- <https://mvnrepository.com/artifact/io.confluent/kafka-schema-registry-client?repo=confluent-packages -->
        <dependency>
            <groupId>io.confluent</groupId>
            <artifactId>kafka-schema-registry-client</artifactId>
            <version>${confluent.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>io.confluent</groupId>
                    <artifactId>common-utils</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>io.confluent</groupId>
                    <artifactId>common-config</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>io.swagger</groupId>
                    <artifactId>swagger-annotations</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>io.swagger</groupId>
                    <artifactId>swagger-core</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <!-- https://mvnrepository.com/artifact/io.confluent/kafka-connect-avro-data -->
        <dependency>
            <groupId>io.confluent</groupId>
            <artifactId>kafka-avro-serializer</artifactId>
            <version>${confluent.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>io.confluent</groupId>
                    <artifactId>common-utils</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <!-- https://mvnrepository.com/artifact/io.confluent/kafka-connect-avro-converter -->
        <dependency>
            <groupId>io.confluent</groupId>
            <artifactId>kafka-connect-avro-converter</artifactId>
            <version>${confluent.version}</version>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>io.confluent</groupId>
            <artifactId>kafka-schema-registry-client-encryption</artifactId>
            <version>${confluent.version}</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/io.dropwizard.metrics/metrics-core -->
        <dependency>
            <groupId>io.dropwizard.metrics</groupId>
            <artifactId>metrics-core</artifactId>
            <version>4.2.26</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/io.dropwizard.metrics/metrics-jmx -->
        <dependency>
            <groupId>io.dropwizard.metrics</groupId>
            <artifactId>metrics-jmx</artifactId>
            <version>4.2.3</version>
        </dependency>

        <dependency>
            <groupId>com.google.guava</groupId>
            <artifactId>guava</artifactId>
            <version>${guava.version}</version>
        </dependency>

        <!-- Annotations (@AutoValue) visible to source; processor is on annotationProcessorPaths only -->
        <dependency>
            <groupId>com.google.auto.value</groupId>
            <artifactId>auto-value-annotations</artifactId>
            <version>${auto-value.version}</version>
        </dependency>
        <!-- Processor only needed at compile time (generates AutoValue_* classes) -->
        <dependency>
            <groupId>com.google.auto.value</groupId>
            <artifactId>auto-value</artifactId>
            <version>${auto-value.version}</version>
            <scope>provided</scope>
        </dependency>

        <!-- Caffeine cache library for client-side validation (copied from SSv1 SDK) -->
        <dependency>
            <groupId>com.github.ben-manes.caffeine</groupId>
            <artifactId>caffeine</artifactId>
            <version>2.9.3</version>
        </dependency>

        <!-- https://github.com/failsafe-lib/failsafe-->
        <dependency>
            <groupId>dev.failsafe</groupId>
            <artifactId>failsafe</artifactId>
            <version>3.3.2</version>
        </dependency>

        <dependency>
            <groupId>org.apache.parquet</groupId>
            <artifactId>parquet-column</artifactId>
            <version>${parquet.version}</version>
        </dependency>

        <!--junit for unit test-->
        <dependency>
            <groupId>org.junit.jupiter</groupId>
            <artifactId>junit-jupiter-engine</artifactId>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.junit.jupiter</groupId>
            <artifactId>junit-jupiter-params</artifactId>
            <scope>test</scope>
        </dependency>
        <!--JUnit Jupiter Engine to depend on the JUnit4 engine and JUnit 4 API  -->
        <dependency>
            <groupId>org.junit.vintage</groupId>
            <artifactId>junit-vintage-engine</artifactId>
            <scope>test</scope>
        </dependency>
        <!-- Optional : override the JUnit 4 API version provided by junit-vintage-engine -->
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.13.1</version>
            <scope>test</scope>
        </dependency>

        <!--Mockito for unit test-->
        <dependency>
            <groupId>org.mockito</groupId>
            <artifactId>mockito-core</artifactId>
            <version>2.20.1</version>
            <scope>test</scope>
        </dependency>

        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>
            <version>${slf4j-api.version}</version>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>${slf4j-api.version}</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-core</artifactId>
            <version>2.17.1</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>com.github.stefanbirkner</groupId>
            <artifactId>system-rules</artifactId>
            <version>1.19.0</version>
            <scope>test</scope>
        </dependency>

        <!-- Apache Commons DBUtils for database operations in tests -->
        <dependency>
            <groupId>commons-dbutils</groupId>
            <artifactId>commons-dbutils</artifactId>
            <version>1.8.1</version>
            <scope>test</scope>
        </dependency>

        <!-- Apache Commons DBCP2 for connection pooling in tests -->
        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-dbcp2</artifactId>
            <version>2.12.0</version>
            <scope>test</scope>
        </dependency>

        <!--Kafka JSON converter for SMT unit test-->
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>connect-json</artifactId>
            <version>0.9.0.0</version>
            <scope>test</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka_2.13</artifactId>
            <version>${kafka.version}</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka_2.13</artifactId>
            <type>test-jar</type>
            <classifier>test</classifier>
            <version>${kafka.version}</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-server-common</artifactId>
            <type>test-jar</type>
            <classifier>test</classifier>
            <version>${kafka.version}</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>connect-runtime</artifactId>
            <version>${kafka.version}</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>connect-runtime</artifactId>
            <version>${kafka.version}</version>
            <classifier>test</classifier>
            <type>test-jar</type>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-clients</artifactId>
            <version>${kafka.version}</version>
            <classifier>test</classifier>
            <type>test-jar</type>
            <scope>test</scope>
            <exclusions>
                <exclusion>
                    <groupId>org.lz4</groupId>
                    <artifactId>lz4-java</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.xerial.snappy</groupId>
                    <artifactId>snappy-java</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-api</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.awaitility</groupId>
            <artifactId>awaitility</artifactId>
            <version>${awaitility.version}</version>
            <scope>test</scope>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.assertj/assertj-core -->
        <dependency>
            <groupId>org.assertj</groupId>
            <artifactId>assertj-core</artifactId>
            <version>${assertj-core.version}</version>
            <scope>test</scope>
        </dependency>

        <!-- Testcontainers for embedded proxy server in tests -->
        <dependency>
            <groupId>org.testcontainers</groupId>
            <artifactId>testcontainers</artifactId>
            <version>1.19.3</version>
            <scope>test</scope>
        </dependency>

        <dependency>
            <groupId>com.snowflake</groupId>
            <artifactId>snowpipe-streaming</artifactId>
            <version>1.2.0</version>
        </dependency>

    </dependencies>

    <profiles>
        <profile>
            <id>aws</id>
            <build>
                <plugins>
                    <plugin>
                        <groupId>org.apache.maven.plugins</groupId>
                        <artifactId>maven-failsafe-plugin</artifactId>
                        <version>2.22.2</version>
                        <configuration>
                            <includes>
                                <include>**/*IT.java</include>
                            </includes>
                            <excludes>
                                <exclude>none</exclude>
                            </excludes>
                            <skipTests>${skipIntegrationTests}</skipTests>
                        </configuration>
                        <executions>
                            <execution>
                                <goals>
                                    <goal>integration-test</goal>
                                    <goal>verify</goal>
                                </goals>
                            </execution>
                        </executions>
                    </plugin>
                </plugins>
            </build>
        </profile>

        <profile>
            <id>non-aws</id>
            <build>
                <plugins>
                    <plugin>
                        <groupId>org.apache.maven.plugins</groupId>
                        <artifactId>maven-failsafe-plugin</artifactId>
                        <version>2.22.2</version>
                        <configuration>
                            <includes>
                                <include>**/*IT.java</include>
                            </includes>
                            <excludes>
                                <!-- External volume configured only on AWS env -->
                                <exclude>**/*Iceberg*IT.java</exclude>
                                <!-- Rowset API is disabled on Azure and GCP -->
                                <exclude>**/*RowSchemaProvider*IT.java</exclude>
                                <exclude>**/*StreamingIngestClientV2Provider*IT.java</exclude>
                                <exclude>**/*SnowflakeSinkServiceV2*IT.java</exclude>
                            </excludes>
                            <skipTests>${skipIntegrationTests}</skipTests>
                        </configuration>
                        <executions>
                            <execution>
                                <goals>
                                    <goal>integration-test</goal>
                                    <goal>verify</goal>
                                </goals>
                            </execution>
                        </executions>
                    </plugin>
                </plugins>
            </build>
        </profile>
    </profiles>

    <dependencyManagement>
        <dependencies>
            <dependency>
                <groupId>org.junit</groupId>
                <artifactId>junit-bom</artifactId>
                <version>5.11.2</version>
                <type>pom</type>
                <scope>import</scope>
            </dependency>
        </dependencies>
    </dependencyManagement>
</project>


================================================
FILE: profile.json.example
================================================
{
  "user": "user name",
  "private_key": "private key",
  "host": "acountname.snowflakecomputing.com:443",
  "schema": "schema name",
  "database": "database name",
  "warehouse": "warehouse name"
}

================================================
FILE: scripts/process_licenses.py
================================================
#!/usr/bin/env python

# This script processes licenses of 3rd party dependencies and stores them in the JAR. The rules are:
# 1. Dependencies, which contains a license file should be put into the shaded JAR as-is.
# 2. Dependencies, which do not contain a license file should be mentioned in the file ADDITIONAL_LICENCES, together with the name of its license.
#
#
# The script accepts the following arguments:
# * DEPENDENCY_LIST_FILE_PATH
#     * Can be obtained by running mvn dependency:list -DincludeScope=runtime -DoutputFile=target/dependency_list.txt
# * DEPENDENCIES_DIR
#     * Directory containing the JAR files of all SDK dependencies. Automatically generated by `mvn clean package` in target/dependency-jars
# * TARGET_DIR
#     * Where to save all output, should be target/generated-sources/META-INF/third-party-licenses
#
#
# Useful mvn commands:
# * mvn clean license:add-third-party
#     * Generate dependency report; useful to find out licenses for dependencies that don't ship with a license file
# * mvn dependency:list -DincludeScope=runtime -DoutputFile=target/dependency_list.txt
#     * Used as input of this script (DEPENDENCY_LIST_FILE_PATH)
import sys
from pathlib import Path
from zipfile import ZipFile

# License name constants
APACHE_LICENSE = "Apache License 2.0"
BSD_2_CLAUSE_LICENSE = "2-Clause BSD License"
BSD_3_CLAUSE_LICENSE = "3-Clause BSD License"
EDL_10_LICENSE = "EDL 1.0"
MIT_LICENSE = "The MIT License"
GO_LICENSE = "The Go license"
BOUNCY_CASTLE_LICENSE = "Bouncy Castle License"
LGPL = "LGPL License"

# The SDK does not need to include licenses of dependencies, which aren't shaded
IGNORED_DEPENDENCIES = {"net.snowflake:snowflake-jdbc", "org.slf4j:slf4j-api"}

# List of dependencies, which don't ship with a license file.
# Only add a new record here after verifying that the dependency JAR does not contain a license!
ADDITIONAL_LICENSES_MAP = {
    "com.eclipsesource.minimal-json:minimal-json": MIT_LICENSE,
    "com.fasterxml.jackson.dataformat:jackson-dataformat-protobuf": APACHE_LICENSE,
    "com.github.ben-manes.caffeine:caffeine": APACHE_LICENSE,
    "com.github.luben:zstd-jni": BSD_2_CLAUSE_LICENSE,
    "com.google.auto.value:auto-value-annotations": APACHE_LICENSE,
    "com.google.code.findbugs:jsr305": APACHE_LICENSE,
    "com.google.crypto.tink:tink": APACHE_LICENSE,
    "com.google.errorprone:error_prone_annotations": APACHE_LICENSE,
    "com.google.code.findbugs:annotations": LGPL,
    "com.google.code.gson:gson": APACHE_LICENSE,
    "com.google.guava:failureaccess": APACHE_LICENSE,
    "com.google.guava:listenablefuture": APACHE_LICENSE,
    "com.google.j2objc:j2objc-annotations": APACHE_LICENSE,
    "com.google.protobuf:protobuf-java": BSD_3_CLAUSE_LICENSE,
    "com.google.protobuf:protobuf-java-util": BSD_3_CLAUSE_LICENSE,
    "com.google.re2j:re2j": GO_LICENSE,
    "com.hubspot.jackson:jackson-datatype-protobuf": APACHE_LICENSE,
    "com.ibm.jsonata4java:JSONata4Java": APACHE_LICENSE,
    "com.snowflake:snowpipe-streaming": APACHE_LICENSE,
    "com.squareup:protoparser": APACHE_LICENSE,
    "dev.failsafe:failsafe": APACHE_LICENSE,
    "info.picocli:picocli": APACHE_LICENSE,
    "io.confluent:common-utils": APACHE_LICENSE,
    "io.confluent:dek-registry-client": APACHE_LICENSE,
    "io.confluent:kafka-avro-serializer": APACHE_LICENSE,
    "io.confluent:kafka-connect-avro-converter": APACHE_LICENSE,
    "io.confluent:kafka-connect-avro-data": APACHE_LICENSE,
    "io.confluent:kafka-schema-converter": APACHE_LICENSE,
    "io.confluent:kafka-schema-registry-client": APACHE_LICENSE,
    "io.confluent:kafka-schema-registry-client-encryption": APACHE_LICENSE,
    "io.confluent:kafka-schema-registry-client-encryption-tink": APACHE_LICENSE,
    "io.confluent:kafka-schema-rules": APACHE_LICENSE,
    "io.confluent:kafka-schema-serializer": APACHE_LICENSE,
    "io.confluent:logredactor": APACHE_LICENSE,
    "io.confluent:logredactor-metrics": APACHE_LICENSE,
    "io.dropwizard.metrics:metrics-core": APACHE_LICENSE,
    "io.dropwizard.metrics:metrics-jmx": APACHE_LICENSE,
    "io.dropwizard.metrics:metrics-jvm": APACHE_LICENSE,
    "io.swagger.core.v3:swagger-annotations": APACHE_LICENSE,
    "net.snowflake:snowflake-kafka-connector": APACHE_LICENSE,
    "net.snowflake:snowflake-ingest-sdk": APACHE_LICENSE,
    "org.agrona:agrona": APACHE_LICENSE,
    "org.antlr:antlr4-runtime": BSD_3_CLAUSE_LICENSE,
    "org.apache.kafka:kafka-clients": APACHE_LICENSE,
    "org.apache.parquet:parquet-common": APACHE_LICENSE,
    "org.apache.parquet:parquet-format-structures": APACHE_LICENSE,
    "org.bouncycastle:bc-fips": BOUNCY_CASTLE_LICENSE,
    "org.bouncycastle:bcpkix-fips": BOUNCY_CASTLE_LICENSE,
    "org.projectnessie.cel:cel-core": APACHE_LICENSE,
    "org.projectnessie.cel:cel-generated-antlr": APACHE_LICENSE,
    "org.projectnessie.cel:cel-generated-pb": APACHE_LICENSE,
    "org.projectnessie.cel:cel-jackson": APACHE_LICENSE,
    "org.projectnessie.cel:cel-tools": APACHE_LICENSE,
    "org.xerial.snappy:snappy-java": APACHE_LICENSE,
    "org.yaml:snakeyaml": APACHE_LICENSE,
    "org.roaringbitmap:RoaringBitmap": APACHE_LICENSE,
    "org.jspecify:jspecify": APACHE_LICENSE,
}


def parse_cmdline_args():
    if len(sys.argv) != 4:
        raise Exception(
            "usage: process_licenses.py DEPENDENCY_LIST_FILE_PATH DEPENDENCIES_DIR TARGET_DIR"
        )
    dependency_list_file_path = Path(sys.argv[1]).absolute()
    dependencies_dir_path = Path(sys.argv[2]).absolute()
    target_dir = Path(sys.argv[3]).absolute()

    if (
        not dependency_list_file_path.exists()
        or not dependency_list_file_path.is_file()
    ):
        raise Exception(f"File {dependency_list_file_path} does not exist")

    if not dependencies_dir_path.exists() or not dependencies_dir_path.is_dir():
        raise Exception(f"Directory {dependencies_dir_path} does not exist")
    return dependency_list_file_path, dependencies_dir_path, target_dir


def main():
    dependency_list_path, dependency_jars_path, target_dir = parse_cmdline_args()

    dependency_count = 0
    dependency_with_license_count = 0
    dependency_without_license_count = 0
    dependency_ignored_count = 0

    missing_licenses_str = ""

    target_dir.mkdir(parents=True, exist_ok=True)

    with open(dependency_list_path, "r") as dependency_file_handle:
        for line in dependency_file_handle.readlines():
            line = line.strip()
            if line == "" or line == "The following files have been resolved:":
                continue
            dependency_count += 1

            # Line is a string like: "commons-codec:commons-codec:jar:1.15:compile -- module org.apache.commons.codec [auto]"
            artifact_details = line.split()[0]
            group_id, artifact_id, _, version, scope = artifact_details.split(":")
            current_jar = Path(dependency_jars_path, f"{artifact_id}-{version}.jar")
            if not current_jar.exists() and current_jar.is_file():
                raise Exception(f"Expected JAR file does not exist: {current_jar}")
            current_jar_as_zip = ZipFile(current_jar)

            dependency_lookup_key = f"{group_id}:{artifact_id}"
            if dependency_lookup_key in IGNORED_DEPENDENCIES:
                dependency_ignored_count += 1
                continue

            license_found = False
            for zip_info in current_jar_as_zip.infolist():
                if zip_info.is_dir():
                    continue
                if zip_info.filename in (
                    "META-INF/LICENSE.txt",
                    "META-INF/LICENSE",
                    "META-INF/LICENSE.md",
                ):
                    license_found = True
                    dependency_with_license_count += 1
                    # Extract license to the target directory
                    zip_info.filename = f"LICENSE_{group_id}__{artifact_id}"
                    current_jar_as_zip.extract(zip_info, target_dir)
                    break
                if (
                    "license" in zip_info.filename.lower()
                ):  # Log potential license matches
                    print(f"Potential license match: {current_jar} {zip_info}")

            if not license_found:
                print(
                    f"License not found {current_jar}; using value from ADDITIONAL_LICENSES_MAP"
                )
                license_name = ADDITIONAL_LICENSES_MAP.get(dependency_lookup_key)
                if license_name:
                    dependency_without_license_count += 1
                    missing_licenses_str += f"{dependency_lookup_key}: {license_name}\n"
                else:
                    err_msg = f"The dependency {dependency_lookup_key} does not ship a license file, but neither is it not defined in ADDITIONAL_LICENSES_MAP"
                    raise Exception(err_msg)

    with open(
        Path(target_dir, "ADDITIONAL_LICENCES"), "w"
    ) as additional_licenses_handle:
        additional_licenses_handle.write(missing_licenses_str)

    if dependency_count < 30:
        raise Exception(
            f"Suspiciously low number of dependency JARs detected in {dependency_jars_path}: {dependency_count}"
        )
    print("License generation finished")
    print(f"\tTotal dependencies: {dependency_count}")
    print(f"\tTotal dependencies (with license): {dependency_with_license_count}")
    print(f"\tTotal dependencies (without license): {dependency_without_license_count}")
    print(f"\tIgnored dependencies: {dependency_ignored_count}")


if __name__ == "__main__":
    main()


================================================
FILE: src/main/java/com/snowflake/ingest/streaming/internal/TimestampWrapper.java
================================================
package com.snowflake.ingest.streaming.internal;

import com.snowflake.kafka.connector.internal.validation.Power10Util;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.math.RoundingMode;
import java.time.OffsetDateTime;

/**
 * Copy of {@code net.snowflake.ingest.streaming.internal.TimestampWrapper} from
 * snowflake-ingest-sdk used by {@code PkgDataValidationUtil} to serialize timestamp values without
 * depending on the legacy SDK artifact.
 */
public class TimestampWrapper {

  private final long epoch;
  private final int fraction;
  private final int timezoneOffsetSeconds;
  private final int scale;

  private static final int BITS_FOR_TIMEZONE = 14;
  private static final int MASK_OF_TIMEZONE = (1 << BITS_FOR_TIMEZONE) - 1;

  public TimestampWrapper(OffsetDateTime offsetDateTime, int scale) {
    if (scale < 0 || scale > 9) {
      throw new IllegalArgumentException(
          String.format("Scale must be between 0 and 9, actual: %d", scale));
    }
    this.epoch = offsetDateTime.toEpochSecond();
    this.fraction =
        offsetDateTime.getNano()
            / Power10Util.intTable[9 - scale]
            * Power10Util.intTable[9 - scale];
    this.timezoneOffsetSeconds = offsetDateTime.getOffset().getTotalSeconds();
    this.scale = scale;
  }

  /** Convert the timestamp to a binary representation. */
  public BigInteger toBinary(boolean includeTimezone) {
    BigDecimal timeInNs =
        BigDecimal.valueOf(epoch).scaleByPowerOfTen(9).add(new BigDecimal(fraction));
    BigDecimal scaledTime = timeInNs.scaleByPowerOfTen(scale - 9);
    scaledTime = scaledTime.setScale(0, RoundingMode.DOWN);
    BigInteger fcpInt = scaledTime.unscaledValue();
    if (includeTimezone) {
      int offsetMin = timezoneOffsetSeconds / 60;
      offsetMin += 1440;
      fcpInt = fcpInt.shiftLeft(BITS_FOR_TIMEZONE);
      fcpInt = fcpInt.add(BigInteger.valueOf(offsetMin & MASK_OF_TIMEZONE));
    }
    return fcpInt;
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/ConnectorConfigTools.java
================================================
/*
 * Copyright (c) 2019 Snowflake Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.snowflake.kafka.connector;

import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.internal.KCLogger;
import java.util.Locale;
import java.util.Map;
import org.apache.kafka.common.config.ConfigDef;

public class ConnectorConfigTools {

  private static final KCLogger LOGGER = new KCLogger(ConnectorConfigTools.class.getName());

  public static final ConfigDef.Validator BOOLEAN_VALIDATOR =
      new ConfigDef.Validator() {
        private final ConfigDef.ValidString validator =
            ConfigDef.ValidString.in(
                Boolean.TRUE.toString().toLowerCase(Locale.ROOT),
                Boolean.FALSE.toString().toLowerCase(Locale.ROOT));

        @Override
        public void ensureValid(String name, Object value) {
          if (value instanceof String) {
            value = ((String) value).toLowerCase(Locale.ROOT);
          }
          this.validator.ensureValid(name, value);
        }
      };

  public static void setDefaultValues(Map<String, String> config) {
    if (!config.containsKey(KafkaConnectorConfigParams.CACHE_TABLE_EXISTS)) {
      config.put(
          KafkaConnectorConfigParams.CACHE_TABLE_EXISTS,
          String.valueOf(KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_DEFAULT));
      LOGGER.info(
          "{} set to default {}",
          KafkaConnectorConfigParams.CACHE_TABLE_EXISTS,
          KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_DEFAULT);
    }
    if (!config.containsKey(KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS)) {
      config.put(
          KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS,
          String.valueOf(KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS_DEFAULT));
      LOGGER.info(
          "{} set to default {} ms",
          KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS,
          KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS_DEFAULT);
    }
    if (!config.containsKey(KafkaConnectorConfigParams.CACHE_PIPE_EXISTS)) {
      config.put(
          KafkaConnectorConfigParams.CACHE_PIPE_EXISTS,
          String.valueOf(KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_DEFAULT));
      LOGGER.info(
          "{} set to default {}",
          KafkaConnectorConfigParams.CACHE_PIPE_EXISTS,
          KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_DEFAULT);
    }
    if (!config.containsKey(KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS)) {
      config.put(
          KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS,
          String.valueOf(KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS_DEFAULT));
      LOGGER.info(
          "{} set to default {} ms",
          KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS,
          KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS_DEFAULT);
    }
  }

  /**
   * Get a property from the config map
   *
   * @param config connector configuration
   * @param key name of the key to be retrieved
   * @return property value or null
   */
  public static String getProperty(final Map<String, String> config, final String key) {
    if (config.containsKey(key) && !config.get(key).isEmpty()) {
      return config.get(key);
    } else {
      return null;
    }
  }

  /* The allowed values for tombstone records. */
  public enum BehaviorOnNullValues {
    // Default as the name suggests, would be a default behavior which will not filter null values.
    // This will put an empty JSON string in corresponding snowflake table.
    // Using this means we will fall back to old behavior before introducing this config.
    DEFAULT,

    // Ignore would filter out records which has null value, but a valid key.
    IGNORE,
    ;

    /* Validator to validate behavior.on.null.values which says whether kafka should keep null value records or ignore them while ingesting into snowflake table. */
    public static final ConfigDef.Validator VALIDATOR =
        new ConfigDef.Validator() {
          private final ConfigDef.ValidString validator = ConfigDef.ValidString.in(names());

          @Override
          public void ensureValid(String name, Object value) {
            if (value instanceof String) {
              value = ((String) value).toLowerCase(Locale.ROOT);
            }
            validator.ensureValid(name, value);
          }

          // Overridden here so that ConfigDef.toEnrichedRst shows possible values correctly
          @Override
          public String toString() {
            return validator.toString();
          }
        };

    // All valid enum values
    public static String[] names() {
      BehaviorOnNullValues[] behaviors = values();
      String[] result = new String[behaviors.length];

      for (int i = 0; i < behaviors.length; i++) {
        result[i] = behaviors[i].toString();
      }

      return result;
    }

    @Override
    public String toString() {
      return name().toLowerCase(Locale.ROOT);
    }
  }

  /* https://www.confluent.io/blog/kafka-connect-deep-dive-error-handling-dead-letter-queues/ */
  public enum ErrorTolerance {

    /** Tolerate no errors. */
    NONE,

    /** Tolerate all errors. */
    ALL;

    /**
     * Validator to validate behavior.on.null.values which says whether kafka should keep null value
     * records or ignore them while ingesting into snowflake table.
     */
    public static final ConfigDef.Validator VALIDATOR =
        new ConfigDef.Validator() {
          private final ConfigDef.ValidString validator =
              ConfigDef.ValidString.in(ErrorTolerance.names());

          @Override
          public void ensureValid(String name, Object value) {
            if (value instanceof String) {
              value = ((String) value).toLowerCase(Locale.ROOT);
            }
            validator.ensureValid(name, value);
          }

          @Override
          public String toString() {
            return validator.toString();
          }
        };

    /**
     * @return All valid enum values
     */
    public static String[] names() {
      ErrorTolerance[] errorTolerances = values();
      String[] result = new String[errorTolerances.length];

      for (int i = 0; i < errorTolerances.length; i++) {
        result[i] = errorTolerances[i].toString();
      }

      return result;
    }

    @Override
    public String toString() {
      return name().toLowerCase(Locale.ROOT);
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/ConnectorConfigValidator.java
================================================
package com.snowflake.kafka.connector;

import java.util.Map;

public interface ConnectorConfigValidator {

  /**
   * Validate input configuration
   *
   * @param config configuration Map
   */
  void validateConfig(Map<String, String> config);
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/Constants.java
================================================
package com.snowflake.kafka.connector;

public final class Constants {
  public static final String DEFAULT_PIPE_NAME_SUFFIX = "-STREAMING";

  public static final class KafkaConnectorConfigParams {

    // connector parameter list
    public static final String NAME = "name";
    public static final String TOPICS = "topics";
    public static final String SNOWFLAKE_TOPICS2TABLE_MAP = "snowflake.topic2table.map";
    public static final String SNOWFLAKE_URL_NAME = "snowflake.url.name";
    public static final String SNOWFLAKE_USER_NAME = "snowflake.user.name";
    public static final String SNOWFLAKE_PRIVATE_KEY = "snowflake.private.key";
    public static final String SNOWFLAKE_DATABASE_NAME = "snowflake.database.name";
    public static final String SNOWFLAKE_SCHEMA_NAME = "snowflake.schema.name";
    public static final String SNOWFLAKE_PRIVATE_KEY_PASSPHRASE =
        "snowflake.private.key.passphrase";
    public static final String SNOWFLAKE_ROLE_NAME = "snowflake.role.name";
    public static final String SNOWFLAKE_AUTHENTICATOR = "snowflake.authenticator";
    public static final String SNOWFLAKE_OAUTH_CLIENT_ID = "snowflake.oauth.client.id";
    public static final String SNOWFLAKE_OAUTH_CLIENT_SECRET = "snowflake.oauth.client.secret";
    public static final String SNOWFLAKE_OAUTH_REFRESH_TOKEN = "snowflake.oauth.refresh.token";
    public static final String SNOWFLAKE_OAUTH_TOKEN_ENDPOINT = "snowflake.oauth.token.endpoint";

    public static final String SNOWFLAKE_JDBC_MAP = "snowflake.jdbc.map";
    public static final String SNOWFLAKE_METADATA_CREATETIME = "snowflake.metadata.createtime";
    public static final String SNOWFLAKE_METADATA_TOPIC = "snowflake.metadata.topic";
    public static final String SNOWFLAKE_METADATA_OFFSET_AND_PARTITION =
        "snowflake.metadata.offset.and.partition";
    public static final String SNOWFLAKE_METADATA_ALL = "snowflake.metadata.all";
    public static final String SNOWFLAKE_METADATA_ALL_DEFAULT = "true";
    public static final String SNOWFLAKE_STREAMING_METADATA_CONNECTOR_PUSH_TIME =
        "snowflake.streaming.metadata.connectorPushTime";
    public static final boolean SNOWFLAKE_STREAMING_METADATA_CONNECTOR_PUSH_TIME_DEFAULT = true;
    public static final String SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP =
        "snowflake.streaming.client.provider.override.map";
    public static final String SNOWFLAKE_OPEN_CHANNEL_IO_THREADS =
        "snowflake.open.channel.io.threads";
    public static final int SNOWFLAKE_OPEN_CHANNEL_IO_THREADS_DEFAULT = 50;

    // Validation
    public static final String SNOWFLAKE_VALIDATION = "snowflake.validation";
    public static final String SNOWFLAKE_VALIDATION_DEFAULT = "server_side";

    // Snowpipe Streaming Classic (SSv1) offset migration
    public static final String SNOWFLAKE_SSV1_OFFSET_MIGRATION =
        "snowflake.streaming.classic.offset.migration";
    public static final String SNOWFLAKE_SSV1_OFFSET_MIGRATION_DEFAULT = "skip";
    public static final String SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME =
        "snowflake.streaming.classic.offset.migration.include.connector.name";
    public static final boolean SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME_DEFAULT =
        false;

    // Caching
    public static final String CACHE_TABLE_EXISTS = "snowflake.cache.table.exists";
    public static final boolean CACHE_TABLE_EXISTS_DEFAULT = true;
    public static final String CACHE_TABLE_EXISTS_EXPIRE_MS =
        "snowflake.cache.table.exists.expire.ms";
    public static final long CACHE_TABLE_EXISTS_EXPIRE_MS_DEFAULT = 5 * 60 * 1000L;
    public static final long CACHE_TABLE_EXISTS_EXPIRE_MS_MIN = 1L;
    public static final String CACHE_PIPE_EXISTS = "snowflake.cache.pipe.exists";
    public static final boolean CACHE_PIPE_EXISTS_DEFAULT = true;
    public static final String CACHE_PIPE_EXISTS_EXPIRE_MS =
        "snowflake.cache.pipe.exists.expire.ms";
    public static final long CACHE_PIPE_EXISTS_EXPIRE_MS_DEFAULT = 5 * 60 * 1000L;
    public static final long CACHE_PIPE_EXISTS_EXPIRE_MS_MIN = 1L;

    public static final String BEHAVIOR_ON_NULL_VALUES = "behavior.on.null.values";
    public static final String VALUE_CONVERTER_SCHEMAS_ENABLE = "value.converter.schemas.enable";

    // metrics
    public static final String JMX_OPT = "jmx";
    public static final boolean JMX_OPT_DEFAULT = true;

    public static final String ERRORS_TOLERANCE_CONFIG = "errors.tolerance";
    public static final String ERRORS_TOLERANCE_DEFAULT =
        ConnectorConfigTools.ErrorTolerance.NONE.toString();
    public static final String ERRORS_LOG_ENABLE_CONFIG = "errors.log.enable";
    public static final boolean ERRORS_LOG_ENABLE_DEFAULT = false;
    public static final String ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_CONFIG =
        "errors.deadletterqueue.topic.name";
    public static final String ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_DEFAULT = "";
    public static final String ENABLE_TASK_FAIL_ON_AUTHORIZATION_ERRORS =
        "enable.task.fail.on.authorization.errors";
    public static final boolean ENABLE_TASK_FAIL_ON_AUTHORIZATION_ERRORS_DEFAULT = false;
    // Compatibility validation
    public static final String SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC =
        "snowflake.streaming.validate.compatibility.with.classic";
    public static final boolean SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC_DEFAULT =
        true;

    public static final String
        SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION =
            "snowflake.compatibility.enable.autogenerated.table.name.sanitization";
    public static final boolean
        SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION_DEFAULT = false;
    public static final String SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION =
        "snowflake.compatibility.enable.column.identifier.normalization";
    public static final boolean
        SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION_DEFAULT = false;
    public static final String SNOWFLAKE_ENABLE_SCHEMATIZATION = "snowflake.enable.schematization";
    public static final boolean SNOWFLAKE_ENABLE_SCHEMATIZATION_DEFAULT = true;

    // MDC logging header
    public static final String ENABLE_MDC_LOGGING_CONFIG = "enable.mdc.logging";
    public static final String ENABLE_MDC_LOGGING_DEFAULT = "false";
    public static final String KEY_CONVERTER = "key.converter";
    public static final String VALUE_CONVERTER = "value.converter";
    public static final String VALUE_CONVERTER_SCHEMA_REGISTRY_URL =
        "value.converter.schema.registry.url";
    // Proxy Info
    public static final String JVM_PROXY_HOST = "jvm.proxy.host";
    public static final String JVM_PROXY_PORT = "jvm.proxy.port";
    public static final String JVM_NON_PROXY_HOSTS = "jvm.nonProxy.hosts";
    public static final String JVM_PROXY_USERNAME = "jvm.proxy.username";
    public static final String JVM_PROXY_PASSWORD = "jvm.proxy.password";

    // jvm proxy
    public static final String HTTP_USE_PROXY = "http.useProxy";
    public static final String HTTPS_PROXY_HOST = "https.proxyHost";
    public static final String HTTPS_PROXY_PORT = "https.proxyPort";
    public static final String HTTP_PROXY_HOST = "http.proxyHost";
    public static final String HTTP_PROXY_PORT = "http.proxyPort";
    public static final String HTTP_NON_PROXY_HOSTS = "http.nonProxyHosts";
    public static final String HTTPS_PROXY_USER = "https.proxyUser";
    public static final String HTTPS_PROXY_PASSWORD = "https.proxyPassword";
    public static final String HTTP_PROXY_USER = "http.proxyUser";
    public static final String HTTP_PROXY_PASSWORD = "http.proxyPassword";
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/DefaultConnectorConfigValidator.java
================================================
package com.snowflake.kafka.connector;

import static com.snowflake.kafka.connector.ConnectorConfigTools.BehaviorOnNullValues.VALIDATOR;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.BEHAVIOR_ON_NULL_VALUES;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_PIPE_EXISTS;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_TABLE_EXISTS;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.JMX_OPT;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY;
import static com.snowflake.kafka.connector.Utils.isValidSnowflakeApplicationName;
import static com.snowflake.kafka.connector.Utils.validateProxySettings;

import com.google.common.collect.ImmutableMap;
import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.config.AuthenticatorType;
import com.snowflake.kafka.connector.internal.KCLogger;
import com.snowflake.kafka.connector.internal.SnowflakeErrors;
import com.snowflake.kafka.connector.internal.streaming.StreamingConfigValidator;
import java.util.HashMap;
import java.util.Map;
import org.apache.kafka.common.config.ConfigException;

public class DefaultConnectorConfigValidator implements ConnectorConfigValidator {

  private static final KCLogger LOGGER =
      new KCLogger(DefaultConnectorConfigValidator.class.getName());

  private final StreamingConfigValidator streamingConfigValidator;

  public DefaultConnectorConfigValidator(StreamingConfigValidator streamingConfigValidator) {
    this.streamingConfigValidator = streamingConfigValidator;
  }

  public void validateConfig(Map<String, String> config) {
    Map<String, String> invalidConfigParams = new HashMap<String, String>();

    // define the input parameters / keys in one place as static constants,
    // instead of using them directly
    // define the thresholds statically in one place as static constants,
    // instead of using the values directly

    // unique name of this connector instance
    String connectorName = config.getOrDefault(KafkaConnectorConfigParams.NAME, "");
    if (connectorName.isEmpty() || !isValidSnowflakeApplicationName(connectorName)) {
      invalidConfigParams.put(
          KafkaConnectorConfigParams.NAME,
          Utils.formatString(
              "{} is empty or invalid. It should match Snowflake object identifier syntax. Please"
                  + " see the documentation.",
              KafkaConnectorConfigParams.NAME));
    }

    if (config.containsKey(KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP)) {
      try {
        TopicToTableParser.parse(config.get(KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP));
      } catch (IllegalArgumentException e) {
        invalidConfigParams.put(
            KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP, e.getMessage());
      }
    }

    // sanity check
    if (!config.containsKey(KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME)) {
      invalidConfigParams.put(
          KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME,
          Utils.formatString(
              "{} cannot be empty.", KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME));
    }

    // sanity check
    if (!config.containsKey(KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME)) {
      invalidConfigParams.put(
          KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME,
          Utils.formatString(
              "{} cannot be empty.", KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME));
    }

    AuthenticatorType authenticator;
    try {
      authenticator =
          AuthenticatorType.fromConfig(
              config.getOrDefault(
                  KafkaConnectorConfigParams.SNOWFLAKE_AUTHENTICATOR,
                  AuthenticatorType.SNOWFLAKE_JWT.toConfigValue()));
    } catch (IllegalArgumentException e) {
      invalidConfigParams.put(KafkaConnectorConfigParams.SNOWFLAKE_AUTHENTICATOR, e.getMessage());
      authenticator = null;
    }
    if (authenticator != null) {
      switch (authenticator) {
        case OAUTH:
          validateOAuthConfig(config, invalidConfigParams);
          break;
        case SNOWFLAKE_JWT:
          if (!config.containsKey(SNOWFLAKE_PRIVATE_KEY)) {
            invalidConfigParams.put(
                SNOWFLAKE_PRIVATE_KEY,
                Utils.formatString("{} cannot be empty", SNOWFLAKE_PRIVATE_KEY));
          }
          break;
        default:
          throw new IllegalStateException("Unhandled authenticator type: " + authenticator);
      }
    }

    if (!config.containsKey(KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME)) {
      invalidConfigParams.put(
          KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME,
          Utils.formatString(
              "{} cannot be empty.", KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME));
    }

    if (!config.containsKey(KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME)) {
      invalidConfigParams.put(
          KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME,
          Utils.formatString("{} cannot be empty.", KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME));
    }

    if (!config.containsKey(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME)) {
      invalidConfigParams.put(
          KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME,
          Utils.formatString(
              "{} cannot be empty.", KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME));
    }
    // jvm proxy settings
    invalidConfigParams.putAll(validateProxySettings(config));

    if (config.containsKey(BEHAVIOR_ON_NULL_VALUES)) {
      try {
        // This throws an exception if config value is invalid.
        VALIDATOR.ensureValid(BEHAVIOR_ON_NULL_VALUES, config.get(BEHAVIOR_ON_NULL_VALUES));
      } catch (ConfigException exception) {
        invalidConfigParams.put(
            BEHAVIOR_ON_NULL_VALUES,
            Utils.formatString(
                "Kafka config: {} error: {}", BEHAVIOR_ON_NULL_VALUES, exception.getMessage()));
      }
    }

    if (config.containsKey(JMX_OPT)) {
      if (!(config.get(JMX_OPT).equalsIgnoreCase("true")
          || config.get(JMX_OPT).equalsIgnoreCase("false"))) {
        invalidConfigParams.put(
            JMX_OPT,
            Utils.formatString("Kafka config: {} should either be true or false", JMX_OPT));
      }
    }

    validateCacheConfig(config, invalidConfigParams);

    validateCompatibilitySettings(config, invalidConfigParams);

    // Check all config values for ingestion method == IngestionMethodConfig.SNOWPIPE_STREAMING
    invalidConfigParams.putAll(streamingConfigValidator.validate(config));

    // logs and throws exception if there are invalid params
    handleInvalidParameters(ImmutableMap.copyOf(invalidConfigParams));
  }

  private void validateOAuthConfig(
      Map<String, String> config, Map<String, String> invalidConfigParams) {
    String clientId = config.getOrDefault(KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_ID, "");
    if (clientId.isEmpty()) {
      invalidConfigParams.put(
          KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_ID,
          Utils.formatString(
              "{} must be non-empty when using oauth authenticator",
              KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_ID));
    }

    String clientSecret =
        config.getOrDefault(KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_SECRET, "");
    if (clientSecret.isEmpty()) {
      invalidConfigParams.put(
          KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_SECRET,
          Utils.formatString(
              "{} must be non-empty when using oauth authenticator",
              KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_SECRET));
    }
  }

  private void validateCompatibilitySettings(
      Map<String, String> config, Map<String, String> invalidConfigParams) {
    boolean validateCompat =
        Boolean.parseBoolean(
            config.getOrDefault(
                KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC,
                String.valueOf(
                    KafkaConnectorConfigParams
                        .SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC_DEFAULT)));
    if (!validateCompat) {
      return;
    }

    String optOutHint =
        " To skip this check, set "
            + KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC
            + "=false.";

    // snowflake.validation must be client_side
    String validation =
        config.getOrDefault(
            KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION,
            KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION_DEFAULT);
    if (!"client_side".equals(validation)) {
      invalidConfigParams.put(
          KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION,
          KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC
              + " is enabled but "
              + KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION
              + " is set to '"
              + validation
              + "'. For KC v3 compatibility, set "
              + KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION
              + "=client_side."
              + optOutHint);
    }

    // snowflake.compatibility.enable.column.identifier.normalization must be true
    String columnNormalization =
        config.getOrDefault(
            KafkaConnectorConfigParams
                .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION,
            String.valueOf(
                KafkaConnectorConfigParams
                    .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION_DEFAULT));
    if (!"true".equalsIgnoreCase(columnNormalization)) {
      invalidConfigParams.put(
          KafkaConnectorConfigParams.SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION,
          KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC
              + " is enabled but "
              + KafkaConnectorConfigParams
                  .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION
              + " is set to '"
              + columnNormalization
              + "'. For KC v3 compatibility, set "
              + KafkaConnectorConfigParams
                  .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION
              + "=true."
              + optOutHint);
    }

    // snowflake.compatibility.enable.autogenerated.table.name.sanitization must be true
    String tableSanitization =
        config.getOrDefault(
            KafkaConnectorConfigParams
                .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION,
            String.valueOf(
                KafkaConnectorConfigParams
                    .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION_DEFAULT));
    if (!"true".equalsIgnoreCase(tableSanitization)) {
      invalidConfigParams.put(
          KafkaConnectorConfigParams
              .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION,
          KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC
              + " is enabled but "
              + KafkaConnectorConfigParams
                  .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION
              + " is set to '"
              + tableSanitization
              + "'. For KC v3 compatibility, set "
              + KafkaConnectorConfigParams
                  .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION
              + "=true."
              + optOutHint);
    }

    // snowflake.enable.schematization must be explicitly set (any value)
    if (!config.containsKey(KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION)) {
      invalidConfigParams.put(
          KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION,
          KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC
              + " is enabled but "
              + KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION
              + " is not explicitly set. The default changed from false (KC v3) to true (KC v4)."
              + " Please set "
              + KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION
              + " explicitly to confirm your intended behavior."
              + optOutHint);
    }

    // snowflake.streaming.classic.offset.migration must be explicitly set
    if (!config.containsKey(KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION)) {
      invalidConfigParams.put(
          KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION,
          KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC
              + " is enabled but "
              + KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION
              + " is not explicitly set. If migrating from KC v3, set it to 'strict' or"
              + " 'best_effort' so that committed offsets from the previous connector version are"
              + " carried over. If migrating from file-based Snowpipe, set it to 'skip'."
              + optOutHint);
    }

    // snowflake.streaming.classic.offset.migration.include.connector.name is only relevant
    // when offset migration is active (strict or best_effort), not when skipped.
    String offsetMigration =
        config.getOrDefault(
            KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION,
            KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_DEFAULT);
    boolean offsetMigrationActive =
        "strict".equalsIgnoreCase(offsetMigration)
            || "best_effort".equalsIgnoreCase(offsetMigration);
    if (offsetMigrationActive
        && !config.containsKey(
            KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME)) {
      invalidConfigParams.put(
          KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME,
          KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC
              + " is enabled but "
              + KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME
              + " is not explicitly set. Whether the SSv1 channel name included the connector"
              + " name depends on the KC v3 configuration that was used. Please set "
              + KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME
              + " explicitly to match how the previous connector was configured."
              + optOutHint);
    }
  }

  private void validateCacheConfig(
      Map<String, String> config, Map<String, String> invalidConfigParams) {
    // Validate table exists cache boolean flag
    if (config.containsKey(CACHE_TABLE_EXISTS)) {
      String value = config.get(CACHE_TABLE_EXISTS);
      if (!isValidBooleanString(value)) {
        invalidConfigParams.put(
            CACHE_TABLE_EXISTS,
            Utils.formatString(
                "{} must be either 'true' or 'false', got: {}", CACHE_TABLE_EXISTS, value));
      }
    }

    // Validate table exists cache expiration
    if (config.containsKey(CACHE_TABLE_EXISTS_EXPIRE_MS)) {
      try {
        long value = Long.parseLong(config.get(CACHE_TABLE_EXISTS_EXPIRE_MS));
        if (value <= 0) {
          invalidConfigParams.put(
              CACHE_TABLE_EXISTS_EXPIRE_MS,
              Utils.formatString(
                  "{} must be a positive number, got: {}", CACHE_TABLE_EXISTS_EXPIRE_MS, value));
        }
      } catch (NumberFormatException e) {
        invalidConfigParams.put(
            CACHE_TABLE_EXISTS_EXPIRE_MS,
            Utils.formatString(
                "{} must be a valid long number, got: {}",
                CACHE_TABLE_EXISTS_EXPIRE_MS,
                config.get(CACHE_TABLE_EXISTS_EXPIRE_MS)));
      }
    }

    // Validate pipe exists cache boolean flag
    if (config.containsKey(CACHE_PIPE_EXISTS)) {
      String value = config.get(CACHE_PIPE_EXISTS);
      if (!isValidBooleanString(value)) {
        invalidConfigParams.put(
            CACHE_PIPE_EXISTS,
            Utils.formatString(
                "{} must be either 'true' or 'false', got: {}", CACHE_PIPE_EXISTS, value));
      }
    }

    // Validate pipe exists cache expiration
    if (config.containsKey(CACHE_PIPE_EXISTS_EXPIRE_MS)) {
      try {
        long value = Long.parseLong(config.get(CACHE_PIPE_EXISTS_EXPIRE_MS));
        if (value <= 0) {
          invalidConfigParams.put(
              CACHE_PIPE_EXISTS_EXPIRE_MS,
              Utils.formatString(
                  "{} must be a positive number, got: {}", CACHE_PIPE_EXISTS_EXPIRE_MS, value));
        }
      } catch (NumberFormatException e) {
        invalidConfigParams.put(
            CACHE_PIPE_EXISTS_EXPIRE_MS,
            Utils.formatString(
                "{} must be a valid long number, got: {}",
                CACHE_PIPE_EXISTS_EXPIRE_MS,
                config.get(CACHE_PIPE_EXISTS_EXPIRE_MS)));
      }
    }
  }

  private static boolean isValidBooleanString(String value) {
    return "true".equalsIgnoreCase(value) || "false".equalsIgnoreCase(value);
  }

  private void handleInvalidParameters(ImmutableMap<String, String> invalidConfigParams) {
    // log all invalid params and throw exception
    if (!invalidConfigParams.isEmpty()) {
      String invalidParamsMessage = "";

      for (String invalidKey : invalidConfigParams.keySet()) {
        String invalidValue = invalidConfigParams.get(invalidKey);
        String errorMessage =
            Utils.formatString(
                "Config value '{}' is invalid. Error message: '{}'", invalidKey, invalidValue);
        invalidParamsMessage += errorMessage + "\n";
      }

      LOGGER.error("Invalid config: " + invalidParamsMessage);
      throw SnowflakeErrors.ERROR_0001.getException(invalidParamsMessage);
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/SemanticVersion.java
================================================
package com.snowflake.kafka.connector;

import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/** Represents a parsed semantic version. */
public class SemanticVersion implements Comparable<SemanticVersion> {
  private final int major;
  private final int minor;
  private final int patch;
  private final boolean isReleaseCandidate;
  private final String originalVersion;

  public SemanticVersion(String version) {
    this.originalVersion = version;
    // Pattern to match versions like "3.1.0" or "4.0.0-rc" or "4.0.0-RC1"
    Pattern pattern = Pattern.compile("(\\d+)\\.(\\d+)\\.(\\d+)(?:-[rR][cC]\\d*)?");
    Matcher matcher = pattern.matcher(version);
    if (!matcher.find()) {
      throw new IllegalArgumentException("Invalid version format: " + version);
    }
    this.major = Integer.parseInt(matcher.group(1));
    this.minor = Integer.parseInt(matcher.group(2));
    this.patch = Integer.parseInt(matcher.group(3));
    this.isReleaseCandidate = version.toLowerCase().contains("-rc");
  }

  public String originalVersion() {
    return originalVersion;
  }

  public boolean isReleaseCandidate() {
    return isReleaseCandidate;
  }

  public int major() {
    return major;
  }

  public int minor() {
    return minor;
  }

  public int patch() {
    return patch;
  }

  @Override
  public int compareTo(SemanticVersion other) {
    if (this.major != other.major) {
      return Integer.compare(this.major, other.major);
    }
    if (this.minor != other.minor) {
      return Integer.compare(this.minor, other.minor);
    }
    return Integer.compare(this.patch, other.patch);
  }

  @Override
  public boolean equals(Object obj) {
    if (!(obj instanceof SemanticVersion)) {
      return false;
    }
    SemanticVersion other = (SemanticVersion) obj;
    return this.major == other.major && this.minor == other.minor && this.patch == other.patch;
  }

  @Override
  public int hashCode() {
    return Objects.hash(major, minor, patch);
  }

  @Override
  public String toString() {
    return originalVersion;
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/SnowflakeSinkTask.java
================================================
/*
 * Copyright (c) 2019 Snowflake Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.snowflake.kafka.connector;

import com.google.common.annotations.VisibleForTesting;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.dlq.KafkaRecordErrorReporter;
import com.snowflake.kafka.connector.internal.KCLogger;
import com.snowflake.kafka.connector.internal.SnowflakeConnectionService;
import com.snowflake.kafka.connector.internal.SnowflakeConnectionServiceFactory;
import com.snowflake.kafka.connector.internal.SnowflakeErrors;
import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException;
import com.snowflake.kafka.connector.internal.SnowflakeSinkService;
import com.snowflake.kafka.connector.internal.metrics.MetricsJmxReporter;
import com.snowflake.kafka.connector.internal.metrics.SnowflakeSinkTaskMetrics;
import com.snowflake.kafka.connector.internal.metrics.TaskMetrics;
import com.snowflake.kafka.connector.internal.streaming.SnowflakeSinkServiceV2;
import com.snowflake.kafka.connector.internal.streaming.telemetry.PeriodicTelemetryReporter;
import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientPools;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeoutException;
import java.util.function.Supplier;
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.connect.errors.ConnectException;
import org.apache.kafka.connect.errors.RetriableException;
import org.apache.kafka.connect.sink.ErrantRecordReporter;
import org.apache.kafka.connect.sink.SinkRecord;
import org.apache.kafka.connect.sink.SinkTask;

/**
 * SnowflakeSinkTask implements SinkTask for Kafka Connect framework.
 *
 * <p>Expects configuration from SnowflakeStreamingSinkConnector
 *
 * <p>Creates sink service instance, takes records loaded from those Kafka partitions and ingests to
 * Snowflake via Sink service
 */
public class SnowflakeSinkTask extends SinkTask {
  private static final long WAIT_TIME = 5 * 1000; // 5 sec
  private static final int REPEAT_TIME = 12; // 60 sec

  // the dynamic logger is intended to be attached per task instance. the instance id will be set
  // during task start, however if it is not set, it falls back to the static logger
  private static final KCLogger STATIC_LOGGER =
      new KCLogger(SnowflakeSinkTask.class.getName() + "_STATIC");
  private KCLogger DYNAMIC_LOGGER;

  private volatile SnowflakeSinkService sink = null;

  // snowflake JDBC connection provides methods to interact with user's
  // snowflake
  // account and execute queries
  private SnowflakeConnectionService conn = null;

  // tracks number of tasks the config wants to create
  private String taskConfigId = "-1";

  private long taskStartTime;

  private final SnowflakeSinkTaskAuthorizationExceptionTracker authorizationExceptionTracker =
      new SnowflakeSinkTaskAuthorizationExceptionTracker();

  // Stores channel error exception detected in preCommit to fail on next put() call
  private volatile SnowflakeKafkaConnectorException channelErrorToFailOn = null;

  // Periodic telemetry reporter for channel status
  private PeriodicTelemetryReporter telemetryReporter = null;

  // Task-level JMX metrics (lifecycle, throughput, duration)
  private TaskMetrics taskMetrics = TaskMetrics.noop();

  /** default constructor, invoked by kafka connect framework */
  public SnowflakeSinkTask() {
    DYNAMIC_LOGGER = new KCLogger(this.getClass().getName());
  }

  @VisibleForTesting
  public SnowflakeSinkTask(
      SnowflakeSinkService service, SnowflakeConnectionService connectionService) {
    DYNAMIC_LOGGER = new KCLogger(this.getClass().getName());
    this.sink = service;
    this.conn = connectionService;
  }

  private SnowflakeConnectionService getConnection() {
    try {
      waitFor(() -> conn != null);
    } catch (Exception e) {
      throw SnowflakeErrors.ERROR_5013.getException();
    }
    return conn;
  }

  /**
   * Return an instance of SnowflakeConnection if it was set previously by calling Start(). Else,
   * return an empty
   *
   * @return Optional of SnowflakeConnectionService
   */
  public Optional<SnowflakeConnectionService> getSnowflakeConnection() {
    return Optional.ofNullable(getConnection());
  }

  protected SnowflakeSinkService getSink() {
    try {
      waitFor(() -> sink != null && !sink.isClosed());
    } catch (Exception e) {
      throw SnowflakeErrors.ERROR_5014.getException();
    }
    return sink;
  }

  /**
   * start method handles configuration parsing and one-time setup of the task. loads configuration
   *
   * @param parsedConfig - has the configuration settings
   */
  @Override
  public void start(final Map<String, String> parsedConfig) {
    this.DYNAMIC_LOGGER.info("starting task...");
    final long startNanos = System.nanoTime();

    // Parse raw config once into typed structure; validates required fields and applies defaults
    final SinkTaskConfig config = SinkTaskConfig.from(parsedConfig);

    // get task id and start time
    this.taskStartTime = System.currentTimeMillis();
    this.taskConfigId = config.getTaskId();

    this.authorizationExceptionTracker.updateStateOnTaskStart(parsedConfig);

    // enable jvm proxy
    Utils.enableJVMProxy(parsedConfig);

    KafkaRecordErrorReporter kafkaRecordErrorReporter = createKafkaRecordErrorReporter();

    conn =
        SnowflakeConnectionServiceFactory.builder()
            .setProperties(config)
            .setTaskID(this.taskConfigId)
            .build();

    if (this.sink != null) {
      this.sink.closeAll();
    }

    String connectorName = config.getConnectorName();

    Optional<MetricsJmxReporter> metricsJmxReporter =
        config.isJmxEnabled()
            ? Optional.of(
                new MetricsJmxReporter(new com.codahale.metrics.MetricRegistry(), connectorName))
            : Optional.empty();

    // Initialize task-level metrics (real JMX or noop depending on config)
    this.taskMetrics =
        metricsJmxReporter
            .<TaskMetrics>map(
                reporter ->
                    new SnowflakeSinkTaskMetrics(
                        connectorName,
                        this.taskConfigId,
                        reporter,
                        () ->
                            (int)
                                StreamingClientPools.getClientCountForTask(
                                    connectorName, this.taskConfigId)))
            .orElse(TaskMetrics.noop());
    this.taskMetrics.recordStartDuration(System.nanoTime() - startNanos);

    this.sink =
        new SnowflakeSinkServiceV2(
            conn,
            config,
            kafkaRecordErrorReporter,
            this.context,
            metricsJmxReporter,
            this.taskMetrics);

    // Initialize and start periodic telemetry reporter for channel status
    this.telemetryReporter =
        new PeriodicTelemetryReporter(
            conn.getTelemetryClient(), sink::getPartitionChannels, config);
    this.telemetryReporter.start();

    DYNAMIC_LOGGER.info(
        "task started, execution time: {} milliseconds",
        this.taskConfigId,
        getDurationFromStartMs(this.taskStartTime));
  }

  /**
   * stop method is invoked only once outstanding calls to other methods have completed. e.g. after
   * current put, and a final preCommit has completed.
   *
   * <p>Note that calling this method does not perform synchronous cleanup in Snowpipe based
   * implementation
   */
  @Override
  public void stop() {
    this.DYNAMIC_LOGGER.info("stopping task {}", this.taskConfigId);

    // Stop telemetry reporter first
    if (this.telemetryReporter != null) {
      this.telemetryReporter.stop();
    }

    this.taskMetrics.unregister();

    if (this.sink != null) {
      this.sink.stop();
    }

    this.DYNAMIC_LOGGER.info(
        "task stopped, total task runtime: {} milliseconds",
        getDurationFromStartMs(this.taskStartTime));
  }

  /**
   * init ingestion task in Sink service
   *
   * @param partitions - The list of all partitions that are now assigned to the task
   */
  @Override
  public void open(final Collection<TopicPartition> partitions) {
    long startTime = System.currentTimeMillis();
    try (TaskMetrics.TimingContext ignored = taskMetrics.timeOpen()) {
      this.sink.startPartitions(partitions);
      taskMetrics.incOpenCount();
      taskMetrics.setAssignedPartitions(partitions.size());
    }
    this.DYNAMIC_LOGGER.info(
        "task opened with {} partitions, execution time: {} milliseconds",
        partitions.size(),
        getDurationFromStartMs(startTime));
  }

  /**
   * Closes sink service
   *
   * <p>Closes all running task because the parameter of open function contains all partition info
   * but not only the new partition
   *
   * @param partitions - The list of all partitions that were assigned to the task
   */
  @Override
  public void close(final Collection<TopicPartition> partitions) {
    long startTime = System.currentTimeMillis();
    try (TaskMetrics.TimingContext ignored = taskMetrics.timeClose()) {
      this.DYNAMIC_LOGGER.info(
          "closing task {} with {} partitions", this.taskConfigId, partitions.size());
      if (this.sink != null) {
        this.sink.close(partitions);
      }
      taskMetrics.incCloseCount();
      taskMetrics.setAssignedPartitions(0);
    }

    this.DYNAMIC_LOGGER.info(
        "task closed, execution time: {} milliseconds",
        this.taskConfigId,
        getDurationFromStartMs(startTime));
  }

  /**
   * ingest records to Snowflake
   *
   * @param records - collection of records from kafka topic/partitions for this connector
   */
  @Override
  public void put(final Collection<SinkRecord> records) {
    this.authorizationExceptionTracker.throwExceptionIfAuthorizationFailed();

    // Check for channel errors detected in preCommit and fail the task
    if (this.channelErrorToFailOn != null) {
      SnowflakeKafkaConnectorException error = this.channelErrorToFailOn;
      this.channelErrorToFailOn = null; // Clear so we don't throw again on restart
      throw new ConnectException(error.getMessage(), error);
    }

    final long recordSize = records.size();
    DYNAMIC_LOGGER.debug("Calling PUT with {} records", recordSize);

    final long startTime = System.currentTimeMillis();
    try (TaskMetrics.TimingContext ignored = taskMetrics.timePut()) {
      getSink().insert(records);
      taskMetrics.markPutRecords(recordSize);
    }

    logWarningForPutAndPrecommit(
        startTime, Utils.formatString("Executed PUT with {} records", recordSize), false);
  }

  /**
   * Sync committed offsets
   *
   * @param offsets - the current map of offsets as of the last call to put
   * @return an empty map if Connect-managed offset commit is not desired, otherwise a map of
   *     offsets by topic-partition that are safe to commit. If we return the same offsets that was
   *     passed in, Kafka Connect assumes that all offsets that are already passed to put() are safe
   *     to commit.
   * @throws RetriableException when meet any issue during processing
   */
  @Override
  public Map<TopicPartition, OffsetAndMetadata> preCommit(
      Map<TopicPartition, OffsetAndMetadata> offsets) throws RetriableException {
    DYNAMIC_LOGGER.info("Precommit started for {} partitions", offsets.size());

    if (DYNAMIC_LOGGER.isDebugEnabled()) {
      DYNAMIC_LOGGER.debug(
          "Precommit partitions and offsets: {}", Arrays.toString(offsets.entrySet().toArray()));
    }

    long startTime = System.currentTimeMillis();
    try (TaskMetrics.TimingContext ignored = taskMetrics.timePreCommit()) {
      // return an empty map means that offset commitment is not desired
      if (sink == null || sink.isClosed()) {
        this.DYNAMIC_LOGGER.warn(
            "sink not initialized or closed before preCommit", this.taskConfigId);
        return new HashMap<>();
      } else if (sink.getPartitionCount() == 0) {
        this.DYNAMIC_LOGGER.warn("no partition is assigned", this.taskConfigId);
        return new HashMap<>();
      }

      Map<TopicPartition, OffsetAndMetadata> committedOffsets = new HashMap<>();
      try {
        Map<TopicPartition, Long> batchOffsets = sink.getCommittedOffsets(offsets.keySet());
        batchOffsets.forEach(
            (topicPartition, offset) ->
                committedOffsets.put(topicPartition, new OffsetAndMetadata(offset)));
      } catch (SnowflakeKafkaConnectorException e) {
        // It's OK to just log the error since preCommit can retry.
        this.authorizationExceptionTracker.reportPrecommitException(e);
        this.DYNAMIC_LOGGER.error("PreCommit error: {} ", e.getMessage());
        // Channel error count exceeded - store to fail on next put() call
        if (e.checkErrorCode(SnowflakeErrors.ERROR_5030)) {
          this.channelErrorToFailOn = e;
        }
      } catch (Exception e) {
        this.authorizationExceptionTracker.reportPrecommitException(e);
        this.DYNAMIC_LOGGER.error("PreCommit error: {} ", e.getMessage());
      }

      logWarningForPutAndPrecommit(
          startTime,
          Utils.formatString(
              "Executed PRECOMMIT on all {} partitions, safe to commit {} partitions",
              offsets.size(),
              committedOffsets.size()),
          true);
      return committedOffsets;
    }
  }

  /**
   * @return connector version
   */
  @Override
  public String version() {
    return Utils.VERSION;
  }

  /**
   * wait for specific status
   *
   * @param func status checker
   */
  private static void waitFor(Supplier<Boolean> func)
      throws InterruptedException, TimeoutException {
    for (int i = 0; i < REPEAT_TIME; i++) {
      if (func.get()) {
        return;
      }
      Thread.sleep(WAIT_TIME);
    }
    throw new TimeoutException();
  }

  private static long getDurationFromStartMs(long startTime) {
    final long currTime = System.currentTimeMillis();
    return currTime - startTime;
  }

  void logWarningForPutAndPrecommit(long startTime, String logContent, boolean isPrecommit) {
    final long executionTimeMs = getDurationFromStartMs(startTime);
    String logExecutionContent =
        Utils.formatString("{}, executionTime: {} ms", logContent, executionTimeMs);

    if (executionTimeMs > 300000) {
      // This won't be frequently printed. It is vary rare to have execution greater than 300
      // seconds.
      // But having this warning helps customer to debug their Kafka Connect config.
      this.DYNAMIC_LOGGER.warn(
          "{}. Expected call to be under {} ms. If there is CommitFailedException in the log or"
              + " there is duplicated records, refer to this link for solution: "
              + "https://docs.snowflake.com/en/user-guide/kafka-connector-ts.html#resolving-specific-issues",
          logExecutionContent,
          executionTimeMs);
    } else {
      if (isPrecommit) {
        this.DYNAMIC_LOGGER.info(logExecutionContent);
      } else {
        this.DYNAMIC_LOGGER.debug(logExecutionContent);
      }
    }
  }

  /* Used to report a record back to DLQ if error tolerance is specified */
  private KafkaRecordErrorReporter createKafkaRecordErrorReporter() {
    KafkaRecordErrorReporter result = noOpKafkaRecordErrorReporter();
    if (context != null) {
      try {
        ErrantRecordReporter errantRecordReporter = context.errantRecordReporter();
        if (errantRecordReporter != null) {
          result =
              (record, error) -> {
                try {
                  // Blocking this until record is delivered to DLQ
                  DYNAMIC_LOGGER.debug(
                      "Sending Sink Record to DLQ with recordOffset:{}, partition:{}",
                      record.kafkaOffset(),
                      record.kafkaPartition());
                  errantRecordReporter.report(record, error).get();
                } catch (InterruptedException | ExecutionException e) {
                  final String errMsg = "ERROR reporting records to ErrantRecordReporter";
                  this.DYNAMIC_LOGGER.error(errMsg, e);
                  throw new ConnectException(errMsg, e);
                }
              };
        } else {
          this.DYNAMIC_LOGGER.info("Errant record reporter is not configured.");
        }
      } catch (NoClassDefFoundError | NoSuchMethodError e) {
        // Will occur in Connect runtimes earlier than 2.6
        this.DYNAMIC_LOGGER.info(
            "Kafka versions prior to 2.6 do not support the errant record reporter.");
      }
    } else {
      DYNAMIC_LOGGER.warn("SinkTaskContext is not set");
    }
    return result;
  }

  /** Blocks until all partition channels have finished initialization. */
  @VisibleForTesting
  public void awaitInitialization() {
    this.getSink().awaitInitialization();
  }

  /**
   * For versions older than 2.6
   *
   * @see <a
   *     href="https://javadoc.io/doc/org.apache.kafka/connect-api/2.6.0/org/apache/kafka/connect/sink/ErrantRecordReporter.html">
   *     link </a>
   */
  @VisibleForTesting
  static KafkaRecordErrorReporter noOpKafkaRecordErrorReporter() {
    return (record, e) -> {
      STATIC_LOGGER.warn(
          "DLQ Kafka Record Error Reporter is not set, requires Kafka Version to be >= 2.6");
    };
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/SnowflakeSinkTaskAuthorizationExceptionTracker.java
================================================
package com.snowflake.kafka.connector;

import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.ENABLE_TASK_FAIL_ON_AUTHORIZATION_ERRORS;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.ENABLE_TASK_FAIL_ON_AUTHORIZATION_ERRORS_DEFAULT;
import static com.snowflake.kafka.connector.internal.SnowflakeErrors.ERROR_1005;

import java.util.Map;

/**
 * When the user rotates Snowflake key that is stored in an external file the Connector hangs and
 * does not mark its tasks as failed. To fix this corner case we need to track the authorization
 * exception thrown during preCommit() and stop tasks during put().
 *
 * <p>Note that exceptions thrown during preCommit() are swallowed by Kafka Connect and will not
 * cause task failure.
 */
public class SnowflakeSinkTaskAuthorizationExceptionTracker {

  private static final String AUTHORIZATION_EXCEPTION_MESSAGE = "Authorization failed after retry";

  private boolean authorizationTaskFailureEnabled;
  private boolean authorizationErrorReported;

  public SnowflakeSinkTaskAuthorizationExceptionTracker() {
    this.authorizationTaskFailureEnabled = true;
    this.authorizationErrorReported = false;
  }

  public void updateStateOnTaskStart(Map<String, String> taskConfig) {
    authorizationTaskFailureEnabled =
        Boolean.parseBoolean(
            taskConfig.getOrDefault(
                ENABLE_TASK_FAIL_ON_AUTHORIZATION_ERRORS,
                Boolean.toString(ENABLE_TASK_FAIL_ON_AUTHORIZATION_ERRORS_DEFAULT)));
  }

  /**
   * Check if the thrown exception is related to authorization
   *
   * @param ex - any exception that occurred during preCommit
   */
  public void reportPrecommitException(Exception ex) {
    if (ex.getMessage().contains(AUTHORIZATION_EXCEPTION_MESSAGE)) {
      authorizationErrorReported = true;
    }
  }

  /** Throw exception if authorization has failed before */
  public void throwExceptionIfAuthorizationFailed() {
    if (authorizationTaskFailureEnabled && authorizationErrorReported) {
      throw ERROR_1005.getException();
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/SnowflakeStreamingSinkConnector.java
================================================
/*
 * Copyright (c) 2019 Snowflake Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.snowflake.kafka.connector;

import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.config.ConnectorConfigDefinition;
import com.snowflake.kafka.connector.internal.KCLogger;
import com.snowflake.kafka.connector.internal.SnowflakeConnectionService;
import com.snowflake.kafka.connector.internal.SnowflakeConnectionServiceFactory;
import com.snowflake.kafka.connector.internal.SnowflakeErrors;
import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException;
import com.snowflake.kafka.connector.internal.streaming.DefaultStreamingConfigValidator;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.kafka.common.config.Config;
import org.apache.kafka.common.config.ConfigDef;
import org.apache.kafka.connect.connector.Task;
import org.apache.kafka.connect.sink.SinkConnector;

/**
 * SnowflakeStreamingSinkConnector implements SinkConnector for Kafka Connect framework.
 *
 * <p>Expected configuration: including topic names, partition numbers, snowflake connection info
 * and credentials info
 *
 * <p>Creates snowflake internal stages, snowflake tables provides configuration to SinkTasks
 * running on Kafka Connect Workers.
 */
public class SnowflakeStreamingSinkConnector extends SinkConnector {
  // create logger without correlationId for now
  private static final KCLogger LOGGER =
      new KCLogger(SnowflakeStreamingSinkConnector.class.getName());

  private Map<String, String> config; // connector configuration, provided by
  // user through kafka connect framework

  // SnowflakeJDBCWrapper provides methods to interact with user's snowflake
  // account and executes queries
  private SnowflakeConnectionService conn;

  // Snowflake Telemetry provides methods to report usage statistics
  private SnowflakeTelemetryService telemetryClient;
  private long connectorStartTime;

  // Kafka Connect starts sink tasks without waiting for setup in
  // SnowflakeStreamingSinkConnector to finish.
  // This causes race conditions for: config validation, tables and stages
  // creation, etc.
  // Using setupComplete to synchronize
  private boolean setupComplete;

  private final ConnectorConfigValidator connectorConfigValidator =
      new DefaultConnectorConfigValidator(new DefaultStreamingConfigValidator());

  /** No-Arg constructor. Required by Kafka Connect framework */
  public SnowflakeStreamingSinkConnector() {
    setupComplete = false;
  }

  /**
   * start method will only be called on a clean connector, i.e. it has either just been
   * instantiated and initialized or stop () has been invoked. loads configuration and validates.
   *
   * <p>Creates snowflake internal stages and snowflake tables
   *
   * @param parsedConfig has the configuration settings
   */
  @Override
  public void start(final Map<String, String> parsedConfig) {
    LOGGER.info("SnowflakeStreamingSinkConnector:starting...");

    Utils.checkConnectorVersion();

    setupComplete = false;
    connectorStartTime = System.currentTimeMillis();
    config = new HashMap<>(parsedConfig);

    ConnectorConfigTools.setDefaultValues(config);

    // modify invalid connector name
    Utils.convertAppName(config);

    connectorConfigValidator.validateConfig(config);

    // enable mdc logging if needed
    KCLogger.toggleGlobalMdcLoggingContext(
        Boolean.parseBoolean(
            config.getOrDefault(
                KafkaConnectorConfigParams.ENABLE_MDC_LOGGING_CONFIG,
                KafkaConnectorConfigParams.ENABLE_MDC_LOGGING_DEFAULT)));

    // enable proxy
    Utils.enableJVMProxy(config);

    // create a persisted connection, and validate snowflake connection
    // config as a side effect
    conn = SnowflakeConnectionServiceFactory.builder().setProperties(config).build();

    telemetryClient = conn.getTelemetryClient();

    telemetryClient.reportKafkaConnectStart(connectorStartTime, this.config);

    setupComplete = true;

    LOGGER.info("SnowflakeStreamingSinkConnector:started");
  }

  /**
   * Stop method will be called to stop a connector, cleans up snowflake internal stages, after
   * making sure that there are no pending files to ingest.
   *
   * <p>Cleans up pipes, after making sure there are no pending files to ingest.
   *
   * <p>Also ensures that there are no leaked stages, no leaked staged files, and no leaked pipes
   */
  @Override
  public void stop() {
    LOGGER.info("SnowflakeStreamingSinkConnector connector stopping...");
    setupComplete = false;

    if (telemetryClient != null) {
      telemetryClient.reportKafkaConnectStop(connectorStartTime);
    }
  }

  /**
   * @return Sink task class
   */
  @Override
  public Class<? extends Task> taskClass() {
    return SnowflakeSinkTask.class;
  }

  /**
   * taskConfigs method returns a set of configurations for SinkTasks based on the current
   * configuration, producing at most 'maxTasks' configurations
   *
   * @param maxTasks maximum number of SinkTasks for this instance of
   *     SnowflakeStreamingSinkConnector
   * @return a list containing 'maxTasks' copies of the configuration
   */
  @Override
  public List<Map<String, String>> taskConfigs(final int maxTasks) {
    LOGGER.info("taskConfigs called with maxTasks: {}", maxTasks);
    // wait for setup to complete
    int counter = 0;
    while (counter < 120) // poll for 120*5 seconds (10 mins) maximum
    {
      if (setupComplete) {
        break;
      } else {
        counter++;
        try {
          LOGGER.info("Sleeping 5000ms to allow setup to " + "complete.");
          Thread.sleep(5000);
        } catch (InterruptedException ex) {
          LOGGER.warn("Waiting for setup to complete got " + "interrupted");
        }
      }
    }
    if (!setupComplete) {
      throw SnowflakeErrors.ERROR_5007.getException(telemetryClient);
    }

    List<Map<String, String>> taskConfigs = new ArrayList<>(maxTasks);
    for (int i = 0; i < maxTasks; i++) {
      Map<String, String> conf = new HashMap<>(config);
      conf.put(Utils.TASK_ID, i + "");
      taskConfigs.add(conf);
    }
    return taskConfigs;
  }

  /**
   * @return ConfigDef with original configuration properties
   */
  @Override
  public ConfigDef config() {
    return ConnectorConfigDefinition.getConfig();
  }

  @Override
  public Config validate(Map<String, String> connectorConfigs) {
    LOGGER.debug("Validating connector Config: Start");
    // cross-fields validation here
    Config result = super.validate(connectorConfigs);

    // Validate ensure that url, user, db, schema, private key exist in config and is not empty
    // and there is no single field validation error
    if (!Utils.isSingleFieldValid(result)) {
      return result;
    }

    // Verify proxy config is valid
    Map<String, String> invalidProxyParams = Utils.validateProxySettings(connectorConfigs);

    for (String invalidKey : invalidProxyParams.keySet()) {
      Utils.updateConfigErrorMessage(result, invalidKey, invalidProxyParams.get(invalidKey));
    }

    // If private key or private key passphrase is
    // provided through a config provider, skip validation
    if (isUsingConfigProviderForPrivateKey(connectorConfigs)) {
      return result;
    }

    // We don't validate name, since it is not included in the return value
    // so just put a test connector here
    connectorConfigs.put(KafkaConnectorConfigParams.NAME, "TEST_CONNECTOR");
    SnowflakeConnectionService testConnection;
    try {
      testConnection =
          SnowflakeConnectionServiceFactory.builder().setProperties(connectorConfigs).build();
    } catch (SnowflakeKafkaConnectorException e) {
      LOGGER.error(
          "Validate: Error connecting to snowflake:{}, errorCode:{}", e.getMessage(), e.getCode());
      // Since url, user, db, schema, exist in config and is not empty,
      // the exceptions here would be invalid URL, and cannot connect, and no private key
      switch (e.getCode()) {
        case "1001":
          // Could be caused by invalid url, invalid user name, invalid password.
          Utils.updateConfigErrorMessage(
              result,
              KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME,
              ": Cannot connect to Snowflake");
          Utils.updateConfigErrorMessage(
              result,
              KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY,
              ": Cannot connect to Snowflake");
          Utils.updateConfigErrorMessage(
              result,
              KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME,
              ": Cannot connect to Snowflake");
          break;
        case "0007":
          Utils.updateConfigErrorMessage(
              result,
              KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME,
              " is not a valid snowflake url");
          break;
        case "0018":
          Utils.updateConfigErrorMessage(
              result, KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE, " is not valid");
          Utils.updateConfigErrorMessage(
              result, KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY, " is not valid");
          break;
        case "0013":
          Utils.updateConfigErrorMessage(
              result, KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY, " must be non-empty");
          break;
        case "0002":
          Utils.updateConfigErrorMessage(
              result,
              KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY,
              " must be a valid PEM RSA private key");
          break;
        default:
          throw e; // Shouldn't reach here, so crash.
      }
      return result;
    }

    try {
      testConnection.databaseExists(
          connectorConfigs.get(KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME));
    } catch (SnowflakeKafkaConnectorException e) {
      LOGGER.error("Validate Error msg:{}, errorCode:{}", e.getMessage(), e.getCode());
      if (e.getCode().equals("2001")) {
        Utils.updateConfigErrorMessage(
            result, KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME, " database does not exist");
      } else {
        throw e;
      }
      return result;
    }

    try {
      testConnection.schemaExists(
          connectorConfigs.get(KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME));
    } catch (SnowflakeKafkaConnectorException e) {
      LOGGER.error("Validate Error msg:{}, errorCode:{}", e.getMessage(), e.getCode());
      if (e.getCode().equals("2001")) {
        Utils.updateConfigErrorMessage(
            result, KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME, " schema does not exist");
      } else {
        throw e;
      }
      return result;
    }

    LOGGER.info("Validated config with no error");
    return result;
  }

  private static boolean isUsingConfigProviderForPrivateKey(Map<String, String> connectorConfigs) {
    Pattern configProviderPrefix = Pattern.compile("[$][{][a-zA-Z]+:");

    return configProviderPrefix
            .matcher(
                connectorConfigs.getOrDefault(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY, ""))
            .find()
        || configProviderPrefix
            .matcher(
                connectorConfigs.getOrDefault(
                    KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE, ""))
            .find();
  }

  /**
   * @return connector version
   */
  @Override
  public String version() {
    return Utils.VERSION;
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/TopicToTableParser.java
================================================
package com.snowflake.kafka.connector;

import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;

public class TopicToTableParser {
  private final String input;
  private int index;

  TopicToTableParser(String input) {
    this.input = input;
  }

  public static Map<String, String> parse(String input) {
    List<Entry> entries = new TopicToTableParser(input).parseEntries();
    Map<String, String> result = new LinkedHashMap<>();
    for (Entry entry : entries) {
      String newTopic = entry.getTopic();
      if (result.containsKey(newTopic)) {
        throw new IllegalArgumentException("Duplicate topic: " + newTopic);
      }

      // Check that regexes don't overlap.
      for (String topic : result.keySet()) {
        if (topic.matches(newTopic) || newTopic.matches(topic)) {
          throw new IllegalArgumentException(
              "Topic regexes cannot overlap. Overlapping regexes: " + topic + ", " + newTopic);
        }
      }

      result.put(newTopic, entry.getTable());
    }
    return result;
  }

  public List<Entry> parseEntries() {
    List<Entry> entries = new ArrayList<>();

    while (true) {
      skipWhitespace();
      if (isAtEnd()) {
        return entries;
      }

      String topic = parseToken(false);
      skipWhitespace();
      expect(':');
      skipWhitespace();
      String table = parseToken(true);
      entries.add(new Entry(topic, table));

      skipWhitespace();
      if (isAtEnd()) {
        return entries;
      }
      expect(',');
    }
  }

  private String parseToken(boolean uppercaseIfUnquoted) {
    if (isAtEnd()) {
      throw error("Expected token, found end of input");
    }

    if (input.charAt(index) == '"') {
      return parseQuotedToken();
    }
    if (uppercaseIfUnquoted) {
      return parseUnquotedToken().toUpperCase(Locale.ROOT);
    } else {
      return parseUnquotedToken();
    }
  }

  private String parseQuotedToken() {
    index++; // opening quote
    int textStart = index;

    while (!isAtEnd() && input.charAt(index) != '"') {
      index++;
    }

    if (isAtEnd()) {
      throw error("Unterminated quoted token");
    }
    if (index == textStart) {
      throw error("Empty quoted token");
    }

    String text = input.substring(textStart, index);
    index++; // closing quote
    return text;
  }

  private String parseUnquotedToken() {
    int start = index;
    while (!isAtEnd()) {
      char character = input.charAt(index);
      if (Character.isWhitespace(character)
          || character == ':'
          || character == ','
          || character == '"') {
        break;
      }
      index++;
    }

    if (index == start) {
      throw error("Expected token");
    }

    return input.substring(start, index);
  }

  private void skipWhitespace() {
    while (!isAtEnd() && Character.isWhitespace(input.charAt(index))) {
      index++;
    }
  }

  private void expect(char expectedCharacter) {
    if (isAtEnd() || input.charAt(index) != expectedCharacter) {
      throw error("Expected '" + expectedCharacter + "'");
    }
    index++;
  }

  private boolean isAtEnd() {
    return index >= input.length();
  }

  private IllegalArgumentException error(String message) {
    StringBuilder sb = new StringBuilder();
    sb.append(message);
    sb.append(" at position ");
    sb.append(index);
    sb.append(": \"");
    sb.append(input);
    sb.append("\". Format: <topic-1>:<table-1>,<topic-2>:\"<table-2>\",...");
    return new IllegalArgumentException(sb.toString());
  }

  public static final class Entry {
    private final String topic;
    private final String table;

    private Entry(String topic, String table) {
      this.topic = topic;
      this.table = table;
    }

    public String getTopic() {
      return topic;
    }

    public String getTable() {
      return table;
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/Utils.java
================================================
/*
 * Copyright (c) 2024 Snowflake Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.snowflake.kafka.connector;

import com.google.common.collect.ImmutableMap;
import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.internal.KCLogger;
import com.snowflake.kafka.connector.internal.SnowflakeErrors;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.Authenticator;
import java.net.PasswordAuthentication;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.kafka.common.config.Config;
import org.apache.kafka.common.config.ConfigValue;

/** Various arbitrary helper functions */
public class Utils {

  // Connector version, change every release
  public static final String VERSION = "4.1.0";

  // task id
  public static final String TASK_ID = "task_id";

  public static final String JDK_HTTP_AUTH_TUNNELING = "jdk.http.auth.tunneling.disabledSchemes";

  // mvn repo
  private static final String MVN_REPO =
      "https://repo1.maven.org/maven2/com/snowflake/snowflake-kafka-connector/";

  public static final String TABLE_COLUMN_CONTENT = "RECORD_CONTENT";
  public static final String TABLE_COLUMN_METADATA = "RECORD_METADATA";

  private static final KCLogger LOGGER = new KCLogger(Utils.class.getName());

  /**
   * Check the connector version from Maven repo, report if any update version is available.
   *
   * <p>A URl connection timeout is added in case Maven repo is not reachable in a proxy'd
   * environment. Returning false from this method doesn't have any side effects to start the
   * connector.
   *
   * <p>Version upgrade logic:
   *
   * <ul>
   *   <li>Suggest only version that is newer than current version. If many new versions available
   *       suggest the most recent one.
   *   <li>Never suggest RC (release candidate) versions
   * </ul>
   */
  public static boolean checkConnectorVersion() {
    return checkConnectorVersion(VERSION, fetchAvailableVersionsFromMaven());
  }

  /**
   * Check connector version with provided current version and available versions.
   *
   * @param currentVersionString current version string
   * @param availableVersions list of available version strings from Maven
   */
  static boolean checkConnectorVersion(
      String currentVersionString, List<String> availableVersions) {
    LOGGER.info("Current Snowflake Kafka Connector Version: {}", currentVersionString);
    try {
      SemanticVersion currentVersion = new SemanticVersion(currentVersionString);
      String recommendedVersion = findRecommendedVersion(currentVersion, availableVersions);

      if (recommendedVersion != null) {
        LOGGER.warn(
            "Connector update is available, please upgrade Snowflake Kafka Connector ({} -> {})."
                + " Please check release notes for breaking changes and upgrade procedures before"
                + " installing.",
            currentVersionString,
            recommendedVersion);
      }
      return true;
    } catch (Exception e) {
      LOGGER.warn("can't verify latest connector version\n{}", e.getMessage());
    }
    return false;
  }

  /**
   * Fetch available versions from Maven repository.
   *
   * @return list of available version strings
   */
  static List<String> fetchAvailableVersionsFromMaven() {
    List<String> versions = new ArrayList<>();
    try {
      URLConnection urlConnection = new URL(MVN_REPO).openConnection();
      urlConnection.setConnectTimeout(5000);
      urlConnection.setReadTimeout(5000);
      InputStream input = urlConnection.getInputStream();
      BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(input));

      String line;
      Pattern pattern = Pattern.compile("(\\d+\\.\\d+\\.\\d+(?:-[rR][cC]\\d*)?)");

      while ((line = bufferedReader.readLine()) != null) {
        Matcher matcher = pattern.matcher(line);
        if (matcher.find()) {
          versions.add(matcher.group(1));
        }
      }
    } catch (Exception e) {
      LOGGER.warn("Failed to fetch versions from Maven: {}", e.getMessage());
    }
    return versions;
  }

  /**
   * Find the recommended version to upgrade to based on current version and available versions.
   * Package-private for testing.
   *
   * @param currentVersion the current connector version
   * @param availableVersions list of available version strings
   * @return recommended version string, or null if no upgrade is recommended
   */
  static String findRecommendedVersion(
      SemanticVersion currentVersion, List<String> availableVersions) {
    SemanticVersion highestCompatibleVersion = null;

    for (String versionString : availableVersions) {
      try {
        SemanticVersion version = new SemanticVersion(versionString);

        // Skip RC versions
        if (version.isReleaseCandidate()) {
          continue;
        }

        // Skip versions that are not greater than current
        if (version.compareTo(currentVersion) <= 0) {
          continue;
        }

        // Track the highest compatible version
        if (highestCompatibleVersion == null || version.compareTo(highestCompatibleVersion) > 0) {
          highestCompatibleVersion = version;
        }
      } catch (IllegalArgumentException e) {
        LOGGER.warn("Could not parse version string {}", versionString, e);
      }
    }

    return highestCompatibleVersion != null ? highestCompatibleVersion.toString() : null;
  }

  /**
   * validate whether proxy settings in the config is valid
   *
   * @param config connector configuration
   */
  public static ImmutableMap<String, String> validateProxySettings(Map<String, String> config) {
    Map<String, String> invalidConfigParams = new HashMap<String, String>();

    String host =
        ConnectorConfigTools.getProperty(config, KafkaConnectorConfigParams.JVM_PROXY_HOST);
    String port =
        ConnectorConfigTools.getProperty(config, KafkaConnectorConfigParams.JVM_PROXY_PORT);

    // either both host and port are provided or none of them are provided
    if (host != null ^ port != null) {
      invalidConfigParams.put(
          KafkaConnectorConfigParams.JVM_PROXY_HOST,
          "proxy host and port must be provided together");
      invalidConfigParams.put(
          KafkaConnectorConfigParams.JVM_PROXY_PORT,
          "proxy host and port must be provided together");
    } else if (host != null) {
      String username =
          ConnectorConfigTools.getProperty(config, KafkaConnectorConfigParams.JVM_PROXY_USERNAME);
      String password =
          ConnectorConfigTools.getProperty(config, KafkaConnectorConfigParams.JVM_PROXY_PASSWORD);
      // either both username and password are provided or none of them are provided
      if (username != null ^ password != null) {
        invalidConfigParams.put(
            KafkaConnectorConfigParams.JVM_PROXY_USERNAME,
            "proxy username and password must be provided together");
        invalidConfigParams.put(
            KafkaConnectorConfigParams.JVM_PROXY_PASSWORD,
            "proxy username and password must be provided together");
      }
    }

    return ImmutableMap.copyOf(invalidConfigParams);
  }

  /**
   * Enable JVM proxy
   *
   * @param config connector configuration
   */
  public static void enableJVMProxy(Map<String, String> config) {
    String host =
        ConnectorConfigTools.getProperty(config, KafkaConnectorConfigParams.JVM_PROXY_HOST);
    String port =
        ConnectorConfigTools.getProperty(config, KafkaConnectorConfigParams.JVM_PROXY_PORT);
    String nonProxyHosts =
        ConnectorConfigTools.getProperty(config, KafkaConnectorConfigParams.JVM_NON_PROXY_HOSTS);
    if (host != null && port != null) {
      LOGGER.info(
          "enable jvm proxy: {}:{} and bypass proxy for hosts: {}", host, port, nonProxyHosts);

      // enable https proxy
      System.setProperty(KafkaConnectorConfigParams.HTTP_USE_PROXY, "true");
      System.setProperty(KafkaConnectorConfigParams.HTTP_PROXY_HOST, host);
      System.setProperty(KafkaConnectorConfigParams.HTTP_PROXY_PORT, port);
      System.setProperty(KafkaConnectorConfigParams.HTTPS_PROXY_HOST, host);
      System.setProperty(KafkaConnectorConfigParams.HTTPS_PROXY_PORT, port);

      // If the user provided the jvm.nonProxy.hosts configuration then we
      // will append that to the list provided by the JVM argument
      // -Dhttp.nonProxyHosts and not override it altogether, if it exists.
      if (nonProxyHosts != null) {
        nonProxyHosts =
            (System.getProperty(KafkaConnectorConfigParams.HTTP_NON_PROXY_HOSTS) != null)
                ? System.getProperty(KafkaConnectorConfigParams.HTTP_NON_PROXY_HOSTS)
                    + "|"
                    + nonProxyHosts
                : nonProxyHosts;
        System.setProperty(KafkaConnectorConfigParams.HTTP_NON_PROXY_HOSTS, nonProxyHosts);
      }

      // set username and password
      String username =
          ConnectorConfigTools.getProperty(config, KafkaConnectorConfigParams.JVM_PROXY_USERNAME);
      String password =
          ConnectorConfigTools.getProperty(config, KafkaConnectorConfigParams.JVM_PROXY_PASSWORD);
      if (username != null && password != null) {
        Authenticator.setDefault(
            new Authenticator() {
              @Override
              public PasswordAuthentication getPasswordAuthentication() {
                return new PasswordAuthentication(username, password.toCharArray());
              }
            });
        System.setProperty(JDK_HTTP_AUTH_TUNNELING, "");
        System.setProperty(KafkaConnectorConfigParams.HTTP_PROXY_USER, username);
        System.setProperty(KafkaConnectorConfigParams.HTTP_PROXY_PASSWORD, password);
        System.setProperty(KafkaConnectorConfigParams.HTTPS_PROXY_USER, username);
        System.setProperty(KafkaConnectorConfigParams.HTTPS_PROXY_PASSWORD, password);
      }
    }
  }

  /**
   * validates that given name is a valid snowflake object identifier
   *
   * @param objName snowflake object name
   * @return true if given object name is valid
   */
  static boolean isValidSnowflakeObjectIdentifier(String objName) {
    return objName.matches("^[_a-zA-Z]{1}[_$a-zA-Z0-9]+$");
  }

  /**
   * validates that given name is a valid snowflake application name, support '-'
   *
   * @param appName snowflake application name
   * @return true if given application name is valid
   */
  public static boolean isValidSnowflakeApplicationName(String appName) {
    return appName.matches("^[-_a-zA-Z]{1}[-_$a-zA-Z0-9]+$");
  }

  /**
   * modify invalid application name in config and return the generated application name
   *
   * @param config input config object
   */
  public static void convertAppName(Map<String, String> config) {
    String appName = config.getOrDefault(KafkaConnectorConfigParams.NAME, "");
    // If appName is empty the following call will throw error
    // Application names are always sanitized for backward compatibility
    String validAppName = generateValidNameFromMap(appName, new HashMap<>(), true);

    config.put(KafkaConnectorConfigParams.NAME, validAppName);
  }

  /**
   * verify topic name, and generate valid table name with optional sanitization
   *
   * @param topic input topic name
   * @param topic2table topic to table map
   * @param enableSanitization if true, sanitize invalid identifiers; if false, pass through
   * @return valid table name
   */
  public static String getTableName(
      String topic, Map<String, String> topic2table, boolean enableSanitization) {
    return generateValidNameFromMap(topic, topic2table, enableSanitization);
  }

  /**
   * verify topic name, and generate valid table/application name with optional sanitization
   *
   * @param topic input topic name
   * @param topic2table topic to table map
   * @param enableSanitization if true, sanitize invalid identifiers; if false, pass through
   * @return valid generated table/application name
   */
  private static String generateValidNameFromMap(
      String topic, Map<String, String> topic2table, boolean enableSanitization) {
    final String PLACE_HOLDER = "_";
    if (topic == null || topic.isEmpty()) {
      throw SnowflakeErrors.ERROR_0020.getException("topic name: " + topic);
    }

    // Map entries always bypass sanitization
    if (topic2table.containsKey(topic)) {
      return topic2table.get(topic);
    }

    // try matching regex tables
    for (String regexTopic : topic2table.keySet()) {
      if (topic.matches(regexTopic)) {
        return topic2table.get(regexTopic);
      }
    }

    // If sanitization is disabled, pass through the topic name as is
    if (!enableSanitization) {
      return topic;
    }

    // When sanitization is enabled, check if the topic is a valid identifier
    if (Utils.isValidSnowflakeObjectIdentifier(topic)) {
      // Valid identifiers are uppercased when sanitization is enabled
      return topic.toUpperCase(Locale.ROOT);
    }

    // Invalid identifiers are sanitized and uppercased when sanitization is enabled
    int hash = Math.abs(topic.hashCode());

    StringBuilder result = new StringBuilder();

    // remove wildcard regex from topic name to generate table name
    topic = topic.replaceAll("\\.\\*", "");

    int index = 0;
    // first char
    if (topic.substring(index, index + 1).matches("[_a-zA-Z]")) {
      result.append(topic.charAt(index));
      index++;
    } else {
      result.append(PLACE_HOLDER);
    }
    while (index < topic.length()) {
      if (topic.substring(index, index + 1).matches("[_$a-zA-Z0-9]")) {
        result.append(topic.charAt(index));
      } else {
        result.append(PLACE_HOLDER);
      }
      index++;
    }

    result.append(PLACE_HOLDER);
    result.append(hash);

    // Uppercase the sanitized result when sanitization is enabled
    return result.toString().toUpperCase(Locale.ROOT);
  }

  /**
   * Convert a Comma separated key value pairs into a Map
   *
   * @param input Provided in KC config
   * @return Map
   */
  public static Map<String, String> parseCommaSeparatedKeyValuePairs(String input) {
    Map<String, String> pairs = new HashMap<>();
    for (String str : input.split(",")) {
      String[] tt = str.split(":");

      if (tt.length != 2 || tt[0].trim().isEmpty() || tt[1].trim().isEmpty()) {
        LOGGER.error(
            "Invalid {} config format: {}",
            KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP,
            input);
        throw SnowflakeErrors.ERROR_0030.getException();
      }
      pairs.put(tt[0].trim(), tt[1].trim());
    }
    return pairs;
  }

  static final String[] loginPropList = {
    KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME,
    KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME,
    KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME,
    KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME
  };

  public static boolean isSingleFieldValid(Config result) {
    // if any single field validation failed
    for (ConfigValue v : result.configValues()) {
      if (!v.errorMessages().isEmpty()) {
        return false;
      }
    }
    // if any of url, user, schema, database or password is empty
    // update error message and return false
    boolean isValidate = true;
    final String errorMsg = " must be provided";
    Map<String, ConfigValue> validateMap = validateConfigToMap(result);
    //
    for (String prop : loginPropList) {
      if (validateMap.get(prop).value() == null) {
        updateConfigErrorMessage(result, prop, errorMsg);
        isValidate = false;
      }
    }

    return isValidate;
  }

  public static Map<String, ConfigValue> validateConfigToMap(final Config result) {
    Map<String, ConfigValue> validateMap = new HashMap<>();
    for (ConfigValue v : result.configValues()) {
      validateMap.put(v.name(), v);
    }
    return validateMap;
  }

  public static void updateConfigErrorMessage(Config result, String key, String msg) {
    for (ConfigValue v : result.configValues()) {
      if (v.name().equals(key)) {
        v.addErrorMessage(key + msg);
      }
    }
  }

  // static elements
  // log message tag
  static final String SF_LOG_TAG = "[SF_KAFKA_CONNECTOR]";

  /**
   * the following method wraps log messages with Snowflake tag. For example,
   *
   * <p>[SF_KAFKA_CONNECTOR] this is a log message
   *
   * <p>[SF_KAFKA_CONNECTOR] this is the second line
   *
   * <p>All log messages should be wrapped by Snowflake tag. Then user can filter out log messages
   * output from Snowflake Kafka connector by these tags.
   *
   * @param format log message format string
   * @param vars variable list
   * @return log message wrapped by snowflake tag
   */
  public static String formatLogMessage(String format, Object... vars) {
    return SF_LOG_TAG + " " + formatString(format, vars);
  }

  public static String formatString(String format, Object... vars) {
    for (int i = 0; i < vars.length; i++) {
      format = format.replaceFirst("\\{}", Objects.toString(vars[i]).replaceAll("\\$", "\\\\\\$"));
    }
    return format;
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/config/AuthenticatorType.java
================================================
package com.snowflake.kafka.connector.config;

import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_AUTHENTICATOR;

import java.util.Arrays;
import java.util.Locale;
import java.util.stream.Collectors;

/** Authentication method for Snowflake connections. */
public enum AuthenticatorType {
  /** Key-pair (JWT) authentication. This is the default. */
  SNOWFLAKE_JWT,

  /** External OAuth authentication. */
  OAUTH;

  /** The config string value, matching the v3 connector convention (lowercase with underscores). */
  public String toConfigValue() {
    return name().toLowerCase(Locale.ROOT);
  }

  /**
   * Parses a config string into an authenticator type (case-insensitive). Returns {@link
   * #SNOWFLAKE_JWT} for null or empty input.
   *
   * @throws IllegalArgumentException for unrecognized values
   */
  public static AuthenticatorType fromConfig(String value) {
    if (value == null || value.trim().isEmpty()) {
      return SNOWFLAKE_JWT;
    }
    String normalized = value.trim().toUpperCase(Locale.ROOT);
    try {
      return valueOf(normalized);
    } catch (IllegalArgumentException e) {
      String validValues =
          Arrays.stream(values())
              .map(AuthenticatorType::toConfigValue)
              .collect(Collectors.joining(", "));
      throw new IllegalArgumentException(
          "Invalid value '"
              + value.trim()
              + "' for config '"
              + SNOWFLAKE_AUTHENTICATOR
              + "'. Valid values are: "
              + validValues,
          e);
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/config/CommaSeparatedKeyValueValidator.java
================================================
package com.snowflake.kafka.connector.config;

import org.apache.kafka.common.config.ConfigDef;
import org.apache.kafka.common.config.ConfigException;

/**
 * Class which validates key value pairs in the format <key-1>:<value-1>,<key-2>:<value-2>
 *
 * <p>It doesn't validate the type of values, only making sure the format is correct.
 */
class CommaSeparatedKeyValueValidator implements ConfigDef.Validator {
  public CommaSeparatedKeyValueValidator() {}

  public void ensureValid(String name, Object value) {
    String s = (String) value;
    // Validate the comma-separated key-value pairs string
    if (s != null && !s.isEmpty() && !isValidCommaSeparatedKeyValueString(s)) {
      throw new ConfigException(name, value, "Format: <key-1>:<value-1>,<key-2>:<value-2>,...");
    }
  }

  private boolean isValidCommaSeparatedKeyValueString(String input) {
    // Split the input string by commas
    String[] pairs = input.split(",");
    for (String pair : pairs) {
      // Trim the pair to remove leading and trailing whitespaces
      pair = pair.trim();
      // Split each pair by colon
      String[] keyValue = pair.split(":");
      // Check if the pair has exactly two elements after trimming
      if (keyValue.length != 2) {
        return false;
      }
      // Check if the key or value is empty after trimming
      if (keyValue[0].trim().isEmpty() || keyValue[1].trim().isEmpty()) {
        return false;
      }
    }
    return true;
  }

  public String toString() {
    return "Comma-separated key-value pairs format: <key-1>:<value-1>,<key-2>:<value-2>,...";
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/config/ConnectorConfigDefinition.java
================================================
package com.snowflake.kafka.connector.config;

import static org.apache.kafka.common.config.ConfigDef.Importance.*;
import static org.apache.kafka.common.config.ConfigDef.Range.*;
import static org.apache.kafka.common.config.ConfigDef.Type.*;

import com.snowflake.kafka.connector.ConnectorConfigTools;
import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import org.apache.kafka.common.config.ConfigDef;
import org.apache.kafka.common.config.ConfigDef.Width;

/** This class is a placeholder for config definition in Apache Kafka specific format */
public class ConnectorConfigDefinition {

  private static final String SNOWFLAKE_LOGIN_INFO_DOC = "Snowflake Login Info";
  private static final String PROXY_INFO_DOC = "Proxy Info";
  private static final String CONNECTOR_CONFIG_DOC = "Connector Config";
  private static final String SNOWFLAKE_METADATA_FLAGS_DOC = "Snowflake Metadata Flags";
  private static final String ERRORS = "ERRORS";

  private static final ConfigDef.Validator NON_EMPTY_STRING_VALIDATOR =
      new ConfigDef.NonEmptyString();
  private static final ConfigDef.Validator TOPIC_TO_TABLE_VALIDATOR = new TopicToTableValidator();
  private static final ConfigDef.Validator STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP_VALIDATOR =
      new CommaSeparatedKeyValueValidator();

  public static ConfigDef getConfig() {
    return new ConfigDef()
        // snowflake login info
        .define(
            KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME,
            STRING,
            null,
            NON_EMPTY_STRING_VALIDATOR,
            HIGH,
            "Snowflake account url",
            SNOWFLAKE_LOGIN_INFO_DOC,
            0,
            Width.NONE,
            KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME)
        .define(
            KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME,
            STRING,
            null,
            NON_EMPTY_STRING_VALIDATOR,
            HIGH,
            "Snowflake user name",
            SNOWFLAKE_LOGIN_INFO_DOC,
            1,
            Width.NONE,
            KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME)
        .define(
            KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY,
            PASSWORD,
            "",
            HIGH,
            "Private key for Snowflake user",
            SNOWFLAKE_LOGIN_INFO_DOC,
            2,
            Width.NONE,
            KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY)
        .define(
            KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE,
            PASSWORD,
            "",
            LOW,
            "Passphrase of private key if encrypted",
            SNOWFLAKE_LOGIN_INFO_DOC,
            3,
            Width.NONE,
            KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE)
        .define(
            KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME,
            STRING,
            null,
            NON_EMPTY_STRING_VALIDATOR,
            HIGH,
            "Snowflake database name",
            SNOWFLAKE_LOGIN_INFO_DOC,
            4,
            Width.NONE,
            KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME)
        .define(
            KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME,
            STRING,
            null,
            NON_EMPTY_STRING_VALIDATOR,
            HIGH,
            "Snowflake database schema name",
            SNOWFLAKE_LOGIN_INFO_DOC,
            5,
            Width.NONE,
            KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME)
        .define(
            KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME,
            STRING,
            null,
            NON_EMPTY_STRING_VALIDATOR,
            HIGH,
            "Snowflake role: snowflake.role.name",
            SNOWFLAKE_LOGIN_INFO_DOC,
            6,
            Width.NONE,
            KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME)
        // OAuth
        .define(
            KafkaConnectorConfigParams.SNOWFLAKE_AUTHENTICATOR,
            STRING,
            AuthenticatorType.SNOWFLAKE_JWT.toConfigValue(),
            LOW,
            "Authenticator for JDBC and streaming ingest SDK."
                + " Valid values: snowflake_jwt, oauth.",
            SNOWFLAKE_LOGIN_INFO_DOC,
            7,
            Width.NONE,
            KafkaConnectorConfigParams.SNOWFLAKE_AUTHENTICATOR)
        .define(
            KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_ID,
            STRING,
            "",
            HIGH,
            "Client id of target OAuth integration",
            SNOWFLAKE_LOGIN_INFO_DOC,
            8,
            Width.NONE,
            KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_ID)
        .define(
            KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_SECRET,
            PASSWORD,
            "",
            HIGH,
            "Client secret of target OAuth integration",
            SNOWFLAKE_LOGIN_INFO_DOC,
            9,
            Width.NONE,
            KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_SECRET)
        .define(
            KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_REFRESH_TOKEN,
            PASSWORD,
            "",
            HIGH,
            "Refresh token for OAuth. If empty, client_credentials grant is used.",
            SNOWFLAKE_LOGIN_INFO_DOC,
            10,
            Width.NONE,
            KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_REFRESH_TOKEN)
        .define(
            KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_TOKEN_ENDPOINT,
            STRING,
            null,
            HIGH,
            "OAuth token endpoint URL. If not set, defaults to the Snowflake account URL.",
            SNOWFLAKE_LOGIN_INFO_DOC,
            11,
            Width.NONE,
            KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_TOKEN_ENDPOINT)
        // proxy
        .define(
            KafkaConnectorConfigParams.JVM_PROXY_HOST,
            STRING,
            "",
            LOW,
            "JVM option: https.proxyHost",
            PROXY_INFO_DOC,
            0,
            Width.NONE,
            KafkaConnectorConfigParams.JVM_PROXY_HOST)
        .define(
            KafkaConnectorConfigParams.JVM_PROXY_PORT,
            STRING,
            "",
            LOW,
            "JVM option: https.proxyPort",
            PROXY_INFO_DOC,
            1,
            Width.NONE,
            KafkaConnectorConfigParams.JVM_PROXY_PORT)
        .define(
            KafkaConnectorConfigParams.JVM_NON_PROXY_HOSTS,
            STRING,
            "",
            LOW,
            "JVM option: http.nonProxyHosts",
            PROXY_INFO_DOC,
            2,
            Width.NONE,
            KafkaConnectorConfigParams.JVM_NON_PROXY_HOSTS)
        .define(
            KafkaConnectorConfigParams.JVM_PROXY_USERNAME,
            STRING,
            "",
            LOW,
            "JVM proxy username",
            PROXY_INFO_DOC,
            3,
            Width.NONE,
            KafkaConnectorConfigParams.JVM_PROXY_USERNAME)
        .define(
            KafkaConnectorConfigParams.JVM_PROXY_PASSWORD,
            PASSWORD,
            "",
            LOW,
            "JVM proxy password",
            PROXY_INFO_DOC,
            4,
            Width.NONE,
            KafkaConnectorConfigParams.JVM_PROXY_PASSWORD)
        // Metadata
        .define(
            KafkaConnectorConfigParams.SNOWFLAKE_METADATA_ALL,
            BOOLEAN,
            KafkaConnectorConfigParams.SNOWFLAKE_METADATA_ALL_DEFAULT,
            LOW,
            "Flag to control whether there is metadata collected. If set to false, all metadata"
                + " will be dropped",
            SNOWFLAKE_METADATA_FLAGS_DOC,
            0,
            Width.NONE,
            KafkaConnectorConfigParams.SNOWFLAKE_METADATA_ALL)
        .define(
            KafkaConnectorConfigParams.SNOWFLAKE_METADATA_CREATETIME,
            BOOLEAN,
            KafkaConnectorConfigParams.SNOWFLAKE_METADATA_ALL_DEFAULT,
            LOW,
            "Flag to control whether createtime is collected in snowflake metadata",
            SNOWFLAKE_METADATA_FLAGS_DOC,
            1,
            Width.NONE,
            KafkaConnectorConfigParams.SNOWFLAKE_METADATA_CREATETIME)
        .define(
            KafkaConnectorConfigParams.SNOWFLAKE_METADATA_TOPIC,
            BOOLEAN,
            KafkaConnectorConfigParams.SNOWFLAKE_METADATA_ALL_DEFAULT,
            LOW,
            "Flag to control whether kafka topic name is collected in snowflake metadata",
            SNOWFLAKE_METADATA_FLAGS_DOC,
            2,
            Width.NONE,
            KafkaConnectorConfigParams.SNOWFLAKE_METADATA_TOPIC)
        .define(
            KafkaConnectorConfigParams.SNOWFLAKE_METADATA_OFFSET_AND_PARTITION,
            BOOLEAN,
            KafkaConnectorConfigParams.SNOWFLAKE_METADATA_ALL_DEFAULT,
            LOW,
            "Flag to control whether kafka partition and offset are collected in snowflake"
                + " metadata",
            SNOWFLAKE_METADATA_FLAGS_DOC,
            3,
            Width.NONE,
            KafkaConnectorConfigParams.SNOWFLAKE_METADATA_OFFSET_AND_PARTITION)
        .define(
            KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_METADATA_CONNECTOR_PUSH_TIME,
            BOOLEAN,
            KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_METADATA_CONNECTOR_PUSH_TIME_DEFAULT,
            LOW,
            "Flag to control whether ConnectorPushTime is collected in snowflake metadata for"
                + " Snowpipe Streaming",
            SNOWFLAKE_METADATA_FLAGS_DOC,
            4,
            Width.NONE,
            KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_METADATA_CONNECTOR_PUSH_TIME)

        // Connector Config
        .define(
            KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC,
            BOOLEAN,
            KafkaConnectorConfigParams
                .SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC_DEFAULT,
            HIGH,
            "When true (default), the connector validates that all settings required for KC v3"
                + " backward compatibility are configured. Set to false to use v4-optimized"
                + " defaults without compatibility checks.",
            CONNECTOR_CONFIG_DOC,
            0,
            Width.NONE,
            KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC)
        .define(
            KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP,
            STRING,
            "",
            TOPIC_TO_TABLE_VALIDATOR,
            LOW,
            "Map of topics to tables (optional). Format : comma-separated tuples, e.g."
                + " <topic-1>:<table-1>,<topic-2>:<table-2>,... ",
            CONNECTOR_CONFIG_DOC,
            1,
            Width.NONE,
            KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP)
        .define(
            KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION,
            STRING,
            KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION_DEFAULT,
            ConfigDef.ValidString.in("client_side", "server_side"),
            HIGH,
            "Data validation mode. 'client_side' enables client-side data validation and schema"
                + " evolution before sending to Snowflake. 'server_side' defers validation and"
                + " schema evolution to the backend for maximum throughput; requires that error"
                + " logging is enabled on the target table.",
            CONNECTOR_CONFIG_DOC,
            2,
            Width.NONE,
            KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION)
        .define(
            KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION,
            STRING,
            KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_DEFAULT,
            ConfigDef.ValidString.in("skip", "best_effort", "strict"),
            HIGH,
            "Controls offset migration from KC v3 (Snowpipe Streaming Classic) channels. 'skip'"
                + " (default): do not consult Classic channels. 'best_effort': migrate the offset"
                + " if the Classic channel exists, otherwise fall through to the Kafka consumer"
                + " group offset. 'strict': migrate the offset if the Classic channel exists, fail"
                + " if it does not.",
            CONNECTOR_CONFIG_DOC,
            3,
            Width.NONE,
            KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION)
        .define(
            KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME,
            BOOLEAN,
            KafkaConnectorConfigParams
                .SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME_DEFAULT,
            HIGH,
            "Whether the KC v3 connector included the connector name in its channel names."
                + " Set to true if the v3 connector had"
                + " 'snowflake.streaming.channel.name.include.connector.name=true'."
                + " Only relevant when offset migration is not 'skip'.",
            CONNECTOR_CONFIG_DOC,
            4,
            Width.NONE,
            KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME)
        .define(
            KafkaConnectorConfigParams.BEHAVIOR_ON_NULL_VALUES,
            STRING,
            ConnectorConfigTools.BehaviorOnNullValues.DEFAULT.toString(),
            ConnectorConfigTools.BehaviorOnNullValues.VALIDATOR,
            LOW,
            "How to handle records with a null value (i.e. Kafka tombstone records)."
                + " Valid options are 'DEFAULT' and 'IGNORE'.",
            CONNECTOR_CONFIG_DOC,
            5,
            Width.NONE,
            KafkaConnectorConfigParams.BEHAVIOR_ON_NULL_VALUES)
        .define(
            KafkaConnectorConfigParams.JMX_OPT,
            BOOLEAN,
            KafkaConnectorConfigParams.JMX_OPT_DEFAULT,
            HIGH,
            "Whether to enable JMX MBeans for custom SF metrics")
        .define(
            KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP,
            STRING,
            "",
            STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP_VALIDATOR,
            LOW,
            "Map of Key value pairs representing Streaming Client Properties to Override. These are"
                + " optional and recommended to use ONLY after consulting Snowflake Support. Format"
                + " : comma-separated tuples, e.g.: key1:value1,key2:value2",
            CONNECTOR_CONFIG_DOC,
            6,
            Width.NONE,
            KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP)
        .define(
            KafkaConnectorConfigParams.ERRORS_TOLERANCE_CONFIG,
            STRING,
            KafkaConnectorConfigParams.ERRORS_TOLERANCE_DEFAULT,
            ConnectorConfigTools.ErrorTolerance.VALIDATOR,
            LOW,
            "Behavior for tolerating errors during Sink connector's operation. 'NONE' is set as"
                + " default and denotes that it will be fail fast. i.e any error will result in an"
                + " immediate task failure. 'ALL'  skips over problematic records.",
            ERRORS,
            0,
            Width.NONE,
            "Error Tolerance")
        .define(
            KafkaConnectorConfigParams.ERRORS_LOG_ENABLE_CONFIG,
            BOOLEAN,
            KafkaConnectorConfigParams.ERRORS_LOG_ENABLE_DEFAULT,
            LOW,
            "If true, write/log each error along with details of the failed operation and record"
                + " properties to the Connect log. Default is 'false', so that only errors that are"
                + " not tolerated are reported.",
            ERRORS,
            1,
            Width.NONE,
            "Log Errors")
        .define(
            KafkaConnectorConfigParams.ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_CONFIG,
            STRING,
            KafkaConnectorConfigParams.ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_DEFAULT,
            LOW,
            "Whether to output conversion errors to the dead letter queue "
                + "By default messages are not sent to the dead letter queue. "
                + "Requires property `errors.tolerance=all`.",
            ERRORS,
            2,
            Width.NONE,
            "Send error records to the Dead Letter Queue (DLQ)")
        .define(
            KafkaConnectorConfigParams.ENABLE_MDC_LOGGING_CONFIG,
            BOOLEAN,
            KafkaConnectorConfigParams.ENABLE_MDC_LOGGING_DEFAULT,
            LOW,
            "Enable MDC context to prepend log messages. Note that this is only available after"
                + " Apache Kafka 2.3",
            CONNECTOR_CONFIG_DOC,
            7,
            Width.NONE,
            "Enable MDC logging")
        .define(
            KafkaConnectorConfigParams.ENABLE_TASK_FAIL_ON_AUTHORIZATION_ERRORS,
            BOOLEAN,
            KafkaConnectorConfigParams.ENABLE_TASK_FAIL_ON_AUTHORIZATION_ERRORS_DEFAULT,
            LOW,
            "If set to true the Connector will fail its tasks when authorization error from"
                + " Snowflake occurred")
        .define(
            KafkaConnectorConfigParams
                .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION,
            BOOLEAN,
            KafkaConnectorConfigParams
                .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION_DEFAULT,
            LOW,
            "When enabled, auto-generated table names are sanitized (special characters replaced)"
                + " and uppercased for v3 compatibility. When disabled, topic names are passed"
                + " through as-is. Use topic2table.map with quoted identifiers for special"
                + " characters when disabled.",
            CONNECTOR_CONFIG_DOC,
            8,
            Width.NONE,
            KafkaConnectorConfigParams
                .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION)
        .define(
            KafkaConnectorConfigParams
                .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION,
            BOOLEAN,
            KafkaConnectorConfigParams
                .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION_DEFAULT,
            LOW,
            "When enabled, column identifiers are normalized to uppercase for v3 compatibility.",
            CONNECTOR_CONFIG_DOC,
            9,
            Width.NONE,
            KafkaConnectorConfigParams
                .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION)
        .define(
            KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION,
            BOOLEAN,
            KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION_DEFAULT,
            MEDIUM,
            "When true (default), records are schematized into individual columns. When false,"
                + " records are wrapped into legacy RECORD_CONTENT and RECORD_METADATA VARIANT"
                + " columns for backward compatibility with KC v3.",
            CONNECTOR_CONFIG_DOC,
            10,
            Width.NONE,
            KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION)
        .define(
            KafkaConnectorConfigParams.CACHE_TABLE_EXISTS,
            BOOLEAN,
            KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_DEFAULT,
            LOW,
            "Enable caching for Snowflake table existence checks to reduce database queries",
            CONNECTOR_CONFIG_DOC,
            11,
            Width.NONE,
            KafkaConnectorConfigParams.CACHE_TABLE_EXISTS)
        .define(
            KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS,
            LONG,
            KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS_DEFAULT,
            atLeast(KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS_MIN),
            LOW,
            "Cache expiration time in milliseconds for table existence checks. Must be a positive"
                + " number.",
            CONNECTOR_CONFIG_DOC,
            12,
            Width.NONE,
            KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS)
        .define(
            KafkaConnectorConfigParams.CACHE_PIPE_EXISTS,
            BOOLEAN,
            KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_DEFAULT,
            LOW,
            "Enable caching for pipe existence checks to reduce database queries",
            CONNECTOR_CONFIG_DOC,
            13,
            Width.NONE,
            KafkaConnectorConfigParams.CACHE_PIPE_EXISTS)
        .define(
            KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS,
            LONG,
            KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS_DEFAULT,
            atLeast(KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS_MIN),
            LOW,
            "Cache expiration time in milliseconds for pipe existence checks. Must be a positive"
                + " number.",
            CONNECTOR_CONFIG_DOC,
            14,
            Width.NONE,
            KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS);
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/config/SinkTaskConfig.java
================================================
package com.snowflake.kafka.connector.config;

import com.google.auto.value.AutoValue;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;
import com.snowflake.kafka.connector.ConnectorConfigTools;
import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.TopicToTableParser;
import com.snowflake.kafka.connector.Utils;
import com.snowflake.kafka.connector.internal.CachingConfig;
import com.snowflake.kafka.connector.internal.SnowflakeErrors;
import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationMode;
import com.snowflake.kafka.connector.records.SnowflakeMetadataConfig;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import javax.annotation.Nullable;
import org.apache.kafka.common.config.types.Password;

/**
 * Parsed, typed configuration for the sink task. Built once from the raw connector config map in
 * {@link com.snowflake.kafka.connector.SnowflakeSinkTask#start(Map)} and passed through the task
 * and streaming layer so call sites use accessors instead of string keys and repeated defaults.
 */
@AutoValue
public abstract class SinkTaskConfig {

  public abstract String getConnectorName();

  public abstract String getTaskId();

  /** Returns an unmodifiable view of the topic-to-table mapping. */
  public abstract Map<String, String> getTopicToTableMap();

  public abstract ConnectorConfigTools.BehaviorOnNullValues getBehaviorOnNullValues();

  public abstract boolean isJmxEnabled();

  public abstract boolean isTolerateErrors();

  public abstract boolean isErrorsLogEnable();

  @Nullable
  public abstract String getDlqTopicName();

  public abstract boolean isEnableSanitization();

  public abstract boolean isEnableSchematization();

  public abstract boolean isEnableColumnIdentifierNormalization();

  public abstract SnowflakeValidation getValidation();

  public abstract int getOpenChannelIoThreads();

  @Nullable
  public abstract String getStreamingClientProviderOverrideMap();

  public abstract CachingConfig getCachingConfig();

  public abstract SnowflakeMetadataConfig getMetadataConfig();

  @Nullable
  public abstract String getSnowflakeUrl();

  @Nullable
  public abstract String getSnowflakeUser();

  @Nullable
  public abstract String getSnowflakeRole();

  @Nullable
  public abstract Password getSnowflakePrivateKey();

  @Nullable
  public abstract Password getSnowflakePrivateKeyPassphrase();

  public abstract AuthenticatorType getAuthenticator();

  @Nullable
  public abstract String getOauthClientId();

  @Nullable
  public abstract Password getOauthClientSecret();

  @Nullable
  public abstract Password getOauthRefreshToken();

  @Nullable
  public abstract String getOauthTokenEndpoint();

  @Nullable
  public abstract String getSnowflakeDatabase();

  @Nullable
  public abstract String getSnowflakeSchema();

  @Nullable
  public abstract String getProxyHost();

  @Nullable
  public abstract String getProxyPort();

  @Nullable
  public abstract String getNonProxyHosts();

  @Nullable
  public abstract String getProxyUsername();

  @Nullable
  public abstract String getProxyPassword();

  @Nullable
  public abstract String getJdbcMap();

  public abstract Ssv1MigrationMode getSsv1MigrationMode();

  public abstract boolean isSsv1MigrationIncludeConnectorName();

  /** Convenience overload that calls {@link #from(Map, boolean)} with {@code false}. */
  public static SinkTaskConfig from(Map<String, String> raw) {
    return from(raw, false);
  }

  /**
   * Parses the raw connector config map into an immutable SinkTaskConfig. Applies defaults for
   * missing optional keys.
   *
   * @param raw raw config from the connector (typically after setDefaultValues)
   * @param skipTaskSpecificConfig if true, task ID and connector name default to "" when absent
   *     instead of throwing. Use this when building a config outside of task startup -- e.g. in
   *     {@code validate()} or connection factory setup -- where task ID is not yet assigned.
   * @return parsed config
   * @throws IllegalArgumentException if required fields are missing or invalid
   */
  public static SinkTaskConfig from(Map<String, String> raw, boolean skipTaskSpecificConfig) {
    return builderFrom(raw, skipTaskSpecificConfig).build();
  }

  @VisibleForTesting
  public static Builder builderFrom(Map<String, String> raw) {
    return builderFrom(raw, false);
  }

  @VisibleForTesting
  public static Builder builderFrom(Map<String, String> raw, boolean skipTaskSpecificConfig) {
    if (raw == null) {
      raw = new HashMap<>();
    }
    Map<String, String> config = new HashMap<>(raw);

    String connectorName = config.getOrDefault(KafkaConnectorConfigParams.NAME, "");
    String taskId = config.getOrDefault(Utils.TASK_ID, "");

    if (!skipTaskSpecificConfig) {
      if (connectorName == null || connectorName.trim().isEmpty()) {
        throw new IllegalArgumentException(
            "Connector name ('"
                + KafkaConnectorConfigParams.NAME
                + "') must be set and cannot be empty");
      }
      if (taskId == null || taskId.trim().isEmpty()) {
        throw new IllegalArgumentException(
            "Task ID ('" + Utils.TASK_ID + "') must be set and cannot be null or empty");
      }
    }

    ImmutableMap<String, String> topicToTableMap = ImmutableMap.of();
    if (config.containsKey(KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP)) {
      try {
        Map<String, String> parsed =
            TopicToTableParser.parse(
                config.get(KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP));
        if (parsed != null) {
          topicToTableMap = ImmutableMap.copyOf(parsed);
        }
      } catch (IllegalArgumentException e) {
        throw SnowflakeErrors.ERROR_0021.getException(e.getMessage());
      }
    }

    ConnectorConfigTools.BehaviorOnNullValues behaviorOnNullValues =
        ConnectorConfigTools.BehaviorOnNullValues.DEFAULT;
    if (config.containsKey(KafkaConnectorConfigParams.BEHAVIOR_ON_NULL_VALUES)) {
      behaviorOnNullValues =
          ConnectorConfigTools.BehaviorOnNullValues.valueOf(
              config
                  .get(KafkaConnectorConfigParams.BEHAVIOR_ON_NULL_VALUES)
                  .toUpperCase(java.util.Locale.ROOT));
    }

    boolean jmxEnabled =
        Optional.ofNullable(config.get(KafkaConnectorConfigParams.JMX_OPT))
            .map(Boolean::parseBoolean)
            .orElse(KafkaConnectorConfigParams.JMX_OPT_DEFAULT);

    String errorsTolerance =
        config.getOrDefault(
            KafkaConnectorConfigParams.ERRORS_TOLERANCE_CONFIG,
            KafkaConnectorConfigParams.ERRORS_TOLERANCE_DEFAULT);
    boolean tolerateErrors =
        ConnectorConfigTools.ErrorTolerance.valueOf(
                errorsTolerance.toUpperCase(java.util.Locale.ROOT))
            .equals(ConnectorConfigTools.ErrorTolerance.ALL);

    boolean errorsLogEnable =
        Boolean.parseBoolean(
            config.getOrDefault(
                KafkaConnectorConfigParams.ERRORS_LOG_ENABLE_CONFIG,
                String.valueOf(KafkaConnectorConfigParams.ERRORS_LOG_ENABLE_DEFAULT)));

    String dlqTopicName =
        config.get(KafkaConnectorConfigParams.ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_CONFIG);

    boolean enableSanitization =
        Boolean.parseBoolean(
            config.getOrDefault(
                KafkaConnectorConfigParams
                    .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION,
                String.valueOf(
                    KafkaConnectorConfigParams
                        .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION_DEFAULT)));

    boolean enableSchematization =
        Boolean.parseBoolean(
            config.getOrDefault(
                KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION,
                String.valueOf(
                    KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION_DEFAULT)));

    boolean enableColumnIdentifierNormalization =
        Boolean.parseBoolean(
            config.getOrDefault(
                KafkaConnectorConfigParams
                    .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION,
                String.valueOf(
                    KafkaConnectorConfigParams
                        .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION_DEFAULT)));

    SnowflakeValidation validation =
        SnowflakeValidation.fromConfig(
            config.getOrDefault(
                KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION,
                KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION_DEFAULT));

    int openChannelIoThreads =
        Optional.ofNullable(
                config.get(KafkaConnectorConfigParams.SNOWFLAKE_OPEN_CHANNEL_IO_THREADS))
            .map(Integer::parseInt)
            .orElse(KafkaConnectorConfigParams.SNOWFLAKE_OPEN_CHANNEL_IO_THREADS_DEFAULT);

    String streamingClientProviderOverrideMap =
        config.get(KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP);

    CachingConfig cachingConfig = CachingConfig.fromConfig(config);
    SnowflakeMetadataConfig metadataConfig = new SnowflakeMetadataConfig(config);

    Ssv1MigrationMode ssv1MigrationMode =
        Ssv1MigrationMode.fromConfig(
            config.getOrDefault(
                KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION,
                KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_DEFAULT));

    boolean ssv1MigrationIncludeConnectorName =
        Boolean.parseBoolean(
            config.getOrDefault(
                KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME,
                String.valueOf(
                    KafkaConnectorConfigParams
                        .SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME_DEFAULT)));

    String snowflakeUrl = config.get(KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME);
    String snowflakeUser = config.get(KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME);
    String snowflakeRole = config.get(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME);
    Password snowflakePrivateKey =
        passwordOrNull(config.get(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY));
    Password snowflakePrivateKeyPassphrase =
        passwordOrNull(config.get(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE));
    String snowflakeDatabase = config.get(KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME);
    String snowflakeSchema = config.get(KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME);

    AuthenticatorType authenticator =
        AuthenticatorType.fromConfig(
            config.get(KafkaConnectorConfigParams.SNOWFLAKE_AUTHENTICATOR));
    String oauthClientId = config.get(KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_ID);
    Password oauthClientSecret =
        passwordOrNull(config.get(KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_SECRET));
    Password oauthRefreshToken =
        passwordOrNull(config.get(KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_REFRESH_TOKEN));
    String oauthTokenEndpoint =
        config.get(KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_TOKEN_ENDPOINT);

    String proxyHost = config.get(KafkaConnectorConfigParams.JVM_PROXY_HOST);
    String proxyPort = config.get(KafkaConnectorConfigParams.JVM_PROXY_PORT);
    String nonProxyHosts = config.get(KafkaConnectorConfigParams.JVM_NON_PROXY_HOSTS);
    String proxyUsername = config.get(KafkaConnectorConfigParams.JVM_PROXY_USERNAME);
    String proxyPassword = config.get(KafkaConnectorConfigParams.JVM_PROXY_PASSWORD);
    String jdbcMap = config.get(KafkaConnectorConfigParams.SNOWFLAKE_JDBC_MAP);

    return builder()
        .connectorName(connectorName)
        .taskId(taskId)
        .topicToTableMap(topicToTableMap)
        .behaviorOnNullValues(behaviorOnNullValues)
        .jmxEnabled(jmxEnabled)
        .tolerateErrors(tolerateErrors)
        .errorsLogEnable(errorsLogEnable)
        .dlqTopicName(dlqTopicName)
        .enableSanitization(enableSanitization)
        .enableSchematization(enableSchematization)
        .enableColumnIdentifierNormalization(enableColumnIdentifierNormalization)
        .validation(validation)
        .openChannelIoThreads(openChannelIoThreads)
        .streamingClientProviderOverrideMap(streamingClientProviderOverrideMap)
        .cachingConfig(cachingConfig)
        .metadataConfig(metadataConfig)
        .snowflakeUrl(snowflakeUrl)
        .snowflakeUser(snowflakeUser)
        .snowflakeRole(snowflakeRole)
        .snowflakePrivateKey(snowflakePrivateKey)
        .snowflakePrivateKeyPassphrase(snowflakePrivateKeyPassphrase)
        .authenticator(authenticator)
        .oauthClientId(oauthClientId)
        .oauthClientSecret(oauthClientSecret)
        .oauthRefreshToken(oauthRefreshToken)
        .oauthTokenEndpoint(oauthTokenEndpoint)
        .snowflakeDatabase(snowflakeDatabase)
        .snowflakeSchema(snowflakeSchema)
        .proxyHost(proxyHost)
        .proxyPort(proxyPort)
        .nonProxyHosts(nonProxyHosts)
        .proxyUsername(proxyUsername)
        .proxyPassword(proxyPassword)
        .jdbcMap(jdbcMap)
        .ssv1MigrationMode(ssv1MigrationMode)
        .ssv1MigrationIncludeConnectorName(ssv1MigrationIncludeConnectorName);
  }

  private static Password passwordOrNull(String value) {
    return value == null ? null : new Password(value);
  }

  /** Creates a new builder. Used by {@link #from(Map)} and by tests. */
  public static Builder builder() {
    return new AutoValue_SinkTaskConfig.Builder();
  }

  /**
   * AutoValue-generated builder. When using directly (e.g. in tests), set connectorName and taskId.
   */
  @AutoValue.Builder
  public abstract static class Builder {
    public abstract Builder connectorName(String connectorName);

    public abstract Builder taskId(String taskId);

    public abstract Builder topicToTableMap(Map<String, String> topicToTableMap);

    public abstract Builder behaviorOnNullValues(
        ConnectorConfigTools.BehaviorOnNullValues behaviorOnNullValues);

    public abstract Builder jmxEnabled(boolean jmxEnabled);

    public abstract Builder tolerateErrors(boolean tolerateErrors);

    public abstract Builder errorsLogEnable(boolean errorsLogEnable);

    public abstract Builder dlqTopicName(String dlqTopicName);

    public abstract Builder enableSanitization(boolean enableSanitization);

    public abstract Builder enableSchematization(boolean enableSchematization);

    public abstract Builder enableColumnIdentifierNormalization(
        boolean enableColumnIdentifierNormalization);

    public abstract Builder validation(SnowflakeValidation validation);

    public abstract Builder openChannelIoThreads(int openChannelIoThreads);

    public abstract Builder streamingClientProviderOverrideMap(
        String streamingClientProviderOverrideMap);

    public abstract Builder cachingConfig(CachingConfig cachingConfig);

    public abstract Builder metadataConfig(SnowflakeMetadataConfig metadataConfig);

    public abstract Builder snowflakeUrl(String snowflakeUrl);

    public abstract Builder snowflakeUser(String snowflakeUser);

    public abstract Builder snowflakeRole(String snowflakeRole);

    public abstract Builder snowflakePrivateKey(Password snowflakePrivateKey);

    public abstract Builder snowflakePrivateKeyPassphrase(Password snowflakePrivateKeyPassphrase);

    public abstract Builder authenticator(AuthenticatorType authenticator);

    public abstract Builder oauthClientId(String oauthClientId);

    public abstract Builder oauthClientSecret(Password oauthClientSecret);

    public abstract Builder oauthRefreshToken(Password oauthRefreshToken);

    public abstract Builder oauthTokenEndpoint(String oauthTokenEndpoint);

    public abstract Builder snowflakeDatabase(String snowflakeDatabase);

    public abstract Builder snowflakeSchema(String snowflakeSchema);

    public abstract Builder proxyHost(String proxyHost);

    public abstract Builder proxyPort(String proxyPort);

    public abstract Builder nonProxyHosts(String nonProxyHosts);

    public abstract Builder proxyUsername(String proxyUsername);

    public abstract Builder proxyPassword(String proxyPassword);

    public abstract Builder jdbcMap(String jdbcMap);

    public abstract Builder ssv1MigrationMode(Ssv1MigrationMode ssv1MigrationMode);

    public abstract Builder ssv1MigrationIncludeConnectorName(
        boolean ssv1MigrationIncludeConnectorName);

    public abstract SinkTaskConfig build();
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/config/SnowflakeValidation.java
================================================
package com.snowflake.kafka.connector.config;

import java.util.Locale;

/**
 * Determines the connector validation mode for data ingestion. Controls whether the connector
 * performs client-side validation before sending data to Snowflake.
 */
public enum SnowflakeValidation {

  /**
   * Client-side validation is enabled. The connector validates data types and schema compatibility
   * before sending to Snowflake. Validation errors can be routed to a DLQ or abort the task.
   */
  CLIENT_SIDE,

  /**
   * Server-side validation. Client-side validation is disabled. Invalid records are handled by the
   * SSv2 Error Table. Use when throughput is critical and an Error Table is configured.
   */
  SERVER_SIDE;

  /** Parses a config string into a validation mode, case-insensitive. */
  public static SnowflakeValidation fromConfig(String value) {
    if (value == null || value.trim().isEmpty()) {
      return CLIENT_SIDE;
    }
    return valueOf(value.trim().toUpperCase(Locale.ROOT));
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/config/TopicToTableValidator.java
================================================
package com.snowflake.kafka.connector.config;

import com.snowflake.kafka.connector.TopicToTableParser;
import org.apache.kafka.common.config.ConfigDef;
import org.apache.kafka.common.config.ConfigException;

class TopicToTableValidator implements ConfigDef.Validator {
  public TopicToTableValidator() {}

  public void ensureValid(String name, Object value) {
    String s = (String) value;
    if (s != null && !s.isEmpty()) // this value is optional and can be empty
    {
      try {
        TopicToTableParser.parse(s);
      } catch (IllegalArgumentException e) {
        throw new ConfigException(name, value, e.getMessage());
      }
    }
  }

  public String toString() {
    return "Topic to table map format : comma-separated tuples, e.g."
        + " <topic-1>:<table-1>,<topic-2>:<table-2>,... ";
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/dlq/KafkaRecordErrorReporter.java
================================================
package com.snowflake.kafka.connector.dlq;

import org.apache.kafka.connect.sink.ErrantRecordReporter;
import org.apache.kafka.connect.sink.SinkRecord;

/**
 * This interface is a wrapper on top of {@link ErrantRecordReporter}. This allows tolerating
 * situations when the class {@link ErrantRecordReporter} is not available because it was recently
 * added and backported to older versions.
 *
 * @see <a
 *     href="https://javadoc.io/doc/org.apache.kafka/connect-api/2.6.0/org/apache/kafka/connect/sink/ErrantRecordReporter.html">
 *     Documentation </a>
 */
public interface KafkaRecordErrorReporter {
  void reportError(SinkRecord record, Exception e);
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/CachingConfig.java
================================================
package com.snowflake.kafka.connector.internal;

import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import java.util.Map;
import java.util.Optional;

/**
 * Configuration class for table and pipe existence caching. Contains all cache-related settings
 * with proper types. The values are coming from the connector config map. If you have any cache
 * related configuration parameters add them here.
 */
public final class CachingConfig {
  private final boolean tableExistsCacheEnabled;
  private final long tableExistsCacheExpireMs;
  private final boolean pipeExistsCacheEnabled;
  private final long pipeExistsCacheExpireMs;

  private CachingConfig(
      boolean tableExistsCacheEnabled,
      long tableExistsCacheExpireMs,
      boolean pipeExistsCacheEnabled,
      long pipeExistsCacheExpireMs) {
    this.tableExistsCacheEnabled = tableExistsCacheEnabled;
    this.tableExistsCacheExpireMs = tableExistsCacheExpireMs;
    this.pipeExistsCacheEnabled = pipeExistsCacheEnabled;
    this.pipeExistsCacheExpireMs = pipeExistsCacheExpireMs;
  }

  public boolean isTableExistsCacheEnabled() {
    return tableExistsCacheEnabled;
  }

  public long getTableExistsCacheExpireMs() {
    return tableExistsCacheExpireMs;
  }

  public boolean isPipeExistsCacheEnabled() {
    return pipeExistsCacheEnabled;
  }

  public long getPipeExistsCacheExpireMs() {
    return pipeExistsCacheExpireMs;
  }

  public static CachingConfig fromConfig(final Map<String, String> config) {

    boolean tableExistsCacheEnabled =
        Optional.ofNullable(config.get(KafkaConnectorConfigParams.CACHE_TABLE_EXISTS))
            .map(Boolean::parseBoolean)
            .orElse(KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_DEFAULT);

    long tableExistsCacheExpireMs =
        Optional.ofNullable(config.get(KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS))
            .map(Long::parseLong)
            .orElse(KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS_DEFAULT);

    boolean pipeExistsCacheEnabled =
        Optional.ofNullable(config.get(KafkaConnectorConfigParams.CACHE_PIPE_EXISTS))
            .map(Boolean::parseBoolean)
            .orElse(KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_DEFAULT);

    long pipeExistsCacheExpireMs =
        Optional.ofNullable(config.get(KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS))
            .map(Long::parseLong)
            .orElse(KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS_DEFAULT);

    // Validate expiration times are positive
    if (tableExistsCacheExpireMs <= 0) {
      throw new IllegalArgumentException(
          "Cache expiration for table existence must be positive, got: "
              + tableExistsCacheExpireMs);
    }
    if (pipeExistsCacheExpireMs <= 0) {
      throw new IllegalArgumentException(
          "Cache expiration for pipe existence must be positive, got: " + pipeExistsCacheExpireMs);
    }

    return new CachingConfig(
        tableExistsCacheEnabled,
        tableExistsCacheExpireMs,
        pipeExistsCacheEnabled,
        pipeExistsCacheExpireMs);
  }

  @Override
  public String toString() {
    return "CacheConfig{"
        + "tableExistsCacheEnabled="
        + tableExistsCacheEnabled
        + ", tableExistsCacheExpireMs="
        + tableExistsCacheExpireMs
        + ", pipeExistsCacheEnabled="
        + pipeExistsCacheEnabled
        + ", pipeExistsCacheExpireMs="
        + pipeExistsCacheExpireMs
        + '}';
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/CachingSnowflakeConnectionService.java
================================================
package com.snowflake.kafka.connector.internal;

import static java.util.concurrent.TimeUnit.MINUTES;

import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheStats;
import com.snowflake.kafka.connector.internal.schemaevolution.ColumnInfos;
import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationResponse;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService;
import java.sql.Connection;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;

/**
 * Decorator implementation of SnowflakeConnectionService that adds caching for table and pipe
 * existence checks. This class wraps an existing SnowflakeConnectionService and intercepts calls to
 * tableExist() and pipeExist() to provide caching.
 */
public class CachingSnowflakeConnectionService implements SnowflakeConnectionService {

  private static final KCLogger LOGGER =
      new KCLogger(CachingSnowflakeConnectionService.class.getName());

  private static final long CACHE_STATS_LOG_INTERVAL_MS = MINUTES.toMillis(5);
  private static final int CACHE_SIZE = 100;
  private final SnowflakeConnectionService delegate;
  private final Cache<String, Boolean> tableExistsCache;
  private final Cache<String, Boolean> pipeExistsCache;
  private final Cache<String, Boolean> errorLoggingCache;
  private final boolean tableExistsCacheEnabled;
  private final boolean pipeExistsCacheEnabled;

  private final AtomicLong lastStatsLogTimestamp = new AtomicLong(System.currentTimeMillis());

  /**
   * Creates a cached wrapper around an existing SnowflakeConnectionService.
   *
   * @param delegate the underlying connection service to wrap
   * @param cachingConfig cache configuration settings
   */
  public CachingSnowflakeConnectionService(
      SnowflakeConnectionService delegate, CachingConfig cachingConfig) {
    this.delegate = delegate;
    this.tableExistsCacheEnabled = cachingConfig.isTableExistsCacheEnabled();
    this.pipeExistsCacheEnabled = cachingConfig.isPipeExistsCacheEnabled();
    this.tableExistsCache =
        CacheBuilder.newBuilder()
            .expireAfterWrite(cachingConfig.getTableExistsCacheExpireMs(), TimeUnit.MILLISECONDS)
            .recordStats()
            .maximumSize(CACHE_SIZE)
            .build();
    this.pipeExistsCache =
        CacheBuilder.newBuilder()
            .expireAfterWrite(cachingConfig.getPipeExistsCacheExpireMs(), TimeUnit.MILLISECONDS)
            .maximumSize(CACHE_SIZE)
            .recordStats()
            .build();
    // Reuses the table-exists TTL since error_logging is also a per-table property.
    this.errorLoggingCache =
        CacheBuilder.newBuilder()
            .expireAfterWrite(cachingConfig.getTableExistsCacheExpireMs(), TimeUnit.MILLISECONDS)
            .maximumSize(CACHE_SIZE)
            .recordStats()
            .build();

    LOGGER.info(
        "Initialized cached connection service - tableExists: {} ({}ms), pipeExists: {} ({}ms)",
        tableExistsCacheEnabled,
        cachingConfig.getTableExistsCacheExpireMs(),
        pipeExistsCacheEnabled,
        cachingConfig.getPipeExistsCacheExpireMs());
  }

  @Override
  public boolean tableExist(final String tableName) {
    if (!tableExistsCacheEnabled) {
      return delegate.tableExist(tableName);
    }

    try {
      boolean result = tableExistsCache.get(tableName, () -> delegate.tableExist(tableName));
      logStatsIfNeeded();
      return result;
    } catch (Exception e) {
      throw new RuntimeException("Error accessing table exists cache for table: " + tableName, e);
    }
  }

  @Override
  public boolean pipeExist(final String pipeName) {
    if (!pipeExistsCacheEnabled) {
      return delegate.pipeExist(pipeName);
    }

    try {
      boolean result = pipeExistsCache.get(pipeName, () -> delegate.pipeExist(pipeName));
      logStatsIfNeeded();
      return result;
    } catch (Exception e) {
      throw new RuntimeException("Error accessing pipe exists cache for pipe: " + pipeName, e);
    }
  }

  /** Logs detailed cache statistics for both table and pipe caches. */
  public void logCacheStatistics() {
    if (tableExistsCacheEnabled) {
      CacheStats tableStats = tableExistsCache.stats();
      LOGGER.info(
          "Table cache stats - Requests: {}, Hits: {}, Misses: {}, Hit Rate: {}%, "
              + "Evictions: {}, Load Success: {}, Load Failures: {}, Avg Load Time: {}ms, Size: {}",
          tableStats.requestCount(),
          tableStats.hitCount(),
          tableStats.missCount(),
          String.format("%.2f", tableStats.hitRate() * 100),
          tableStats.evictionCount(),
          tableStats.loadSuccessCount(),
          tableStats.loadExceptionCount(),
          String.format(
              "%.2f",
              tableStats.averageLoadPenalty() / 1_000_000.0), // Convert nanoseconds to milliseconds
          tableExistsCache.size());
    }

    if (pipeExistsCacheEnabled) {
      CacheStats pipeStats = pipeExistsCache.stats();
      LOGGER.info(
          "Pipe cache stats - Requests: {}, Hits: {}, Misses: {}, Hit Rate: {}%, "
              + "Evictions: {}, Load Success: {}, Load Failures: {}, Avg Load Time: {}ms, Size: {}",
          pipeStats.requestCount(),
          pipeStats.hitCount(),
          pipeStats.missCount(),
          String.format("%.2f", pipeStats.hitRate() * 100),
          pipeStats.evictionCount(),
          pipeStats.loadSuccessCount(),
          pipeStats.loadExceptionCount(),
          String.format(
              "%.2f",
              pipeStats.averageLoadPenalty() / 1_000_000.0), // Convert nanoseconds to milliseconds
          pipeExistsCache.size());
    }

    if (tableExistsCacheEnabled) {
      CacheStats errorLoggingStats = errorLoggingCache.stats();
      LOGGER.info(
          "Error logging cache stats - Requests: {}, Hits: {}, Misses: {}, Hit Rate: {}%,"
              + " Size: {}",
          errorLoggingStats.requestCount(),
          errorLoggingStats.hitCount(),
          errorLoggingStats.missCount(),
          String.format("%.2f", errorLoggingStats.hitRate() * 100),
          errorLoggingCache.size());
    }
  }

  // All other methods delegate directly without caching

  @Override
  public void createTableWithOnlyMetadataColumn(String tableName) {
    delegate.createTableWithOnlyMetadataColumn(tableName);
    tableExistsCache.invalidate(tableName);
    errorLoggingCache.invalidate(tableName);
  }

  @Override
  public boolean isTableCompatible(String tableName) {
    return delegate.isTableCompatible(tableName);
  }

  @Override
  public void databaseExists(String databaseName) {
    delegate.databaseExists(databaseName);
  }

  @Override
  public void schemaExists(String schemaName) {
    delegate.schemaExists(schemaName);
  }

  @Override
  public SnowflakeTelemetryService getTelemetryClient() {
    return delegate.getTelemetryClient();
  }

  @Override
  public void close() {
    LOGGER.info("Closing CachedSnowflakeConnectionService, final cache statistics:");
    logCacheStatistics();
    delegate.close();
  }

  @Override
  public boolean isClosed() {
    return delegate.isClosed();
  }

  @Override
  public String getConnectorName() {
    return delegate.getConnectorName();
  }

  @Override
  public Connection getConnection() {
    return delegate.getConnection();
  }

  @Override
  public Optional<List<DescribeTableRow>> describeTable(String tableName) {
    return delegate.describeTable(tableName);
  }

  @Override
  public void executeQueryWithParameters(String query, String... parameters) {
    delegate.executeQueryWithParameters(query, parameters);
    pipeExistsCache.invalidateAll();
    tableExistsCache.invalidateAll();
    errorLoggingCache.invalidateAll();
  }

  @Override
  public void appendColumnsToTable(String tableName, Map<String, ColumnInfos> columnInfosMap) {
    delegate.appendColumnsToTable(tableName, columnInfosMap);
  }

  @Override
  public void alterNonNullableColumns(String tableName, List<String> columnNames) {
    delegate.alterNonNullableColumns(tableName, columnNames);
  }

  @Override
  public boolean shouldEvolveSchema(String tableName, String role) {
    return delegate.shouldEvolveSchema(tableName, role);
  }

  @Override
  public boolean isIcebergTable(String tableName) {
    return delegate.isIcebergTable(tableName);
  }

  @Override
  public boolean hasErrorLoggingEnabled(String tableName) {
    if (!tableExistsCacheEnabled) {
      return delegate.hasErrorLoggingEnabled(tableName);
    }
    try {
      boolean result =
          errorLoggingCache.get(tableName, () -> delegate.hasErrorLoggingEnabled(tableName));
      logStatsIfNeeded();
      return result;
    } catch (Exception e) {
      throw new RuntimeException("Error accessing error logging cache for table: " + tableName, e);
    }
  }

  @Override
  public Ssv1MigrationResponse migrateSsv1ChannelOffset(
      String tableName, String ssv1ChannelName, String ssv2ChannelName, String pipeName) {
    return delegate.migrateSsv1ChannelOffset(tableName, ssv1ChannelName, ssv2ChannelName, pipeName);
  }

  private void logStatsIfNeeded() {
    final long now = System.currentTimeMillis();
    final long lastLogged = lastStatsLogTimestamp.get();
    if (now - lastLogged >= CACHE_STATS_LOG_INTERVAL_MS
        && lastStatsLogTimestamp.compareAndSet(lastLogged, now)) {
      logCacheStatistics();
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/DescribeTableRow.java
================================================
package com.snowflake.kafka.connector.internal;

import java.util.Objects;

/** Class representing a single row returned by describe table statement. */
public class DescribeTableRow {
  private final String column;
  private final String type;
  private final String comment;
  private final String nullable;
  private final String defaultValue;
  private final String autoincrement;

  /** Full constructor with default and autoincrement metadata. */
  public DescribeTableRow(
      String column,
      String type,
      String comment,
      String nullable,
      String defaultValue,
      String autoincrement) {
    this.column = column;
    this.type = type;
    this.comment = comment;
    this.nullable = nullable;
    this.defaultValue = defaultValue;
    this.autoincrement = autoincrement;
  }

  /** Backward-compatible constructor (no default/autoincrement metadata). */
  public DescribeTableRow(String column, String type, String comment, String nullable) {
    this(column, type, comment, nullable, null, null);
  }

  public String getColumn() {
    return column;
  }

  public String getType() {
    return type;
  }

  public String getComment() {
    return comment;
  }

  public String getNullable() {
    return nullable;
  }

  public String getDefaultValue() {
    return defaultValue;
  }

  public String getAutoincrement() {
    return autoincrement;
  }

  /** True when the column has a server-assigned default value. */
  public boolean hasDefault() {
    return defaultValue != null && !defaultValue.isEmpty();
  }

  /** True when the column is an autoincrement/identity column. */
  public boolean isAutoincrement() {
    return autoincrement != null && !autoincrement.isEmpty();
  }

  @Override
  public boolean equals(Object o) {
    if (this == o) return true;
    if (o == null || getClass() != o.getClass()) return false;
    DescribeTableRow that = (DescribeTableRow) o;
    return Objects.equals(column, that.column) && Objects.equals(type, that.type);
  }

  @Override
  public int hashCode() {
    return Objects.hash(column, type);
  }

  @Override
  public String toString() {
    StringBuilder sb = new StringBuilder(" " + column + " " + type);
    if (hasDefault()) {
      sb.append(" DEFAULT=").append(defaultValue);
    }
    if (isAutoincrement()) {
      sb.append(" ").append(autoincrement);
    }
    return sb.toString();
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/InternalUtils.java
================================================
package com.snowflake.kafka.connector.internal;

import static org.apache.commons.lang3.StringUtils.isBlank;

import com.snowflake.kafka.connector.Utils;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.time.Instant;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.Optional;
import java.util.Properties;
import org.apache.kafka.common.config.types.Password;

public class InternalUtils {
  // authenticator type
  public static final String SNOWFLAKE_JWT = "snowflake_jwt";
  // JDBC parameter list
  static final String JDBC_DATABASE = "db";
  static final String JDBC_SCHEMA = "schema";
  static final String JDBC_USER = "user";
  static final String JDBC_PRIVATE_KEY = "privateKey";
  static final String JDBC_SSL = "ssl";
  static final String JDBC_SESSION_KEEP_ALIVE = "client_session_keep_alive";
  static final String JDBC_WAREHOUSE = "warehouse"; // for test only
  static final String JDBC_TOKEN = JdbcPropertyKeys.TOKEN;
  static final String JDBC_QUERY_RESULT_FORMAT = "JDBC_QUERY_RESULT_FORMAT";
  // internal parameters

  private static final KCLogger LOGGER = new KCLogger(InternalUtils.class.getName());

  private static final DateTimeFormatter ISO_DATE_TIME_FORMAT =
      DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'").withZone(ZoneOffset.UTC);

  /**
   * count the size of result set
   *
   * @param resultSet sql result set
   * @return size
   * @throws SQLException when failed to read result set
   */
  static int resultSize(ResultSet resultSet) throws SQLException {
    int size = 0;
    while (resultSet.next()) {
      size++;
    }
    return size;
  }

  static void assertNotEmpty(String name, Object value) {
    if (value == null || (value instanceof String && value.toString().isEmpty())) {
      switch (name.toLowerCase()) {
        case "tablename":
          throw SnowflakeErrors.ERROR_0005.getException();
        case "pipename":
          throw SnowflakeErrors.ERROR_0006.getException();
        case "conf":
          throw SnowflakeErrors.ERROR_0001.getException();
        default:
          throw SnowflakeErrors.ERROR_0003.getException("parameter name: " + name);
      }
    }
  }

  /**
   * convert a timestamp to Date String
   *
   * @param time a long integer representing timestamp
   * @return date string
   */
  static String timestampToDate(long time) {
    String date = ISO_DATE_TIME_FORMAT.format(Instant.ofEpochMilli(time));
    LOGGER.debug("converted date: {}", date);
    return date;
  }

  /**
   * Build JDBC driver properties from a parsed {@link SinkTaskConfig}.
   *
   * @param config parsed sink task configuration
   * @param url target server url
   * @return a Properties instance ready for JDBC
   */
  static Properties makeJdbcDriverProperties(SinkTaskConfig config, SnowflakeURL url) {
    Properties properties = new Properties();

    putIfNotBlank(properties, JDBC_DATABASE, config.getSnowflakeDatabase());
    putIfNotBlank(properties, JDBC_SCHEMA, config.getSnowflakeSchema());
    putIfNotBlank(properties, JDBC_USER, config.getSnowflakeUser());
    putIfNotBlank(properties, JdbcPropertyKeys.ROLE, config.getSnowflakeRole());

    properties.put(JdbcPropertyKeys.AUTHENTICATOR, SNOWFLAKE_JWT);

    String privateKey =
        Optional.ofNullable(config.getSnowflakePrivateKey()).map(Password::value).orElse(null);
    if (isBlank(privateKey)) {
      throw SnowflakeErrors.ERROR_0013.getException();
    }
    String privateKeyPassphrase =
        Optional.ofNullable(config.getSnowflakePrivateKeyPassphrase())
            .map(Password::value)
            .orElse(null);
    properties.put(
        JDBC_PRIVATE_KEY, PrivateKeyTool.parsePrivateKey(privateKey, privateKeyPassphrase));

    properties.put(JDBC_SSL, url.sslEnabled() ? "on" : "off");
    // put values for optional parameters
    properties.put(JDBC_SESSION_KEEP_ALIVE, "true");
    // SNOW-989387 - Set query resultset format to JSON as a workaround
    properties.put(JDBC_QUERY_RESULT_FORMAT, "json");
    properties.put(JdbcPropertyKeys.ALLOW_UNDERSCORES_IN_HOST, "true");

    if (!properties.containsKey(JDBC_SCHEMA)) {
      throw SnowflakeErrors.ERROR_0014.getException();
    }
    if (!properties.containsKey(JDBC_DATABASE)) {
      throw SnowflakeErrors.ERROR_0015.getException();
    }
    if (!properties.containsKey(JDBC_USER)) {
      throw SnowflakeErrors.ERROR_0016.getException();
    }

    return properties;
  }

  private static void putIfNotBlank(Properties properties, String key, String value) {
    if (!isBlank(value)) {
      properties.put(key, value);
    }
  }

  /**
   * Helper method to decide whether to add any properties related to proxy server. These property
   * is passed on to snowflake JDBC while calling put API, which requires proxyProperties
   *
   * @param config parsed connector configuration
   * @return proxy parameters if needed
   */
  protected static Properties generateProxyParametersIfRequired(SinkTaskConfig config) {
    Properties properties = new Properties();
    // Set proxyHost and proxyPort only if both of them are present and are non null
    if (config.getProxyHost() != null && config.getProxyPort() != null) {
      properties.put(JdbcPropertyKeys.USE_PROXY, "true");
      properties.put(JdbcPropertyKeys.PROXY_HOST, config.getProxyHost());
      properties.put(JdbcPropertyKeys.PROXY_PORT, config.getProxyPort());

      // nonProxyHosts parameter is not required. Check if it was set or not.
      if (config.getNonProxyHosts() != null) {
        properties.put(JdbcPropertyKeys.NON_PROXY_HOSTS, config.getNonProxyHosts());
      }

      // For username and password, check if host and port are given.
      // If they are given, check if username and password are non null
      if (config.getProxyUsername() != null && config.getProxyPassword() != null) {
        properties.put(JdbcPropertyKeys.PROXY_USER, config.getProxyUsername());
        properties.put(JdbcPropertyKeys.PROXY_PASSWORD, config.getProxyPassword());
      }
    }
    return properties;
  }

  protected static Properties parseJdbcPropertiesMap(SinkTaskConfig config) {
    if (config.getJdbcMap() == null) {
      return new Properties();
    }
    Properties properties = new Properties();
    properties.putAll(Utils.parseCommaSeparatedKeyValuePairs(config.getJdbcMap()));
    return properties;
  }

  /** Interfaces to define the lambda function to be used by backoffAndRetry */
  public interface backoffFunction {
    Object apply() throws Exception;
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/JdbcProperties.java
================================================
package com.snowflake.kafka.connector.internal;

import java.util.Properties;

/** Wrapper class for all snowflake jdbc properties */
public class JdbcProperties {

  /** All jdbc properties including proxyProperties */
  private final Properties properties;

  /** Proxy related properties */
  private final Properties proxyProperties;

  private JdbcProperties(Properties combinedProperties, Properties proxyProperties) {
    this.properties = combinedProperties;
    this.proxyProperties = proxyProperties;
  }

  public Properties getProperties() {
    return properties;
  }

  public Properties getProxyProperties() {
    return proxyProperties;
  }

  /**
   * Combine all jdbc related properties. Throws error if jdbcPropertiesMap overrides any property
   * defined in connectionProperties or proxyProperties.
   *
   * @param connectionProperties snowflake.database.name, snowflake.schema,name,
   *     snowflake.private.key etc.
   * @param proxyProperties jvm.proxy.xxx
   * @param jdbcPropertiesMap snowflake.jdbc.map
   */
  static JdbcProperties create(
      Properties connectionProperties, Properties proxyProperties, Properties jdbcPropertiesMap) {
    InternalUtils.assertNotEmpty("connectionProperties", connectionProperties);
    proxyProperties = setEmptyIfNull(proxyProperties);
    jdbcPropertiesMap = setEmptyIfNull(jdbcPropertiesMap);

    Properties proxyAndConnection = mergeProperties(connectionProperties, proxyProperties);
    detectOverrides(proxyAndConnection, jdbcPropertiesMap);

    Properties combinedProperties = mergeProperties(proxyAndConnection, jdbcPropertiesMap);

    return new JdbcProperties(combinedProperties, proxyProperties);
  }

  private static void detectOverrides(Properties proxyAndConnection, Properties jdbcPropertiesMap) {
    jdbcPropertiesMap.forEach(
        (k, v) -> {
          if (proxyAndConnection.containsKey(k)) {
            throw SnowflakeErrors.ERROR_0031.getException("Duplicated property: " + k);
          }
        });
  }

  private static Properties mergeProperties(
      Properties connectionProperties, Properties proxyProperties) {
    Properties mergedProperties = new Properties();
    mergedProperties.putAll(connectionProperties);
    mergedProperties.putAll(proxyProperties);
    return mergedProperties;
  }

  /** Parsing methods does not return null. However, It's better to be perfectly sure. */
  private static Properties setEmptyIfNull(Properties properties) {
    if (properties != null) {
      return properties;
    }
    return new Properties();
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/JdbcPropertyKeys.java
================================================
package com.snowflake.kafka.connector.internal;

/**
 * Snowflake JDBC connection property key names. These match the official JDBC driver connection
 * parameters (see Snowflake JDBC documentation). Used instead of internal SFSessionProperty to
 * remain compatible with JDBC 4.x public API.
 */
public final class JdbcPropertyKeys {

  private JdbcPropertyKeys() {}

  public static final String AUTHENTICATOR = "authenticator";
  public static final String TOKEN = "token";
  public static final String ROLE = "role";
  public static final String ALLOW_UNDERSCORES_IN_HOST = "allowUnderscoresInHost";
  public static final String USE_PROXY = "useProxy";
  public static final String PROXY_HOST = "proxyHost";
  public static final String PROXY_PORT = "proxyPort";
  public static final String PROXY_USER = "proxyUser";
  public static final String PROXY_PASSWORD = "proxyPassword";
  public static final String NON_PROXY_HOSTS = "nonProxyHosts";
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/KCLogger.java
================================================
package com.snowflake.kafka.connector.internal;

import com.snowflake.kafka.connector.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.MDC;

/** Logger for Snowflake Sink Connector. Attaches MDC's connector context if available */
public class KCLogger {
  public static final String MDC_CONN_CTX_KEY = "connector.context";
  private static boolean prependMdcContext;
  private static final Logger META_LOGGER = LoggerFactory.getLogger(KCLogger.class.getName());
  private Logger logger;

  /**
   * Enable or disables the MDC context. Only available for apache kafka versions after 2.3.0.
   * https://cwiki.apache.org/confluence/display/KAFKA/KIP-449%3A+Add+connector+contexts+to+Connect+worker+logs
   *
   * @param shouldPrependMdcContext If all KC loggers should enable or disable MDC context
   */
  public static void toggleGlobalMdcLoggingContext(boolean shouldPrependMdcContext) {
    prependMdcContext = shouldPrependMdcContext;
    META_LOGGER.debug(
        "Setting MDC context enablement to: {}. MDC context is only available for Apache Kafka"
            + " versions after 2.3.0",
        shouldPrependMdcContext);
  }

  /**
   * Create and return a new logging handler
   *
   * @param name The class name passed for initializing the logger
   */
  public KCLogger(String name) {
    this.logger = LoggerFactory.getLogger(name);
  }

  /**
   * Logs an info level message
   *
   * @param format The message format without variables
   * @param vars The variables to insert into the format. These variables will be toString()'ed
   */
  public void info(String format, Object... vars) {
    if (this.logger.isInfoEnabled()) {
      this.logger.info(this.getFormattedLogMessage(format, vars));
    }
  }

  public boolean isInfoEnabled() {
    return logger.isInfoEnabled();
  }

  /**
   * Logs an trace level message
   *
   * @param format The message format without variables
   * @param vars The variables to insert into the format. These variables will be toString()'ed
   */
  public void trace(String format, Object... vars) {
    if (this.logger.isTraceEnabled()) {
      this.logger.trace(this.getFormattedLogMessage(format, vars));
    }
  }

  /**
   * Logs an debug level message
   *
   * @param format The message format without variables
   * @param vars The variables to insert into the format. These variables will be toString()'ed
   */
  public void debug(String format, Object... vars) {
    if (this.logger.isDebugEnabled()) {
      this.logger.debug(this.getFormattedLogMessage(format, vars));
    }
  }

  /**
   * Logs an warn level message
   *
   * @param format The message format without variables
   * @param vars The variables to insert into the format. These variables will be toString()'ed
   */
  public void warn(String format, Object... vars) {
    if (this.logger.isWarnEnabled()) {
      this.logger.warn(this.getFormattedLogMessage(format, vars));
    }
  }

  /**
   * Logs an error level message
   *
   * @param format The message format without variables
   * @param vars The variables to insert into the format. These variables will be toString()'ed
   */
  public void error(String format, Object... vars) {
    if (this.logger.isErrorEnabled()) {
      this.logger.error(this.getFormattedLogMessage(format, vars));
    }
  }

  public void error(String s, Throwable throwable) {
    if (this.logger.isErrorEnabled()) {
      logger.error(s, throwable);
    }
  }

  public boolean isDebugEnabled() {
    return logger.isDebugEnabled();
  }

  public boolean isTraceEnabled() {
    return logger.isTraceEnabled();
  }

  private String getFormattedLogMessage(String format, Object... vars) {
    if (prependMdcContext) {
      String connCtx = MDC.get(MDC_CONN_CTX_KEY);
      return Utils.formatLogMessage(connCtx + format, vars);
    }

    return Utils.formatLogMessage(format, vars);
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/PrivateKeyTool.java
================================================
package com.snowflake.kafka.connector.internal;

import static org.apache.commons.lang3.StringUtils.isBlank;

import java.io.StringReader;
import java.security.KeyFactory;
import java.security.PrivateKey;
import java.security.Security;
import java.security.spec.PKCS8EncodedKeySpec;
import java.util.Base64;
import org.bouncycastle.asn1.pkcs.PrivateKeyInfo;
import org.bouncycastle.jcajce.provider.BouncyCastleFipsProvider;
import org.bouncycastle.openssl.PEMParser;
import org.bouncycastle.openssl.jcajce.JcaPEMKeyConverter;
import org.bouncycastle.openssl.jcajce.JceOpenSSLPKCS8DecryptorProviderBuilder;
import org.bouncycastle.operator.InputDecryptorProvider;
import org.bouncycastle.pkcs.PKCS8EncryptedPrivateKeyInfo;

public final class PrivateKeyTool {

  private static final KCLogger LOGGER = new KCLogger(PrivateKeyTool.class.getName());

  public static PrivateKey parsePrivateKey(String privateKeyStr, String privateKeyPassword) {
    if (isBlank(privateKeyPassword)) {
      return parseNonEncryptedPrivateKey(privateKeyStr);
    } else {
      return parseEncryptedPrivateKey(privateKeyStr, privateKeyPassword);
    }
  }

  private static PrivateKey parseNonEncryptedPrivateKey(String key) {
    LOGGER.info("Not using passphrase for private key, not specified");
    // remove header, footer, and line breaks
    key = key.replaceAll("-+[A-Za-z ]+-+", "");
    key = key.replaceAll("\\s", "");

    byte[] encoded;
    try {
      encoded = Base64.getDecoder().decode(key);
    } catch (IllegalArgumentException e) {
      throw SnowflakeErrors.ERROR_0002.getException(e);
    }
    try {
      KeyFactory kf = KeyFactory.getInstance("RSA");
      PKCS8EncodedKeySpec keySpec = new PKCS8EncodedKeySpec(encoded);
      return kf.generatePrivate(keySpec);
    } catch (Exception e) {
      throw SnowflakeErrors.ERROR_0002.getException(e);
    }
  }

  private static PrivateKey parseEncryptedPrivateKey(String key, String passphrase) {
    LOGGER.info("Using passphrase for private key");
    // remove header, footer, and line breaks
    key = key.replaceAll("-+[A-Za-z ]+-+", "");
    key = key.replaceAll("\\s", "");

    StringBuilder builder = new StringBuilder();
    builder.append("-----BEGIN ENCRYPTED PRIVATE KEY-----");
    for (int i = 0; i < key.length(); i++) {
      if (i % 64 == 0) {
        builder.append("\n");
      }
      builder.append(key.charAt(i));
    }
    builder.append("\n-----END ENCRYPTED PRIVATE KEY-----");
    key = builder.toString();
    Security.addProvider(new BouncyCastleFipsProvider());
    try {
      PEMParser pemParser = new PEMParser(new StringReader(key));
      PKCS8EncryptedPrivateKeyInfo encryptedPrivateKeyInfo =
          (PKCS8EncryptedPrivateKeyInfo) pemParser.readObject();
      pemParser.close();
      InputDecryptorProvider pkcs8Prov =
          new JceOpenSSLPKCS8DecryptorProviderBuilder().build(passphrase.toCharArray());
      JcaPEMKeyConverter converter =
          new JcaPEMKeyConverter().setProvider(BouncyCastleFipsProvider.PROVIDER_NAME);
      PrivateKeyInfo decryptedPrivateKeyInfo =
          encryptedPrivateKeyInfo.decryptPrivateKeyInfo(pkcs8Prov);
      return converter.getPrivateKey(decryptedPrivateKeyInfo);
    } catch (Exception e) {
      throw SnowflakeErrors.ERROR_0018.getException(e);
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/SnowflakeConnectionService.java
================================================
package com.snowflake.kafka.connector.internal;

import com.snowflake.kafka.connector.internal.schemaevolution.ColumnInfos;
import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationResponse;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService;
import java.sql.Connection;
import java.util.List;
import java.util.Map;
import java.util.Optional;

public interface SnowflakeConnectionService {
  /**
   * check table existence
   *
   * @param tableName table name
   * @return true if table exists, false otherwise
   */
  boolean tableExist(String tableName);

  /**
   * check pipe existence
   *
   * @param pipeName pipe name
   * @return true if pipe exists, false otherwise
   */
  boolean pipeExist(String pipeName);

  /**
   * Check the given table has correct schema correct schema: (record_metadata variant)
   *
   * @param tableName table name
   * @return true if schema is correct, false is schema is incorrect or table does not exist
   */
  boolean isTableCompatible(String tableName);

  /**
   * check if a given database exists
   *
   * @param databaseName database name
   */
  void databaseExists(String databaseName);

  /**
   * check if a given schema exists
   *
   * @param schemaName schema name
   */
  void schemaExists(String schemaName);

  /**
   * @return telemetry client
   */
  SnowflakeTelemetryService getTelemetryClient();

  /** Close Connection */
  void close();

  /**
   * @return true is connection is closed
   */
  boolean isClosed();

  /**
   * @return name of Kafka Connector instance
   */
  String getConnectorName();

  /**
   * @return the raw jdbc connection
   */
  Connection getConnection();

  /**
   * Create a table with only the RECORD_METADATA column. The rest of the columns might be added
   * through schema evolution
   *
   * <p>In the beginning of the function we will check if we have the permission to do schema
   * evolution, and we will error out if we don't
   *
   * @param tableName table name
   */
  void createTableWithOnlyMetadataColumn(String tableName);

  /**
   * Calls describe table statement and returns all columns and corresponding types.
   *
   * @param tableName - table name
   * @return Optional.empty() if table does not exist. List of all table columns and their types
   *     otherwise.
   */
  Optional<List<DescribeTableRow>> describeTable(String tableName);

  /**
   * execute sql query
   *
   * @param query sql query string
   * @param parameters query parameters
   */
  void executeQueryWithParameters(String query, String... parameters);

  /**
   * Add columns to an existing table via ALTER TABLE ... ADD COLUMN IF NOT EXISTS.
   *
   * @param tableName table name
   * @param columnInfosMap map of column name to ColumnInfos (type + comment)
   */
  void appendColumnsToTable(String tableName, Map<String, ColumnInfos> columnInfosMap);

  /**
   * Drop NOT NULL constraints on columns via ALTER TABLE ... ALTER ... DROP NOT NULL.
   *
   * @param tableName table name
   * @param columnNames list of column names to make nullable
   */
  void alterNonNullableColumns(String tableName, List<String> columnNames);

  /**
   * Check whether the user has the role privilege to do schema evolution and whether the schema
   * evolution option is enabled on the table.
   *
   * @param tableName table name
   * @param role the role of the user
   * @return whether schema evolution has the required permission to be performed
   */
  boolean shouldEvolveSchema(String tableName, String role);

  /**
   * Check whether the given table is an iceberg table.
   *
   * @param tableName table name
   * @return true if the table is an iceberg table, false otherwise
   */
  boolean isIcebergTable(String tableName);

  /**
   * Check whether the given table has ERROR_LOGGING enabled via SHOW TABLES.
   *
   * @param tableName table name
   * @return true if error_logging is "Y", false otherwise or if the column is not present
   */
  boolean hasErrorLoggingEnabled(String tableName);

  /**
   * Calls SYSTEM$MIGRATE_SSV1_CHANNEL_OFFSET to migrate the committed offset from an SSv1 channel
   * to an SSv2 channel. The system function reads the SSv1 offset and writes it directly to the
   * SSv2 channel in FDB.
   *
   * @param tableName unqualified table name (the JDBC session's database/schema are used)
   * @param ssv1ChannelName SSv1 channel name ({topic}_{partition} or
   *     {connectorName}_{topic}_{partition})
   * @param ssv2ChannelName SSv2 channel name ({connectorName}_{topic}_{partition})
   * @param pipeName SSv2 pipe name
   * @return the parsed {@link Ssv1MigrationResponse} indicating whether the channel was found and
   *     (if so) the migrated offset value
   * @throws RuntimeException if the system function call fails (SQL error, unexpected response)
   */
  Ssv1MigrationResponse migrateSsv1ChannelOffset(
      String tableName, String ssv1ChannelName, String ssv2ChannelName, String pipeName);
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/SnowflakeConnectionServiceFactory.java
================================================
package com.snowflake.kafka.connector.internal;

import com.snowflake.kafka.connector.config.SinkTaskConfig;
import java.util.Map;
import java.util.Properties;

public class SnowflakeConnectionServiceFactory {
  public static SnowflakeConnectionServiceBuilder builder() {
    return new SnowflakeConnectionServiceBuilder();
  }

  public static class SnowflakeConnectionServiceBuilder {

    private JdbcProperties jdbcProperties;
    private SnowflakeURL url;
    private String connectorName;
    private String taskID = "-1";
    private CachingConfig cachingConfig;

    // For testing only
    public Properties getProperties() {
      return this.jdbcProperties.getProperties();
    }

    public SnowflakeConnectionServiceBuilder setTaskID(String taskID) {
      this.taskID = taskID;
      return this;
    }

    public SnowflakeConnectionServiceBuilder setProperties(Map<String, String> conf) {
      return setProperties(SinkTaskConfig.from(conf, true));
    }

    public SnowflakeConnectionServiceBuilder setProperties(SinkTaskConfig parsedConfig) {
      if (parsedConfig.getSnowflakeUrl() == null || parsedConfig.getSnowflakeUrl().isEmpty()) {
        throw SnowflakeErrors.ERROR_0017.getException();
      }
      this.url = new SnowflakeURL(parsedConfig.getSnowflakeUrl());
      this.connectorName = parsedConfig.getConnectorName();
      this.cachingConfig = parsedConfig.getCachingConfig();

      Properties connectionProperties =
          InternalUtils.makeJdbcDriverProperties(parsedConfig, this.url);
      Properties proxyProperties = InternalUtils.generateProxyParametersIfRequired(parsedConfig);
      Properties jdbcPropertiesMap = InternalUtils.parseJdbcPropertiesMap(parsedConfig);
      this.jdbcProperties =
          JdbcProperties.create(connectionProperties, proxyProperties, jdbcPropertiesMap);
      return this;
    }

    public SnowflakeConnectionService build() {
      InternalUtils.assertNotEmpty("jdbcProperties", jdbcProperties);
      InternalUtils.assertNotEmpty("url", url);
      InternalUtils.assertNotEmpty("connectorName", connectorName);

      SnowflakeConnectionService baseService =
          new StandardSnowflakeConnectionService(jdbcProperties, url, connectorName, taskID);

      return new CachingSnowflakeConnectionService(baseService, cachingConfig);
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/SnowflakeErrors.java
================================================
/*
 * Copyright (c) 2019 Snowflake Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.snowflake.kafka.connector.internal;

import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.Utils;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService;

public enum SnowflakeErrors {

  // connector configuration issues 0---
  ERROR_0001(
      "0001",
      "Invalid input connector configuration",
      "input kafka connector configuration is null, missing required values, "
          + "or is invalid. Check logs for list of invalid parameters."),
  ERROR_0002("0002", "Invalid private key", "private key should be a valid PEM RSA private key"),
  ERROR_0003(
      "0003",
      "Missing required parameter",
      "one or multiple required parameters haven't be provided"),
  ERROR_0005("0005", "Empty Table name", "Input Table name is empty string or null"),
  ERROR_0006("0006", "Empty Pipe name", "Input Pipe name is empty String or null"),
  ERROR_0007(
      "0007",
      "Invalid Snowflake URL",
      "Snowflake URL format: 'https://<account_name>.<region_name>"
          + ".snowflakecomputing.com:443', 'https://' and ':443' are optional."),
  ERROR_0013(
      "0013",
      "Missed private key in connector config",
      "private key must be provided with "
          + KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY
          + " parameter"),
  ERROR_0014(
      "0014",
      "Missed snowflake schema name in connector config",
      "snowflake schema name must be provided with "
          + KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME
          + " "
          + "parameter"),
  ERROR_0015(
      "0015",
      "Missed snowflake database name in connector config ",
      "snowflake database name must be provided with "
          + KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME
          + " "
          + "parameter"),
  ERROR_0016(
      "0016",
      "Missed snowflake user name in connector config ",
      "snowflake user name must be provided with "
          + KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME
          + " parameter"),
  ERROR_0017(
      "0017",
      "Missed snowflake url in connector config ",
      "snowflake URL must be provided with "
          + KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME
          + " parameter, e.g. 'accountname.snoflakecomputing.com'"),
  ERROR_0018(
      "0018",
      "Invalid encrypted private key or passphrase",
      "failed to decrypt private key. Please verify input private key and passphrase. Snowflake"
          + " Kafka Connector only supports encryption algorithms in FIPS 140-2"),
  ERROR_0020("0020", "Invalid topic name", "Topic name is empty String or null"),
  ERROR_0021("0021", "Invalid topic2table map", "Failed to parse topic2table map"),
  ERROR_0022(
      "0022",
      "Invalid proxy host or port",
      "Both host and port need to be provided if one of them is provided"),
  ERROR_0023(
      "0023",
      "Invalid proxy username or password",
      "Both username and password need to be provided if one of them is provided"),
  ERROR_0030(
      "0030",
      String.format(
          "Invalid %s map",
          KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP),
      String.format(
          "Failed to parse %s map",
          KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP)),
  ERROR_0031(
      "0031",
      "Failed to combine JDBC properties",
      "One of snowflake.jdbc.map property overrides other jdbc property"),
  ERROR_1001(
      "1001",
      "Failed to connect to Snowflake Server",
      "Snowflake connection issue, reported by Snowflake JDBC"),
  ERROR_1003(
      "1003",
      "Snowflake connection is closed",
      "Either the current connection is closed or hasn't connect to snowflake" + " server"),
  ERROR_1005(
      "1005",
      "Task failed due to authorization error",
      "Set `enable.task.fail.on.authorization.errors=false` to avoid this behavior"),
  // SQL issues 2---
  ERROR_2001(
      "2001", "Failed to prepare SQL statement", "SQL Exception, reported by Snowflake JDBC"),

  ERROR_2005("2005", "Failed to close connection", "Failed to close snowflake JDBC connection"),
  ERROR_2006(
      "2006", "Failed to connection status", "Failed to retrieve Snowflake JDBC connection Status"),
  ERROR_2007(
      "2007",
      "Failed to create table",
      "Failed to create table on Snowflake, please check that you have permission to do so."),
  ERROR_2015(
      "2015", "Failed to append columns", "Failed to append columns during schema evolution"),
  ERROR_2016("2016", "Failed to drop NOT NULL", "Failed to drop NOT NULL during schema evolution"),
  ERROR_5007(
      "5007",
      "SnowflakeStreamingSinkConnector timeout",
      "SnowflakeStreamingSinkConnector timed out. Tables or stages are not yet "
          + "available for data ingestion to start. If this persists, please "
          + "contact Snowflake support."),
  ERROR_5010(
      "5010",
      "Connection is null or closed",
      "Connection is closed or null when starting sink service"),
  ERROR_5013(
      "5013",
      "Failed to initialize SinkTask",
      "SinkTask hasn't been started before calling OPEN function"),
  ERROR_5014(
      "5014",
      "Failed to put records",
      "SinkTask hasn't been initialized before calling PUT function"),
  ERROR_5015(
      "5015", "Invalid SinkRecord received", "Error parsing SinkRecord value or SinkRecord header"),
  ERROR_5020("5020", "Failed to register MBean in MbeanServer", "Object Name is invalid"),
  ERROR_5021(
      "5021",
      "Failed to get data schema",
      "Failed to get data schema. Unrecognizable data type in JSON object"),
  ERROR_5022("5022", "Invalid column name", "Failed to find column in the schema"),
  ERROR_5027(
      "5027",
      "Data verification failed",
      "Connector couldn't verify that all data was committed to Snowflake. Stopping to avoid data"
          + " loss."),
  ERROR_5028(
      "5028",
      "Failed to open Snowpipe Streaming v2 channel",
      "Failed to open Snowpipe Streaming v2 channel"),
  ERROR_5030(
      "5030",
      "Channel error count threshold exceeded",
      "Channel has reported errors during data ingestion. Check the channel history for details."),
  ERROR_0032(
      "0032",
      "Non-default pipe not supported with client-side validation",
      "Client-side validation only supports default pipes ({table}-STREAMING). Either disable"
          + " client-side validation (snowflake.validation=server_side) or drop the"
          + " existing pipe so the connector uses the default pipe.");

  // properties

  private final String name;
  private final String detail;
  private final String code;

  SnowflakeErrors(String code, String name, String detail) {
    this.code = code;
    this.name = name;
    this.detail = detail;
  }

  public SnowflakeKafkaConnectorException getException() {
    return getException("", null);
  }

  public SnowflakeKafkaConnectorException getException(String msg) {
    return getException(msg, null);
  }

  public SnowflakeKafkaConnectorException getException(Exception e) {
    return getException(e, null);
  }

  public SnowflakeKafkaConnectorException getException(
      Exception e, SnowflakeTelemetryService telemetryService) {
    StringBuilder str = new StringBuilder();
    str.append(e.getMessage());
    for (StackTraceElement element : e.getStackTrace()) {
      str.append("\n").append(element.toString());
    }
    return getException(str.toString(), telemetryService);
  }

  public SnowflakeKafkaConnectorException getException(SnowflakeTelemetryService telemetryService) {
    return getException("", telemetryService);
  }

  /**
   * Convert a given message into SnowflakeKafkaConnectorException.
   *
   * <p>If message is null, we use Enum's toString() method to wrap inside
   * SnowflakeKafkaConnectorException
   *
   * @param msg Message to send to Telemetry Service. Remember, we Strip the message
   * @param telemetryService can be null
   * @return Exception wrapped in Snowflake Connector Exception
   */
  public SnowflakeKafkaConnectorException getException(
      String msg, SnowflakeTelemetryService telemetryService) {
    if (telemetryService != null) {
      telemetryService.reportKafkaConnectFatalError(
          getCode() + msg.substring(0, Math.min(msg.length(), 500)));
    }

    if (msg == null || msg.isEmpty()) {
      return new SnowflakeKafkaConnectorException(toString(), code);
    } else {
      return new SnowflakeKafkaConnectorException(
          Utils.formatLogMessage(
              "Exception: {}\nError Code: {}\nDetail: {}\nMessage: {}", name, code, detail, msg),
          code);
    }
  }

  public String getCode() {
    return code;
  }

  public String getDetail() {
    return this.detail;
  }

  @Override
  public String toString() {
    return Utils.formatLogMessage("Exception: {}\nError Code: {}\nDetail: {}", name, code, detail);
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/SnowflakeKafkaConnectorException.java
================================================
package com.snowflake.kafka.connector.internal;

public class SnowflakeKafkaConnectorException extends RuntimeException {
  private final String code;

  public SnowflakeKafkaConnectorException(String msg, String code) {
    super(msg);
    this.code = code;
  }

  public String getCode() {
    return code;
  }

  public boolean checkErrorCode(SnowflakeErrors error) {
    return this.code.equals(error.getCode());
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/SnowflakeSinkService.java
================================================
package com.snowflake.kafka.connector.internal;

import com.codahale.metrics.MetricRegistry;
import com.google.common.annotations.VisibleForTesting;
import com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel;
import java.util.Collection;
import java.util.Map;
import java.util.Optional;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.connect.sink.SinkRecord;

/** Background service of data sink, responsible to create/drop pipe and ingest/purge files */
public interface SnowflakeSinkService {
  /**
   * Start the Task. This should handle any configuration parsing and one-time setup of the task.
   *
   * @param topicPartition TopicPartition passed from Kafka
   */
  void startPartition(TopicPartition topicPartition);

  /**
   * Start a collection of TopicPartition. This should handle any configuration parsing and one-time
   * setup of the task.
   *
   * @param partitions collection of topic partitions
   */
  void startPartitions(Collection<TopicPartition> partitions);

  /**
   * call pipe to insert a collections of JSON records will trigger time based flush
   *
   * @param records record content
   */
  void insert(final Collection<SinkRecord> records);

  /**
   * call pipe to insert a JSON record will not trigger time based flush
   *
   * @param record record content
   * @return true if the record was processed successfully, false if recovery was triggered and the
   *     caller should stop feeding records to this partition for the remainder of the batch
   */
  boolean insert(final SinkRecord record);

  /**
   * retrieve offset of last loaded record for given pipe name
   *
   * @param topicPartition topic and partition
   * @return offset, or -1 for empty
   */
  long getOffset(TopicPartition topicPartition);

  /**
   * Fetches committed offsets for all given partitions using the SDK's batch channel-status API.
   * Makes at most one network call per SDK client (i.e. per topic/pipe), regardless of the number
   * of partitions.
   *
   * @param partitions the partitions to query
   * @return map of TopicPartition to the offset safe to commit to Kafka (committed + 1), only
   *     containing entries where a valid offset was found
   */
  Map<TopicPartition, Long> getCommittedOffsets(Collection<TopicPartition> partitions);

  /**
   * get the number of partitions assigned to this sink service
   *
   * @return number of partitions
   */
  int getPartitionCount();

  /** terminate all tasks and close this service instance */
  void closeAll();

  /**
   * terminate given topic partitions
   *
   * @param partitions a list of topic partition
   */
  void close(Collection<TopicPartition> partitions);

  /**
   * close all cleaner thread but have no effect on sink service context
   *
   * <p>Note that calling this method does not perform synchronous cleanup in Snowpipe based
   * implementation
   */
  void stop();

  /**
   * retrieve sink service status
   *
   * @return true is closed
   */
  boolean isClosed();

  Map<String, TopicPartitionChannel> getPartitionChannels();

  /** Blocks until all partition channels have finished initialization. No-op by default. */
  default void awaitInitialization() {}

  /* Get metric registry of an associated partition */
  @VisibleForTesting
  Optional<MetricRegistry> getMetricRegistry(final String partitionIdentifier);
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/SnowflakeURL.java
================================================
/*
 * Copyright (c) 2019 Snowflake Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.snowflake.kafka.connector.internal;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/** Snowflake URL Object https://account.region.snowflakecomputing.com:443 */
public class SnowflakeURL implements URL {

  private final KCLogger LOGGER = new KCLogger(SnowflakeURL.class.getName());

  private String jdbcUrl;

  private final String url;

  private final boolean ssl;

  private final String account;

  private final int port;

  /**
   * There are several matching groups here. Matching groups numbers are identified as the opening
   * braces start and are indexed from number 1.
   *
   * <p>Group 1: If https is present or not. (Not required)
   *
   * <p>Group 2: Is the entire URL including the port number
   *
   * <p>Group 3: URL until .com
   *
   * <p>Group 4: Account name (may include org-account/alias)
   *
   * <p>Group 5: (Everything after accountname or org-accountname until .com)
   *
   * <p>Group 7: port number
   */
  private static final String SNOWFLAKE_URL_REGEX_PATTERN =
      "^(https?://)?((([\\w\\d-]+)(\\.[\\w\\d-]+){2,})(:(\\d+))?)/?$";

  public SnowflakeURL(String urlStr) {
    Pattern pattern = Pattern.compile(SNOWFLAKE_URL_REGEX_PATTERN);

    Matcher matcher = pattern.matcher(urlStr.trim().toLowerCase());

    if (!matcher.find()) {
      throw SnowflakeErrors.ERROR_0007.getException("input url: " + urlStr);
    }

    ssl = !"http://".equals(matcher.group(1));

    url = matcher.group(3);

    account = matcher.group(4);

    if (matcher.group(7) != null) {
      port = Integer.parseInt(matcher.group(7));
    } else if (ssl) {
      port = 443;
    } else {
      port = 80;
    }

    jdbcUrl = "jdbc:snowflake://" + url + ":" + port;
    LOGGER.debug("parsed Snowflake URL: {}", urlStr);
  }

  String getJdbcUrl() {
    return jdbcUrl;
  }

  public String getAccount() {
    return account;
  }

  public boolean sslEnabled() {
    return ssl;
  }

  public String getScheme() {
    if (ssl) {
      return "https";
    } else {
      return "http";
    }
  }

  String getFullUrl() {
    return url + ":" + port;
  }

  public String getUrlWithoutPort() {
    return url;
  }

  int getPort() {
    return port;
  }

  @Override
  public String toString() {
    return getFullUrl();
  }

  @Override
  public String hostWithPort() {
    return getFullUrl();
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/StandardSnowflakeConnectionService.java
================================================
package com.snowflake.kafka.connector.internal;

import static com.snowflake.kafka.connector.Utils.TABLE_COLUMN_METADATA;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.snowflake.kafka.connector.internal.schemaevolution.ColumnInfos;
import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationResponse;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryServiceFactory;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.Properties;
import net.snowflake.client.api.driver.SnowflakeDriver;

/**
 * Implementation of Snowflake Connection Service interface which includes all handshake between KC
 * and SF through JDBC connection.
 */
public class StandardSnowflakeConnectionService implements SnowflakeConnectionService {

  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

  private static final String COLUMN_COMMENT =
      "created by automatic table creation from Snowflake Kafka Connector High Performance";

  private static final String SHOW_ICEBERG_TABLES_QUERY = "show iceberg tables like ? limit 1";
  private final KCLogger LOGGER = new KCLogger(StandardSnowflakeConnectionService.class.getName());
  private final Connection conn;
  private final SnowflakeTelemetryService telemetry;
  private final String connectorName;
  private final String taskID;

  StandardSnowflakeConnectionService(
      JdbcProperties jdbcProperties, SnowflakeURL url, String connectorName, String taskID) {
    this.connectorName = connectorName;
    this.taskID = taskID;
    Properties proxyProperties = jdbcProperties.getProxyProperties();
    Properties combinedProperties = jdbcProperties.getProperties();
    try {
      if (!proxyProperties.isEmpty()) {
        LOGGER.debug("Proxy properties are set, passing in JDBC while creating the connection");
      } else {
        LOGGER.info("Establishing a JDBC connection with url:{}", url.getJdbcUrl());
      }
      this.conn = new SnowflakeDriver().connect(url.getJdbcUrl(), combinedProperties);
    } catch (SQLException e) {
      throw SnowflakeErrors.ERROR_1001.getException(e);
    }
    this.telemetry =
        SnowflakeTelemetryServiceFactory.builder(conn)
            .setAppName(this.connectorName)
            .setTaskID(this.taskID)
            .build();
    LOGGER.info("initialized the snowflake connection");
  }

  @Override
  public void createTableWithOnlyMetadataColumn(final String tableName) {
    checkConnection();
    InternalUtils.assertNotEmpty("tableName", tableName);
    String createTableQuery =
        "create table if not exists identifier(?) (record_metadata variant comment '"
            + COLUMN_COMMENT
            + "') enable_schema_evolution = true error_logging = true";

    try {
      PreparedStatement stmt = conn.prepareStatement(createTableQuery);
      stmt.setString(1, quoteIdentifier(tableName));
      stmt.execute();
      stmt.close();
    } catch (SQLException e) {
      // Snowflake rejects CREATE TABLE IF NOT EXISTS when the name is already taken by an
      // ICEBERG TABLE (cross-type conflict is not suppressed by IF NOT EXISTS). KCv4 only
      // supports pre-created Iceberg tables; error_logging is not available for them.
      // We match on the error message text because Snowflake does not provide a stable SQL
      // error code that distinguishes this cross-type conflict from other CREATE TABLE errors.
      if (e.getMessage() != null && e.getMessage().contains("already exists as ICEBERG_TABLE")) {
        LOGGER.warn(
            "Table '{}' is a pre-created Iceberg table. Skipping auto-creation."
                + " Error table functionality is not available for Iceberg tables.",
            tableName);
        return;
      }
      throw SnowflakeErrors.ERROR_2007.getException(e);
    }

    LOGGER.info(
        "Created table {} with RECORD_METADATA column and ERROR_LOGGING enabled", tableName);
  }

  @Override
  public boolean tableExist(final String tableName) {
    return describeTable(tableName).isPresent();
  }

  @Override
  public boolean pipeExist(final String pipeName) {
    LOGGER.info("Calling DESCRIBE PIPE {}", pipeName);
    checkConnection();
    InternalUtils.assertNotEmpty("pipeName", pipeName);
    String query = "desc pipe identifier(?)";
    PreparedStatement stmt = null;
    boolean exist;
    try {
      stmt = conn.prepareStatement(query);
      stmt.setString(1, pipeName);
      stmt.execute();
      exist = true;
    } catch (SQLException e) {
      LOGGER.debug("pipe {} doesn't exist", pipeName);
      exist = false;
    } finally {
      if (stmt != null) {
        try {
          stmt.close();
        } catch (SQLException e) {
          e.printStackTrace();
        }
      }
    }
    return exist;
  }

  @Override
  public boolean isTableCompatible(final String tableName) {
    checkConnection();
    InternalUtils.assertNotEmpty("tableName", tableName);
    String query = "desc table identifier(?)";
    PreparedStatement stmt = null;
    ResultSet result = null;
    boolean compatible;
    try {
      stmt = conn.prepareStatement(query);
      stmt.setString(1, quoteIdentifier(tableName));
      result = stmt.executeQuery();
      boolean hasMeta = false;
      boolean allNullable = true;
      while (result.next()) {
        switch (result.getString(1)) {
          case TABLE_COLUMN_METADATA:
            if (result.getString(2).equals("VARIANT")) {
              hasMeta = true;
            }
            break;
          default:
            if (result.getString(4).equals("N")) {
              allNullable = false;
            }
        }
      }
      compatible = hasMeta && allNullable;
    } catch (SQLException e) {
      LOGGER.debug("Table {} doesn't exist. Exception {}", tableName, e.getStackTrace());
      compatible = false;
    } finally {
      try {
        if (result != null) {
          result.close();
        }
      } catch (Exception e) {
        e.printStackTrace();
      }

      try {
        if (stmt != null) {
          stmt.close();
        }
      } catch (Exception e) {
        e.printStackTrace();
      }
    }
    LOGGER.info("Table {} compatibility is {}", tableName, compatible);
    return compatible;
  }

  @Override
  public void databaseExists(String databaseName) {
    checkConnection();
    String query = "use database identifier(?)";
    try {
      PreparedStatement stmt = conn.prepareStatement(query);
      stmt.setString(1, databaseName);
      stmt.execute();
      stmt.close();
    } catch (SQLException e) {
      throw SnowflakeErrors.ERROR_2001.getException(e);
    }

    LOGGER.info("database {} exists", databaseName);
  }

  @Override
  public void schemaExists(String schemaName) {
    checkConnection();
    String query = "use schema identifier(?)";
    boolean foundSchema = false;
    try {
      PreparedStatement stmt = conn.prepareStatement(query);
      stmt.setString(1, schemaName);
      stmt.execute();
      stmt.close();
    } catch (SQLException e) {
      throw SnowflakeErrors.ERROR_2001.getException(e);
    }

    LOGGER.info("schema {} exists", schemaName);
  }

  @Override
  public SnowflakeTelemetryService getTelemetryClient() {
    return this.telemetry;
  }

  @Override
  public void close() {
    try {
      conn.close();
    } catch (SQLException e) {
      throw SnowflakeErrors.ERROR_2005.getException(e, this.telemetry);
    }

    LOGGER.info("snowflake connection closed");
  }

  @Override
  public boolean isClosed() {
    try {
      return conn.isClosed();
    } catch (SQLException e) {
      throw SnowflakeErrors.ERROR_2006.getException(e, this.telemetry);
    }
  }

  @Override
  public String getConnectorName() {
    return this.connectorName;
  }

  /** make sure connection is not closed */
  private void checkConnection() {
    try {
      if (conn.isClosed()) {
        throw SnowflakeErrors.ERROR_1003.getException();
      }
    } catch (SQLException e) {
      throw SnowflakeErrors.ERROR_1003.getException(e, this.telemetry);
    }
  }

  /**
   * generate pipe definition
   *
   * @param tableName table name
   * @param stageName stage name
   * @return pipe definition string
   */
  private String pipeDefinition(String tableName, String stageName) {
    return "copy into "
        + tableName
        + "(RECORD_METADATA, RECORD_CONTENT) from (select $1:meta, $1:content from"
        + " @"
        + stageName
        + " t) file_format = (type = 'json')";
  }

  @Override
  public Connection getConnection() {
    return this.conn;
  }

  @Override
  public Optional<List<DescribeTableRow>> describeTable(String tableName) {
    LOGGER.info("Calling DESCRIBE TABLE {}", tableName);
    checkConnection();
    String query = "desc table identifier(?)";
    PreparedStatement stmt = null;
    List<DescribeTableRow> rows = new ArrayList<>();

    try {
      stmt = conn.prepareStatement(query);
      stmt.setString(1, quoteIdentifier(tableName));
      ResultSet result = stmt.executeQuery();

      while (result.next()) {
        String columnName = result.getString("name");
        String type = result.getString("type");
        String comment = result.getString("comment");
        String nullable = result.getString("null?");
        String defaultValue = null;
        String autoincrement = null;
        try {
          defaultValue = result.getString("default");
          autoincrement = result.getString("autoincrement");
        } catch (SQLException e) {
          LOGGER.debug(
              "default/autoincrement columns not available in DESCRIBE TABLE for {}", tableName);
        }
        rows.add(
            new DescribeTableRow(columnName, type, comment, nullable, defaultValue, autoincrement));
      }
      return Optional.of(rows);
    } catch (Exception e) {
      LOGGER.debug("table {} doesn't exist", tableName);
      return Optional.empty();
    } finally {
      if (stmt != null) {
        try {
          stmt.close();
        } catch (SQLException e) {
          e.printStackTrace();
        }
      }
    }
  }

  @Override
  public boolean shouldEvolveSchema(String tableName, String role) {
    LOGGER.info("Checking schema evolution permission for table {}", tableName);
    checkConnection();
    InternalUtils.assertNotEmpty("tableName", tableName);
    InternalUtils.assertNotEmpty("role", role);

    String query = "show grants on table identifier(?)";
    List<String> schemaEvolutionAllowedPrivilegeList =
        Arrays.asList("EVOLVE SCHEMA", "ALL", "OWNERSHIP");
    boolean hasRolePrivilege = false;
    String myRole =
        (role.charAt(0) == '"' && role.charAt(role.length() - 1) == '"')
            ? role.substring(1, role.length() - 1)
            : role.toUpperCase();
    try {
      PreparedStatement stmt = conn.prepareStatement(query);
      stmt.setString(1, quoteIdentifier(tableName));
      ResultSet result = stmt.executeQuery();
      while (result.next()) {
        if (!result.getString("grantee_name").equals(myRole)) {
          continue;
        }
        if (schemaEvolutionAllowedPrivilegeList.contains(
            result.getString("privilege").toUpperCase())) {
          hasRolePrivilege = true;
        }
      }
      stmt.close();
    } catch (SQLException e) {
      throw SnowflakeErrors.ERROR_2001.getException(e);
    }

    boolean hasTableOptionEnabled = false;
    String escapedTableName =
        tableName.replace("\\", "\\\\").replace("_", "\\_").replace("%", "\\%");
    for (String showQuery :
        new String[] {"show tables like ? limit 1", SHOW_ICEBERG_TABLES_QUERY}) {
      if (hasTableOptionEnabled) break;
      try (PreparedStatement stmt = conn.prepareStatement(showQuery)) {
        stmt.setString(1, escapedTableName);
        try (ResultSet result = stmt.executeQuery()) {
          while (result.next()) {
            String enableSchemaEvolution = "N";
            try {
              enableSchemaEvolution = result.getString("enable_schema_evolution");
            } catch (SQLException e) {
              LOGGER.warn(
                  "enable_schema_evolution column not found in SHOW output for table {}: {}",
                  tableName,
                  e.getMessage());
            }
            if (enableSchemaEvolution.equals("Y")) {
              hasTableOptionEnabled = true;
            }
          }
        }
      } catch (SQLException e) {
        throw SnowflakeErrors.ERROR_2001.getException(e);
      }
    }

    boolean hasPermission = hasRolePrivilege && hasTableOptionEnabled;
    LOGGER.info(
        "Table: {} has schema evolution permission: {} (hasRolePrivilege={},"
            + " hasTableOptionEnabled={})",
        tableName,
        hasPermission,
        hasRolePrivilege,
        hasTableOptionEnabled);
    return hasPermission;
  }

  @Override
  public boolean isIcebergTable(String tableName) {
    checkConnection();
    InternalUtils.assertNotEmpty("tableName", tableName);
    try (PreparedStatement stmt = conn.prepareStatement(SHOW_ICEBERG_TABLES_QUERY)) {
      String escapedTableName =
          tableName.replace("\\", "\\\\").replace("_", "\\_").replace("%", "\\%");
      stmt.setString(1, escapedTableName);
      try (ResultSet result = stmt.executeQuery()) {
        boolean iceberg = result.next();
        LOGGER.info("Table {} isIcebergTable={}", tableName, iceberg);
        return iceberg;
      }
    } catch (SQLException e) {
      throw SnowflakeErrors.ERROR_2001.getException(e);
    }
  }

  @Override
  public boolean hasErrorLoggingEnabled(String tableName) {
    checkConnection();
    InternalUtils.assertNotEmpty("tableName", tableName);

    try (PreparedStatement stmt = conn.prepareStatement("show tables like ? limit 1")) {
      String escapedTableName =
          tableName.replace("\\", "\\\\").replace("_", "\\_").replace("%", "\\%");
      stmt.setString(1, escapedTableName);
      try (ResultSet result = stmt.executeQuery()) {
        if (result.next()) {
          try {
            if ("Y".equals(result.getString("error_logging"))) {
              LOGGER.debug("Table {} has ERROR_LOGGING enabled", tableName);
              return true;
            }
          } catch (SQLException e) {
            // error_logging column absent in result set — treat as disabled to surface a warning
            LOGGER.warn(
                "error_logging column not found in SHOW TABLES output for table {} —"
                    + " treating as disabled",
                tableName);
            return false;
          }
        }
      }
    } catch (SQLException e) {
      throw SnowflakeErrors.ERROR_2001.getException(e);
    }
    LOGGER.debug("Table {} does not have ERROR_LOGGING enabled", tableName);
    return false;
  }

  @Override
  public void executeQueryWithParameters(String query, String... parameters) {
    try {
      PreparedStatement stmt = conn.prepareStatement(query);
      for (int i = 0; i < parameters.length; i++) {
        stmt.setString(i + 1, parameters[i]);
      }
      stmt.execute();
      stmt.close();
    } catch (Exception e) {
      throw new RuntimeException("Error executing query: " + query, e);
    }
  }

  @Override
  public void appendColumnsToTable(String tableName, Map<String, ColumnInfos> columnInfosMap) {
    if (columnInfosMap == null || columnInfosMap.isEmpty()) {
      return;
    }
    checkConnection();
    InternalUtils.assertNotEmpty("tableName", tableName);

    // identifier(?) works for the table name but NOT for column names in ADD COLUMN.
    // Column names are quoted inline to preserve case (e.g. "age" vs "AGE").
    // Iceberg tables require ALTER ICEBERG TABLE instead of ALTER TABLE.
    String alterKeyword = isIcebergTable(tableName) ? "alter iceberg table" : "alter table";
    StringBuilder query =
        new StringBuilder(alterKeyword + " identifier(?) add column if not exists ");
    boolean first = true;
    for (Map.Entry<String, ColumnInfos> entry : columnInfosMap.entrySet()) {
      if (!first) {
        query.append(", if not exists ");
      }
      query.append(quoteIdentifier(entry.getKey()));
      query.append(" ");
      query.append(entry.getValue().getColumnType());
      query.append(entry.getValue().getDdlComments());
      first = false;
    }

    try (PreparedStatement stmt = conn.prepareStatement(query.toString())) {
      stmt.setString(1, quoteIdentifier(tableName));
      stmt.execute();
      LOGGER.info("Added columns to table {}: {}", tableName, columnInfosMap.keySet());
    } catch (SQLException e) {
      LOGGER.warn(
          "ALTER TABLE/ICEBERG TABLE ADD COLUMN failed for table {} (may be concurrent race"
              + " condition): {}",
          tableName,
          e.getMessage());
      throw SnowflakeErrors.ERROR_2015.getException(e);
    }
  }

  @Override
  public Ssv1MigrationResponse migrateSsv1ChannelOffset(
      String tableName, String ssv1ChannelName, String ssv2ChannelName, String pipeName) {
    checkConnection();
    LOGGER.info(
        "Calling SYSTEM$MIGRATE_SSV1_CHANNEL_OFFSET for table={}, ssv1Channel={}, "
            + "ssv2Channel={}, pipe={}",
        tableName,
        ssv1ChannelName,
        ssv2ChannelName,
        pipeName);

    String query = "SELECT SYSTEM$MIGRATE_SSV1_CHANNEL_OFFSET(?, ?, ?, ?)";
    try (PreparedStatement stmt = conn.prepareStatement(query)) {
      stmt.setString(1, quoteIdentifier(tableName));
      // The backend should unquote/uppercase the channel name, but that fix is not yet rolled out.
      // Uppercase here as a workaround
      // TODO(SNOW-3360048): Remove once the backend fix is rolled out.
      stmt.setString(2, ssv1ChannelName.toUpperCase(Locale.ROOT));
      stmt.setString(3, ssv2ChannelName);
      stmt.setString(4, pipeName);
      try (ResultSet rs = stmt.executeQuery()) {
        if (!rs.next()) {
          throw new RuntimeException(
              "SYSTEM$MIGRATE_SSV1_CHANNEL_OFFSET returned no result for table " + tableName);
        }
        String jsonResponse = rs.getString(1);
        try {
          return OBJECT_MAPPER.readValue(jsonResponse, Ssv1MigrationResponse.class);
        } catch (Exception e) {
          throw new RuntimeException(
              "Failed to parse SYSTEM$MIGRATE_SSV1_CHANNEL_OFFSET response for channel "
                  + ssv1ChannelName,
              e);
        }
      }
    } catch (SQLException e) {
      throw new RuntimeException(
          "SYSTEM$MIGRATE_SSV1_CHANNEL_OFFSET failed for ssv1Channel="
              + ssv1ChannelName
              + ", ssv2Channel="
              + ssv2ChannelName
              + ": "
              + e.getMessage(),
          e);
    }
  }

  @Override
  public void alterNonNullableColumns(String tableName, List<String> columnNames) {
    if (columnNames == null || columnNames.isEmpty()) {
      return;
    }
    checkConnection();
    InternalUtils.assertNotEmpty("tableName", tableName);

    // identifier(?) works for the table name but NOT for column names in ALTER ... DROP NOT NULL.
    // Column names are quoted inline to preserve case.
    // Iceberg tables require ALTER ICEBERG TABLE instead of ALTER TABLE.
    String alterKeyword = isIcebergTable(tableName) ? "alter iceberg table" : "alter table";
    StringBuilder query = new StringBuilder(alterKeyword + " identifier(?) alter ");
    boolean first = true;
    for (String colName : columnNames) {
      if (!first) {
        query.append(", ");
      }
      String quoted = quoteIdentifier(colName);
      query
          .append(quoted)
          .append(" drop not null, ")
          .append(quoted)
          .append(
              " comment 'column altered to be nullable by schema evolution from"
                  + " Snowflake Kafka Connector'");
      first = false;
    }

    try (PreparedStatement stmt = conn.prepareStatement(query.toString())) {
      stmt.setString(1, quoteIdentifier(tableName));
      stmt.execute();
      LOGGER.info("Dropped NOT NULL constraints on table {}: {}", tableName, columnNames);
    } catch (SQLException e) {
      LOGGER.warn(
          "ALTER TABLE/ICEBERG TABLE DROP NOT NULL failed for table {} (may be concurrent race"
              + " condition): {}",
          tableName,
          e.getMessage());
      throw SnowflakeErrors.ERROR_2016.getException(e);
    }
  }

  /**
   * Wraps a raw column name in double quotes to preserve case in DDL statements. Snowflake treats
   * unquoted identifiers as case-insensitive (uppercased), so quoting is required for
   * case-sensitive column names like {@code "age"} vs {@code "AGE"}. Internal double quotes are
   * escaped per SQL standard.
   */
  private static String quoteIdentifier(String name) {
    return "\"" + name.replace("\"", "\"\"") + "\"";
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/URL.java
================================================
package com.snowflake.kafka.connector.internal;

public interface URL {
  String hostWithPort();

  String getScheme();
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/metrics/MetricsJmxReporter.java
================================================
package com.snowflake.kafka.connector.internal.metrics;

import static com.snowflake.kafka.connector.internal.metrics.MetricsUtil.JMX_METRIC_PREFIX;

import com.codahale.metrics.MetricFilter;
import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.jmx.JmxReporter;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Splitter;
import com.snowflake.kafka.connector.internal.KCLogger;
import com.snowflake.kafka.connector.internal.SnowflakeErrors;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.concurrent.TimeUnit;
import javax.management.MalformedObjectNameException;
import javax.management.ObjectName;

/**
 * Helper class for creation of JMX Metrics from metrics registry, also includes a definition to
 * create an ObjectName used to register a {@link com.codahale.metrics.Metric}
 *
 * <p>This instance is separate for all pipes and hence registration and unregistration of metrics
 * is handled per pipe level.
 */
public class MetricsJmxReporter {
  static final KCLogger LOGGER = new KCLogger(MetricsJmxReporter.class.getName());

  // The registry which will hold pool of all metrics for this instance
  private final MetricRegistry metricRegistry;

  /**
   * Wrapper on top of listeners and metricRegistry for codehale. This will be useful to start the
   * jmx metrics when time is appropriate. (Check {@link MetricsJmxReporter#start()}
   */
  private final JmxReporter jmxReporter;

  public MetricsJmxReporter(MetricRegistry metricRegistry, final String connectorName) {
    this.metricRegistry = metricRegistry;
    this.jmxReporter = createJMXReporter(connectorName);
  }

  public MetricRegistry getMetricRegistry() {
    return metricRegistry;
  }

  /**
   * This function will internally register all metrics present inside metric registry and will
   * register mbeans to the mbeanserver
   */
  public void start() {
    jmxReporter.start();
  }

  /**
   * This method is called to fetch an object name for all registered metrics. It can be called
   * during registration or unregistration. (Internal implementation of codehale)
   *
   * @param connectorName name of the connector. (From Config)
   * @param jmxDomain JMX Domain
   * @param metricName metric name used while registering the metric. (Check {@link
   *     MetricsUtil#channelMetricName(String, String, String)}
   * @return Object Name constructed from above three args
   */
  @VisibleForTesting
  static ObjectName getObjectName(String connectorName, String jmxDomain, String metricName) {
    try {
      // each metric name is scope:scopeValue/subDomain/metricName
      // e.g. "task:task-0/lifecycle/open-count" or "channel:conn_topic_0/offsets/processed-offset"
      Iterator<String> tokens = Splitter.on("/").split(metricName).iterator();

      // First token is always scope:value -- split on colon to get the MBean key and value
      String firstToken = tokens.next();
      int colonIndex = firstToken.indexOf(':');

      Hashtable<String, String> keys = new Hashtable<>();
      keys.put("connector", connectorName);
      keys.put(firstToken.substring(0, colonIndex), firstToken.substring(colonIndex + 1));
      keys.put("category", tokens.next());
      keys.put("name", tokens.next());

      return new ObjectName(jmxDomain, keys);
    } catch (MalformedObjectNameException e) {
      LOGGER.warn("Could not create Object name for MetricName:{}", metricName);
      throw SnowflakeErrors.ERROR_5020.getException();
    }
  }

  /**
   * Unregister all snowflake KC related metrics from registry
   *
   * @param prefixFilter prefix for removing the filter.
   */
  public void removeMetricsFromRegistry(final String prefixFilter) {
    if (metricRegistry.getMetrics().size() != 0) {
      LOGGER.debug("Unregistering all metrics matching prefix '{}'", prefixFilter);
      metricRegistry.removeMatching(MetricFilter.startsWith(prefixFilter));
      LOGGER.debug(
          "Metric registry size after removing '{}' is:{}, names:{}",
          prefixFilter,
          metricRegistry.getMetrics().size(),
          metricRegistry.getMetrics().keySet().toString());
    }
  }

  /**
   * Remove a single metric by its exact registered name. O(1) lookup vs the O(N) scan of {@link
   * #removeMetricsFromRegistry}.
   */
  public boolean removeMetric(final String exactName) {
    return metricRegistry.remove(exactName);
  }

  /**
   * Create JMXReporter Instance, which internally handles the mbean server fetching and
   * registration of Mbeans. We use codehale metrics library to achieve this. More details
   * here: @see <a href="https://metrics.dropwizard.io/4.2.0/getting-started.html">DropWizard</a>
   *
   * <p>We will convert all duration to SECONDS and prefix our metrics with {@link
   * MetricsUtil#JMX_METRIC_PREFIX}
   *
   * @param connectorName connectorName passed inside configuration
   * @return JMXReporter instance.
   */
  private JmxReporter createJMXReporter(final String connectorName) {
    return JmxReporter.forRegistry(this.metricRegistry)
        .inDomain(JMX_METRIC_PREFIX)
        .convertDurationsTo(TimeUnit.SECONDS)
        .createsObjectNamesWith(
            (ignoreMeterType, jmxDomain, metricName) ->
                getObjectName(connectorName, jmxDomain, metricName))
        .build();
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/metrics/MetricsUtil.java
================================================
package com.snowflake.kafka.connector.internal.metrics;

import java.util.Collection;

/** All metrics related constants. Mainly for JMX */
public class MetricsUtil {
  public static final String JMX_METRIC_PREFIX = "snowflake.kafka.connector";

  // file count related constants
  public static final String OFFSET_SUB_DOMAIN = "offsets";

  /**
   * Offset number that is most recent inside the buffer (In memory buffer)
   *
   * <p>This is updated every time an offset is sent as put API of SinkTask {@link
   * org.apache.kafka.connect.sink.SinkTask#put(Collection)}
   */
  public static final String PROCESSED_OFFSET = "processed-offset";

  public static final String OFFSET_PERSISTED_IN_SNOWFLAKE = "persisted-in-snowflake-offset";

  public static final String LATEST_CONSUMER_OFFSET = "latest-consumer-offset";

  /**
   * Returns the metric-registry key prefix for a given channel, e.g. {@code "channel:myConn_t_0"}.
   * Use this when removing all metrics for a channel via {@link
   * MetricsJmxReporter#removeMetricsFromRegistry}.
   */
  public static String channelMetricPrefix(final String channelName) {
    return "channel:" + channelName;
  }

  /**
   * Construct a channel-level metric name. The resulting MBean will use {@code channel=} as the
   * first key property.
   *
   * <p>Will be of form <b>channel:channelName/subDomain/metricName</b>. The {@code channel:} prefix
   * is parsed by {@link MetricsJmxReporter#getObjectName} to produce the MBean key.
   *
   * @param channelName channel or partition identifier
   * @param subDomain categorize this metric (e.g. "offsets")
   * @param metricName actual Metric name for which we will use Gauge, Meter, Histogram
   * @return concatenized String
   */
  public static String channelMetricName(
      final String channelName, final String subDomain, final String metricName) {
    return channelMetricPrefix(channelName) + "/" + subDomain + "/" + metricName;
  }

  /**
   * Returns the metric-registry key prefix for a given task, e.g. {@code "task:task-0"}. Use this
   * when removing all metrics for a task via {@link MetricsJmxReporter#removeMetricsFromRegistry}.
   */
  public static String taskMetricPrefix(final String taskPrefix) {
    return "task:" + taskPrefix;
  }

  /**
   * Construct a task-level metric name. The resulting MBean will use {@code task=} as the first key
   * property.
   *
   * <p>Will be of form <b>task:taskPrefix/subDomain/metricName</b>
   *
   * @param taskPrefix task identifier (e.g. "task-0")
   * @param subDomain categorize this metric (e.g. "task", "lifecycle")
   * @param metricName actual Metric name
   * @return concatenized String with scope prefix
   */
  public static String taskMetricName(
      final String taskPrefix, final String subDomain, final String metricName) {
    return taskMetricPrefix(taskPrefix) + "/" + subDomain + "/" + metricName;
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/metrics/NoopTaskMetrics.java
================================================
package com.snowflake.kafka.connector.internal.metrics;

/** Null-object implementation of {@link TaskMetrics}. Every method is a no-op. */
enum NoopTaskMetrics implements TaskMetrics {
  INSTANCE;

  @Override
  public TimingContext timePut() {
    return TimingContext.NOOP;
  }

  @Override
  public TimingContext timePreCommit() {
    return TimingContext.NOOP;
  }

  @Override
  public TimingContext timeOpen() {
    return TimingContext.NOOP;
  }

  @Override
  public TimingContext timeClose() {
    return TimingContext.NOOP;
  }

  @Override
  public TimingContext timeSdkClientCreate() {
    return TimingContext.NOOP;
  }

  @Override
  public TimingContext timeChannelOpen() {
    return TimingContext.NOOP;
  }

  @Override
  public TimingContext timeOffsetFetch() {
    return TimingContext.NOOP;
  }

  @Override
  public void recordStartDuration(long nanos) {}

  @Override
  public void incOpenCount() {}

  @Override
  public void incCloseCount() {}

  @Override
  public void incChannelOpenCount() {}

  @Override
  public void incPreCommitPartitionsSkipped() {}

  @Override
  public void incBackpressureRewindCount() {}

  @Override
  public void markPutRecords(long count) {}

  @Override
  public void setAssignedPartitions(int count) {}

  @Override
  public void unregister() {}
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/metrics/SnowflakeSinkTaskMetrics.java
================================================
package com.snowflake.kafka.connector.internal.metrics;

import static com.snowflake.kafka.connector.internal.metrics.MetricsUtil.taskMetricName;
import static com.snowflake.kafka.connector.internal.metrics.MetricsUtil.taskMetricPrefix;

import com.codahale.metrics.Counter;
import com.codahale.metrics.Gauge;
import com.codahale.metrics.Meter;
import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.Timer;
import com.snowflake.kafka.connector.internal.KCLogger;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Supplier;

/**
 * Manages task-level JMX metrics for a single {@link
 * com.snowflake.kafka.connector.SnowflakeSinkTask} instance. Metrics are registered when the task
 * starts and unregistered when it stops.
 *
 * <p>MBean ObjectNames follow the pattern:
 *
 * <pre>snowflake.kafka.connector:connector=X,task=task-N,category=task|lifecycle,name=metric</pre>
 */
public class SnowflakeSinkTaskMetrics implements TaskMetrics {

  private static final KCLogger LOGGER = new KCLogger(SnowflakeSinkTaskMetrics.class.getName());

  static final String TASK_SUB_DOMAIN = "task";
  static final String LIFECYCLE_SUB_DOMAIN = "lifecycle";

  // Method duration timers
  static final String PUT_DURATION = "put-duration";
  static final String PRECOMMIT_DURATION = "precommit-duration";
  static final String OPEN_DURATION = "open-duration";
  static final String CLOSE_DURATION = "close-duration";
  static final String START_DURATION = "start-duration";

  // Channel and SDK timers
  static final String CHANNEL_OPEN_DURATION = "channel-open-duration";
  static final String SDK_CLIENT_CREATE_DURATION = "sdk-client-create-duration";
  static final String PRECOMMIT_OFFSET_FETCH_DURATION = "precommit-offset-fetch-duration";

  // Throughput
  static final String PUT_RECORDS = "put-records";

  // Counters
  static final String PRECOMMIT_PARTITIONS_SKIPPED = "precommit-partitions-skipped";
  static final String OPEN_COUNT = "open-count";
  static final String CLOSE_COUNT = "close-count";
  static final String CHANNEL_OPEN_COUNT = "channel-open-count";
  static final String BACKPRESSURE_REWIND_COUNT = "backpressure-rewind-count";

  // Gauges
  static final String ASSIGNED_PARTITIONS = "assigned-partitions";
  static final String SDK_CLIENT_COUNT = "sdk-client-count";

  private final String taskMetricPrefix;
  private final MetricsJmxReporter metricsJmxReporter;

  // Method duration timers
  private final Timer putDuration;
  private final Timer preCommitDuration;
  private final Timer openDuration;
  private final Timer closeDuration;
  private final Timer startDuration;

  // Channel/SDK timers (aggregated across all channels in this task)
  private final Timer channelOpenDuration;
  private final Timer sdkClientCreateDuration;
  private final Timer preCommitOffsetFetchDuration;

  // Throughput
  private final Meter putRecords;

  // Counters
  private final Counter preCommitPartitionsSkipped;
  private final Counter openCount;
  private final Counter closeCount;
  private final Counter channelOpenCount;
  private final Counter backpressureRewindCount;

  // Gauges (backed by atomics)
  private final AtomicInteger assignedPartitions;

  public SnowflakeSinkTaskMetrics(
      String connectorName, String taskId, MetricsJmxReporter metricsJmxReporter) {
    this(connectorName, taskId, metricsJmxReporter, null);
  }

  public SnowflakeSinkTaskMetrics(
      String connectorName,
      String taskId,
      MetricsJmxReporter metricsJmxReporter,
      Supplier<Integer> sdkClientCountSupplier) {
    this.taskMetricPrefix = "task-" + taskId;
    this.metricsJmxReporter = metricsJmxReporter;
    this.assignedPartitions = new AtomicInteger(0);

    MetricRegistry registry = metricsJmxReporter.getMetricRegistry();

    // Method duration timers
    this.putDuration =
        registry.timer(taskMetricName(taskMetricPrefix, TASK_SUB_DOMAIN, PUT_DURATION));
    this.preCommitDuration =
        registry.timer(taskMetricName(taskMetricPrefix, TASK_SUB_DOMAIN, PRECOMMIT_DURATION));
    this.openDuration =
        registry.timer(taskMetricName(taskMetricPrefix, LIFECYCLE_SUB_DOMAIN, OPEN_DURATION));
    this.closeDuration =
        registry.timer(taskMetricName(taskMetricPrefix, LIFECYCLE_SUB_DOMAIN, CLOSE_DURATION));
    this.startDuration =
        registry.timer(taskMetricName(taskMetricPrefix, LIFECYCLE_SUB_DOMAIN, START_DURATION));

    // Channel/SDK timers
    this.channelOpenDuration =
        registry.timer(
            taskMetricName(taskMetricPrefix, LIFECYCLE_SUB_DOMAIN, CHANNEL_OPEN_DURATION));
    this.sdkClientCreateDuration =
        registry.timer(
            taskMetricName(taskMetricPrefix, LIFECYCLE_SUB_DOMAIN, SDK_CLIENT_CREATE_DURATION));
    this.preCommitOffsetFetchDuration =
        registry.timer(
            taskMetricName(taskMetricPrefix, TASK_SUB_DOMAIN, PRECOMMIT_OFFSET_FETCH_DURATION));

    // Throughput
    this.putRecords =
        registry.meter(taskMetricName(taskMetricPrefix, TASK_SUB_DOMAIN, PUT_RECORDS));

    // Counters
    this.preCommitPartitionsSkipped =
        registry.counter(
            taskMetricName(taskMetricPrefix, TASK_SUB_DOMAIN, PRECOMMIT_PARTITIONS_SKIPPED));
    this.openCount =
        registry.counter(taskMetricName(taskMetricPrefix, LIFECYCLE_SUB_DOMAIN, OPEN_COUNT));
    this.closeCount =
        registry.counter(taskMetricName(taskMetricPrefix, LIFECYCLE_SUB_DOMAIN, CLOSE_COUNT));
    this.channelOpenCount =
        registry.counter(
            taskMetricName(taskMetricPrefix, LIFECYCLE_SUB_DOMAIN, CHANNEL_OPEN_COUNT));
    this.backpressureRewindCount =
        registry.counter(
            taskMetricName(taskMetricPrefix, TASK_SUB_DOMAIN, BACKPRESSURE_REWIND_COUNT));

    // Gauges
    registry.register(
        taskMetricName(taskMetricPrefix, TASK_SUB_DOMAIN, ASSIGNED_PARTITIONS),
        (Gauge<Integer>) assignedPartitions::get);

    if (sdkClientCountSupplier != null) {
      registry.register(
          taskMetricName(taskMetricPrefix, LIFECYCLE_SUB_DOMAIN, SDK_CLIENT_COUNT),
          (Gauge<Integer>) sdkClientCountSupplier::get);
    }

    metricsJmxReporter.start();
    LOGGER.info(
        "Registered task-level JMX metrics for connector: {}, task: {}", connectorName, taskId);
  }

  // ---- TaskMetrics interface (timing) ----

  @Override
  public TimingContext timePut() {
    return wrap(putDuration);
  }

  @Override
  public TimingContext timePreCommit() {
    return wrap(preCommitDuration);
  }

  @Override
  public TimingContext timeOpen() {
    return wrap(openDuration);
  }

  @Override
  public TimingContext timeClose() {
    return wrap(closeDuration);
  }

  @Override
  public TimingContext timeSdkClientCreate() {
    return wrap(sdkClientCreateDuration);
  }

  @Override
  public TimingContext timeChannelOpen() {
    return wrap(channelOpenDuration);
  }

  @Override
  public TimingContext timeOffsetFetch() {
    return wrap(preCommitOffsetFetchDuration);
  }

  @Override
  public void recordStartDuration(long nanos) {
    startDuration.update(nanos, TimeUnit.NANOSECONDS);
  }

  // ---- TaskMetrics interface (counters) ----

  @Override
  public void incOpenCount() {
    openCount.inc();
  }

  @Override
  public void incCloseCount() {
    closeCount.inc();
  }

  @Override
  public void incChannelOpenCount() {
    channelOpenCount.inc();
  }

  @Override
  public void incPreCommitPartitionsSkipped() {
    preCommitPartitionsSkipped.inc();
  }

  @Override
  public void incBackpressureRewindCount() {
    backpressureRewindCount.inc();
  }

  // ---- TaskMetrics interface (throughput) ----

  @Override
  public void markPutRecords(long count) {
    putRecords.mark(count);
  }

  // ---- TaskMetrics interface (gauges) ----

  @Override
  public void setAssignedPartitions(int count) {
    assignedPartitions.set(count);
  }

  // ---- TaskMetrics interface (lifecycle) ----

  @Override
  public void unregister() {
    metricsJmxReporter.removeMetricsFromRegistry(taskMetricPrefix(taskMetricPrefix));
    LOGGER.info("Unregistered task-level JMX metrics for prefix: {}", taskMetricPrefix);
  }

  // ---- raw accessors (package-private, for tests in the same package) ----

  Timer putDuration() {
    return putDuration;
  }

  Timer preCommitDuration() {
    return preCommitDuration;
  }

  Timer openDuration() {
    return openDuration;
  }

  Timer closeDuration() {
    return closeDuration;
  }

  Timer startDuration() {
    return startDuration;
  }

  Timer channelOpenDuration() {
    return channelOpenDuration;
  }

  Timer sdkClientCreateDuration() {
    return sdkClientCreateDuration;
  }

  Timer preCommitOffsetFetchDuration() {
    return preCommitOffsetFetchDuration;
  }

  Meter putRecords() {
    return putRecords;
  }

  Counter preCommitPartitionsSkipped() {
    return preCommitPartitionsSkipped;
  }

  Counter openCount() {
    return openCount;
  }

  Counter closeCount() {
    return closeCount;
  }

  Counter channelOpenCount() {
    return channelOpenCount;
  }

  Counter backpressureRewindCount() {
    return backpressureRewindCount;
  }

  int getAssignedPartitions() {
    return assignedPartitions.get();
  }

  // ---- internal ----

  private static TimingContext wrap(Timer timer) {
    Timer.Context ctx = timer.time();
    return ctx::stop;
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/metrics/TaskMetrics.java
================================================
package com.snowflake.kafka.connector.internal.metrics;

/**
 * Task-level metrics facade. Callers program against this interface; the connector wires in either
 * {@link SnowflakeSinkTaskMetrics} (real JMX) or the singleton returned by {@link #noop()} when
 * monitoring is disabled.
 *
 * <p>All methods are safe to call unconditionally -- the noop implementation is a no-op.
 */
public interface TaskMetrics {

  // ---- timing (try-with-resources) ----

  TimingContext timePut();

  TimingContext timePreCommit();

  TimingContext timeOpen();

  TimingContext timeClose();

  TimingContext timeSdkClientCreate();

  TimingContext timeChannelOpen();

  TimingContext timeOffsetFetch();

  void recordStartDuration(long nanos);

  // ---- counters ----

  void incOpenCount();

  void incCloseCount();

  void incChannelOpenCount();

  void incPreCommitPartitionsSkipped();

  void incBackpressureRewindCount();

  // ---- throughput ----

  void markPutRecords(long count);

  // ---- gauges ----

  void setAssignedPartitions(int count);

  // ---- lifecycle ----

  void unregister();

  // ---- timing context ----

  @FunctionalInterface
  interface TimingContext extends AutoCloseable {
    TimingContext NOOP = () -> {};

    @Override
    void close();
  }

  // ---- factory ----

  static TaskMetrics noop() {
    return NoopTaskMetrics.INSTANCE;
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/schemaevolution/ColumnInfos.java
================================================
/*
 * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved.
 */

package com.snowflake.kafka.connector.internal.schemaevolution;

import java.util.Objects;
import java.util.Optional;

/** Represents column type and DDL comment for schema evolution. */
public class ColumnInfos {
  private final String columnType;
  private final String comments;

  public ColumnInfos(String columnType, String comments) {
    this.columnType = Objects.requireNonNull(columnType, "columnType cannot be null");
    this.comments = comments;
  }

  public ColumnInfos(String columnType) {
    this.columnType = Objects.requireNonNull(columnType, "columnType cannot be null");
    this.comments = null;
  }

  public String getColumnType() {
    return columnType;
  }

  public String getComments() {
    return comments;
  }

  public String getDdlComments() {
    return Optional.ofNullable(comments)
        .map(comment -> String.format(" comment '%s' ", comment.replace("'", "''")))
        .orElse(" comment 'column created by schema evolution from Snowflake Kafka Connector' ");
  }

  @Override
  public boolean equals(Object o) {
    if (this == o) return true;
    if (o == null || getClass() != o.getClass()) return false;
    ColumnInfos that = (ColumnInfos) o;
    return Objects.equals(columnType, that.columnType) && Objects.equals(comments, that.comments);
  }

  @Override
  public int hashCode() {
    return Objects.hash(columnType, comments);
  }

  @Override
  public String toString() {
    return "ColumnInfos{"
        + "columnType='"
        + columnType
        + '\''
        + ", comments='"
        + comments
        + '\''
        + '}';
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/schemaevolution/ColumnTypeMapper.java
================================================
/*
 * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved.
 */

package com.snowflake.kafka.connector.internal.schemaevolution;

import com.fasterxml.jackson.databind.JsonNode;
import org.apache.kafka.connect.data.Schema;

/** Abstract base for mapping Kafka Connect types to Snowflake DDL types. */
public abstract class ColumnTypeMapper {

  public String mapToColumnType(Schema.Type kafkaType) {
    return mapToColumnType(kafkaType, null);
  }

  public abstract String mapToColumnType(Schema.Type kafkaType, String schemaName);

  /**
   * Map the JSON node type to Kafka type
   *
   * @param value JSON node
   * @return Kafka type
   */
  public abstract Schema.Type mapJsonNodeTypeToKafkaType(JsonNode value);
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/schemaevolution/SchemaEvolutionTargetItems.java
================================================
/*
 * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved.
 */

package com.snowflake.kafka.connector.internal.schemaevolution;

import java.util.Collections;
import java.util.HashSet;
import java.util.Objects;
import java.util.Set;
import javax.annotation.Nonnull;

/**
 * Contains target items for schema evolution: table name, columns to drop non-nullability, and
 * columns to add to the table.
 */
public class SchemaEvolutionTargetItems {
  private final String tableName;

  @Nonnull private final Set<String> columnsToDropNonNullability;
  @Nonnull private final Set<String> columnsToAdd;

  public SchemaEvolutionTargetItems(
      String tableName, Set<String> columnsToDropNonNullability, Set<String> columnsToAdd) {
    this.tableName = tableName;
    this.columnsToDropNonNullability =
        columnsToDropNonNullability != null
            ? new HashSet<>(columnsToDropNonNullability)
            : Collections.emptySet();
    this.columnsToAdd = columnsToAdd != null ? new HashSet<>(columnsToAdd) : Collections.emptySet();
  }

  public boolean hasDataForSchemaEvolution() {
    return !columnsToDropNonNullability.isEmpty() || !columnsToAdd.isEmpty();
  }

  public SchemaEvolutionTargetItems(String tableName, Set<String> columnsToAdd) {
    this(tableName, null, columnsToAdd);
  }

  public String getTableName() {
    return tableName;
  }

  @Nonnull
  public Set<String> getColumnsToDropNonNullability() {
    return Collections.unmodifiableSet(columnsToDropNonNullability);
  }

  @Nonnull
  public Set<String> getColumnsToAdd() {
    return Collections.unmodifiableSet(columnsToAdd);
  }

  @Override
  public boolean equals(Object o) {
    if (this == o) return true;
    if (o == null || getClass() != o.getClass()) return false;
    SchemaEvolutionTargetItems that = (SchemaEvolutionTargetItems) o;
    return Objects.equals(tableName, that.tableName)
        && Objects.equals(columnsToDropNonNullability, that.columnsToDropNonNullability)
        && Objects.equals(columnsToAdd, that.columnsToAdd);
  }

  @Override
  public int hashCode() {
    return Objects.hash(tableName, columnsToDropNonNullability, columnsToAdd);
  }

  @Override
  public String toString() {
    return "SchemaEvolutionTargetItems{"
        + "tableName='"
        + tableName
        + '\''
        + ", nonNullableColumns="
        + columnsToDropNonNullability
        + ", extraColNames="
        + columnsToAdd
        + '}';
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/schemaevolution/SnowflakeColumnTypeMapper.java
================================================
/*
 * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved.
 */

package com.snowflake.kafka.connector.internal.schemaevolution;

import static org.apache.kafka.connect.data.Schema.Type.ARRAY;
import static org.apache.kafka.connect.data.Schema.Type.BOOLEAN;
import static org.apache.kafka.connect.data.Schema.Type.BYTES;
import static org.apache.kafka.connect.data.Schema.Type.FLOAT32;
import static org.apache.kafka.connect.data.Schema.Type.FLOAT64;
import static org.apache.kafka.connect.data.Schema.Type.INT16;
import static org.apache.kafka.connect.data.Schema.Type.INT32;
import static org.apache.kafka.connect.data.Schema.Type.INT64;
import static org.apache.kafka.connect.data.Schema.Type.STRING;
import static org.apache.kafka.connect.data.Schema.Type.STRUCT;

import com.fasterxml.jackson.databind.JsonNode;
import org.apache.kafka.connect.data.Date;
import org.apache.kafka.connect.data.Decimal;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.Time;
import org.apache.kafka.connect.data.Timestamp;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/** Maps Kafka Connect types to Snowflake DDL types. */
public class SnowflakeColumnTypeMapper extends ColumnTypeMapper {

  private static final Logger LOGGER = LoggerFactory.getLogger(SnowflakeColumnTypeMapper.class);

  @Override
  public String mapToColumnType(Schema.Type kafkaType, String schemaName) {
    switch (kafkaType) {
      case INT8:
        return "BYTEINT";
      case INT16:
        return "SMALLINT";
      case INT32:
        if (Date.LOGICAL_NAME.equals(schemaName)) {
          return "DATE";
        } else if (Time.LOGICAL_NAME.equals(schemaName)) {
          return "TIME(6)";
        } else {
          return "INT";
        }
      case INT64:
        if (Timestamp.LOGICAL_NAME.equals(schemaName)) {
          return "TIMESTAMP(6)";
        } else {
          return "BIGINT";
        }
      case FLOAT32:
        return "FLOAT";
      case FLOAT64:
        return "DOUBLE";
      case BOOLEAN:
        return "BOOLEAN";
      case STRING:
        return "VARCHAR";
      case BYTES:
        if (Decimal.LOGICAL_NAME.equals(schemaName)) {
          return "VARCHAR";
        } else {
          return "BINARY";
        }
      case ARRAY:
        return "ARRAY";
      default:
        // MAP and STRUCT will go here
        LOGGER.debug(
            "The corresponding kafka type is {}, so infer to VARIANT type", kafkaType.getName());
        return "VARIANT";
    }
  }

  @Override
  public Schema.Type mapJsonNodeTypeToKafkaType(JsonNode value) {
    if (value == null || value.isNull()) {
      return STRING;
    } else if (value.isNumber()) {
      if (value.isShort()) {
        return INT16;
      } else if (value.isInt()) {
        return INT32;
      } else if (value.isFloat()) {
        return FLOAT32;
      } else if (value.isDouble()) {
        return FLOAT64;
      }
      return INT64;
    } else if (value.isTextual()) {
      return STRING;
    } else if (value.isBoolean()) {
      return BOOLEAN;
    } else if (value.isBinary()) {
      return BYTES;
    } else if (value.isArray()) {
      return ARRAY;
    } else if (value.isObject()) {
      return STRUCT;
    } else {
      return null;
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/schemaevolution/SnowflakeSchemaEvolutionService.java
================================================
/*
 * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved.
 */

package com.snowflake.kafka.connector.internal.schemaevolution;

import com.snowflake.kafka.connector.internal.SnowflakeConnectionService;
import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException;
import com.snowflake.kafka.connector.records.SnowflakeSinkRecord;
import java.util.ArrayList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Executes schema evolution DDL (ALTER TABLE) based on validation results. Handles adding columns
 * and dropping NOT NULL constraints.
 */
public class SnowflakeSchemaEvolutionService {

  private static final Logger LOGGER =
      LoggerFactory.getLogger(SnowflakeSchemaEvolutionService.class);

  private final SnowflakeConnectionService conn;
  private final TableSchemaResolver tableSchemaResolver;

  public SnowflakeSchemaEvolutionService(SnowflakeConnectionService conn) {
    this(conn, new TableSchemaResolver());
  }

  SnowflakeSchemaEvolutionService(
      SnowflakeConnectionService conn, TableSchemaResolver tableSchemaResolver) {
    this.conn = conn;
    this.tableSchemaResolver = tableSchemaResolver;
  }

  /**
   * Execute ALTER TABLE commands if there are columns to add or NOT NULL constraints to drop.
   *
   * <p>Note: Columns must be added BEFORE dropping NOT NULL constraints, otherwise the constraint
   * modification will fail if the column doesn't exist yet.
   *
   * @param targetItems target items for schema evolution
   * @param record the sink record that contains the schema and content
   */
  public void evolveSchemaIfNeeded(
      SchemaEvolutionTargetItems targetItems, SnowflakeSinkRecord record) {
    if (!targetItems.hasDataForSchemaEvolution()) {
      return;
    }

    String tableName = targetItems.getTableName();

    // Add new columns FIRST (must exist before we can modify constraints)
    if (!targetItems.getColumnsToAdd().isEmpty()) {
      LOGGER.debug(
          "Adding columns to table: {} columns: {}", tableName, targetItems.getColumnsToAdd());
      TableSchema tableSchema =
          tableSchemaResolver.resolveTableSchemaFromSnowflakeRecord(
              record, new ArrayList<>(targetItems.getColumnsToAdd()));
      try {
        conn.appendColumnsToTable(tableName, tableSchema.getColumnInfos());
      } catch (SnowflakeKafkaConnectorException e) {
        LOGGER.warn(
            "Failure altering table to add column: {}, this could happen when multiple"
                + " partitions try to alter the table at the same time and the warning could be"
                + " ignored",
            tableName,
            e);
      }
    }

    // Drop NOT NULL constraints AFTER columns exist
    if (!targetItems.getColumnsToDropNonNullability().isEmpty()) {
      LOGGER.debug(
          "Dropping nonNullability for table: {} columns: {}",
          tableName,
          targetItems.getColumnsToDropNonNullability());
      try {
        conn.alterNonNullableColumns(
            tableName, new ArrayList<>(targetItems.getColumnsToDropNonNullability()));
      } catch (SnowflakeKafkaConnectorException e) {
        LOGGER.warn(
            "Failure altering table to update nullability: {}, this could happen when multiple"
                + " partitions try to alter the table at the same time and the warning could be"
                + " ignored",
            tableName,
            e);
      }
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/schemaevolution/TableSchema.java
================================================
/*
 * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved.
 */

package com.snowflake.kafka.connector.internal.schemaevolution;

import java.util.Collections;
import java.util.Map;
import java.util.Objects;

/** Wrapper around Map of column name to ColumnInfos. */
public class TableSchema {
  private final Map<String, ColumnInfos> columnInfos;

  public TableSchema(Map<String, ColumnInfos> columnInfos) {
    this.columnInfos = columnInfos;
  }

  public Map<String, ColumnInfos> getColumnInfos() {
    return Collections.unmodifiableMap(columnInfos);
  }

  @Override
  public boolean equals(Object o) {
    if (this == o) return true;
    if (o == null || getClass() != o.getClass()) return false;
    TableSchema that = (TableSchema) o;
    return Objects.equals(columnInfos, that.columnInfos);
  }

  @Override
  public int hashCode() {
    return Objects.hash(columnInfos);
  }

  @Override
  public String toString() {
    return "TableSchema{" + "columnInfos=" + columnInfos + '}';
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/schemaevolution/TableSchemaResolver.java
================================================
/*
 * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved.
 */

package com.snowflake.kafka.connector.internal.schemaevolution;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import com.google.common.collect.Streams;
import com.snowflake.kafka.connector.internal.SnowflakeErrors;
import com.snowflake.kafka.connector.records.SnowflakeSinkRecord;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.Schema;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Resolves table schema from Kafka SinkRecord. Supports both schema-ful (Avro/Protobuf) and
 * schema-less (JSON) records.
 */
public class TableSchemaResolver {

  private static final Logger LOGGER = LoggerFactory.getLogger(TableSchemaResolver.class);
  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

  private final ColumnTypeMapper columnTypeMapper;

  public TableSchemaResolver(ColumnTypeMapper columnTypeMapper) {
    this.columnTypeMapper = columnTypeMapper;
  }

  public TableSchemaResolver() {
    this(new SnowflakeColumnTypeMapper());
  }

  /**
   * Collect column data types from either the Kafka Connect schema or the content values. Column
   * names in {@code columnsToInclude} and the returned TableSchema keys are raw internal names (as
   * returned by DESCRIBE TABLE or as normalized at record creation time).
   *
   * @param record the SnowflakeSinkRecord containing schema and content
   * @param columnsToInclude the names of the columns to include in the schema
   * @return a Map object where the key is column name and value is ColumnInfos
   */
  public TableSchema resolveTableSchemaFromSnowflakeRecord(
      SnowflakeSinkRecord record, List<String> columnsToInclude) {
    if (columnsToInclude == null || columnsToInclude.isEmpty()) {
      return new TableSchema(ImmutableMap.of());
    }

    Set<String> columnNamesSet = new HashSet<>(columnsToInclude);

    if (hasSchema(record)) {
      return getTableSchemaFromRecordSchema(record, columnNamesSet);
    } else {
      return getTableSchemaFromJson(record, columnNamesSet);
    }
  }

  private boolean hasSchema(SnowflakeSinkRecord record) {
    Schema schema = record.getSchema();
    return schema != null
        && schema.type() == Schema.Type.STRUCT
        && schema.fields() != null
        && !schema.fields().isEmpty();
  }

  private TableSchema getTableSchemaFromRecordSchema(
      SnowflakeSinkRecord record, Set<String> columnNamesSet) {
    JsonNode recordNode = OBJECT_MAPPER.valueToTree(record.getContent());
    Map<String, ColumnInfos> schemaMap = getFullSchemaMapFromRecord(record);
    Map<Boolean, List<ColumnValuePair>> columnsWithValue =
        Streams.stream(recordNode.fields())
            .map(ColumnValuePair::from)
            .filter(pair -> columnNamesSet.contains(pair.getColumnName()))
            .collect(
                Collectors.partitioningBy(pair -> schemaMap.containsKey(pair.getColumnName())));

    List<ColumnValuePair> notFoundFieldsInSchema = columnsWithValue.get(false);
    List<ColumnValuePair> foundFieldsInSchema = columnsWithValue.get(true);

    if (!notFoundFieldsInSchema.isEmpty()) {
      throw SnowflakeErrors.ERROR_5022.getException(
          "Columns not found in schema: "
              + notFoundFieldsInSchema.stream()
                  .map(ColumnValuePair::getColumnName)
                  .collect(Collectors.toList())
              + ", schemaMap: "
              + schemaMap);
    }
    Map<String, ColumnInfos> columnsInferredFromSchema =
        foundFieldsInSchema.stream()
            .map(
                pair ->
                    Maps.immutableEntry(pair.getColumnName(), schemaMap.get(pair.getColumnName())))
            .collect(
                Collectors.toMap(
                    Map.Entry::getKey, Map.Entry::getValue, (oldValue, newValue) -> newValue));
    return new TableSchema(columnsInferredFromSchema);
  }

  private TableSchema getTableSchemaFromJson(
      SnowflakeSinkRecord record, Set<String> columnNamesSet) {
    JsonNode recordNode = OBJECT_MAPPER.valueToTree(record.getContent());
    Map<String, ColumnInfos> columnsInferredFromJson =
        Streams.stream(recordNode.fields())
            .map(ColumnValuePair::from)
            .filter(pair -> columnNamesSet.contains(pair.getColumnName()))
            .map(
                pair ->
                    Maps.immutableEntry(
                        pair.getColumnName(),
                        new ColumnInfos(inferDataTypeFromJsonObject(pair.getJsonNode()))))
            .collect(
                Collectors.toMap(
                    Map.Entry::getKey, Map.Entry::getValue, (oldValue, newValue) -> newValue));
    return new TableSchema(columnsInferredFromJson);
  }

  /**
   * Build column type information from a Kafka Connect schema.
   *
   * @param record the SnowflakeSinkRecord containing the schema
   * @return a Map where the key is the field name and value is ColumnInfos
   */
  private Map<String, ColumnInfos> getFullSchemaMapFromRecord(SnowflakeSinkRecord record) {
    Map<String, ColumnInfos> schemaMap = new HashMap<>();
    Schema schema = record.getSchema();
    if (schema != null && schema.fields() != null) {
      for (Field field : schema.fields()) {
        String columnType =
            columnTypeMapper.mapToColumnType(field.schema().type(), field.schema().name());
        LOGGER.info(
            "Got the data type for field:{}, schemaName:{}, schemaDoc: {} kafkaType:{},"
                + " columnType:{}",
            field.name(),
            field.schema().name(),
            field.schema().doc(),
            field.schema().type(),
            columnType);

        schemaMap.put(field.name(), new ColumnInfos(columnType, field.schema().doc()));
      }
    }
    return schemaMap;
  }

  /** Try to infer the data type from the data */
  private String inferDataTypeFromJsonObject(JsonNode value) {
    Schema.Type schemaType = columnTypeMapper.mapJsonNodeTypeToKafkaType(value);
    if (schemaType == null) {
      // only when the type of the value is unrecognizable for JAVA
      throw SnowflakeErrors.ERROR_5021.getException("class: " + value.getClass());
    }
    // Passing null to schemaName when there is no schema information
    return columnTypeMapper.mapToColumnType(schemaType);
  }

  // ---- ColumnValuePair ----

  private static class ColumnValuePair {
    private final String columnName;
    private final JsonNode jsonNode;

    public static ColumnValuePair from(Map.Entry<String, JsonNode> field) {
      return new ColumnValuePair(field.getKey(), field.getValue());
    }

    private ColumnValuePair(String columnName, JsonNode jsonNode) {
      this.columnName = columnName;
      this.jsonNode = jsonNode;
    }

    public String getColumnName() {
      return columnName;
    }

    public JsonNode getJsonNode() {
      return jsonNode;
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/schemaevolution/ValidationResultMapper.java
================================================
/*
 * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved.
 */

package com.snowflake.kafka.connector.internal.schemaevolution;

import com.snowflake.kafka.connector.internal.validation.ValidationResult;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
 * Maps {@link ValidationResult} to {@link SchemaEvolutionTargetItems}. Column names are raw
 * internal names (as returned by DESCRIBE TABLE / as normalized at record creation time). Quoting
 * for DDL is handled downstream in {@link
 * com.snowflake.kafka.connector.internal.StandardSnowflakeConnectionService}.
 */
public class ValidationResultMapper {

  /**
   * Convert ValidationResult to SchemaEvolutionTargetItems.
   *
   * @param result ValidationResult with structural error details (raw column names)
   * @param tableName Target table name
   * @return SchemaEvolutionTargetItems with raw column names to add and columns to drop NOT NULL
   */
  public static SchemaEvolutionTargetItems mapToSchemaEvolutionItems(
      ValidationResult result, String tableName) {
    Set<String> extraColNames = result.getExtraColNames();
    Set<String> columnsToDropNonNull =
        Stream.concat(
                result.getMissingNotNullColNames().stream(),
                result.getNullValueForNotNullColNames().stream())
            .collect(Collectors.toSet());

    return new SchemaEvolutionTargetItems(tableName, columnsToDropNonNull, extraColNames);
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/DefaultStreamingConfigValidator.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

import static com.snowflake.kafka.connector.ConnectorConfigTools.BOOLEAN_VALIDATOR;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.ERRORS_LOG_ENABLE_CONFIG;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.ERRORS_TOLERANCE_CONFIG;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.VALUE_CONVERTER;

import com.google.common.base.Strings;
import com.google.common.collect.ImmutableMap;
import com.snowflake.kafka.connector.ConnectorConfigTools;
import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.Utils;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import org.apache.kafka.common.config.ConfigException;

public class DefaultStreamingConfigValidator implements StreamingConfigValidator {

  private static final String STRING_CONVERTER_KEYWORD = "StringConverter";
  private static final String BYTE_ARRAY_CONVERTER_KEYWORD = "ByteArrayConverter";

  @Override
  public ImmutableMap<String, String> validate(Map<String, String> inputConfig) {
    Map<String, String> invalidParams = new HashMap<>();

    validateRole(inputConfig)
        .ifPresent(errorEntry -> invalidParams.put(errorEntry.getKey(), errorEntry.getValue()));

    // Validate error handling configs
    if (inputConfig.containsKey(ERRORS_TOLERANCE_CONFIG)) {
      try {
        ConnectorConfigTools.ErrorTolerance.VALIDATOR.ensureValid(
            ERRORS_TOLERANCE_CONFIG, inputConfig.get(ERRORS_TOLERANCE_CONFIG));
      } catch (ConfigException e) {
        invalidParams.put(
            ERRORS_TOLERANCE_CONFIG,
            Utils.formatString(
                "{} configuration error: {}", ERRORS_TOLERANCE_CONFIG, e.getMessage()));
      }
    }
    if (inputConfig.containsKey(ERRORS_LOG_ENABLE_CONFIG)) {
      try {
        BOOLEAN_VALIDATOR.ensureValid(
            ERRORS_LOG_ENABLE_CONFIG, inputConfig.get(ERRORS_LOG_ENABLE_CONFIG));
      } catch (ConfigException e) {
        invalidParams.put(ERRORS_LOG_ENABLE_CONFIG, e.getMessage());
      }
    }

    // Validate schematization config
    invalidParams.putAll(validateSchematizationConfig(inputConfig));

    return ImmutableMap.copyOf(invalidParams);
  }

  private static Optional<Map.Entry<String, String>> validateRole(Map<String, String> inputConfig) {
    if (!inputConfig.containsKey(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME)
        || Strings.isNullOrEmpty(inputConfig.get(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME))) {
      String missingRole =
          String.format(
              "Config: %s should be present for Snowpipe Streaming",
              KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME);
      return Optional.of(Map.entry(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, missingRole));
    }
    return Optional.empty();
  }

  private static void ensureValidLong(
      Map<String, String> inputConfig, String param, Map<String, String> invalidParams) {
    try {
      Long.parseLong(inputConfig.get(param));
    } catch (NumberFormatException exception) {
      invalidParams.put(
          param,
          Utils.formatString(
              param + " configuration must be a parsable long. Given configuration" + " was: {}",
              inputConfig.get(param)));
    }
  }

  /**
   * Validates if the configs are allowed values when schematization is enabled.
   *
   * <p>return a map of invalid params
   */
  private static Map<String, String> validateSchematizationConfig(Map<String, String> inputConfig) {
    Map<String, String> invalidParams = new HashMap<>();

    boolean schematizationEnabled =
        Boolean.parseBoolean(
            inputConfig.getOrDefault(
                KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION,
                String.valueOf(
                    KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION_DEFAULT)));

    if (schematizationEnabled
        && inputConfig.get(VALUE_CONVERTER) != null
        && (inputConfig.get(VALUE_CONVERTER).contains(STRING_CONVERTER_KEYWORD)
            || inputConfig.get(VALUE_CONVERTER).contains(BYTE_ARRAY_CONVERTER_KEYWORD))) {
      invalidParams.put(
          inputConfig.get(VALUE_CONVERTER),
          Utils.formatString(
              "The value converter:{} is not supported when schematization is enabled.",
              inputConfig.get(VALUE_CONVERTER)));
    }

    return invalidParams;
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/IngestionMethodConfig.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

import java.util.Locale;

/**
 * Enum representing the ingestion method for Snowflake Kafka Connector.
 *
 * <p>Only SNOWPIPE_STREAMING is supported (SSv2). Legacy SNOWPIPE and SSv1 have been removed.
 */
public enum IngestionMethodConfig {

  /* Snowpipe streaming (SSv2) - the only supported ingestion method */
  SNOWPIPE_STREAMING;

  @Override
  public String toString() {
    return name().toLowerCase(Locale.ROOT);
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/LatestCommitedOffsetTokenExecutor.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

import static java.time.temporal.ChronoUnit.SECONDS;

import com.snowflake.kafka.connector.internal.KCLogger;
import dev.failsafe.Failsafe;
import dev.failsafe.FailsafeExecutor;
import dev.failsafe.Fallback;
import dev.failsafe.RetryPolicy;
import dev.failsafe.function.CheckedSupplier;
import java.time.Duration;

/**
 * Class that separates Failsafe specific logic (retries and fallback) from the actual channel logic
 */
public class LatestCommitedOffsetTokenExecutor {

  private static final KCLogger LOGGER =
      new KCLogger(LatestCommitedOffsetTokenExecutor.class.getName());

  private static final Duration DURATION_BETWEEN_GET_OFFSET_TOKEN_RETRY = Duration.ofSeconds(1);
  protected static final int MAX_GET_OFFSET_TOKEN_RETRIES = 3;

  public static FailsafeExecutor<Long> getExecutor(
      String channelName,
      Class<? extends Throwable> exceptionClass,
      CheckedSupplier<Long> fallbackSupplier) {
    RetryPolicy<Long> retryPolicy = createRetryPolicy(exceptionClass);
    Fallback<Long> fallback = createFallback(channelName, exceptionClass, fallbackSupplier);

    return Failsafe.with(fallback)
        .onFailure(
            event ->
                LOGGER.error(
                    "[OFFSET_TOKEN_RETRY_FAILSAFE] Failure to fetch offsetToken even after retry"
                        + " and fallback from snowflake for channel:{}, elapsedTimeSeconds:{}",
                    channelName,
                    event.getElapsedTime().get(SECONDS),
                    event.getException()))
        .compose(retryPolicy);
  }

  private static RetryPolicy<Long> createRetryPolicy(
      Class<? extends Throwable> retryExceptionClass) {
    return RetryPolicy.<Long>builder()
        .handle(retryExceptionClass)
        .withDelay(DURATION_BETWEEN_GET_OFFSET_TOKEN_RETRY)
        .withMaxAttempts(MAX_GET_OFFSET_TOKEN_RETRIES)
        .onRetry(
            event ->
                LOGGER.warn(
                    "[OFFSET_TOKEN_RETRY_POLICY] retry for getLatestCommittedOffsetToken. Retry"
                        + " no:{}, message:{}",
                    event.getAttemptCount(),
                    event.getLastException().getMessage()))
        .build();
  }

  private static Fallback<Long> createFallback(
      String channelName,
      Class<? extends Throwable> exceptionClass,
      CheckedSupplier<Long> fallbackSupplier) {
    return Fallback.builder(fallbackSupplier)
        .handle(exceptionClass)
        .onFailure(
            event ->
                LOGGER.error(
                    "[OFFSET_TOKEN_FALLBACK] Failed to open Channel/fetch offsetToken for"
                        + " channel:{}, exception:{}",
                    channelName,
                    event.getException().toString()))
        .build();
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/OpenChannelRetryPolicy.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

import com.snowflake.ingest.streaming.SFException;
import com.snowflake.ingest.streaming.SnowflakeStreamingIngestChannel;
import com.snowflake.kafka.connector.internal.KCLogger;
import dev.failsafe.Failsafe;
import dev.failsafe.RetryPolicy;
import dev.failsafe.function.CheckedSupplier;
import java.time.Duration;

/**
 * Policy class that encapsulates retry logic for opening streaming channels with exponential
 * backoff and jitter.
 *
 * <p>This class provides a clean interface to execute channel opening operations with automatic
 * retry on HTTP 429 (rate limiting) errors from Snowflake streaming service.
 */
class OpenChannelRetryPolicy {

  private static final KCLogger LOGGER = new KCLogger(OpenChannelRetryPolicy.class.getName());

  private static final String RATE_LIMIT_MESSAGE_PART = "HTTP Status: 429";

  // Retry policy constants
  /** Initial delay before the first retry attempt. */
  private static final Duration INITIAL_DELAY = Duration.ofSeconds(2);

  /** Maximum delay between retry attempts. */
  private static final Duration MAX_DELAY = Duration.ofSeconds(8);

  /** Exponential backoff multiplier (retry delays: 2s, 4s, 8s max). */
  private static final double BACKOFF_MULTIPLIER = 2.0;

  /** Random jitter added to retry delays to prevent thundering herd. */
  private static final Duration JITTER_DURATION = Duration.ofMillis(200);

  /**
   * Executes the provided channel opening action with retry handling.
   *
   * <p>On SFException containing "429" (HTTP rate limiting), it will retry with exponential backoff
   * and jitter with unlimited retry attempts. Other exceptions are not retried.
   *
   * @param channelOpenAction the action to execute (typically openChannelForTable call)
   * @param channelName the channel name for logging purposes
   * @return the result of the channel opening operation
   */
  static SnowflakeStreamingIngestChannel executeWithRetry(
      CheckedSupplier<SnowflakeStreamingIngestChannel> channelOpenAction, String channelName) {

    RetryPolicy<SnowflakeStreamingIngestChannel> retryPolicy =
        RetryPolicy.<SnowflakeStreamingIngestChannel>builder()
            .handleIf(OpenChannelRetryPolicy::isRetryableError)
            .withBackoff(INITIAL_DELAY, MAX_DELAY, BACKOFF_MULTIPLIER)
            .withJitter(JITTER_DURATION)
            .withMaxAttempts(-1)
            .onRetry(
                event ->
                    LOGGER.warn(
                        "Open channel {} retry attempt #{} due to: {}",
                        channelName,
                        event.getAttemptCount(),
                        event.getLastException().getMessage()))
            .build();

    return Failsafe.with(retryPolicy).get(channelOpenAction);
  }

  private static boolean isRetryableError(Throwable e) {
    return e instanceof SFException && e.getMessage().contains(RATE_LIMIT_MESSAGE_PART);
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/SnowflakeSinkServiceV2.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

import static com.snowflake.kafka.connector.Utils.getTableName;
import static com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel.NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE;
import static com.snowflake.kafka.connector.internal.streaming.v2.PipeNameProvider.buildDefaultPipeName;

import com.codahale.metrics.MetricRegistry;
import com.google.common.annotations.VisibleForTesting;
import com.snowflake.kafka.connector.ConnectorConfigTools;
import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.config.SnowflakeValidation;
import com.snowflake.kafka.connector.dlq.KafkaRecordErrorReporter;
import com.snowflake.kafka.connector.internal.KCLogger;
import com.snowflake.kafka.connector.internal.SnowflakeConnectionService;
import com.snowflake.kafka.connector.internal.SnowflakeErrors;
import com.snowflake.kafka.connector.internal.SnowflakeSinkService;
import com.snowflake.kafka.connector.internal.metrics.MetricsJmxReporter;
import com.snowflake.kafka.connector.internal.metrics.TaskMetrics;
import com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel;
import com.snowflake.kafka.connector.internal.streaming.v2.BackpressureException;
import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientPools;
import com.snowflake.kafka.connector.internal.streaming.v2.service.BatchOffsetFetcher;
import com.snowflake.kafka.connector.internal.streaming.v2.service.PartitionChannelManager;
import com.snowflake.kafka.connector.internal.streaming.v2.service.ThreadPools;
import java.time.Duration;
import java.time.Instant;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.connect.sink.SinkRecord;
import org.apache.kafka.connect.sink.SinkTaskContext;

/**
 * This is per task configuration. A task can be assigned multiple partitions. Major methods are
 * startTask, insert, getOffset and close methods.
 *
 * <p>StartTask: Called when partitions are assigned. Responsible for generating the POJOs.
 *
 * <p>Insert and getOffset are called when {@link
 * com.snowflake.kafka.connector.SnowflakeSinkTask#put(Collection)} and {@link
 * com.snowflake.kafka.connector.SnowflakeSinkTask#preCommit(Map)} APIs are called.
 *
 * <p>This implementation of SinkService uses Streaming Snowpipe (Streaming Ingestion)
 *
 * <p>Hence this initializes the channel, opens, closes. The StreamingIngestChannel resides inside
 * {@link TopicPartitionChannel} which is per partition.
 */
public class SnowflakeSinkServiceV2 implements SnowflakeSinkService {

  private static final KCLogger LOGGER = new KCLogger(SnowflakeSinkServiceV2.class.getName());

  private final SnowflakeConnectionService conn;

  private final Optional<MetricsJmxReporter> metricsJmxReporter;
  private final String connectorName;

  private final SinkTaskConfig taskConfig;
  private final SinkTaskContext sinkTaskContext;

  // Set that keeps track of the channels that have been seen per input batch
  private final Set<String> channelsVisitedPerBatch = new HashSet<>();
  private final BatchOffsetFetcher batchOffsetFetcher;

  private final PartitionChannelManager channelManager;
  private final TaskMetrics taskMetrics;

  /** Cooldown duration after a backpressure event before retrying inserts. */
  static final Duration BACKPRESSURE_COOLDOWN = Duration.ofSeconds(1);

  /** Timestamp until which all inserts are skipped due to backpressure. */
  @VisibleForTesting Instant backpressureUntil = Instant.MIN;

  public SnowflakeSinkServiceV2(
      SnowflakeConnectionService conn,
      SinkTaskConfig taskConfig,
      KafkaRecordErrorReporter recordErrorReporter,
      SinkTaskContext sinkTaskContext,
      Optional<MetricsJmxReporter> metricsJmxReporter,
      TaskMetrics taskMetrics) {
    this(
        conn,
        taskConfig,
        sinkTaskContext,
        metricsJmxReporter,
        () ->
            new BatchOffsetFetcher(
                taskConfig.getConnectorName(),
                taskConfig.getTaskId(),
                taskConfig,
                ThreadPools.getIoExecutor(taskConfig.getConnectorName()),
                taskMetrics),
        () ->
            new PartitionChannelManager(
                conn.getTelemetryClient(),
                taskConfig,
                recordErrorReporter,
                sinkTaskContext,
                metricsJmxReporter,
                taskMetrics,
                conn),
        taskMetrics);
  }

  SnowflakeSinkServiceV2(
      SnowflakeConnectionService conn,
      SinkTaskConfig taskConfig,
      SinkTaskContext sinkTaskContext,
      Optional<MetricsJmxReporter> metricsJmxReporter,
      Supplier<BatchOffsetFetcher> batchOffsetFetcherFactory,
      Supplier<PartitionChannelManager> channelManagerFactory,
      TaskMetrics taskMetrics) {
    if (conn == null || conn.isClosed()) {
      throw SnowflakeErrors.ERROR_5010.getException();
    }
    this.conn = conn;
    this.taskConfig = taskConfig;
    this.sinkTaskContext = sinkTaskContext;
    this.metricsJmxReporter = metricsJmxReporter;

    this.connectorName = taskConfig.getConnectorName();

    ThreadPools.registerTask(this.connectorName, taskConfig);

    this.taskMetrics = taskMetrics;
    this.batchOffsetFetcher = batchOffsetFetcherFactory.get();
    this.channelManager = channelManagerFactory.get();

    // Log validation configuration for operator visibility
    logValidationConfiguration();

    LOGGER.info(
        "SnowflakeSinkServiceV2 initialized for connector: {}, task: {}, tolerateErrors: {},"
            + " enableSanitization: {}",
        this.connectorName,
        taskConfig.getTaskId(),
        taskConfig.isTolerateErrors(),
        taskConfig.isEnableSanitization());
  }

  /**
   * Perform pre-flight safety checks on validation configuration. Verifies that error handling is
   * properly configured to prevent silent data loss or task crashes.
   *
   * <p>Safety checks: - If validation disabled: Warn that SSv2 Error Table is required to prevent
   * task crashes - If validation enabled: Verify DLQ or tolerance=none for safe error handling
   *
   * @throws IllegalStateException if configuration is unsafe and would cause data loss
   */
  private void logValidationConfiguration() {
    String errorsTolerance =
        taskConfig.isTolerateErrors()
            ? ConnectorConfigTools.ErrorTolerance.ALL.toString()
            : ConnectorConfigTools.ErrorTolerance.NONE.toString();
    String dlqTopic = taskConfig.getDlqTopicName();

    boolean dlqConfigured = dlqTopic != null && !dlqTopic.trim().isEmpty();
    boolean tolerateAll = "all".equalsIgnoreCase(errorsTolerance);

    // Check for legacy KC v3 config and warn if present (schematization enabled via task config)
    if (taskConfig.isEnableSchematization()) {
      LOGGER.warn(
          "Config 'snowflake.enable.schematization' is not supported in KC v4. "
              + "Schema evolution is now handled server-side via table property "
              + "'ENABLE_SCHEMA_EVOLUTION'. For pre-created tables, run: "
              + "ALTER TABLE ... SET ENABLE_SCHEMA_EVOLUTION = TRUE");
    } else {
      LOGGER.info(
          "Schematization is disabled — the connector wraps payloads into"
              + " RECORD_CONTENT/RECORD_METADATA.");
    }

    if (taskConfig.getValidation() != SnowflakeValidation.CLIENT_SIDE) {
      // Check each target table for ERROR_LOGGING.
      // Note: makes up to 3 network calls per table (tableExist + isIcebergTable +
      // hasErrorLoggingEnabled). Acceptable at startup; only runs once per task constructor.
      Set<String> uniqueTables = new HashSet<>(taskConfig.getTopicToTableMap().values());
      for (String tableName : uniqueTables) {
        if (!conn.tableExist(tableName)) {
          // Table doesn't exist yet — will be auto-created with ERROR_LOGGING = TRUE
          continue;
        }
        if (conn.isIcebergTable(tableName)) {
          LOGGER.warn(
              "Table '{}' is an Iceberg table. Iceberg tables do not support ERROR_LOGGING."
                  + " In v4 high-throughput mode, invalid records targeting this table will be"
                  + " silently dropped. Error table functionality is not available for Iceberg"
                  + " tables.",
              tableName);
          continue;
        }
        if (!conn.hasErrorLoggingEnabled(tableName)) {
          LOGGER.warn(
              "Table '{}' does not have ERROR_LOGGING enabled. In v4 high-throughput mode,"
                  + " invalid records will be silently dropped. Run: ALTER TABLE \"{}\" SET"
                  + " ERROR_LOGGING = TRUE",
              tableName,
              tableName);
        } else {
          LOGGER.info("Table '{}' has ERROR_LOGGING enabled — error table is active.", tableName);
        }
      }
      return;
    }

    // VALIDATION ENABLED
    // Verify safe error handling configuration
    if (tolerateAll) {
      if (dlqConfigured) {
        // SAFE: Validation errors route to DLQ
        LOGGER.info(
            "Client-side validation enabled with errors.tolerance=all. "
                + "Validation failures will route to DLQ topic: {}",
            dlqTopic);
      } else {
        // UNSAFE: Validation errors are silently dropped
        LOGGER.error(
            "UNSAFE CONFIGURATION: Client-side validation enabled with errors.tolerance=all but NO"
                + " DLQ configured. "
                + "Invalid records will be SILENTLY DROPPED causing data loss. "
                + "Configure '{}' to preserve failed records, or set errors.tolerance=none to abort"
                + " on errors.",
            KafkaConnectorConfigParams.ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_CONFIG);
        // Note: Not throwing exception to allow connector to start, but logging ERROR
        // Operators can decide if they want to fail fast by checking logs
      }
    } else {
      // SAFE: Task aborts on validation failure (errors.tolerance=none)
      LOGGER.info(
          "Client-side validation enabled with errors.tolerance=none. "
              + "Validation failures will abort the task (safe - prevents data loss){}.",
          dlqConfigured ? " DLQ configured but only used when errors.tolerance=all" : "");
    }
  }

  /**
   * Creates a table if it doesnt exist in Snowflake.
   *
   * <p>Initializes the Channel and partitionsToChannel map with new instance of {@link
   * TopicPartitionChannel}
   *
   * @param topicPartition TopicPartition passed from Kafka
   */
  @Override
  public void startPartition(TopicPartition topicPartition) {
    startPartitions(Set.of(topicPartition));
  }

  /**
   * Ensures tables and pipes exist in Snowflake, then delegates channel creation to the {@link
   * PartitionChannelManager}.
   *
   * @param partitions collection of topic partition
   */
  @Override
  public void startPartitions(Collection<TopicPartition> partitions) {
    final Map<String, String> tableToPipeMapping = new HashMap<>();

    final Collection<String> uniqueTopics =
        partitions.stream().map(TopicPartition::topic).collect(Collectors.toSet());

    for (String topic : uniqueTopics) {
      final String tableName =
          getTableName(topic, taskConfig.getTopicToTableMap(), taskConfig.isEnableSanitization());
      createTableIfNotExists(tableName);

      // Client-side validation only supports default pipes.
      // When validation is enabled, reject non-default pipes (pipes whose name equals the table
      // name) because validation assumptions may not hold for user-created pipes.
      final String targetPipeName;
      if (taskConfig.getValidation() == SnowflakeValidation.CLIENT_SIDE) {
        if (this.conn.pipeExist(tableName)) {
          throw SnowflakeErrors.ERROR_0032.getException("table: " + tableName);
        }
        targetPipeName = buildDefaultPipeName(tableName);
      } else {
        // When validation is disabled (high-performance mode), allow non-default pipes.
        final boolean pipeExists = this.conn.pipeExist(tableName);
        targetPipeName = pipeExists ? tableName : buildDefaultPipeName(tableName);
      }

      tableToPipeMapping.put(tableName, targetPipeName);
      LOGGER.info("Table: {}, using pipe: {}", tableName, targetPipeName);
    }

    channelManager.startPartitions(partitions, tableToPipeMapping);
  }

  private void createTableIfNotExists(final String tableName) {
    if (this.conn.tableExist(tableName)) {
      LOGGER.info("Using existing table {}.", tableName);
    } else {
      LOGGER.info("Creating new table {}.", tableName);
      this.conn.createTableWithOnlyMetadataColumn(tableName);
    }
  }

  private Set<TopicPartition> currentlyInitializing(Collection<TopicPartition> partitions) {
    return partitions.stream()
        .filter(
            tp -> {
              return channelManager
                  .getChannel(tp)
                  .map(TopicPartitionChannel::isInitializing)
                  .orElse(false);
            })
        .collect(Collectors.toSet());
  }

  /**
   * @param records records coming from Kafka. Please note, they are not just from single topic and
   *     partition. It depends on the kafka connect worker node which can consume from multiple
   *     Topic and multiple Partitions
   */
  @Override
  public void insert(final Collection<SinkRecord> records) {
    channelsVisitedPerBatch.clear();

    // Skip partitions for which the partition-channel bridge is currently being initialized.
    Set<TopicPartition> partitions =
        records.stream()
            .map(record -> new TopicPartition(record.topic(), record.kafkaPartition()))
            .collect(Collectors.toSet());

    Set<TopicPartition> initializingPartitions = currentlyInitializing(partitions);
    if (!initializingPartitions.isEmpty()) {
      LOGGER.debug(
          "Skipping put for {}/{} partitions that are currently being initialized: {}",
          initializingPartitions.size(),
          partitions.size(),
          initializingPartitions);
    }

    // If still in cooldown from a recent backpressure event, treat all partitions as
    // backpressured so we skip the entire batch and give the SDK time to drain.
    boolean skipAllPartitions = false;
    if (Instant.now().isBefore(backpressureUntil)) {
      LOGGER.debug(
          "Backpressure cooldown active until {}. Skipping entire batch.", backpressureUntil);
      skipAllPartitions = true;
    }

    Map<TopicPartition, Long> offsetsOfFirstSkippedRecord = new HashMap<>();
    boolean newBackpressure = false;
    for (SinkRecord record : records) {
      // check if it needs to handle null value records
      if (shouldSkipNullValue(record)) {
        continue;
      }

      TopicPartition tp = new TopicPartition(record.topic(), record.kafkaPartition());

      if (offsetsOfFirstSkippedRecord.containsKey(tp)) {
        // We've already skipped a record in this partition, so should also skip the remaining
        // records in this partition.
        continue;
      }
      if (skipAllPartitions || initializingPartitions.contains(tp)) {
        // Make sure we store the first record in each partition that we skipped so we can correctly
        // rewind the offset.
        offsetsOfFirstSkippedRecord.putIfAbsent(tp, record.kafkaOffset());
        continue;
      }

      try {
        if (!insert(record)) {
          offsetsOfFirstSkippedRecord.putIfAbsent(tp, record.kafkaOffset());
        }
      } catch (BackpressureException e) {
        LOGGER.warn(
            "Backpressure on partition {}. Skipping remaining records for this partition."
                + " Exception: {}",
            tp,
            e.getMessage());
        taskMetrics.incBackpressureRewindCount();
        offsetsOfFirstSkippedRecord.putIfAbsent(tp, record.kafkaOffset());
        skipAllPartitions = true;
        newBackpressure = true;
      }
    }

    if (newBackpressure) {
      backpressureUntil = Instant.now().plus(BACKPRESSURE_COOLDOWN);
      LOGGER.info("Backpressure cooldown set until {}", backpressureUntil);
    }

    if (!offsetsOfFirstSkippedRecord.isEmpty()) {
      LOGGER.info("Rewinding offsets for skipped partitions: {}", offsetsOfFirstSkippedRecord);
      offsetsOfFirstSkippedRecord.forEach(sinkTaskContext::offset);
    }
  }

  /**
   * Inserts individual records into buffer. It fetches the TopicPartitionChannel from the map and
   * then each partition(Streaming channel) calls its respective appendRows API
   */
  @Override
  public boolean insert(SinkRecord record) {
    LOGGER.trace("Inserting record: {}", record);

    TopicPartition topicPartition = new TopicPartition(record.topic(), record.kafkaPartition());

    // Initialize a new topic partition if it's not in the cache or if the channel is closed.
    if (channelManager
        .getChannel(topicPartition)
        .map(TopicPartitionChannel::isChannelClosed)
        .orElse(true)) {
      LOGGER.warn("Streaming channel doesn't exist or is closed for {}", topicPartition);
      startPartition(topicPartition);
    }

    TopicPartitionChannel channel =
        channelManager
            .getChannel(topicPartition)
            .orElseThrow(
                () ->
                    new IllegalStateException(
                        "Channel for " + topicPartition + " not found after startPartition"));

    boolean isFirstRowPerPartitionInBatch = channelsVisitedPerBatch.add(channel.getChannelName());
    return channel.insertRecord(record, isFirstRowPerPartitionInBatch);
  }

  private boolean shouldSkipNullValue(SinkRecord record) {
    if (taskConfig.getBehaviorOnNullValues() == ConnectorConfigTools.BehaviorOnNullValues.DEFAULT) {
      return false;
    }
    if (record.value() == null) {
      LOGGER.debug(
          "Null valued record from topic '{}', partition {} and offset {} was skipped.",
          record.topic(),
          record.kafkaPartition(),
          record.kafkaOffset());
      return true;
    }
    return false;
  }

  @Override
  public long getOffset(TopicPartition topicPartition) {
    return getCommittedOffsets(Collections.singleton(topicPartition))
        .getOrDefault(topicPartition, NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE);
  }

  @Override
  public Map<TopicPartition, Long> getCommittedOffsets(
      final Collection<TopicPartition> partitions) {

    // Skip partitions for which the partition-channel bridge is currently being initialized.
    Set<TopicPartition> initializingPartitions = currentlyInitializing(partitions);
    if (!initializingPartitions.isEmpty()) {
      LOGGER.info(
          "Skipping preCommit for {}/{} partitions that are currently being initialized: {}",
          initializingPartitions.size(),
          partitions.size(),
          initializingPartitions);
    }

    Set<TopicPartition> partitionsToFetchOffsetsFor =
        partitions.stream()
            .filter(tp -> !initializingPartitions.contains(tp))
            .collect(Collectors.toSet());

    return batchOffsetFetcher.getCommittedOffsets(
        partitionsToFetchOffsetsFor, channelManager::getChannel);
  }

  @Override
  public int getPartitionCount() {
    return channelManager.getPartitionChannels().size();
  }

  @Override
  public void closeAll() {
    channelManager.closeAll();
  }

  /**
   * This function is called during rebalance.
   *
   * <p>All the channels are closed. The client is still active. Upon rebalance, (inside {@link
   * com.snowflake.kafka.connector.SnowflakeSinkTask#open(Collection)} we will reopen the channel.
   *
   * <p>We will wipe the cache partitionsToChannel so that in {@link
   * com.snowflake.kafka.connector.SnowflakeSinkTask#open(Collection)} we reinstantiate and fetch
   * offsetToken
   *
   * @param partitions a list of topic partition
   */
  @Override
  public void close(Collection<TopicPartition> partitions) {
    channelManager.close(partitions);
  }

  @Override
  public void stop() {
    LOGGER.info(
        "Stopping SnowflakeSinkServiceV2 for connector: {}, task: {}",
        this.connectorName,
        taskConfig.getTaskId());

    channelManager.waitForAllChannelsToCommitData();

    // Release all streaming clients used by this service.
    // Clients will only be closed if no other tasks are using them.
    StreamingClientPools.closeTaskClients(connectorName, taskConfig.getTaskId());

    // Release this task's claim on the shared thread pool.
    // The pool is shut down when the last task for this connector unregisters.
    ThreadPools.closeForTask(connectorName);
  }

  /* Undefined */
  @Override
  public boolean isClosed() {
    return false;
  }

  @Override
  public Map<String, TopicPartitionChannel> getPartitionChannels() {
    return channelManager.getPartitionChannels();
  }

  @Override
  public Optional<MetricRegistry> getMetricRegistry(String partitionChannelKey) {
    if (channelManager.getChannel(partitionChannelKey).isEmpty()) {
      return Optional.empty();
    }
    return metricsJmxReporter.map(MetricsJmxReporter::getMetricRegistry);
  }

  /** Blocks until all partition channels have finished initialization. */
  @Override
  public void awaitInitialization() {
    channelManager.awaitAllPartitions();
  }

  @VisibleForTesting
  PartitionChannelManager getChannelManager() {
    return channelManager;
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/StreamingClientProperties.java
================================================
/*
 * Copyright (c) 2023 Snowflake Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.snowflake.kafka.connector.internal.streaming;

import com.google.common.base.Strings;
import com.snowflake.kafka.connector.Utils;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.internal.KCLogger;
import com.snowflake.kafka.connector.internal.PrivateKeyTool;
import com.snowflake.kafka.connector.internal.SnowflakeURL;
import java.security.PrivateKey;
import java.util.Base64;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Properties;
import org.apache.kafka.common.config.types.Password;

/**
 * Object to convert and store properties for {@code
 * net.snowflake.ingest.streaming.SnowflakeStreamingIngestClient}. This object is used to compare
 * equality between clients in {@code StreamingClientProvider}.
 */
public class StreamingClientProperties {
  public static final String STREAMING_CLIENT_V2_PREFIX_NAME = "KC_CLIENT_V2_";
  public static final String DEFAULT_CLIENT_NAME = "DEFAULT_CLIENT";

  private static final KCLogger LOGGER = new KCLogger(StreamingClientProperties.class.getName());
  public final Properties clientProperties;
  public final String clientNamePrefix;
  public final Map<String, Object> parameterOverrides;

  /** Constructor used by {@link #from(SinkTaskConfig)}. */
  private StreamingClientProperties(
      Properties clientProperties,
      String clientNamePrefix,
      Map<String, Object> parameterOverrides) {
    this.clientProperties = clientProperties;
    this.clientNamePrefix = clientNamePrefix;
    this.parameterOverrides = parameterOverrides;
  }

  /** Creates streaming client properties from parsed {@link SinkTaskConfig}. */
  public static StreamingClientProperties from(SinkTaskConfig config) {
    final Properties clientProperties = new Properties();
    if (!Strings.isNullOrEmpty(config.getSnowflakeUrl())) {
      SnowflakeURL url = new SnowflakeURL(config.getSnowflakeUrl());
      final String privateKeyStr =
          Optional.ofNullable(config.getSnowflakePrivateKey()).map(Password::value).orElse(null);
      final String privateKeyPassphrase =
          Optional.ofNullable(config.getSnowflakePrivateKeyPassphrase())
              .map(Password::value)
              .orElse(null);
      final PrivateKey privateKey =
          PrivateKeyTool.parsePrivateKey(privateKeyStr, privateKeyPassphrase);
      final String privateKeyEncoded = Base64.getEncoder().encodeToString(privateKey.getEncoded());
      clientProperties.put("private_key", privateKeyEncoded);

      clientProperties.put("user", config.getSnowflakeUser());
      clientProperties.put("role", config.getSnowflakeRole());
      clientProperties.put("account", url.getAccount());
      clientProperties.put("host", url.getUrlWithoutPort());
    }

    String clientNamePrefix =
        STREAMING_CLIENT_V2_PREFIX_NAME
            + (config.getConnectorName() != null ? config.getConnectorName() : DEFAULT_CLIENT_NAME);

    Map<String, Object> parameterOverrides = new HashMap<>();
    String overrideMap = config.getStreamingClientProviderOverrideMap();
    if (overrideMap != null && !overrideMap.isEmpty()) {
      Utils.parseCommaSeparatedKeyValuePairs(overrideMap)
          .forEach((key, value) -> parameterOverrides.put(key.toLowerCase(), value));
      LOGGER.info("Streaming Client config overrides: {}", parameterOverrides);
    }

    return new StreamingClientProperties(clientProperties, clientNamePrefix, parameterOverrides);
  }

  /**
   * Determines equality between StreamingClientProperties by only looking at the parsed
   * clientProperties. This is used in {@code StreamingClientProvider} to determine equality in
   * registered clients
   *
   * @param other other object to determine equality
   * @return if the given object's clientProperties exists and is equal
   */
  @Override
  public boolean equals(Object other) {
    return other.getClass().equals(StreamingClientProperties.class)
        && ((StreamingClientProperties) other).clientProperties.equals(this.clientProperties)
        && ((StreamingClientProperties) other).parameterOverrides.equals(this.parameterOverrides);
  }

  /**
   * Creates the hashcode for this object from the clientProperties. This is used in {@code
   * StreamingClientProvider} to determine equality in registered clients
   *
   * @return the clientProperties' hashcode
   */
  @Override
  public int hashCode() {
    return Objects.hash(this.clientProperties, this.parameterOverrides);
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/StreamingConfigValidator.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

import com.google.common.collect.ImmutableMap;
import java.util.Map;

/** Validates connector config for Snowpipe Streaming */
// TODO (separate PR) - rename to ConfigValidator and return an ordinary Map
public interface StreamingConfigValidator {

  /**
   * @param inputConfig connector provided by user
   * @return map of invalid parameters
   */
  ImmutableMap<String, String> validate(final Map<String, String> inputConfig);
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/StreamingErrorHandler.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_CONFIG;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.ERRORS_TOLERANCE_CONFIG;

import com.google.common.base.Strings;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.dlq.KafkaRecordErrorReporter;
import com.snowflake.kafka.connector.internal.KCLogger;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService;
import org.apache.kafka.connect.errors.DataException;
import org.apache.kafka.connect.sink.SinkRecord;

/** Class encapsulating logic related to error handling e.g. DLQ. */
public class StreamingErrorHandler {

  private static final KCLogger LOGGER = new KCLogger(StreamingErrorHandler.class.getName());

  private final boolean logErrors;
  private final boolean isDLQTopicSet;
  private final boolean errorTolerance;
  private final KafkaRecordErrorReporter kafkaRecordErrorReporter;
  private final SnowflakeTelemetryService telemetryServiceV2;

  public StreamingErrorHandler(
      SinkTaskConfig config,
      KafkaRecordErrorReporter kafkaRecordErrorReporter,
      SnowflakeTelemetryService telemetryServiceV2) {
    this.logErrors = config.isErrorsLogEnable();
    this.isDLQTopicSet = !Strings.isNullOrEmpty(config.getDlqTopicName());
    this.errorTolerance = config.isTolerateErrors();
    this.kafkaRecordErrorReporter = kafkaRecordErrorReporter;
    this.telemetryServiceV2 = telemetryServiceV2;
  }

  public boolean isLogErrors() {
    return logErrors;
  }

  public void handleError(Exception error, SinkRecord originalRecordForReporting) {
    if (logErrors) {
      LOGGER.error("Insert Row Error message:{}", error.getMessage());
    }
    if (errorTolerance) {
      if (!isDLQTopicSet) {
        LOGGER.warn(
            "{} is set, however {} is not. The message will not be added to the Dead Letter Queue"
                + " topic.",
            ERRORS_TOLERANCE_CONFIG,
            ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_CONFIG);
      } else {
        LOGGER.warn(
            "Adding the message to Dead Letter Queue topic: {}",
            ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_CONFIG);
        // Wrap in DataException for KCv3 compatibility while preserving original exception
        DataException wrappedException =
            new DataException("Error converting record: " + error.getMessage(), error);
        this.kafkaRecordErrorReporter.reportError(originalRecordForReporting, wrappedException);
      }
    } else {
      // Preserve the record in DLQ before failing the task
      if (isDLQTopicSet && kafkaRecordErrorReporter != null) {
        LOGGER.warn(
            "Routing failed record to DLQ topic before aborting task (errors.tolerance=none)");
        DataException wrappedException =
            new DataException("Error converting record: " + error.getMessage(), error);
        this.kafkaRecordErrorReporter.reportError(originalRecordForReporting, wrappedException);
      }
      final String errMsg =
          String.format(
              "Error inserting Records using Streaming API with msg:%s", error.getMessage());
      this.telemetryServiceV2.reportKafkaConnectFatalError(errMsg);
      throw new DataException(errMsg, error);
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/TopicPartitionChannelInsertionException.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

/**
 * Class for exceptions that occur while interacting with Snowflake through Snowpipe Streaming.
 *
 * <p>Please note: This exception is translated from SFException when Client SDK determines this is
 * an invalid insert Operation. (For instance, clientSequencer is bumped up, but we are still
 * calling from older clientSequencer number)
 *
 * <p>Use this exception when a particular channel (Topic Partition) fails to insert Rows into
 * Snowflake Table, in this case we will reopen the channel and try to insert same rows again.
 *
 * <p>(Note: This exception is not when Streaming Snowpipe API returns error in its response)
 */
public class TopicPartitionChannelInsertionException extends RuntimeException {
  public TopicPartitionChannelInsertionException(String msg, Throwable t) {
    super(msg, t);
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/channel/TopicPartitionChannel.java
================================================
package com.snowflake.kafka.connector.internal.streaming.channel;

import com.google.common.annotations.VisibleForTesting;
import com.snowflake.ingest.streaming.ChannelStatus;
import com.snowflake.ingest.streaming.SFException;
import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetryChannelStatus;
import java.util.concurrent.CompletableFuture;
import org.apache.kafka.connect.sink.SinkRecord;

public interface TopicPartitionChannel {
  long NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE = -1L;

  /**
   * Inserts the record into buffer
   *
   * <p>Step 1: Initializes this channel by fetching the offsetToken from Snowflake for the first
   * time this channel/partition has received offset after start/restart.
   *
   * <p>Step 2: Decides whether given offset from Kafka needs to be processed and whether it
   * qualifies for being added into buffer.
   *
   * @param kafkaSinkRecord input record from Kafka
   * @param isFirstRowPerPartitionInBatch indicates whether the given record is the first record per
   *     partition in a batch
   * @return true if the record was processed (or legitimately skipped as a duplicate), false if
   *     recovery was triggered and the caller should stop feeding records to this partition for the
   *     remainder of the batch
   */
  boolean insertRecord(SinkRecord kafkaSinkRecord, boolean isFirstRowPerPartitionInBatch);

  /**
   * Asynchronously closes a channel associated to this partition. Any {@link SFException} occurred
   * is swallowed and a successful {@link CompletableFuture} is returned instead.
   */
  CompletableFuture<Void> closeChannelAsync();

  /** A channel which is initializing will be skipped in put and preCommit. */
  default boolean isInitializing() {
    return false;
  }

  /** Blocks until channel initialization is complete. */
  default void awaitInitialization() {}

  /* Return true is channel is closed. Caller should handle the logic for reopening the channel if it is closed. */
  boolean isChannelClosed();

  /** Returns the fully qualified channel name in the format of "db.schema.channel". */
  String getChannelNameFormatV1();

  /** Returns the simple (unqualified) channel name, as expected by the SDK batch status API. */
  String getChannelName();

  void setLatestConsumerGroupOffset(long consumerOffset);

  /**
   * Processes a channel status: logs it, checks for ingestion errors, updates offset tracking, and
   * returns the offset safe to commit to Kafka.
   *
   * <p>If the committed offset token is null (no data committed yet), returns {@link
   * #NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE}. Otherwise returns (committedOffset + 1) so that
   * Kafka resumes from the next record after a restart.
   *
   * <p>When {@code tolerateErrors} is false and new ingestion errors are detected, throws a
   * connector exception to fail the task.
   *
   * @param status the channel status, typically from a batch status call
   * @param tolerateErrors whether to tolerate ingestion errors (maps to {@code errors.tolerance})
   * @return the offset safe to commit to Kafka, or {@link #NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE}
   */
  long processChannelStatus(ChannelStatus status, boolean tolerateErrors);

  /** Returns the pipe name associated with this channel's SDK client. */
  String getPipeName();

  default CompletableFuture<Void> waitForLastProcessedRecordCommitted() {
    return CompletableFuture.completedFuture(null);
  }

  @VisibleForTesting
  SnowflakeTelemetryChannelStatus getSnowflakeTelemetryChannelStatus();
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/telemetry/PeriodicTelemetryReporter.java
================================================
package com.snowflake.kafka.connector.internal.streaming.telemetry;

import com.google.common.annotations.VisibleForTesting;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.internal.KCLogger;
import com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService;
import java.util.Map;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
import java.util.function.Supplier;

/**
 * Handles periodic reporting of channel status telemetry to Snowflake. This class manages a
 * background daemon thread that reports telemetry at regular intervals.
 */
public final class PeriodicTelemetryReporter {

  private static final KCLogger LOGGER = new KCLogger(PeriodicTelemetryReporter.class.getName());

  public static final long DEFAULT_REPORT_INTERVAL_MS = 120 * 1000L;

  public static final long MAX_INITIAL_JITTER_MS = 10 * 1000L;

  private final SnowflakeTelemetryService telemetryService;
  private final Supplier<Map<String, TopicPartitionChannel>> channelsSupplier;
  private final String connectorName;
  private final String taskId;
  private final long reportIntervalMs;
  private final ScheduledExecutorService executor;

  public PeriodicTelemetryReporter(
      SnowflakeTelemetryService telemetryService,
      Supplier<Map<String, TopicPartitionChannel>> channelsSupplier,
      SinkTaskConfig taskConfig) {
    this(
        telemetryService,
        channelsSupplier,
        taskConfig.getConnectorName(),
        taskConfig.getTaskId(),
        DEFAULT_REPORT_INTERVAL_MS);
  }

  @VisibleForTesting
  PeriodicTelemetryReporter(
      SnowflakeTelemetryService telemetryService,
      Supplier<Map<String, TopicPartitionChannel>> channelsSupplier,
      String connectorName,
      String taskId,
      long reportIntervalMs) {
    this.telemetryService = telemetryService;
    this.channelsSupplier = channelsSupplier;
    this.connectorName = connectorName;
    this.taskId = taskId;
    this.reportIntervalMs = reportIntervalMs;
    this.executor = createExecutor();
  }

  private ScheduledExecutorService createExecutor() {
    return Executors.newSingleThreadScheduledExecutor(
        r -> {
          Thread t = new Thread(r);
          t.setName("snowflake-telemetry-reporter-" + connectorName + "-" + taskId);
          t.setDaemon(true);
          return t;
        });
  }

  /** Starts the periodic telemetry reporting with jitter to prevent thundering herd. */
  public void start() {
    long jitter = ThreadLocalRandom.current().nextLong(0, MAX_INITIAL_JITTER_MS);
    long initialDelay = reportIntervalMs + jitter;

    executor.scheduleAtFixedRate(
        this::reportChannelStatusTelemetry, initialDelay, reportIntervalMs, TimeUnit.MILLISECONDS);
    LOGGER.info(
        "Started periodic telemetry reporter with interval {} ms (initial delay {} ms including {}"
            + " ms jitter) for connector: {}, task: {}",
        reportIntervalMs,
        initialDelay,
        jitter,
        connectorName,
        taskId);
  }

  public void stop() {
    if (!executor.isShutdown()) {
      LOGGER.info("Stopping telemetry reporter for connector: {}, task: {}", connectorName, taskId);
      executor.shutdown();
      try {
        if (!executor.awaitTermination(5, TimeUnit.SECONDS)) {
          LOGGER.warn("Telemetry reporter did not terminate gracefully, forcing shutdown");
          executor.shutdownNow();
        }
      } catch (InterruptedException e) {
        LOGGER.warn("Interrupted while waiting for telemetry reporter to terminate");
        executor.shutdownNow();
        Thread.currentThread().interrupt();
      }
    }
  }

  /**
   * Reports telemetry for all active channels. This method is called periodically by the scheduled
   * executor.
   */
  private void reportChannelStatusTelemetry() {
    try {
      Map<String, TopicPartitionChannel> channels = channelsSupplier.get();
      if (channels == null || channels.isEmpty()) {
        LOGGER.info("No active channels to report telemetry for");
        return;
      }

      LOGGER.debug(
          "Reporting telemetry for {} active channels for connector: {}, task: {}",
          channels.size(),
          connectorName,
          taskId);

      for (Map.Entry<String, TopicPartitionChannel> entry : channels.entrySet()) {
        reportChannelTelemetry(entry.getKey(), entry.getValue());
      }
    } catch (Exception e) {
      LOGGER.error("Error during periodic telemetry reporting: {}", e.getMessage());
    }
  }

  private void reportChannelTelemetry(String channelKey, TopicPartitionChannel channel) {
    try {
      final SnowflakeTelemetryChannelStatus channelStatus =
          channel.getSnowflakeTelemetryChannelStatus();

      if (channelStatus != null && !channelStatus.isEmpty()) {
        telemetryService.reportKafkaPartitionUsage(channelStatus, false);
        LOGGER.trace("Reported telemetry for channel: {}", channelKey);
      }
    } catch (Exception e) {
      LOGGER.warn(
          "Failed to report telemetry for channel: {}, error: {}", channelKey, e.getMessage());
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/telemetry/SnowflakeTelemetryChannelCreation.java
================================================
/*
 * Copyright (c) 2023 Snowflake Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.snowflake.kafka.connector.internal.streaming.telemetry;

import static com.snowflake.kafka.connector.internal.telemetry.TelemetryConstants.TABLE_NAME;
import static com.snowflake.kafka.connector.internal.telemetry.TelemetryConstants.TOPIC_PARTITION_CHANNEL_CREATION_TIME;
import static com.snowflake.kafka.connector.internal.telemetry.TelemetryConstants.TOPIC_PARTITION_CHANNEL_NAME;

import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryBasicInfo;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService;
import net.snowflake.client.jdbc.internal.fasterxml.jackson.databind.node.ObjectNode;

/**
 * This object is sent only once when a channel starts. No concurrent modification is made on this
 * object, thus no lock is required.
 */
public class SnowflakeTelemetryChannelCreation extends SnowflakeTelemetryBasicInfo {
  private final long tpChannelCreationTime; // start time of the channel
  private final String tpChannelName;
  private boolean isReuseTable = false; // is the channel reusing existing table

  public SnowflakeTelemetryChannelCreation(
      final String tableName, final String channelName, final long startTime) {
    super(tableName, SnowflakeTelemetryService.TelemetryType.KAFKA_CHANNEL_START);
    this.tpChannelName = channelName;
    this.tpChannelCreationTime = startTime;
  }

  @Override
  public void dumpTo(ObjectNode msg) {
    msg.put(TABLE_NAME, this.tableName);
    msg.put(TOPIC_PARTITION_CHANNEL_NAME, this.tpChannelName);

    msg.put(TOPIC_PARTITION_CHANNEL_CREATION_TIME, tpChannelCreationTime);
  }

  @Override
  public boolean isEmpty() {
    throw new IllegalStateException(
        "Empty function doesnt apply to:" + this.getClass().getSimpleName());
  }

  public void setReuseTable(boolean reuseTable) {
    isReuseTable = reuseTable;
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/telemetry/SnowflakeTelemetryChannelStatus.java
================================================
/*
 * Copyright (c) 2023 Snowflake Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.snowflake.kafka.connector.internal.streaming.telemetry;

import static com.snowflake.kafka.connector.internal.metrics.MetricsUtil.channelMetricName;
import static com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel.NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE;

import com.codahale.metrics.Gauge;
import com.codahale.metrics.MetricRegistry;
import com.google.common.annotations.VisibleForTesting;
import com.snowflake.ingest.streaming.ChannelStatus;
import com.snowflake.kafka.connector.internal.metrics.MetricsJmxReporter;
import com.snowflake.kafka.connector.internal.metrics.MetricsUtil;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryBasicInfo;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService;
import com.snowflake.kafka.connector.internal.telemetry.TelemetryConstants;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicLong;
import net.snowflake.client.jdbc.internal.fasterxml.jackson.databind.node.ObjectNode;

/**
 * Extension of {@link SnowflakeTelemetryBasicInfo} class used to send data to snowflake when the
 * TopicPartitionChannel closes. Also creates and registers various metrics with JMX
 *
 * <p>Most of the data sent to Snowflake is aggregated data.
 */
public class SnowflakeTelemetryChannelStatus extends SnowflakeTelemetryBasicInfo {
  public static final long NUM_METRICS = 4; // update when new metrics are added

  static final String CHANNEL_RECOVERY_COUNT = "channel-recovery-count";

  // channel properties
  private final String connectorName;
  private final String channelName;
  private final Optional<MetricsJmxReporter> metricsJmxReporter;
  private final long channelCreationTime;

  // offsets
  private final AtomicLong offsetPersistedInSnowflake;
  private final AtomicLong processedOffset;
  private final AtomicLong latestConsumerOffset;

  // channel recovery counter (always tracked; also registered as JMX gauge if enabled)
  private final AtomicLong recoveryCount = new AtomicLong(0);

  // Aggregated count of client-side validation failures for this channel.
  // Reported in channel status telemetry on close, avoiding per-record telemetry overhead.
  private final AtomicLong validationFailureCount = new AtomicLong(0);

  // Count of records where errors were tolerated (errors.tolerance=all) instead of failing the
  // task.
  private final AtomicLong errorToleratedCount = new AtomicLong(0);

  // Whether client-side validation was silently disabled due to initialization failure.
  private volatile boolean validationDisabled = false;

  // Latest SDK-reported metrics, updated on each processChannelStatus call.
  // Using volatile (not AtomicLong) since these are set, never atomically incremented.
  private volatile long rowsInsertedCount;
  private volatile long rowsParsedCount;
  private volatile long rowsErrorCount;
  private volatile long serverAvgProcessingLatencyMs = -1;

  // SDK ChannelStatus identity and error fields, updated on each processChannelStatus call.
  private volatile String databaseName;
  private volatile String schemaName;
  private volatile String pipeName;
  private volatile String statusCode;
  private volatile String lastErrorTimestamp;
  private volatile String lastErrorOffsetTokenUpperBound;

  // Counts of SDK backpressure retries and channel-reopen fallbacks during appendRow.
  private final AtomicLong backpressureRetryCount = new AtomicLong(0);
  private final AtomicLong appendRowFallbackCount = new AtomicLong(0);
  private final AtomicLong schemaEvolutionFailureCount = new AtomicLong(0);

  private volatile String[] registeredMetricNames;

  /**
   * Creates a new object tracking {@link
   * com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel} metrics with
   * JMX and send telemetry data to snowflake
   *
   * @param tableName the table the channel is ingesting to
   * @param channelName the name of the TopicPartitionChannel to track
   * @param metricsJmxReporter JMX reporter; present enables channel-level metrics, empty disables
   */
  public SnowflakeTelemetryChannelStatus(
      final String tableName,
      final String connectorName,
      final String channelName,
      final long startTime,
      final Optional<MetricsJmxReporter> metricsJmxReporter,
      final AtomicLong offsetPersistedInSnowflake,
      final AtomicLong processedOffset,
      final AtomicLong latestConsumerOffset) {
    super(tableName, SnowflakeTelemetryService.TelemetryType.KAFKA_CHANNEL_USAGE);

    this.channelCreationTime = startTime;
    this.connectorName = connectorName;
    this.channelName = channelName;
    this.metricsJmxReporter = metricsJmxReporter;

    this.offsetPersistedInSnowflake = offsetPersistedInSnowflake;
    this.processedOffset = processedOffset;
    this.latestConsumerOffset = latestConsumerOffset;

    metricsJmxReporter.ifPresent(reporter -> registerChannelJMXMetrics(reporter));
  }

  @Override
  public boolean isEmpty() {
    // Check that all properties are still at the default value.
    return this.offsetPersistedInSnowflake.get() == NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE
        && this.processedOffset.get() == NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE
        && this.latestConsumerOffset.get() == NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE;
  }

  @Override
  public void dumpTo(ObjectNode msg) {
    msg.put(TelemetryConstants.TABLE_NAME, this.tableName);
    msg.put(TelemetryConstants.CONNECTOR_NAME, this.connectorName);
    msg.put(TelemetryConstants.TOPIC_PARTITION_CHANNEL_NAME, this.channelName);

    msg.put(
        TelemetryConstants.OFFSET_PERSISTED_IN_SNOWFLAKE, this.offsetPersistedInSnowflake.get());
    msg.put(TelemetryConstants.PROCESSED_OFFSET, this.processedOffset.get());
    msg.put(TelemetryConstants.LATEST_CONSUMER_OFFSET, this.latestConsumerOffset.get());

    msg.put(TelemetryConstants.TOPIC_PARTITION_CHANNEL_CREATION_TIME, this.channelCreationTime);
    msg.put(TelemetryConstants.TOPIC_PARTITION_CHANNEL_CLOSE_TIME, System.currentTimeMillis());
    msg.put(TelemetryConstants.VALIDATION_FAILURE_COUNT, this.validationFailureCount.get());
    msg.put(TelemetryConstants.ERROR_TOLERATED_COUNT, this.errorToleratedCount.get());
    msg.put(TelemetryConstants.CHANNEL_RECOVERY_COUNT, this.recoveryCount.get());
    msg.put(TelemetryConstants.VALIDATION_DISABLED, this.validationDisabled);
    msg.put(TelemetryConstants.ROWS_INSERTED_COUNT, this.rowsInsertedCount);
    msg.put(TelemetryConstants.ROWS_PARSED_COUNT, this.rowsParsedCount);
    msg.put(TelemetryConstants.ROWS_ERROR_COUNT, this.rowsErrorCount);
    msg.put(TelemetryConstants.SERVER_AVG_PROCESSING_LATENCY_MS, this.serverAvgProcessingLatencyMs);

    putIfNotNull(msg, TelemetryConstants.DATABASE_NAME, this.databaseName);
    putIfNotNull(msg, TelemetryConstants.SCHEMA_NAME, this.schemaName);
    putIfNotNull(msg, TelemetryConstants.PIPE_NAME, this.pipeName);
    putIfNotNull(msg, TelemetryConstants.STATUS_CODE, this.statusCode);
    putIfNotNull(msg, TelemetryConstants.LAST_ERROR_TIMESTAMP, this.lastErrorTimestamp);
    putIfNotNull(
        msg,
        TelemetryConstants.LAST_ERROR_OFFSET_TOKEN_UPPER_BOUND,
        this.lastErrorOffsetTokenUpperBound);
    msg.put(TelemetryConstants.BACKPRESSURE_RETRY_COUNT, this.backpressureRetryCount.get());
    msg.put(TelemetryConstants.APPEND_ROW_FALLBACK_COUNT, this.appendRowFallbackCount.get());
    msg.put(
        TelemetryConstants.SCHEMA_EVOLUTION_FAILURE_COUNT, this.schemaEvolutionFailureCount.get());
  }

  private void registerChannelJMXMetrics(MetricsJmxReporter reporter) {
    MetricRegistry currentMetricRegistry = reporter.getMetricRegistry();

    registeredMetricNames =
        new String[] {
          channelMetricName(
              this.channelName,
              MetricsUtil.OFFSET_SUB_DOMAIN,
              MetricsUtil.OFFSET_PERSISTED_IN_SNOWFLAKE),
          channelMetricName(
              this.channelName, MetricsUtil.OFFSET_SUB_DOMAIN, MetricsUtil.PROCESSED_OFFSET),
          channelMetricName(
              this.channelName, MetricsUtil.OFFSET_SUB_DOMAIN, MetricsUtil.LATEST_CONSUMER_OFFSET),
          channelMetricName(
              this.channelName, MetricsUtil.OFFSET_SUB_DOMAIN, CHANNEL_RECOVERY_COUNT),
        };

    @SuppressWarnings("unchecked")
    Gauge<Long>[] gauges =
        new Gauge[] {
          (Gauge<Long>) this.offsetPersistedInSnowflake::get,
          (Gauge<Long>) this.processedOffset::get,
          (Gauge<Long>) this.latestConsumerOffset::get,
          (Gauge<Long>) this.recoveryCount::get,
        };

    for (int i = 0; i < registeredMetricNames.length; i++) {
      try {
        currentMetricRegistry.register(registeredMetricNames[i], gauges[i]);
      } catch (IllegalArgumentException ex) {
        // Safe: channel registration is serialized per task within open()
        LOGGER.warn(
            "Metric already present for channel {}, replacing: {}",
            this.channelName,
            registeredMetricNames[i]);
        reporter.removeMetric(registeredMetricNames[i]);
        currentMetricRegistry.register(registeredMetricNames[i], gauges[i]);
      }
    }

    // JmxReporter is started once at task level (SnowflakeSinkTaskMetrics constructor).
    // Its MetricRegistryListener auto-registers new MBeans as metrics are added.
    // Calling start() per-channel would re-process ALL metrics: O(N) unregister + register.
  }

  /** Unregisters the JMX metrics if possible */
  public void tryUnregisterChannelJMXMetrics() {
    metricsJmxReporter.ifPresent(
        reporter -> {
          if (registeredMetricNames != null) {
            for (String name : registeredMetricNames) {
              reporter.removeMetric(name);
            }
          }
        });
  }

  /** Increments the channel recovery counter. Thread-safe. */
  public void incRecoveryCount() {
    this.recoveryCount.incrementAndGet();
  }

  /** Increments the validation failure counter. Thread-safe. */
  public void incValidationFailureCount() {
    this.validationFailureCount.incrementAndGet();
  }

  /** Increments the error-tolerated counter. Thread-safe. */
  public void incErrorToleratedCount() {
    this.errorToleratedCount.incrementAndGet();
  }

  /** Marks that client-side validation was silently disabled due to initialization failure. */
  public void setValidationDisabled() {
    this.validationDisabled = true;
  }

  /** Increments the backpressure retry counter. Thread-safe. */
  public void incBackpressureRetryCount() {
    this.backpressureRetryCount.incrementAndGet();
  }

  /** Increments the append-row fallback counter. Thread-safe. */
  public void incAppendRowFallbackCount() {
    this.appendRowFallbackCount.incrementAndGet();
  }

  /** Increments the schema evolution failure counter. Thread-safe. */
  public void incSchemaEvolutionFailureCount() {
    this.schemaEvolutionFailureCount.incrementAndGet();
  }

  /** Updates SDK-reported metrics from a ChannelStatus response. */
  public void updateFromChannelStatus(ChannelStatus status) {
    this.rowsInsertedCount = status.getRowsInsertedCount();
    this.rowsParsedCount = status.getRowsParsedCount();
    this.rowsErrorCount = status.getRowsErrorCount();
    this.serverAvgProcessingLatencyMs =
        status.getServerAvgProcessingLatency() != null
            ? status.getServerAvgProcessingLatency().toMillis()
            : -1;
    this.databaseName = status.getDatabaseName();
    this.schemaName = status.getSchemaName();
    this.pipeName = status.getPipeName();
    this.statusCode = status.getStatusCode() != null ? status.getStatusCode().toString() : null;
    this.lastErrorTimestamp =
        status.getLastErrorTimestamp() != null ? status.getLastErrorTimestamp().toString() : null;
    this.lastErrorOffsetTokenUpperBound = status.getLastErrorOffsetTokenUpperBound();
  }

  private static void putIfNotNull(ObjectNode msg, String key, String value) {
    if (value != null) {
      msg.put(key, value);
    }
  }

  @VisibleForTesting
  public long getOffsetPersistedInSnowflake() {
    return this.offsetPersistedInSnowflake.get();
  }

  @VisibleForTesting
  public long getProcessedOffset() {
    return this.processedOffset.get();
  }

  @VisibleForTesting
  public long getLatestConsumerOffset() {
    return this.latestConsumerOffset.get();
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/telemetry/SnowflakeTelemetrySsv1Migration.java
================================================
package com.snowflake.kafka.connector.internal.streaming.telemetry;

import static com.snowflake.kafka.connector.internal.telemetry.TelemetryConstants.SSV1_CHANNEL_NAME;
import static com.snowflake.kafka.connector.internal.telemetry.TelemetryConstants.SSV1_MIGRATED_OFFSET;
import static com.snowflake.kafka.connector.internal.telemetry.TelemetryConstants.SSV1_MIGRATION_MODE;
import static com.snowflake.kafka.connector.internal.telemetry.TelemetryConstants.SSV1_MIGRATION_OUTCOME;
import static com.snowflake.kafka.connector.internal.telemetry.TelemetryConstants.TABLE_NAME;
import static com.snowflake.kafka.connector.internal.telemetry.TelemetryConstants.TOPIC_PARTITION_CHANNEL_NAME;

import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationMode;
import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationResponse;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryBasicInfo;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService;
import java.util.Locale;
import net.snowflake.client.jdbc.internal.fasterxml.jackson.databind.node.ObjectNode;

/**
 * One-shot telemetry event sent when SSv1 offset migration is attempted for a channel. Only emitted
 * when the migration mode is not SKIP and the SSv2 channel has no committed offset yet.
 */
public class SnowflakeTelemetrySsv1Migration extends SnowflakeTelemetryBasicInfo {
  private final String channelName;
  private final String ssv1ChannelName;
  private final Ssv1MigrationMode migrationMode;
  private final Ssv1MigrationResponse response;

  public SnowflakeTelemetrySsv1Migration(
      String tableName,
      String channelName,
      String ssv1ChannelName,
      Ssv1MigrationMode migrationMode,
      Ssv1MigrationResponse response) {
    super(tableName, SnowflakeTelemetryService.TelemetryType.KAFKA_SSV1_MIGRATION);
    this.channelName = channelName;
    this.ssv1ChannelName = ssv1ChannelName;
    this.migrationMode = migrationMode;
    this.response = response;
  }

  @Override
  public void dumpTo(ObjectNode msg) {
    msg.put(TABLE_NAME, this.tableName);
    msg.put(TOPIC_PARTITION_CHANNEL_NAME, this.channelName);
    msg.put(SSV1_CHANNEL_NAME, this.ssv1ChannelName);
    msg.put(SSV1_MIGRATION_MODE, this.migrationMode.name().toLowerCase(Locale.ROOT));
    msg.put(SSV1_MIGRATION_OUTCOME, deriveOutcome());
    Long offset = this.response.getMigratedOffset();
    if (offset != null) {
      msg.put(SSV1_MIGRATED_OFFSET, offset);
    }
  }

  private String deriveOutcome() {
    if (response.getMigratedOffset() != null) {
      return "migrated";
    } else if (!response.isSsv1ChannelFound()) {
      return migrationMode == Ssv1MigrationMode.STRICT ? "ssv1_not_found_strict" : "ssv1_not_found";
    } else {
      return "ssv1_no_offset";
    }
  }

  @Override
  public boolean isEmpty() {
    throw new IllegalStateException("isEmpty does not apply to " + this.getClass().getSimpleName());
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/AppendRowWithFallbackPolicy.java
================================================
package com.snowflake.kafka.connector.internal.streaming.v2;

import com.snowflake.ingest.streaming.SFException;
import com.snowflake.kafka.connector.internal.KCLogger;
import dev.failsafe.Failsafe;
import dev.failsafe.Fallback;
import dev.failsafe.function.CheckedRunnable;
import java.time.Duration;

/**
 * Policy class that encapsulates Failsafe logic for insert row operations with channel reopening
 * fallback functionality.
 *
 * <p>This class provides a clean interface to execute append row operations with automatic channel
 * recovery on non-retryable {@link SFException}. For retryable backpressure errors, it throws
 * {@link BackpressureException} to signal the batch-level insert loop to abandon the batch and
 * rewind offsets.
 */
class AppendRowWithFallbackPolicy {

  private static final KCLogger LOGGER = new KCLogger(AppendRowWithFallbackPolicy.class.getName());

  /** Delay before fallback attempt (channel reopening). */
  private static final Duration FALLBACK_DELAY = Duration.ofMillis(500);

  /** Random jitter added to fallback delays to prevent retry storms. */
  private static final Duration JITTER_DURATION = Duration.ofMillis(200);

  /**
   * Executes the given action after a delay with jitter to prevent retry storms.
   *
   * @param action the action to execute after the delay
   * @param channelName the channel name for logging purposes
   */
  private static void withDelay(CheckedRunnable action, String channelName) throws Throwable {
    try {
      long delayMs =
          FALLBACK_DELAY.toMillis() + (long) (Math.random() * JITTER_DURATION.toMillis());

      LOGGER.info("Delaying channel recovery by {}ms for channel: {}", delayMs, channelName);
      Thread.sleep(delayMs);

      LOGGER.info("Executing channel recovery for channel: {}", channelName);
      action.run();
    } catch (InterruptedException e) {
      Thread.currentThread().interrupt();
    } catch (SFException e) {
      // Re-throw SFException unchanged so Fallback can handle it properly
      throw e;
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  }

  /**
   * Executes the provided append row action with fallback handling.
   *
   * <p>On retryable {@link SFException} (backpressure errors), throws {@link BackpressureException}
   * to signal the batch-level insert loop that the batch should be abandoned and offsets should be
   * rewound. The channel remains valid.
   *
   * <p>On non-retryable {@link SFException}, it will execute the fallback supplier to reopen the
   * channel and reset offsets after a simple blocking delay with jitter to prevent retry storms.
   *
   * @param appendRowAction the action to execute (typically channel.appendRow call)
   * @param fallbackSupplier the fallback action to execute on non-retryable failure (channel
   *     reopening logic)
   * @param channelName the channel name for logging purposes
   */
  /**
   * @return true if the append row action succeeded normally, false if the fallback was executed
   *     (meaning the record was NOT inserted). When this returns false, callers must NOT advance
   *     processedOffset — the fallback's recovery logic has already reset offset state.
   */
  static boolean executeWithFallback(
      CheckedRunnable appendRowAction,
      FallbackSupplierWithException fallbackSupplier,
      String channelName) {

    boolean[] succeeded = {true};

    Fallback<Void> reopenChannelFallbackExecutor =
        Fallback.<Void>builder(
                executionAttemptedEvent -> {
                  Throwable lastException = executionAttemptedEvent.getLastException();

                  // Check if this is a retryable backpressure error
                  if (BackpressureException.isRetryableError(lastException)) {
                    // The channel is still valid; throw BackpressureException to signal
                    // the batch-level insert loop to abandon the batch and rewind offsets
                    throw new BackpressureException((SFException) lastException);
                  }

                  // Non-retryable error: proceed with channel reopening
                  succeeded[0] = false;
                  withDelay(() -> fallbackSupplier.execute(lastException), channelName);
                })
            .handle(SFException.class)
            .onFailedAttempt(
                event ->
                    LOGGER.warn(
                        "Failed Attempt to invoke the appendRow API for channel: {}. Exception: {}",
                        channelName,
                        event.getLastException()))
            .onFailure(
                event -> {
                  if (event.getException() instanceof BackpressureException) {
                    LOGGER.warn(
                        "Backpressure on channel {}: {}",
                        channelName,
                        event.getException().getMessage());
                  } else {
                    LOGGER.error(
                        "{} Failed to open Channel or fetching offsetToken for channel:{}."
                            + " Exception: {}",
                        "APPEND_ROW_FALLBACK",
                        channelName,
                        event.getException());
                  }
                })
            .build();

    Failsafe.with(reopenChannelFallbackExecutor).run(appendRowAction);
    return succeeded[0];
  }

  /**
   * Functional interface for fallback supplier that can throw exceptions.
   *
   * <p>This is used to encapsulate the channel reopening logic that needs to be executed when the
   * primary append row operation fails.
   */
  @FunctionalInterface
  interface FallbackSupplierWithException {
    /**
     * Executes the fallback logic.
     *
     * @param exception the original exception that caused the fallback to be triggered
     * @throws Exception if the fallback operation fails
     */
    void execute(Throwable exception) throws Exception;
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/BackpressureException.java
================================================
package com.snowflake.kafka.connector.internal.streaming.v2;

import com.google.common.base.Preconditions;
import com.snowflake.ingest.streaming.SFException;
import java.util.Set;

/**
 * Unchecked exception thrown when the Snowflake SDK signals backpressure due to memory saturation
 * or receiver overload.
 *
 * <p>This exception wraps {@link SFException} instances with specific error codes indicating
 * transient memory pressure. It signals to the batch-level insert loop that the current batch
 * should be abandoned and offsets should be rewound, but the channel remains valid and does not
 * need to be reopened.
 *
 * <p>Retryable error codes:
 *
 * <ul>
 *   <li>{@code ReceiverSaturated} - 429 Too Many Requests
 *   <li>{@code MemoryThresholdExceeded} - 429 Too Many Requests
 *   <li>{@code MemoryThresholdExceededInContainer} - 429 Too Many Requests
 *   <li>{@code HttpRetryableClientError} - 503 Service Unavailable
 * </ul>
 */
public class BackpressureException extends RuntimeException {

  private static final Set<String> RETRYABLE_ERROR_CODE_NAMES =
      Set.of(
          // 429 Too Many Requests
          "ReceiverSaturated",
          "MemoryThresholdExceeded",
          "MemoryThresholdExceededInContainer",
          // 503 Service Unavailable
          "HttpRetryableClientError");

  /**
   * Constructs a new {@code BackpressureException} wrapping the given {@link SFException}.
   *
   * @param cause the SDK exception indicating backpressure
   */
  public BackpressureException(SFException cause) {
    super(
        "SDK backpressure: " + Preconditions.checkNotNull(cause, "cause").getErrorCodeName(),
        cause);
    Preconditions.checkArgument(
        isRetryableError(cause),
        "BackpressureException requires a retryable SFException, got: %s",
        cause.getErrorCodeName());
  }

  /**
   * Checks if the given throwable represents a retryable backpressure error.
   *
   * @param e the exception to check (may be null)
   * @return {@code true} if {@code e} is an {@link SFException} with a retryable error code name;
   *     {@code false} otherwise
   */
  public static boolean isRetryableError(Throwable e) {
    if (!(e instanceof SFException)) {
      return false;
    }
    return RETRYABLE_ERROR_CODE_NAMES.contains(((SFException) e).getErrorCodeName());
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/ClientRecreationException.java
================================================
package com.snowflake.kafka.connector.internal.streaming.v2;

import com.google.common.base.Preconditions;
import com.snowflake.ingest.streaming.SFException;
import java.util.Set;

/**
 * Unchecked exception thrown when the Snowflake SDK signals that the streaming client is in an
 * invalid state and must be recreated.
 *
 * <p>This exception wraps {@link SFException} instances with specific error codes indicating the
 * client itself is no longer usable. Unlike {@link BackpressureException} (where the channel
 * remains valid), this signals that the client and all its channels must be replaced.
 *
 * <p>Client-invalid error codes:
 *
 * <ul>
 *   <li>{@code InvalidClientError} - client marked invalid after a fatal internal error or pipe
 *       failover (409 Conflict)
 *   <li>{@code SfApiPipeFailedOverError} - HTTP 410 on any API call triggers client invalidation
 *   <li>{@code ClosedClientError} - client has been closed and cannot be reused (409 Conflict)
 * </ul>
 */
public class ClientRecreationException extends RuntimeException {

  private static final Set<String> CLIENT_INVALID_ERROR_CODE_NAMES =
      Set.of(
          // Client invalidated by SDK (pipe failover, auth refresh failure, etc.)
          "InvalidClientError",
          // HTTP 410 on open_channel, insert_rows, get_channel_status, or pipe refresh
          "SfApiPipeFailedOverError",
          // Client was closed
          "ClosedClientError");

  /**
   * Constructs a new {@code ClientRecreationException} wrapping the given {@link SFException}.
   *
   * @param cause the SDK exception indicating the client is invalid
   */
  public ClientRecreationException(SFException cause) {
    super(
        "SDK client invalid: " + Preconditions.checkNotNull(cause, "cause").getErrorCodeName(),
        cause);
    Preconditions.checkArgument(
        isClientInvalidError(cause),
        "ClientRecreationException requires a client-invalid SFException, got: %s",
        cause.getErrorCodeName());
  }

  /**
   * Wraps the given throwable as a {@code ClientRecreationException} if it is a client-invalid
   * {@link SFException}. Avoids the need for callers to cast to {@code SFException} manually.
   *
   * @param e the exception to wrap
   * @return a new {@code ClientRecreationException} wrapping the cause
   * @throws IllegalArgumentException if {@code e} is not a client-invalid {@link SFException}
   */
  public static ClientRecreationException wrap(Throwable e) {
    Preconditions.checkArgument(
        isClientInvalidError(e),
        "Cannot wrap non-client-invalid exception: %s",
        e.getClass().getName());
    return new ClientRecreationException((SFException) e);
  }

  /**
   * Checks if the given throwable represents a client-level invalidation error that requires client
   * recreation.
   *
   * @param e the exception to check (may be null)
   * @return {@code true} if {@code e} is an {@link SFException} with a client-invalid error code
   *     name; {@code false} otherwise
   */
  public static boolean isClientInvalidError(Throwable e) {
    if (!(e instanceof SFException)) {
      return false;
    }
    return CLIENT_INVALID_ERROR_CODE_NAMES.contains(((SFException) e).getErrorCodeName());
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/ClientRecreator.java
================================================
package com.snowflake.kafka.connector.internal.streaming.v2;

import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient;

/**
 * Strategy for replacing an invalid {@link SnowflakeStreamingIngestClient} with a new one.
 *
 * <p>Implementations are expected to use compare-and-swap semantics: if the client has already been
 * replaced by another caller, the existing replacement should be returned without creating a second
 * one.
 */
@FunctionalInterface
public interface ClientRecreator {

  /**
   * Replaces the given invalid client with a new one.
   *
   * @param invalidClient the client instance that is no longer valid (identity-compared in the
   *     pool)
   * @return the new client, or the already-replaced client if another caller got there first
   */
  SnowflakeStreamingIngestClient recreate(SnowflakeStreamingIngestClient invalidClient);
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/PipeNameProvider.java
================================================
package com.snowflake.kafka.connector.internal.streaming.v2;

import static com.snowflake.kafka.connector.Constants.DEFAULT_PIPE_NAME_SUFFIX;

/** Class that generates pipe name for Snowpipe Streaming v2 */
public final class PipeNameProvider {
  public static String buildDefaultPipeName(String table) {
    return table + DEFAULT_PIPE_NAME_SUFFIX;
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/SnowpipeStreamingPartitionChannel.java
================================================
package com.snowflake.kafka.connector.internal.streaming.v2;

import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME;
import static com.snowflake.kafka.connector.internal.SnowflakeErrors.ERROR_5027;
import static com.snowflake.kafka.connector.internal.SnowflakeErrors.ERROR_5028;
import static com.snowflake.kafka.connector.internal.SnowflakeErrors.ERROR_5030;

import com.google.common.annotations.VisibleForTesting;
import com.snowflake.ingest.streaming.ChannelStatus;
import com.snowflake.ingest.streaming.OpenChannelResult;
import com.snowflake.ingest.streaming.SFException;
import com.snowflake.ingest.streaming.SnowflakeStreamingIngestChannel;
import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.config.SnowflakeValidation;
import com.snowflake.kafka.connector.internal.DescribeTableRow;
import com.snowflake.kafka.connector.internal.KCLogger;
import com.snowflake.kafka.connector.internal.SnowflakeConnectionService;
import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException;
import com.snowflake.kafka.connector.internal.metrics.TaskMetrics;
import com.snowflake.kafka.connector.internal.schemaevolution.SchemaEvolutionTargetItems;
import com.snowflake.kafka.connector.internal.schemaevolution.SnowflakeSchemaEvolutionService;
import com.snowflake.kafka.connector.internal.schemaevolution.ValidationResultMapper;
import com.snowflake.kafka.connector.internal.streaming.StreamingErrorHandler;
import com.snowflake.kafka.connector.internal.streaming.TopicPartitionChannelInsertionException;
import com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel;
import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetryChannelCreation;
import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetryChannelStatus;
import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetrySsv1Migration;
import com.snowflake.kafka.connector.internal.streaming.v2.channel.PartitionOffsetTracker;
import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationMode;
import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationResponse;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService;
import com.snowflake.kafka.connector.internal.validation.ColumnSchema;
import com.snowflake.kafka.connector.internal.validation.RowValidator;
import com.snowflake.kafka.connector.internal.validation.ValidationResult;
import com.snowflake.kafka.connector.records.SnowflakeSinkRecord;
import java.time.Duration;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.CancellationException;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.kafka.connect.errors.ConnectException;
import org.apache.kafka.connect.errors.DataException;
import org.apache.kafka.connect.sink.SinkRecord;

public class SnowpipeStreamingPartitionChannel implements TopicPartitionChannel {
  private static final KCLogger LOGGER =
      new KCLogger(SnowpipeStreamingPartitionChannel.class.getName());

  private volatile CompletableFuture<SnowflakeStreamingIngestChannel> channel;
  private final AtomicBoolean cancelled = new AtomicBoolean(false);

  private final PartitionOffsetTracker offsetTracker;

  // Tracks the initial error count when the channel was opened.
  // Used to detect NEW errors (current error count > initial error count) since error counts
  // are cumulative and don't reset when a channel is reopened.
  private long initialErrorCount = 0;

  /** Max consecutive channel recoveries before giving up and letting the task fail. */
  private static final int MAX_CONSECUTIVE_RECOVERIES = 5;

  /**
   * Consecutive recovery counter. Incremented each time the fallback reopens the channel, reset to
   * zero on every successful appendRow. If this reaches {@link #MAX_CONSECUTIVE_RECOVERIES} the
   * fallback re-throws to let the KC framework kill the task.
   */
  private int consecutiveRecoveryCount = 0;

  private final String channelName;

  private final SnowflakeTelemetryChannelStatus snowflakeTelemetryChannelStatus;

  private final SinkTaskConfig taskConfig;

  /**
   * Used to send telemetry to Snowflake. Currently, TelemetryClient created from a Snowflake
   * Connection Object, i.e. not a session-less Client
   */
  private final SnowflakeTelemetryService telemetryService;

  private final String pipeName;

  private final SnowflakeStreamingIngestClient streamingClient;
  private final ExecutorService openChannelIoExecutor;

  private final StreamingErrorHandler streamingErrorHandler;

  private final TaskMetrics taskMetrics;

  // SSv1 offset migration
  private final Optional<String> ssv1ChannelName;

  // Client-side validation fields
  private final SnowflakeConnectionService conn;
  private final String tableName;
  private volatile RowValidator rowValidator;
  private volatile SnowflakeSchemaEvolutionService schemaEvolutionService;
  private volatile Map<String, ColumnSchema> tableSchema;
  private final boolean shouldEvolveSchema;

  public SnowpipeStreamingPartitionChannel(
      String tableName,
      String channelName,
      String pipeName,
      SnowflakeStreamingIngestClient streamingClient,
      ExecutorService openChannelIoExecutor,
      SnowflakeTelemetryService telemetryService,
      SnowflakeTelemetryChannelStatus snowflakeTelemetryChannelStatus,
      PartitionOffsetTracker offsetTracker,
      SinkTaskConfig taskConfig,
      StreamingErrorHandler streamingErrorHandler,
      TaskMetrics taskMetrics,
      boolean shouldEvolveSchema,
      SnowflakeConnectionService conn,
      Optional<String> ssv1ChannelName) {
    this.channelName = channelName;
    this.pipeName = pipeName;
    this.streamingClient = streamingClient;
    this.openChannelIoExecutor = openChannelIoExecutor;
    this.taskConfig = taskConfig;
    this.streamingErrorHandler = streamingErrorHandler;
    this.taskMetrics = taskMetrics;
    this.telemetryService = telemetryService;
    this.snowflakeTelemetryChannelStatus = snowflakeTelemetryChannelStatus;
    this.offsetTracker = offsetTracker;
    this.shouldEvolveSchema = shouldEvolveSchema;
    this.conn = conn;
    this.tableName = tableName;
    this.ssv1ChannelName = ssv1ChannelName;

    LOGGER.info(
        "Initializing SnowpipeStreamingPartitionChannel channel: {}, pipe: {}",
        channelName,
        pipeName);

    this.channel =
        CompletableFuture.supplyAsync(
            () -> {
              OpenChannelResult openChannelResult = openChannelForTable(channelName);
              long offsetRecoveredFromSnowflake = parseOrMigrateOffsetToken(openChannelResult);
              offsetTracker.initializeFromSnowflake(offsetRecoveredFromSnowflake);
              return openChannelResult.getChannel();
            },
            openChannelIoExecutor);

    if (taskConfig.getValidation() == SnowflakeValidation.CLIENT_SIDE) {
      initializeValidation();
    } else {
      LOGGER.info("Client-side validation disabled for channel {}", channelName);
    }

    this.telemetryService.reportKafkaPartitionStart(
        new SnowflakeTelemetryChannelCreation(tableName, channelName, System.currentTimeMillis()));
  }

  @Override
  public boolean insertRecord(SinkRecord kafkaSinkRecord, boolean isFirstRowPerPartitionInBatch) {
    if (offsetTracker.shouldProcess(kafkaSinkRecord.kafkaOffset(), isFirstRowPerPartitionInBatch)) {
      return transformAndSend(kafkaSinkRecord);
    }
    return true;
  }

  private boolean transformAndSend(SinkRecord kafkaSinkRecord) {
    try {
      final long kafkaOffset = kafkaSinkRecord.kafkaOffset();
      final SnowflakeSinkRecord record =
          SnowflakeSinkRecord.from(
              kafkaSinkRecord,
              taskConfig.getMetadataConfig(),
              taskConfig.isEnableSchematization(),
              taskConfig.isEnableColumnIdentifierNormalization());

      if (record.isBroken()) {
        LOGGER.debug("Broken record offset:{}, topic:{}", kafkaOffset, kafkaSinkRecord.topic());
        streamingErrorHandler.handleError(record.getBrokenReason(), kafkaSinkRecord);
        // If we reach here, the error was tolerated (errors.tolerance=all)
        snowflakeTelemetryChannelStatus.incErrorToleratedCount();
      } else {
        // If we reach here, it means we should ingest a record (possibly empty for tombstones)
        final Map<String, Object> row =
            record.getContentWithMetadata(
                taskConfig.getMetadataConfig().shouldIncludeAllMetadata());
        if (!row.isEmpty()) {
          if (taskConfig.getValidation() == SnowflakeValidation.CLIENT_SIDE
              && rowValidator != null) {
            ValidationResult validationResult = rowValidator.validateRow(row);

            if (!validationResult.isValid()) {
              if (validationResult.hasStructuralError()) {
                handleStructuralError(validationResult, kafkaSinkRecord, record, row);
              } else {
                handleValidationError(validationResult, kafkaSinkRecord);
              }
              offsetTracker.recordProcessed(kafkaOffset);
              return true;
            }
          }

          if (!insertRowWithFallback(row, kafkaOffset)) {
            // Fallback fired: the record was NOT inserted, and the fallback's recovery
            // logic already reset processedOffset + rewound Kafka. Do NOT call
            // recordProcessed() here — that would advance processedOffset past the
            // recovery point and cause replayed offsets to be skipped. See SNOW-3344243.
            return false;
          }
        }
      }
      // Always update processedOffset after processing, even for broken records
      offsetTracker.recordProcessed(kafkaOffset);
      return true;
    } catch (BackpressureException ex) {
      snowflakeTelemetryChannelStatus.incBackpressureRetryCount();
      throw ex;
    } catch (TopicPartitionChannelInsertionException ex) {
      // Suppressing the exception because other channels might still continue to ingest
      LOGGER.warn(
          "Failed to insert row for channel:{}. Will be retried by Kafka. Exception: {}",
          this.channelName,
          ex);
      return true;
    }
  }

  @Override
  public CompletableFuture<Void> waitForLastProcessedRecordCommitted() {
    if (offsetTracker.getLastAppendRowsOffset() == NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE) {
      return CompletableFuture.completedFuture(null);
    }

    return CompletableFuture.runAsync(
        () -> {
          LOGGER.info("Starting flush for channel: {}", this.channelName);

          streamingClient.initiateFlush();

          final long targetOffset = offsetTracker.getLastAppendRowsOffset();
          WaitForLastOffsetCommittedPolicy.getPolicy(
              () -> {
                long offsetCommittedToBackend = fetchLatestCommittedOffsetFromSnowflake();
                if (offsetCommittedToBackend == targetOffset) {
                  return true;
                }
                throw ERROR_5027.getException();
              });

          LOGGER.info("Completed flush for channel: {}", this.channelName);
        });
  }

  /**
   * Uses {@link AppendRowWithFallbackPolicy} to reopen the channel if insertRows throws {@link
   * SFException}.
   *
   * <p>We have deliberately not performed retries on insertRows because it might slow down overall
   * ingestion and introduce lags in committing offsets to Kafka.
   *
   * <p>Note that insertRows API does perform channel validation which might throw SFException if
   * channel is invalidated.
   */
  /**
   * @return true if the record was inserted successfully, false if the fallback fired (record was
   *     NOT inserted)
   */
  private boolean insertRowWithFallback(Map<String, Object> row, long offset) {
    return AppendRowWithFallbackPolicy.executeWithFallback(
        () -> {
          LOGGER.trace("Inserting transformed record: {}, offset: {}", row, offset);
          getChannel().appendRow(row, Long.toString(offset));
          offsetTracker.recordAppended(offset);
          consecutiveRecoveryCount = 0;
        },
        (Throwable ex) -> {
          consecutiveRecoveryCount++;
          if (consecutiveRecoveryCount > MAX_CONSECUTIVE_RECOVERIES) {
            LOGGER.error(
                "Channel {} exceeded max consecutive recoveries ({}), giving up",
                this.channelName,
                MAX_CONSECUTIVE_RECOVERIES);
            throw new TopicPartitionChannelInsertionException(
                String.format(
                    "Channel %s failed after %d consecutive recovery attempts",
                    this.channelName, MAX_CONSECUTIVE_RECOVERIES),
                ex);
          }
          LOGGER.warn(
              "Channel {} recovery attempt {}/{}",
              this.channelName,
              consecutiveRecoveryCount,
              MAX_CONSECUTIVE_RECOVERIES);
          reopenChannel("APPEND_ROW_FALLBACK");
          snowflakeTelemetryChannelStatus.incAppendRowFallbackCount();
        },
        this.channelName);
  }

  private static void closeChannelWithoutFlushing(SnowflakeStreamingIngestChannel channel) {
    try {
      channel.close(false /* waitForFlush */, Duration.ZERO);
    } catch (TimeoutException e) {
      // This should never happen since we are not waiting for the channel to flush.
      throw new RuntimeException(
          String.format("Error closing channel %s: %s", channel.getChannelName(), e.getMessage()));
    }
  }

  /**
   * Fallback function to be executed when either of insertRows API or getOffsetToken sends
   * SFException.
   *
   * <p>Or, in other words, if streaming channel is invalidated, we will reopen the channel and
   * reset the kafka offset to last committed offset in Snowflake.
   *
   * <p>If a valid offset is found from snowflake, we will reset the topicPartition with
   * (offsetReturnedFromSnowflake + 1).
   *
   * @param reason Reason for the channel recovery. Used for logging.
   * @return offset which was last present in Snowflake
   */
  private void reopenChannel(final String reason) {
    LOGGER.warn("{} Channel {} recovery initiated", reason, this.channelName);

    if (this.snowflakeTelemetryChannelStatus != null) {
      this.snowflakeTelemetryChannelStatus.incRecoveryCount();
    }

    this.channel =
        this.channel
            // Close old channel before reopening a new one. We don't want to wait for the channel
            // to flush since it will be reopened right away and the in-progress data will be lost.
            .thenAccept(
                oldChannel -> {
                  if (!oldChannel.isClosed()) {
                    LOGGER.info(
                        "{} Channel {} is not closed before reopening", reason, this.channelName);
                    closeChannelWithoutFlushing(oldChannel);
                  }
                })
            // If the previous init failed, there is no old channel to close.
            .exceptionally(
                initFailure -> {
                  LOGGER.warn(
                      "{} Channel {} had a failed initialization, skipping close: {}",
                      reason,
                      this.channelName,
                      initFailure.getMessage());
                  return null;
                })
            .thenApply(
                ignored -> {
                  OpenChannelResult openChannelResult = openChannelForTable(channelName);
                  final long offsetRecoveredFromSnowflake =
                      parseOrMigrateOffsetToken(openChannelResult);

                  if (offsetRecoveredFromSnowflake == NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE) {
                    LOGGER.info(
                        "{} Channel {} has no offset token. Will use consumer group offset,"
                            + " currently {}",
                        reason,
                        this.channelName,
                        offsetTracker.consumerGroupOffsetRef().get());
                  }

                  offsetTracker.resetAfterRecovery(offsetRecoveredFromSnowflake);

                  LOGGER.info(
                      "{} Channel {} recovery complete, offsetRecoveredFromSnowflake={}",
                      reason,
                      this.channelName,
                      offsetRecoveredFromSnowflake);

                  return openChannelResult.getChannel();
                });
  }

  /**
   * Parses the SSv2 offset from the open-channel result, and if SSv2 has no committed offset yet,
   * attempts SSv1 offset migration based on the configured {@link Ssv1MigrationMode}.
   *
   * <p>Used by both the initial channel open (constructor) and {@link #reopenChannel} so that
   * migration behavior is consistent regardless of whether the first open succeeded or failed.
   */
  private long parseOrMigrateOffsetToken(OpenChannelResult openChannelResult) {
    final long ssv2Offset =
        parseOffsetToken(
            openChannelResult.getChannelStatus().getLatestCommittedOffsetToken(), channelName);
    LOGGER.info("Channel {} has SSv2 offset token {}", channelName, ssv2Offset);

    long effectiveOffset = ssv2Offset;

    // Only consult SSv1 when SSv2 has no committed offset yet (first-time migration).
    // Once SSv2 has its own offset, it is authoritative.
    if (ssv2Offset == NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE
        && taskConfig.getSsv1MigrationMode() != Ssv1MigrationMode.SKIP) {
      // migrateSsv1ChannelOffset calls SYSTEM$MIGRATE_SSV1_CHANNEL_OFFSET which:
      //   - returns ssv1ChannelFound=false if the SSv1 channel doesn't exist
      //   - returns ssv1ChannelFound=true, migratedOffset=null if found but no committed offset
      //   - returns ssv1ChannelFound=true, migratedOffset=N on success (also writes to SSv2 in FDB)
      //   - THROWS for SQL/network errors (must not silently proceed --
      //     falling through to consumer group offset could cause duplicates)
      String ssv1Channel =
          ssv1ChannelName.orElseThrow(
              () ->
                  new IllegalStateException(
                      "ssv1ChannelName must be present when migration mode is "
                          + taskConfig.getSsv1MigrationMode()));
      Ssv1MigrationResponse response =
          conn.migrateSsv1ChannelOffset(tableName, ssv1Channel, channelName, pipeName);
      Long migrated = response.getMigratedOffset();
      if (migrated != null) {
        effectiveOffset = migrated;
        LOGGER.info(
            "SSv2 channel {} has no offset yet, migrating SSv1 offset for {}: {}",
            channelName,
            ssv1Channel,
            effectiveOffset);
      } else if (!response.isSsv1ChannelFound()) {
        LOGGER.info("SSv1 channel {} not found for SSv2 channel {}", ssv1Channel, channelName);
      } else {
        LOGGER.info(
            "SSv1 channel {} exists but has no committed offset for SSv2 channel {}",
            ssv1Channel,
            channelName);
      }
      telemetryService.reportSsv1Migration(
          new SnowflakeTelemetrySsv1Migration(
              tableName, channelName, ssv1Channel, taskConfig.getSsv1MigrationMode(), response));
      if (!response.isSsv1ChannelFound()
          && taskConfig.getSsv1MigrationMode() == Ssv1MigrationMode.STRICT) {
        throw new ConnectException(
            "Snowpipe Streaming Classic channel "
                + ssv1Channel
                + " not found but the offset token migration mode is set to 'strict'. This can"
                + " happen if new topics are added after migrating from version 3 of the"
                + " connector or if an incorrect value is provided for "
                + SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME
                + " or the connector name. Validate your settings or set "
                + SNOWFLAKE_SSV1_OFFSET_MIGRATION
                + " to 'best_effort' or 'skip' to fall through to the Kafka consumer group"
                + " offset.");
      }
    }

    return effectiveOffset;
  }

  /**
   * Parses an offset token string into a long value.
   *
   * @param offsetToken the offset token string (may be null)
   * @param channelNameForLogging used in error messages
   * @return the parsed long, or {@link
   *     TopicPartitionChannel#NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE} if null
   * @throws ConnectException if the token is non-null but not parsable as long
   */
  @VisibleForTesting
  static long parseOffsetToken(String offsetToken, String channelNameForLogging) {
    if (offsetToken == null) {
      return NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE;
    }
    try {
      return Long.parseLong(offsetToken);
    } catch (NumberFormatException ex) {
      LOGGER.error(
          "The offsetToken string does not contain a parsable long:{} for channel:{}",
          offsetToken,
          channelNameForLogging);
      throw new ConnectException(ex);
    }
  }

  /**
   * Returns the offset Token persisted into snowflake.
   *
   * <p>OffsetToken from Snowflake returns a String and we will convert it into long.
   *
   * <p>If it is not long parsable, we will throw {@link ConnectException}
   *
   * @return -1 if no offset is found in snowflake, else the long value of committedOffset in
   *     snowflake.
   */
  private long fetchLatestCommittedOffsetFromSnowflake() {
    return fetchLatestOffsetFromChannel(this.getChannel());
  }

  private static long fetchLatestOffsetFromChannel(SnowflakeStreamingIngestChannel channel) {
    String offsetToken = channel.getLatestCommittedOffsetToken();
    LOGGER.info(
        "Fetched offsetToken for channelName:{}, offset:{}", channel.getChannelName(), offsetToken);
    return parseOffsetToken(offsetToken, channel.getChannelName());
  }

  private void initializeValidation() {
    try {
      Optional<List<DescribeTableRow>> describeResult = conn.describeTable(tableName);
      if (!describeResult.isPresent()) {
        LOGGER.warn(
            "Table {} not found during validation initialization. "
                + "Client-side validation will be disabled for channel {}",
            tableName,
            channelName);
        this.snowflakeTelemetryChannelStatus.setValidationDisabled();
        return;
      }

      this.tableSchema = new HashMap<>();
      for (DescribeTableRow row : describeResult.get()) {
        ColumnSchema colSchema =
            ColumnSchema.fromDescribeTableFields(
                row.getColumn(),
                row.getType(),
                row.getNullable(),
                row.hasDefault(),
                row.isAutoincrement());
        this.tableSchema.put(row.getColumn(), colSchema);
      }

      RowValidator.validateSchema(this.tableSchema);

      this.rowValidator = new RowValidator(this.tableSchema);
      this.schemaEvolutionService = new SnowflakeSchemaEvolutionService(conn);

      LOGGER.info(
          "Client-side validation enabled for channel {}. Table {} has {} columns,"
              + " enableSchematization={}",
          channelName,
          tableName,
          this.tableSchema.size(),
          taskConfig.isEnableSchematization());
    } catch (Exception e) {
      LOGGER.warn(
          "Failed to initialize client-side validation for channel {}. "
              + "Validation will be disabled. Error: {}",
          channelName,
          e.getMessage());
      this.snowflakeTelemetryChannelStatus.setValidationDisabled();
      this.rowValidator = null;
    }
  }

  private void refreshTableSchema() {
    initializeValidation();
  }

  private void handleValidationError(
      ValidationResult result, SinkRecord originalRecordForReporting) {
    if (streamingErrorHandler.isLogErrors()) {
      LOGGER.warn(
          "Client-side validation failure [{}] channel={}, column={}, error={}, offset={}",
          result.getErrorType(),
          channelName,
          result.getColumnName(),
          result.getValueError(),
          originalRecordForReporting.kafkaOffset());
    }

    snowflakeTelemetryChannelStatus.incValidationFailureCount();

    String errorMsg =
        String.format(
            "Validation failed for column %s: %s", result.getColumnName(), result.getValueError());
    streamingErrorHandler.handleError(new DataException(errorMsg), originalRecordForReporting);
    snowflakeTelemetryChannelStatus.incErrorToleratedCount();
  }

  private void handleStructuralError(
      ValidationResult result,
      SinkRecord originalRecordForReporting,
      SnowflakeSinkRecord snowflakeRecord,
      Map<String, Object> row) {
    if (streamingErrorHandler.isLogErrors()) {
      LOGGER.warn(
          "Client-side structural validation failure [{}] channel={}, "
              + "hasSchemaEvolutionPermission={}, extraCols={}, missingNotNull={}, "
              + "nullNotNull={}, offset={}",
          result.getErrorType(),
          channelName,
          shouldEvolveSchema,
          result.getExtraColNames(),
          result.getMissingNotNullColNames(),
          result.getNullValueForNotNullColNames(),
          originalRecordForReporting.kafkaOffset());
    }

    if (!shouldEvolveSchema) {
      snowflakeTelemetryChannelStatus.incValidationFailureCount();

      String errorMsg =
          String.format(
              "Structural validation error (schema evolution disabled): extraCols=%s,"
                  + " missingNotNull=%s",
              result.getExtraColNames(), result.getMissingNotNullColNames());
      LOGGER.info("Routing to DLQ for channel {}: {}", channelName, errorMsg);
      streamingErrorHandler.handleError(new DataException(errorMsg), originalRecordForReporting);
      snowflakeTelemetryChannelStatus.incErrorToleratedCount();
      return;
    }

    try {
      LOGGER.info("Attempting schema evolution for channel {}, table {}", channelName, tableName);
      SchemaEvolutionTargetItems items =
          ValidationResultMapper.mapToSchemaEvolutionItems(result, tableName);
      schemaEvolutionService.evolveSchemaIfNeeded(items, snowflakeRecord);

      refreshTableSchema();

      ValidationResult retryResult = result;
      if (rowValidator != null) {
        retryResult = rowValidator.validateRow(row);
        if (retryResult.isValid()) {
          insertRowWithFallback(row, originalRecordForReporting.kafkaOffset());
          return;
        }
      }

      snowflakeTelemetryChannelStatus.incValidationFailureCount();
      snowflakeTelemetryChannelStatus.incSchemaEvolutionFailureCount();

      String errorMsg =
          String.format(
              "Schema mismatch after evolution attempt: extraCols=%s, missingNotNull=%s",
              retryResult.getExtraColNames(), retryResult.getMissingNotNullColNames());
      streamingErrorHandler.handleError(new DataException(errorMsg), originalRecordForReporting);
      snowflakeTelemetryChannelStatus.incErrorToleratedCount();
    } catch (SnowflakeKafkaConnectorException e) {
      LOGGER.error("Schema evolution failed for table {}", tableName, e);
      throw e;
    }
  }

  /**
   * Open a channel for Table with given channel name and tableName.
   *
   * <p>Open channels happens at:
   *
   * <p>Constructor of TopicPartitionChannel -> which means we will wipe of all states and it will
   * call precomputeOffsetTokenForChannel
   *
   * <p>Failure handling which will call reopen, replace instance variable with new channel and call
   * offsetToken/insertRows.
   *
   * @return new channel which was fetched after open/reopen
   */
  private OpenChannelResult openChannelForTable(final String channelName) {
    if (cancelled.get()) {
      throw new CancellationException("Channel " + channelName + " was cancelled before opening");
    }

    final OpenChannelResult result;
    try (TaskMetrics.TimingContext ignored = taskMetrics.timeChannelOpen()) {
      result = streamingClient.openChannel(channelName, null);
    }

    taskMetrics.incChannelOpenCount();

    final ChannelStatus channelStatus = result.getChannelStatus();
    if (channelStatus.getStatusCode().equals("SUCCESS")) {
      // Capture the initial error count - errors are cumulative and don't reset on channel reopen.
      // We only want to fail on NEW errors that occur after the channel was opened.
      this.initialErrorCount = channelStatus.getRowsErrorCount();
      LOGGER.info(
          "Successfully opened streaming channel: {}, initialErrorCount: {}",
          channelName,
          this.initialErrorCount);
      return result;
    } else {
      LOGGER.error(
          "Failed to open channel: {}, error code: {}", channelName, channelStatus.getStatusCode());
      throw ERROR_5028.getException(
          String.format(
              "Failed to open channel %s. Error code %s",
              channelName, channelStatus.getStatusCode()));
    }
  }

  @Override
  public CompletableFuture<Void> closeChannelAsync() {
    LOGGER.info("Closing streaming channel {}", this.channelName);
    cancelled.set(true);
    return channel
        .thenAccept(
            c -> {
              try {
                if (!c.isClosed()) {
                  closeChannelWithoutFlushing(c);
                }
                LOGGER.info("Successfully closed streaming channel {}", this.channelName);
              } catch (RuntimeException e) {
                tryRecoverFromCloseChannelError(e);
              } finally {
                this.telemetryService.reportKafkaPartitionUsage(
                    this.snowflakeTelemetryChannelStatus, true);
                this.snowflakeTelemetryChannelStatus.tryUnregisterChannelJMXMetrics();
              }
            })
        .exceptionally(
            e -> {
              Throwable cause = e.getCause() != null ? e.getCause() : e;
              if (cause instanceof java.util.concurrent.CancellationException) {
                LOGGER.info(
                    "Channel {} was cancelled before opening, nothing to close", this.channelName);
              } else {
                LOGGER.warn(
                    "Channel {} failed during initialization, skipping close: {}",
                    this.channelName,
                    cause.getMessage());
              }
              this.snowflakeTelemetryChannelStatus.tryUnregisterChannelJMXMetrics();
              return null;
            });
  }

  private void tryRecoverFromCloseChannelError(RuntimeException e) {
    String errMsg =
        String.format(
            "Failure closing streaming channel %s, error: %s", this.channelName, e.getMessage());
    this.telemetryService.reportKafkaConnectFatalError(
        errMsg, this.channelName, this.tableName, this.pipeName);

    // Only SFExceptions are swallowed.
    // If a channel-related error occurs, it shouldn't fail a connector task.
    // The channel is going to be reopened after a rebalance, so the failed channel
    // will be invalidated anyway.
    if (e instanceof SFException) {
      LOGGER.warn(
          "Encountered {} when closing streaming channel {}: {}. Stack trace: {}",
          e.getClass(),
          this.channelName,
          e.getMessage(),
          Arrays.toString(e.getStackTrace()));
    } else {
      throw e;
    }
  }

  @Override
  public boolean isInitializing() {
    return !channel.isDone();
  }

  @Override
  public void awaitInitialization() {
    channel.join();
  }

  @Override
  public boolean isChannelClosed() {
    try {
      return this.getChannel().isClosed();
    } catch (RuntimeException e) {
      // If the channel failed to initialize, we consider it closed.
      LOGGER.warn(
          "Channel {} failed to initialize, treating as closed: {}", channelName, e.getMessage());
      return true;
    }
  }

  @Override
  public String getChannelNameFormatV1() {
    return getChannel().getFullyQualifiedChannelName();
  }

  @Override
  public String getChannelName() {
    return channelName;
  }

  /**
   * Blocks until the channel initialization future completes and returns the underlying SDK
   * channel.
   *
   * <p><b>Warning:</b> Do not call this from the channel construction future body (the lambda
   * passed to {@code CompletableFuture.supplyAsync} in the constructor). That future is what
   * populates {@code this.channel}; calling {@code join()} on it from within itself will deadlock.
   */
  @VisibleForTesting
  public SnowflakeStreamingIngestChannel getChannel() {
    try {
      return this.channel.join();
    } catch (CompletionException e) {
      if (e.getCause() instanceof RuntimeException) {
        throw (RuntimeException) e.getCause();
      }
      throw new RuntimeException(e.getCause());
    }
  }

  @Override
  @VisibleForTesting
  public SnowflakeTelemetryChannelStatus getSnowflakeTelemetryChannelStatus() {
    return this.snowflakeTelemetryChannelStatus;
  }

  @Override
  public void setLatestConsumerGroupOffset(long consumerOffset) {
    offsetTracker.setLatestConsumerGroupOffset(consumerOffset);
  }

  @Override
  public long processChannelStatus(final ChannelStatus status, final boolean tolerateErrors) {
    logChannelStatus(status);

    handleChannelErrors(status, tolerateErrors);

    this.snowflakeTelemetryChannelStatus.updateFromChannelStatus(status);

    long committedOffset =
        parseOffsetToken(status.getLatestCommittedOffsetToken(), this.channelName);
    offsetTracker.updatePersistedOffset(committedOffset);

    if (committedOffset == NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE) {
      return NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE;
    }
    long offsetSafeToCommit = committedOffset + 1;
    setLatestConsumerGroupOffset(offsetSafeToCommit);
    return offsetSafeToCommit;
  }

  @Override
  public String getPipeName() {
    return pipeName;
  }

  private void logChannelStatus(final ChannelStatus status) {
    LOGGER.info(
        "Channel status for channel=[{}]: databaseName=[{}], schemaName=[{}], pipeName=[{}],"
            + " channelName=[{}], statusCode=[{}], latestCommittedOffsetToken=[{}],"
            + " createdOn=[{}], rowsInsertedCount=[{}], rowsParsedCount=[{}],"
            + " rowsErrorCount=[{}], lastErrorOffsetTokenUpperBound=[{}],"
            + " lastErrorMessage=[{}], lastErrorTimestamp=[{}],"
            + " serverAvgProcessingLatency=[{}], lastRefreshedOn=[{}]",
        this.channelName,
        status.getDatabaseName(),
        status.getSchemaName(),
        status.getPipeName(),
        status.getChannelName(),
        status.getStatusCode(),
        status.getLatestCommittedOffsetToken(),
        status.getCreatedOn(),
        status.getRowsInsertedCount(),
        status.getRowsParsedCount(),
        status.getRowsErrorCount(),
        status.getLastErrorOffsetTokenUpperBound(),
        status.getLastErrorMessage(),
        status.getLastErrorTimestamp(),
        status.getServerAvgProcessingLatency(),
        status.getLastRefreshedOn());
  }

  private void handleChannelErrors(final ChannelStatus status, final boolean tolerateErrors) {
    final long currentErrorCount = status.getRowsErrorCount();
    // Error counts are cumulative and don't reset when a channel is reopened.
    // Only fail if there are NEW errors that occurred after the channel was opened.
    final long newErrorCount = currentErrorCount - this.initialErrorCount;

    if (newErrorCount > 0) {
      final String errorMessage =
          String.format(
              "Channel [%s] has %d new errors (total: %d, initial: %d). Last error message: %s,"
                  + " last error timestamp: %s, last error offset token upper bound: %s",
              this.channelName,
              newErrorCount,
              currentErrorCount,
              this.initialErrorCount,
              status.getLastErrorMessage(),
              status.getLastErrorTimestamp(),
              status.getLastErrorOffsetTokenUpperBound());

      this.initialErrorCount = currentErrorCount;
      if (tolerateErrors) {
        LOGGER.warn(errorMessage);
      } else {
        this.telemetryService.reportKafkaConnectFatalError(
            errorMessage, this.channelName, this.tableName, this.pipeName);
        throw ERROR_5030.getException(errorMessage);
      }
    } else if (currentErrorCount > 0) {
      LOGGER.debug(
          "Channel [{}] has {} pre-existing errors from before connector startup (no new errors)",
          this.channelName,
          currentErrorCount);
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/WaitForLastOffsetCommittedPolicy.java
================================================
package com.snowflake.kafka.connector.internal.streaming.v2;

import static com.snowflake.kafka.connector.internal.SnowflakeErrors.ERROR_5027;

import com.snowflake.kafka.connector.internal.KCLogger;
import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException;
import dev.failsafe.Failsafe;
import dev.failsafe.Fallback;
import dev.failsafe.RetryPolicy;
import dev.failsafe.function.CheckedSupplier;
import java.time.Duration;

class WaitForLastOffsetCommittedPolicy {

  private static final KCLogger LOGGER =
      new KCLogger(WaitForLastOffsetCommittedPolicy.class.getName());

  static void getPolicy(CheckedSupplier<Object> action) {
    Fallback<Object> fallback =
        Fallback.ofException(
            e -> {
              LOGGER.error("Wait for the last offset to be commited - max retry attempts", e);
              throw ERROR_5027.getException();
            });

    RetryPolicy<Object> retryPolicy =
        RetryPolicy.builder()
            .handle(SnowflakeKafkaConnectorException.class)
            .withDelay(Duration.ofSeconds(1))
            .withBackoff(Duration.ofSeconds(1), Duration.ofSeconds(30), 1.5)
            .withJitter(Duration.ofMillis(100))
            .withMaxAttempts(10) // for some reason it has to be set as well
            .onRetry(
                event ->
                    LOGGER.info(
                        "Wait for the last offset to be commited retry no:{}, message:{}",
                        event.getAttemptCount(),
                        event.getLastException().getMessage()))
            .build();

    Failsafe.with(fallback).compose(retryPolicy).get(action);
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/channel/PartitionOffsetTracker.java
================================================
package com.snowflake.kafka.connector.internal.streaming.v2.channel;

import static com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel.NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE;

import com.snowflake.kafka.connector.internal.KCLogger;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.connect.sink.SinkTaskContext;

/**
 * Tracks all offset state for a single partition channel. This is a passive state holder -- it
 * makes no network calls. Offsets are updated during channel init/recovery, record processing in
 * `put`, and when processing channel statuses in `preCommit`.
 *
 * <h3>Threading model</h3>
 *
 * Most methods are called from the Kafka Connect task thread, which is single-threaded per
 * partition ({@link #shouldProcess}, {@link #recordProcessed}, {@link #recordAppended}, {@link
 * #initializeFromSnowflake}, {@link #resetAfterRecovery}).
 *
 * <p>{@link #setLatestConsumerGroupOffset} may be called from a different thread, so its
 * set-if-greater logic uses a CAS loop for atomicity. The three AtomicLong fields use atomic types
 * for two reasons: (1) their refs are exposed for telemetry reads from other threads, and (2)
 * {@code currentConsumerGroupOffset} is written by both the task thread and {@link
 * #setLatestConsumerGroupOffset}. The remaining fields ({@code lastAppendRowsOffset}, {@code
 * needToSkipCurrentBatch}) are only accessed from the task thread and need no synchronization.
 */
public class PartitionOffsetTracker {

  private static final KCLogger LOGGER = new KCLogger(PartitionOffsetTracker.class.getName());

  private final TopicPartition topicPartition;
  private final SinkTaskContext sinkTaskContext;
  private final String channelName;

  // Offset persisted in Snowflake, determined from the insertRows API / fetchOffsetToken calls.
  private final AtomicLong offsetPersistedInSnowflake =
      new AtomicLong(NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE);

  // KC-side processed offset. On creation set to Snowflake's committed offset, then updated on
  // each new row from KC. Ensures exactly-once semantics.
  private final AtomicLong processedOffset =
      new AtomicLong(NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE);

  // Consumer group offset -- used for telemetry and as a fallback during recovery when Snowflake
  // has no committed offset.
  private final AtomicLong currentConsumerGroupOffset =
      new AtomicLong(NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE);

  // Last offset passed to appendRow -- used by flush to know when all data is committed.
  private long lastAppendRowsOffset = NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE;

  // When true, leftover rows in the current batch are skipped because the channel was
  // invalidated and offsets were reset in Kafka.
  private boolean needToSkipCurrentBatch = false;

  public PartitionOffsetTracker(
      TopicPartition topicPartition, SinkTaskContext sinkTaskContext, String channelName) {
    this.topicPartition = topicPartition;
    this.sinkTaskContext = sinkTaskContext;
    this.channelName = channelName;
  }

  /** Sets both persisted and processed offsets, and resets the Kafka consumer position. */
  public void initializeFromSnowflake(long committedOffset) {
    LOGGER.info(
        "Initializing offsetPersistedInSnowflake=[{}], channel=[{}]", committedOffset, channelName);
    this.offsetPersistedInSnowflake.set(committedOffset);

    LOGGER.info("Initializing processedOffset=[{}], channel=[{}]", committedOffset, channelName);
    this.processedOffset.set(committedOffset);

    resetKafkaOffset(committedOffset);
  }

  /**
   * Determines whether the given kafka offset should be processed, and manages batch-skip state.
   *
   * @return true if the record should be ingested, false if it should be skipped
   */
  public boolean shouldProcess(long kafkaOffset, boolean isFirstRowInBatch) {
    if (currentConsumerGroupOffset.compareAndSet(
        NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE, kafkaOffset)) {
      LOGGER.trace(
          "Setting currentConsumerGroupOffset=[{}], channel=[{}]", kafkaOffset, channelName);
    }

    if (isFirstRowInBatch) {
      needToSkipCurrentBatch = false;
    }

    if (needToSkipCurrentBatch) {
      LOGGER.info(
          "Ignore inserting offset:{} for channel:{} because we recently reset offset in"
              + " Kafka. currentProcessedOffset:{}",
          kafkaOffset,
          channelName,
          processedOffset.get());
      return false;
    }

    long currentProcessedOffset = this.processedOffset.get();
    if (currentProcessedOffset == NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE
        || kafkaOffset >= currentProcessedOffset + 1) {
      return true;
    }

    LOGGER.warn(
        "Channel {} - skipping current record - expected offset {} but received {}. The"
            + " current offset stored in Snowflake: {}",
        channelName,
        currentProcessedOffset,
        kafkaOffset,
        offsetPersistedInSnowflake.get());
    return false;
  }

  /** Called after a record has been fully processed (inserted or reported as broken). */
  public void recordProcessed(long kafkaOffset) {
    this.processedOffset.set(kafkaOffset);
    LOGGER.trace("Setting processedOffset=[{}], channel=[{}]", kafkaOffset, channelName);
  }

  /** Called after a row has been successfully passed to appendRow. */
  public void recordAppended(long kafkaOffset) {
    this.lastAppendRowsOffset = kafkaOffset;
  }

  /**
   * Resets offset state after a channel recovery (reopen). Resets the Kafka consumer position and
   * marks the current batch for skipping so leftover rows are discarded.
   *
   * <p>If we don't get a valid offset token (because of a table recreation or channel inactivity),
   * we will rely on Kafka to send us the correct offset.
   *
   * <p>The offset reset in Kafka is set to (offsetRecoveredFromSnowflake + 1) so that Kafka sends
   * offsets starting from the next unprocessed record, avoiding data loss.
   *
   * @param offsetRecoveredFromSnowflake the offset recovered from Snowflake after reopening
   */
  public void resetAfterRecovery(long offsetRecoveredFromSnowflake) {
    long consumerGroupOffset = currentConsumerGroupOffset.get();
    final long offsetToResetInKafka =
        offsetRecoveredFromSnowflake == NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE
            ? consumerGroupOffset
            : offsetRecoveredFromSnowflake + 1L;

    if (offsetToResetInKafka == NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE) {
      return;
    }

    sinkTaskContext.offset(topicPartition, offsetToResetInKafka);

    this.offsetPersistedInSnowflake.set(offsetRecoveredFromSnowflake);
    LOGGER.info(
        "Reset channel metadata after recovery offsetPersistedInSnowflake=[{}], channel=[{}]",
        offsetRecoveredFromSnowflake,
        channelName);
    this.processedOffset.set(offsetRecoveredFromSnowflake);

    needToSkipCurrentBatch = true;
  }

  public void setLatestConsumerGroupOffset(long consumerOffset) {
    long current;
    do {
      current = this.currentConsumerGroupOffset.get();
      if (consumerOffset <= current) {
        LOGGER.trace(
            "Not setting currentConsumerGroupOffset because consumerOffset=[{}] is <="
                + " currentConsumerGroupOffset=[{}] for channel=[{}]",
            consumerOffset,
            current,
            channelName);
        return;
      }
    } while (!this.currentConsumerGroupOffset.compareAndSet(current, consumerOffset));
    LOGGER.trace(
        "Setting currentConsumerGroupOffset=[{}], channel=[{}]", consumerOffset, channelName);
  }

  /** For future: allows an external batch service to push a committed offset. */
  public void updatePersistedOffset(long offset) {
    this.offsetPersistedInSnowflake.set(offset);
  }

  public long getPersistedOffset() {
    return offsetPersistedInSnowflake.get();
  }

  public long getProcessedOffset() {
    return processedOffset.get();
  }

  public long getLastAppendRowsOffset() {
    return lastAppendRowsOffset;
  }

  // Expose AtomicLong refs for telemetry binding
  public AtomicLong persistedOffsetRef() {
    return offsetPersistedInSnowflake;
  }

  public AtomicLong processedOffsetRef() {
    return processedOffset;
  }

  public AtomicLong consumerGroupOffsetRef() {
    return currentConsumerGroupOffset;
  }

  private void resetKafkaOffset(long committedOffset) {
    if (committedOffset != NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE) {
      sinkTaskContext.offset(topicPartition, committedOffset + 1L);
    } else {
      LOGGER.info(
          "TopicPartitionChannel:{}, offset token is NULL, will rely on Kafka to send us the"
              + " correct offset instead",
          channelName);
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/client/StreamingClientFactory.java
================================================
package com.snowflake.kafka.connector.internal.streaming.v2.client;

import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient;
import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClientFactory;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.internal.streaming.StreamingClientProperties;
import java.util.concurrent.atomic.AtomicInteger;

/** Factory for creating Snowpipe Streaming clients. Shared by all connectors. */
public class StreamingClientFactory {

  // Supplier reference is here so that we can swap it to mocked one in the tests
  private static volatile StreamingClientSupplier ingestClientSupplier =
      new StreamingClientSupplierImpl();

  private static final AtomicInteger createdClientId = new AtomicInteger(0);

  /** Sets a custom ingest client supplier. This method is used in tests only. */
  public static void setStreamingClientSupplier(final StreamingClientSupplier supplier) {
    ingestClientSupplier = supplier;
  }

  /** Resets the ingest client supplier to default. This method is used in tests only. */
  public static void resetStreamingClientSupplier() {
    ingestClientSupplier = new StreamingClientSupplierImpl();
  }

  static SnowflakeStreamingIngestClient createClient(
      final String pipeName,
      final SinkTaskConfig config,
      final StreamingClientProperties streamingClientProperties) {

    String clientName = clientName(streamingClientProperties);
    String dbName = config.getSnowflakeDatabase();
    String schemaName = config.getSnowflakeSchema();

    return ingestClientSupplier.get(
        clientName, dbName, schemaName, pipeName, streamingClientProperties);
  }

  private static String clientName(final StreamingClientProperties streamingClientProperties) {
    return streamingClientProperties.clientNamePrefix + createdClientId.incrementAndGet();
  }

  static final class StreamingClientSupplierImpl implements StreamingClientSupplier {
    @Override
    public SnowflakeStreamingIngestClient get(
        final String clientName,
        final String dbName,
        final String schemaName,
        final String pipeName,
        final StreamingClientProperties streamingClientProperties) {

      // Quote the pipe name to handle lowercase / special characters in the name.
      return SnowflakeStreamingIngestClientFactory.builder(
              clientName, dbName, schemaName, '"' + pipeName + '"')
          .setProperties(streamingClientProperties.clientProperties)
          .setParameterOverrides(streamingClientProperties.parameterOverrides)
          .build();
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/client/StreamingClientPool.java
================================================
package com.snowflake.kafka.connector.internal.streaming.v2.client;

import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.internal.KCLogger;
import com.snowflake.kafka.connector.internal.metrics.TaskMetrics;
import com.snowflake.kafka.connector.internal.streaming.StreamingClientProperties;
import com.snowflake.kafka.connector.internal.streaming.v2.service.ThreadPools;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicReference;

/**
 * Manages clients for a single connector. Tracks which tasks use which pipes and only closes
 * clients when no tasks are using them.
 *
 * <p>Client creation is dispatched to the connector's I/O thread pool so that multiple pipes can
 * initialize in parallel.
 *
 * <p>Thread safety is achieved via a single {@link ConcurrentHashMap} with per-key atomic {@code
 * compute()} calls — no explicit locking is needed. The actual blocking wait for client readiness
 * ({@code future.join()}) happens outside the atomic section so that other pipes can proceed in
 * parallel.
 */
public class StreamingClientPool {
  private static final KCLogger LOGGER = new KCLogger(StreamingClientPool.class.getName());

  private final String connectorName;

  private final ConcurrentHashMap<String, RefCountedClient> pipes = new ConcurrentHashMap<>();

  private final ExecutorService ioExecutor;

  /**
   * A client shared by one or more tasks. Holds a {@link CompletableFuture} so that client creation
   * can be kicked off asynchronously, allowing multiple pipes to initialize in parallel.
   */
  static class RefCountedClient {
    final CompletableFuture<SnowflakeStreamingIngestClient> clientFuture;
    private final Set<String> taskIds = ConcurrentHashMap.newKeySet();

    RefCountedClient(
        String pipeName,
        String connectorName,
        SinkTaskConfig config,
        StreamingClientProperties streamingClientProperties,
        TaskMetrics taskMetrics,
        ExecutorService executor) {
      LOGGER.info(
          "Creating new streaming client for pipe: {}, connector: {}", pipeName, connectorName);
      this.clientFuture =
          CompletableFuture.supplyAsync(
              () -> {
                try (TaskMetrics.TimingContext ignored = taskMetrics.timeSdkClientCreate()) {
                  return StreamingClientFactory.createClient(
                      pipeName, config, streamingClientProperties);
                }
              },
              executor);
    }

    void addTask(String taskId) {
      taskIds.add(taskId);
    }

    boolean hasTask(String taskId) {
      return taskIds.contains(taskId);
    }

    /** Removes the task and returns {@code true} if no tasks remain (client is unreferenced). */
    boolean removeTask(String taskId) {
      return taskIds.remove(taskId) && taskIds.isEmpty();
    }

    int taskCount() {
      return taskIds.size();
    }

    /** Copies all task registrations from another entry into this one. */
    void copyTasksFrom(RefCountedClient other) {
      taskIds.addAll(other.taskIds);
    }

    void close(String pipeName, String connectorName) {
      LOGGER.info(
          "Closing client for pipe {} in connector {} (last task stopped)",
          pipeName,
          connectorName);
      clientFuture.join().close();
    }
  }

  StreamingClientPool(final String connectorName) {
    this.connectorName = connectorName;
    this.ioExecutor = ThreadPools.getIoExecutor(connectorName);

    LOGGER.info("Created client manager for connector: {}", connectorName);
  }

  /**
   * Asynchronously gets or creates a client for the given task and pipe. The returned future
   * completes when the client is ready.
   */
  CompletableFuture<SnowflakeStreamingIngestClient> getClientAsync(
      final String taskId,
      final String pipeName,
      final SinkTaskConfig config,
      final StreamingClientProperties streamingClientProperties,
      final TaskMetrics taskMetrics) {

    RefCountedClient entry =
        pipes.compute(
            pipeName,
            (key, current) -> {
              if (current == null) {
                current =
                    new RefCountedClient(
                        pipeName,
                        connectorName,
                        config,
                        streamingClientProperties,
                        taskMetrics,
                        ioExecutor);
              }
              current.addTask(taskId);
              return current;
            });

    return entry.clientFuture.whenComplete(
        (client, error) -> {
          if (error != null) {
            // Only remove if the entry still holds the same (failed) future.
            pipes.compute(pipeName, (key, current) -> current == entry ? null : current);
          } else {
            LOGGER.info(
                "Task {} now using pipe {} for connector {}, total tasks on this pipe: {}",
                taskId,
                pipeName,
                connectorName,
                entry.taskCount());
          }
        });
  }

  long getClientCountForTask(final String taskId) {
    return pipes.values().stream().filter(entry -> entry.hasTask(taskId)).count();
  }

  void closeTaskClients(final String taskId) {
    LOGGER.info("Releasing clients for task {} in connector {}", taskId, connectorName);

    for (String pipeName : pipes.keySet()) {
      pipes.compute(
          pipeName,
          (key, entry) -> {
            if (entry == null) {
              return null;
            }
            if (entry.removeTask(taskId)) {
              entry.close(pipeName, connectorName);
              return null;
            }
            return entry;
          });
    }
  }

  /**
   * Atomically replaces the client for a pipe if the current client matches the given invalid
   * client. Uses compare-and-swap semantics: if another caller already replaced the entry, the
   * existing new client is returned without creating a second one.
   *
   * @param taskId the ID of the task requesting recreation; registered on the replacement entry so
   *     the pool does not prematurely evict it on task-local cleanup
   * @param pipeName the pipe whose client should be replaced
   * @param invalidClient the client instance that the caller believes is invalid (identity check)
   * @param config task config for creating the replacement client
   * @param streamingClientProperties streaming client properties
   * @param taskMetrics metrics for timing the new client creation
   * @return the new (or already-replaced) client
   */
  SnowflakeStreamingIngestClient recreateClient(
      final String taskId,
      final String pipeName,
      final SnowflakeStreamingIngestClient invalidClient,
      final SinkTaskConfig config,
      final StreamingClientProperties streamingClientProperties,
      final TaskMetrics taskMetrics) {

    // Captured inside compute() so the old client can be closed outside the lock.
    AtomicReference<SnowflakeStreamingIngestClient> clientToClose = new AtomicReference<>();

    RefCountedClient chosenEntry =
        pipes.compute(
            pipeName,
            (key, current) -> {
              if (current == null) {
                LOGGER.warn(
                    "recreateClient called for pipe {} but no entry exists in connector {}."
                        + " Creating a fresh entry.",
                    pipeName,
                    connectorName);
                return createReplacement(
                    taskId, pipeName, null, config, streamingClientProperties, taskMetrics);
              }

              // Check if the current entry still holds the invalid client (CAS guard).
              // Use timeout=0 to avoid blocking the compute() supplier on I/O: a client
              // whose future hasn't completed yet cannot possibly be the invalid client the
              // caller just observed, so we can assume it's a valid replacement already in flight.
              SnowflakeStreamingIngestClient currentClient;
              try {
                currentClient = current.clientFuture.get(0, TimeUnit.MILLISECONDS);
              } catch (TimeoutException timeout) {
                LOGGER.info(
                    "recreateClient for pipe {} in connector {}: current entry's future not"
                        + " yet complete, assuming replacement already in flight",
                    pipeName,
                    connectorName);
                current.addTask(taskId);
                return current;
              } catch (CompletionException | ExecutionException e) {
                // Current entry failed to create — replace it unconditionally.
                LOGGER.warn(
                    "recreateClient for pipe {}: current entry has a failed client future,"
                        + " replacing unconditionally",
                    pipeName);
                return createReplacement(
                    taskId, pipeName, current, config, streamingClientProperties, taskMetrics);
              } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                throw new RuntimeException(e);
              }

              if (currentClient != invalidClient) {
                LOGGER.info(
                    "recreateClient for pipe {} in connector {}: client already replaced"
                        + " by another caller, reusing existing entry",
                    pipeName,
                    connectorName);
                current.addTask(taskId);
                return current;
              }

              // CAS matches — replace with a new entry, preserving task registrations.
              LOGGER.info(
                  "Recreating streaming client for pipe {} in connector {}."
                      + " Old client will be closed best-effort.",
                  pipeName,
                  connectorName);
              // Capture old client for best-effort close outside the compute() lock.
              clientToClose.set(currentClient);
              return createReplacement(
                  taskId, pipeName, current, config, streamingClientProperties, taskMetrics);
            });

    // Best-effort close of the old (invalid) client outside the compute() lock
    // to avoid blocking the ConcurrentHashMap bucket during I/O.
    SnowflakeStreamingIngestClient oldClient = clientToClose.get();
    if (oldClient != null) {
      try {
        oldClient.close();
      } catch (Exception e) {
        LOGGER.warn(
            "Best-effort close of invalid client for pipe {} failed: {}", pipeName, e.getMessage());
      }
    }

    return joinAndEvictOnFailure(pipeName, chosenEntry);
  }

  /**
   * Creates a new {@link RefCountedClient} for the given pipe, inheriting task registrations from
   * {@code previous} if non-null, and always registering {@code taskId}. Centralizing this logic
   * ensures the calling task is always registered so the pool does not prematurely evict a
   * freshly-created entry during subsequent task-local cleanup.
   */
  private RefCountedClient createReplacement(
      final String taskId,
      final String pipeName,
      final RefCountedClient previous,
      final SinkTaskConfig config,
      final StreamingClientProperties streamingClientProperties,
      final TaskMetrics taskMetrics) {
    RefCountedClient fresh =
        new RefCountedClient(
            pipeName, connectorName, config, streamingClientProperties, taskMetrics, ioExecutor);
    if (previous != null) {
      fresh.copyTasksFrom(previous);
    }
    fresh.addTask(taskId);
    return fresh;
  }

  /**
   * Joins the entry's client future and evicts the entry from the pool if the future has failed, so
   * the next caller gets a fresh entry instead of retrying a broken one.
   */
  private SnowflakeStreamingIngestClient joinAndEvictOnFailure(
      final String pipeName, final RefCountedClient entry) {
    try {
      return entry.clientFuture.join();
    } catch (CompletionException e) {
      pipes.compute(pipeName, (key, current) -> current == entry ? null : current);
      if (e.getCause() instanceof RuntimeException) {
        throw (RuntimeException) e.getCause();
      }
      throw e;
    }
  }

  /** Returns true if there are no remaining clients or task registrations. */
  boolean isEmpty() {
    return pipes.isEmpty();
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/client/StreamingClientPools.java
================================================
package com.snowflake.kafka.connector.internal.streaming.v2.client;

import static com.google.common.base.Strings.isNullOrEmpty;

import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.internal.KCLogger;
import com.snowflake.kafka.connector.internal.metrics.TaskMetrics;
import com.snowflake.kafka.connector.internal.streaming.StreamingClientProperties;
import com.snowflake.kafka.connector.internal.streaming.v2.ClientRecreationException;
import dev.failsafe.Failsafe;
import dev.failsafe.FailsafeException;
import dev.failsafe.RetryPolicy;
import java.time.Duration;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.kafka.connect.errors.ConnectException;

/**
 * JVM-global registry of {@link StreamingClientPool} objects, keyed by connector name.
 *
 * <p>Multiple Kafka Connect connector instances (i.e. different connector configs) can run in the
 * same JVM process. Each gets its own {@link StreamingClientPool}, but they all share this static
 * registry because Kafka Connect only passes String config values to tasks — there is no way to
 * inject a shared object directly. Tasks look up their pool by connector name at startup.
 */
public class StreamingClientPools {
  private static final KCLogger LOGGER = new KCLogger(StreamingClientPools.class.getName());

  // Map: connectorName → StreamingClientPool
  private static final Map<String, StreamingClientPool> connectors = new ConcurrentHashMap<>();

  private StreamingClientPools() {}

  /**
   * Gets or creates a client for the given connector, task, and pipe. Multiple tasks can share the
   * same client. Kafka Connect guarantees that no two tasks in the same connector can work on the
   * same partition. It means that two tasks will never work with given channel at the same time,
   * because channel names are scoped to connector_name + topic_name + partition_id
   *
   * @param connectorName the name of the connector
   * @param taskId the ID of the task requesting the client
   * @param pipeName the pipe name
   * @param config parsed task config
   * @param streamingClientProperties streaming client properties
   * @param taskMetrics metrics to record client creation time (noop-safe)
   * @return the client for this pipe
   * @throws IllegalArgumentException if connectorName, taskId, or pipeName is null or empty
   */
  public static SnowflakeStreamingIngestClient getClient(
      final String connectorName,
      final String taskId,
      final String pipeName,
      final SinkTaskConfig config,
      final StreamingClientProperties streamingClientProperties,
      final TaskMetrics taskMetrics) {
    try {
      return getClientAsync(
              connectorName, taskId, pipeName, config, streamingClientProperties, taskMetrics)
          .join();
    } catch (CompletionException e) {
      Throwable cause = e.getCause();
      if (cause instanceof RuntimeException) {
        throw (RuntimeException) cause;
      }
      throw new ConnectException(
          "Unexpected error creating streaming client for pipe: " + pipeName, cause);
    }
  }

  /**
   * Asynchronously gets or creates a client for the given connector, task, and pipe. The returned
   * future completes when the client is ready.
   */
  public static CompletableFuture<SnowflakeStreamingIngestClient> getClientAsync(
      final String connectorName,
      final String taskId,
      final String pipeName,
      final SinkTaskConfig config,
      final StreamingClientProperties streamingClientProperties,
      final TaskMetrics taskMetrics) {

    if (isNullOrEmpty(connectorName)) {
      throw new IllegalArgumentException("connectorName cannot be null or empty");
    }
    if (isNullOrEmpty(taskId)) {
      throw new IllegalArgumentException("taskId cannot be null or empty");
    }
    if (isNullOrEmpty(pipeName)) {
      throw new IllegalArgumentException("pipeName cannot be null or empty");
    }

    return getPool(connectorName)
        .getClientAsync(taskId, pipeName, config, streamingClientProperties, taskMetrics);
  }

  private static StreamingClientPool getPool(final String connectorName) {
    return connectors.computeIfAbsent(connectorName, k -> new StreamingClientPool(connectorName));
  }

  public static long getClientCountForTask(final String connectorName, final String taskId) {
    StreamingClientPool pool = connectors.get(connectorName);
    if (pool == null) {
      return 0;
    }

    return pool.getClientCountForTask(taskId);
  }

  /**
   * Atomically replaces the client for a pipe if the current client matches the given invalid
   * client. Uses compare-and-swap semantics: if another caller already replaced the entry, the
   * existing new client is returned without creating a second one.
   *
   * @param connectorName the connector name
   * @param taskId the ID of the task requesting recreation; registered on the replacement entry so
   *     the pool does not prematurely evict it on task-local cleanup
   * @param pipeName the pipe whose client should be replaced
   * @param invalidClient the client instance the caller believes is invalid (identity check)
   * @param config task config for creating the replacement client
   * @param streamingClientProperties streaming client properties
   * @param taskMetrics metrics for timing the new client creation
   * @return the new (or already-replaced) client
   */
  public static SnowflakeStreamingIngestClient recreateClient(
      final String connectorName,
      final String taskId,
      final String pipeName,
      final SnowflakeStreamingIngestClient invalidClient,
      final SinkTaskConfig config,
      final StreamingClientProperties streamingClientProperties,
      final TaskMetrics taskMetrics) {
    try {
      return Failsafe.with(recreateClientRetryPolicy(pipeName))
          .get(
              () ->
                  getPool(connectorName)
                      .recreateClient(
                          taskId,
                          pipeName,
                          invalidClient,
                          config,
                          streamingClientProperties,
                          taskMetrics));
    } catch (FailsafeException e) {
      // Retries exhausted — wrap as ClientRecreationException so the batch
      // loop can rewind offsets instead of crashing the task.
      Throwable cause = e.getCause() != null ? e.getCause() : e;
      throw ClientRecreationException.wrap(cause);
    }
  }

  /**
   * Delay between client-creation retries. Pipe failover typically takes a few seconds to stabilize
   * on the server side, and back-to-back retries with no delay would all hit the same in-flight
   * failover window and fail before the server finishes.
   *
   * <p>Note: this delay is per-invocation. {@link #recreateClient} can be called concurrently by
   * multiple {@link
   * com.snowflake.kafka.connector.internal.streaming.v2.SnowpipeStreamingPartitionChannel}s on the
   * same pipe. The pool's CAS dedupes to a single fresh client, but each caller runs its own
   * Failsafe retry schedule — so from the pool's perspective, client creation can happen more than
   * once per {@code CLIENT_CREATION_RETRY_DELAY} window across concurrent callers. This is
   * acceptable: each individual channel still retries at ~5s cadence, so its total recovery window
   * spans {@code MAX_CLIENT_CREATION_RETRIES * CLIENT_CREATION_RETRY_DELAY = ~15s}. When reading
   * logs, expect to see overlapping retry schedules across channels on the same pipe during a
   * failover event.
   */
  private static final Duration CLIENT_CREATION_RETRY_DELAY = Duration.ofSeconds(5);

  /**
   * Maximum retry attempts when a replacement client also fails with a client-invalid error during
   * {@link #recreateClient}. Three attempts provide enough headroom for transient failover windows
   * while keeping total blocking time bounded (each attempt creates a fresh SDK client).
   */
  private static final int MAX_CLIENT_CREATION_RETRIES = 3;

  /**
   * Retries replacement-client creation when the SDK reports a client-invalid error (e.g., pipe
   * failover still in flight). The pool evicts the failed entry on each attempt, so the retry
   * creates a fresh client. Non-client-invalid errors fall through immediately.
   */
  private static RetryPolicy<SnowflakeStreamingIngestClient> recreateClientRetryPolicy(
      String pipeName) {
    return RetryPolicy.<SnowflakeStreamingIngestClient>builder()
        .handleIf(
            e -> e instanceof RuntimeException && ClientRecreationException.isClientInvalidError(e))
        .withMaxAttempts(MAX_CLIENT_CREATION_RETRIES)
        .withDelay(CLIENT_CREATION_RETRY_DELAY)
        .onRetry(
            event ->
                LOGGER.warn(
                    "Replacement client for pipe {} failed with client-invalid error"
                        + " (attempt {}/{}): {}. Retrying after {}.",
                    pipeName,
                    event.getAttemptCount(),
                    MAX_CLIENT_CREATION_RETRIES,
                    event.getLastException().getMessage(),
                    CLIENT_CREATION_RETRY_DELAY))
        .onRetriesExceeded(
            event ->
                LOGGER.error(
                    "Replacement client for pipe {} failed after {} attempts: {}",
                    pipeName,
                    event.getAttemptCount(),
                    event.getException().getMessage()))
        .build();
  }

  /**
   * Releases all clients used by a specific task. Clients that are still used by other tasks remain
   * open. Only closes clients when the last task using them stops. When the pool becomes empty (no
   * remaining clients or tasks), the pool is removed from the registry.
   *
   * @param connectorName the name of the connector
   * @param taskId the ID of the task
   */
  public static void closeTaskClients(final String connectorName, final String taskId) {
    connectors.compute(
        connectorName,
        (key, pool) -> {
          if (pool == null) {
            LOGGER.warn(
                "Attempted to release task {} for unknown connector: {}", taskId, connectorName);
            return null;
          }
          pool.closeTaskClients(taskId);
          if (pool.isEmpty()) {
            LOGGER.info("All tasks released for connector: {}", connectorName);
            return null;
          }
          return pool;
        });
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/client/StreamingClientSupplier.java
================================================
package com.snowflake.kafka.connector.internal.streaming.v2.client;

import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient;
import com.snowflake.kafka.connector.internal.streaming.StreamingClientProperties;

public interface StreamingClientSupplier {
  SnowflakeStreamingIngestClient get(
      String clientName,
      String dbName,
      String schemaName,
      String pipeName,
      StreamingClientProperties streamingClientProperties);
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/migration/Ssv1MigrationMode.java
================================================
package com.snowflake.kafka.connector.internal.streaming.v2.migration;

import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_DEFAULT;

import java.util.Arrays;
import java.util.Locale;
import java.util.stream.Collectors;

/**
 * Controls whether the connector reads committed offsets from SSv1 channels during migration from
 * KC v3 to KC v4. Only consulted when the SSv2 channel has no committed offset yet.
 */
public enum Ssv1MigrationMode {
  /** Do not query SSv1 at all (default, current behavior). */
  SKIP,

  /**
   * If SSv2 has no committed offset, query SSv1 and use its offset as the starting point. If the
   * SSv1 channel is not found, fall through to the consumer group offset.
   */
  BEST_EFFORT,

  /**
   * If SSv2 has no committed offset, query SSv1 and use its offset as the starting point. If the
   * SSv1 channel is not found, fail the channel open so the operator can investigate.
   */
  STRICT;

  /**
   * Parses a config string into a migration mode (case-insensitive). Falls back to {@link
   * com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams#SNOWFLAKE_SSV1_OFFSET_MIGRATION_DEFAULT
   * SNOWFLAKE_SSV1_OFFSET_MIGRATION_DEFAULT} for null or empty input. Throws {@link
   * IllegalArgumentException} for unrecognized values, including the config key and valid options.
   */
  public static Ssv1MigrationMode fromConfig(String value) {
    if (value == null || value.trim().isEmpty()) {
      value = SNOWFLAKE_SSV1_OFFSET_MIGRATION_DEFAULT;
    }
    String normalized = value.trim().toUpperCase(Locale.ROOT);
    try {
      return valueOf(normalized);
    } catch (IllegalArgumentException e) {
      String validValues =
          Arrays.stream(values())
              .map(v -> v.name().toLowerCase(Locale.ROOT))
              .collect(Collectors.joining(", "));
      throw new IllegalArgumentException(
          "Invalid value '"
              + value.trim()
              + "' for config '"
              + SNOWFLAKE_SSV1_OFFSET_MIGRATION
              + "'. Valid values are: "
              + validValues,
          e);
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/migration/Ssv1MigrationResponse.java
================================================
package com.snowflake.kafka.connector.internal.streaming.v2.migration;

import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.annotations.VisibleForTesting;
import javax.annotation.Nullable;

/**
 * Deserialized response from SYSTEM$MIGRATE_SSV1_CHANNEL_OFFSET. The three possible outcomes are:
 *
 * <ul>
 *   <li>{@code ssv1ChannelFound == false} — the SSv1 channel does not exist
 *   <li>{@code ssv1ChannelFound == true, migratedOffset == null} — channel exists but has no
 *       committed offset
 *   <li>{@code ssv1ChannelFound == true, migratedOffset != null} — offset was migrated successfully
 * </ul>
 */
@JsonIgnoreProperties(ignoreUnknown = true)
public class Ssv1MigrationResponse {

  @JsonProperty("ssv1_channel_found")
  boolean ssv1ChannelFound;

  @Nullable
  @JsonProperty("migrated_offset")
  Long migratedOffset;

  /** Creates a response representing a channel that was not found. */
  @VisibleForTesting
  public static Ssv1MigrationResponse channelNotFound() {
    Ssv1MigrationResponse response = new Ssv1MigrationResponse();
    response.ssv1ChannelFound = false;
    return response;
  }

  /** Creates a response representing a channel that exists but has no committed offset. */
  @VisibleForTesting
  public static Ssv1MigrationResponse channelFoundNoOffset() {
    Ssv1MigrationResponse response = new Ssv1MigrationResponse();
    response.ssv1ChannelFound = true;
    return response;
  }

  /** Creates a response representing a successful migration with the given offset. */
  @VisibleForTesting
  public static Ssv1MigrationResponse migrated(long offset) {
    Ssv1MigrationResponse response = new Ssv1MigrationResponse();
    response.ssv1ChannelFound = true;
    response.migratedOffset = offset;
    return response;
  }

  public boolean isSsv1ChannelFound() {
    return ssv1ChannelFound;
  }

  @Nullable
  public Long getMigratedOffset() {
    return migratedOffset;
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/service/BatchOffsetFetcher.java
================================================
package com.snowflake.kafka.connector.internal.streaming.v2.service;

import static com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel.NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE;

import com.snowflake.ingest.streaming.ChannelStatus;
import com.snowflake.ingest.streaming.ChannelStatusBatch;
import com.snowflake.ingest.streaming.SFException;
import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.internal.KCLogger;
import com.snowflake.kafka.connector.internal.metrics.TaskMetrics;
import com.snowflake.kafka.connector.internal.streaming.StreamingClientProperties;
import com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel;
import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientPools;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.kafka.common.TopicPartition;

/**
 * Fetches committed offsets for topic partitions in batches, grouped by pipe. Makes at most one
 * network call per SDK client (i.e. per pipe), regardless of the number of partitions.
 */
public class BatchOffsetFetcher {

  private static final KCLogger LOGGER = new KCLogger(BatchOffsetFetcher.class.getName());

  private final String connectorName;
  private final String taskId;
  private final SinkTaskConfig taskConfig;
  private final StreamingClientProperties streamingClientProperties;
  private final boolean tolerateErrors;
  private final ExecutorService ioExecutor;
  private final TaskMetrics taskMetrics;

  public BatchOffsetFetcher(
      String connectorName,
      String taskId,
      SinkTaskConfig taskConfig,
      ExecutorService ioExecutor,
      TaskMetrics taskMetrics) {
    this.connectorName = connectorName;
    this.taskId = taskId;
    this.taskConfig = taskConfig;
    this.streamingClientProperties = StreamingClientProperties.from(taskConfig);
    this.tolerateErrors = taskConfig.isTolerateErrors();
    this.ioExecutor = ioExecutor;
    this.taskMetrics = taskMetrics;
  }

  /**
   * Fetches committed offsets for the given partitions using the SDK's batch channel-status API.
   * Makes at most one network call per pipe, regardless of partition count.
   *
   * @param partitions the partitions to query
   * @param channelLookup function to look up the TopicPartitionChannel for a given partition
   * @return map of TopicPartition to the offset safe to commit to Kafka (committed + 1), only
   *     containing entries where a valid offset was found
   */
  public Map<TopicPartition, Long> getCommittedOffsets(
      Collection<TopicPartition> partitions,
      Function<TopicPartition, Optional<TopicPartitionChannel>> channelLookup) {

    PartitionsByTopic grouped = PartitionsByTopic.groupByTopic(partitions, channelLookup);

    grouped.topicToPartitionsWithoutChannels.forEach(
        (topic, uninitializedPartitions) ->
            LOGGER.warn(
                "Topic: {} has partition(s) not yet initialized to get offset: {}",
                topic,
                uninitializedPartitions));

    Map<TopicPartition, Long> result = new ConcurrentHashMap<>();

    CompletableFuture<?>[] futures =
        grouped.pipeNameToChannels.entrySet().stream()
            .map(entry -> fetchOffsetsAsync(entry.getKey(), entry.getValue(), result))
            .toArray(CompletableFuture[]::new);

    try {
      CompletableFuture.allOf(futures).join();
    } catch (CompletionException e) {
      if (e.getCause() instanceof RuntimeException) {
        throw (RuntimeException) e.getCause();
      }
      throw e;
    }
    return result;
  }

  private CompletableFuture<Void> fetchOffsetsAsync(
      String pipeName,
      Map<TopicPartition, TopicPartitionChannel> channelsByPartition,
      Map<TopicPartition, Long> result) {
    return CompletableFuture.runAsync(
        () -> {
          try {
            result.putAll(getCommittedOffsetsForPipe(pipeName, channelsByPartition));
          } catch (SFException e) {
            LOGGER.error(
                "Failed to fetch committed offsets for pipe: {}, skipping {} channel(s)",
                pipeName,
                channelsByPartition.size(),
                e);
          }
        },
        ioExecutor);
  }

  /**
   * @throws SFException if {@code getChannelStatus} fails after the SDK exhausts its internal
   *     retries (exponential backoff on transient HTTP errors)
   */
  private Map<TopicPartition, Long> getCommittedOffsetsForPipe(
      String pipeName, Map<TopicPartition, TopicPartitionChannel> channelsByPartition) {
    List<String> channelNames =
        channelsByPartition.values().stream()
            .map(TopicPartitionChannel::getChannelName)
            .collect(Collectors.toList());

    SnowflakeStreamingIngestClient client =
        StreamingClientPools.getClient(
            connectorName, taskId, pipeName, taskConfig, streamingClientProperties, taskMetrics);

    final ChannelStatusBatch batch;
    try (TaskMetrics.TimingContext ignored = taskMetrics.timeOffsetFetch()) {
      batch = client.getChannelStatus(channelNames);
    }

    Map<TopicPartition, Long> result = new HashMap<>();
    channelsByPartition.forEach(
        (topicPartition, channel) -> {
          String channelName = channel.getChannelName();

          ChannelStatus status = batch.getChannelStatusBatch().get(channelName);
          if (status == null) {
            // This should never happen but we can still recover by simply skipping this channel.
            // There is no obligation to return any committed offsets in `preCommit`.
            LOGGER.warn("No status returned for channel: {}", channelName);
            return;
          }
          long offset = channel.processChannelStatus(status, tolerateErrors);
          LOGGER.info(
              "Fetched snowflake committed offset: [{}] for channel [{}]", offset, channelName);
          if (offset != NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE) {
            result.put(topicPartition, offset);
          }
        });
    return result;
  }

  public static class PartitionsByTopic {
    /** Partitions with initialized channels, grouped by pipe name */
    public final Map<String, Map<TopicPartition, TopicPartitionChannel>> pipeNameToChannels;

    /** Partitions without an initialized channel, grouped by topic */
    public final Map<String, Set<TopicPartition>> topicToPartitionsWithoutChannels;

    PartitionsByTopic(
        Map<String, Map<TopicPartition, TopicPartitionChannel>> pipeNameToChannels,
        Map<String, Set<TopicPartition>> topicToPartitionsWithoutChannels) {
      this.pipeNameToChannels = pipeNameToChannels;
      this.topicToPartitionsWithoutChannels = topicToPartitionsWithoutChannels;
    }

    public static PartitionsByTopic groupByTopic(
        Collection<TopicPartition> partitions,
        Function<TopicPartition, Optional<TopicPartitionChannel>> channelLookup) {
      Map<String, Map<TopicPartition, TopicPartitionChannel>> pipeNameToChannels = new HashMap<>();
      Map<String, Set<TopicPartition>> topicToPartitionsWithoutChannels = new HashMap<>();
      for (TopicPartition topicPartition : partitions) {
        channelLookup
            .apply(topicPartition)
            .ifPresentOrElse(
                channel ->
                    pipeNameToChannels
                        .computeIfAbsent(channel.getPipeName(), k -> new LinkedHashMap<>())
                        .put(topicPartition, channel),
                () ->
                    topicToPartitionsWithoutChannels
                        .computeIfAbsent(topicPartition.topic(), k -> new HashSet<>())
                        .add(topicPartition));
      }
      return new PartitionsByTopic(pipeNameToChannels, topicToPartitionsWithoutChannels);
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/service/PartitionChannelManager.java
================================================
package com.snowflake.kafka.connector.internal.streaming.v2.service;

import com.google.common.annotations.VisibleForTesting;
import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient;
import com.snowflake.kafka.connector.Utils;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.config.SnowflakeValidation;
import com.snowflake.kafka.connector.dlq.KafkaRecordErrorReporter;
import com.snowflake.kafka.connector.internal.KCLogger;
import com.snowflake.kafka.connector.internal.SnowflakeConnectionService;
import com.snowflake.kafka.connector.internal.metrics.MetricsJmxReporter;
import com.snowflake.kafka.connector.internal.metrics.TaskMetrics;
import com.snowflake.kafka.connector.internal.streaming.StreamingClientProperties;
import com.snowflake.kafka.connector.internal.streaming.StreamingErrorHandler;
import com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel;
import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetryChannelStatus;
import com.snowflake.kafka.connector.internal.streaming.v2.SnowpipeStreamingPartitionChannel;
import com.snowflake.kafka.connector.internal.streaming.v2.channel.PartitionOffsetTracker;
import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientPools;
import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationMode;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService;
import java.util.Collection;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.connect.sink.SinkTaskContext;

/**
 * Manages the lifecycle of {@link TopicPartitionChannel} instances for a single Kafka Connect task.
 * Handles channel creation, opening, closing, and lookup.
 */
public class PartitionChannelManager {

  private static final KCLogger LOGGER = new KCLogger(PartitionChannelManager.class.getName());

  /**
   * Creates a {@link TopicPartitionChannel} for a single partition during {@link #startPartitions}.
   * Production code uses {@link #buildChannel}; tests inject a lambda.
   */
  @FunctionalInterface
  interface PartitionChannelBuilder {
    TopicPartitionChannel build(
        TopicPartition topicPartition, String tableName, String channelName, String pipeName);
  }

  private final SnowflakeTelemetryService telemetryService;
  private final KafkaRecordErrorReporter kafkaRecordErrorReporter;
  private final Optional<MetricsJmxReporter> metricsJmxReporter;
  private final TaskMetrics taskMetrics;

  private final SinkTaskContext sinkTaskContext;

  private final SinkTaskConfig taskConfig;
  private final SnowflakeConnectionService conn;

  private final PartitionChannelBuilder partitionChannelBuilder;
  private final Map<String, TopicPartitionChannel> partitionChannels;
  private final Map<String, Boolean> shouldEvolveSchemaCache = new ConcurrentHashMap<>();

  public PartitionChannelManager(
      SnowflakeTelemetryService telemetryService,
      SinkTaskConfig taskConfig,
      KafkaRecordErrorReporter kafkaRecordErrorReporter,
      SinkTaskContext sinkTaskContext,
      Optional<MetricsJmxReporter> metricsJmxReporter,
      TaskMetrics taskMetrics,
      SnowflakeConnectionService conn) {
    this.telemetryService = telemetryService;
    this.taskConfig = taskConfig;
    this.kafkaRecordErrorReporter = kafkaRecordErrorReporter;
    this.sinkTaskContext = sinkTaskContext;
    this.metricsJmxReporter = metricsJmxReporter;
    this.taskMetrics = taskMetrics;
    this.conn = conn;
    this.partitionChannelBuilder = this::buildChannel;
    this.partitionChannels = new ConcurrentHashMap<>();
  }

  @VisibleForTesting
  PartitionChannelManager(
      SinkTaskConfig taskConfig, PartitionChannelBuilder partitionChannelBuilder) {
    this.taskConfig = taskConfig;
    this.partitionChannelBuilder = partitionChannelBuilder;
    this.partitionChannels = new ConcurrentHashMap<>();
    this.telemetryService = null;
    this.kafkaRecordErrorReporter = null;
    this.sinkTaskContext = null;
    this.metricsJmxReporter = Optional.empty();
    this.taskMetrics = null;
    this.conn = null;
  }

  /** Gets a unique identifier consisting of connector name, topic name and partition number. */
  @VisibleForTesting
  public static String makeChannelName(
      final String connectorName, final String topic, final int partition) {
    final String separator = "_";
    return connectorName + separator + topic + separator + partition;
  }

  private String getChannelName(TopicPartition topicPartition) {
    return makeChannelName(
        taskConfig.getConnectorName(), topicPartition.topic(), topicPartition.partition());
  }

  private String getTableName(TopicPartition topicPartition) {
    return Utils.getTableName(
        topicPartition.topic(), taskConfig.getTopicToTableMap(), taskConfig.isEnableSanitization());
  }

  /**
   * Creates and registers channels for the given partitions.
   *
   * @param partitions collection of topic partitions to open channels for
   * @param tableToPipeMapping pre-resolved mapping of table name to pipe name; the caller is
   *     responsible for ensuring tables exist and resolving the correct pipe for each table
   */
  public void startPartitions(
      Collection<TopicPartition> partitions, Map<String, String> tableToPipeMapping) {
    LOGGER.info(
        "Starting {} partitions for connector: {}, task: {}",
        partitions.size(),
        taskConfig.getConnectorName(),
        taskConfig.getTaskId());

    warmUpStreamingClients(tableToPipeMapping);

    for (TopicPartition topicPartition : partitions) {
      final String tableName = getTableName(topicPartition);
      final String pipeName = tableToPipeMapping.get(tableName);
      final String channelName = getChannelName(topicPartition);

      LOGGER.info(
          "Creating streaming channel {} for {}, table: {}, pipe: {}",
          channelName,
          topicPartition,
          tableName,
          pipeName);

      final TopicPartitionChannel partitionChannel =
          partitionChannelBuilder.build(topicPartition, tableName, channelName, pipeName);

      partitionChannels.put(channelName, partitionChannel);
      LOGGER.info("Successfully created streaming channel: {}", channelName);
    }
  }

  private TopicPartitionChannel buildChannel(
      TopicPartition topicPartition, String tableName, String channelName, String pipeName) {

    final StreamingErrorHandler streamingErrorHandler =
        new StreamingErrorHandler(taskConfig, kafkaRecordErrorReporter, telemetryService);
    final StreamingClientProperties streamingClientProperties =
        StreamingClientProperties.from(taskConfig);
    final SnowflakeStreamingIngestClient streamingClient =
        StreamingClientPools.getClient(
            taskConfig.getConnectorName(),
            taskConfig.getTaskId(),
            pipeName,
            taskConfig,
            streamingClientProperties,
            taskMetrics);
    final PartitionOffsetTracker offsetTracker =
        new PartitionOffsetTracker(topicPartition, this.sinkTaskContext, channelName);

    final SnowflakeTelemetryChannelStatus telemetryChannelStatus =
        new SnowflakeTelemetryChannelStatus(
            tableName,
            taskConfig.getConnectorName(),
            channelName,
            System.currentTimeMillis(),
            this.metricsJmxReporter,
            offsetTracker.persistedOffsetRef(),
            offsetTracker.processedOffsetRef(),
            offsetTracker.consumerGroupOffsetRef());

    final ExecutorService openChannelIoExecutor =
        ThreadPools.getOpenChannelIoExecutor(taskConfig.getConnectorName());

    final boolean shouldEvolveSchema =
        (taskConfig.getValidation() == SnowflakeValidation.CLIENT_SIDE)
            && shouldEvolveSchemaCache.computeIfAbsent(
                tableName, t -> conn.shouldEvolveSchema(t, taskConfig.getSnowflakeRole()));

    // KC v3 defaulted to V1 channel naming: {topic}_{partition}.
    // Customers who set snowflake.streaming.channel.name.include.connector.name=true
    // in KC v3 used V2 naming: {connectorName}_{topic}_{partition} (same as KC v4).
    final Ssv1MigrationMode ssv1MigrationMode = taskConfig.getSsv1MigrationMode();
    final Optional<String> ssv1ChannelName;
    if (ssv1MigrationMode != Ssv1MigrationMode.SKIP) {
      String topic = topicPartition.topic();
      int partition = topicPartition.partition();
      ssv1ChannelName =
          Optional.of(
              taskConfig.isSsv1MigrationIncludeConnectorName()
                  ? taskConfig.getConnectorName() + "_" + topic + "_" + partition
                  : topic + "_" + partition);
    } else {
      ssv1ChannelName = Optional.empty();
    }

    return new SnowpipeStreamingPartitionChannel(
        tableName,
        channelName,
        pipeName,
        streamingClient,
        openChannelIoExecutor,
        this.telemetryService,
        telemetryChannelStatus,
        offsetTracker,
        taskConfig,
        streamingErrorHandler,
        this.taskMetrics,
        shouldEvolveSchema,
        this.conn,
        ssv1ChannelName);
  }

  /**
   * Pre-warms the {@link StreamingClientPools} cache by creating clients for all distinct pipes in
   * parallel. Subsequent per-partition calls to {@link StreamingClientPools#getClient} in {@link
   * #buildChannel} will return the cached clients immediately.
   *
   * <p>Skipped when using the test constructor (conn is null).
   */
  private void warmUpStreamingClients(Map<String, String> tableToPipeMapping) {
    if (conn == null) {
      return;
    }

    final StreamingClientProperties streamingClientProperties =
        StreamingClientProperties.from(taskConfig);

    CompletableFuture<?>[] clientFutures =
        tableToPipeMapping.values().stream()
            .distinct()
            .map(
                pipeName ->
                    StreamingClientPools.getClientAsync(
                        taskConfig.getConnectorName(),
                        taskConfig.getTaskId(),
                        pipeName,
                        taskConfig,
                        streamingClientProperties,
                        taskMetrics))
            .toArray(CompletableFuture[]::new);
    try {
      CompletableFuture.allOf(clientFutures).join();
    } catch (CompletionException e) {
      if (e.getCause() instanceof RuntimeException) {
        throw (RuntimeException) e.getCause();
      }
      throw e;
    }
  }

  public void waitForAllChannelsToCommitData() {
    int channelCount = partitionChannels.size();
    if (channelCount == 0) {
      return;
    }

    LOGGER.info("Starting parallel flush for {} channels", channelCount);

    CompletableFuture<?>[] futures =
        partitionChannels.values().stream()
            .map(TopicPartitionChannel::waitForLastProcessedRecordCommitted)
            .toArray(CompletableFuture[]::new);

    CompletableFuture.allOf(futures).join();

    LOGGER.info("Completed parallel flush for {} channels", channelCount);
  }

  public void closeAll() {
    LOGGER.info(
        "Closing all {} partition channels for connector: {}, task: {}",
        partitionChannels.size(),
        taskConfig.getConnectorName(),
        taskConfig.getTaskId());

    CompletableFuture<?>[] futures =
        partitionChannels.values().stream()
            .map(TopicPartitionChannel::closeChannelAsync)
            .toArray(CompletableFuture[]::new);
    CompletableFuture.allOf(futures).join();

    partitionChannels.clear();

    LOGGER.info(
        "Completed closing all partition channels for connector: {}, task: {}",
        taskConfig.getConnectorName(),
        taskConfig.getTaskId());
  }

  /**
   * This function is called during rebalance.
   *
   * <p>All the channels are closed. The client is still active. Upon rebalance, (inside {@link
   * com.snowflake.kafka.connector.SnowflakeSinkTask#open(Collection)} we will reopen the channel.
   *
   * <p>We will wipe the cache partitionChannels so that in {@link
   * com.snowflake.kafka.connector.SnowflakeSinkTask#open(Collection)} we reinstantiate and fetch
   * offsetToken
   *
   * @param partitions a list of topic partition
   */
  public void close(Collection<TopicPartition> partitions) {
    LOGGER.info(
        "Closing {} partitions for connector: {}, task: {}",
        partitions.size(),
        taskConfig.getConnectorName(),
        taskConfig.getTaskId());

    CompletableFuture<?>[] futures =
        partitions.stream()
            .map(this::getChannel)
            .filter(Optional::isPresent)
            .map(Optional::get)
            .map(
                channel ->
                    channel
                        .closeChannelAsync()
                        .thenAccept(__ -> partitionChannels.remove(channel.getChannelName())))
            .toArray(CompletableFuture[]::new);
    CompletableFuture.allOf(futures).join();

    LOGGER.info(
        "Closed {} partitions, remaining {} open partitions are: {}",
        partitions.size(),
        partitionChannels.size(),
        partitionChannels.keySet().toString());
  }

  /** Returns the channel for the given name, or empty if not found. */
  public Optional<TopicPartitionChannel> getChannel(String channelName) {
    return Optional.ofNullable(partitionChannels.get(channelName));
  }

  /** Returns the channel for the given TopicPartition, or empty if not found. */
  public Optional<TopicPartitionChannel> getChannel(TopicPartition topicPartition) {
    String channelName =
        makeChannelName(
            taskConfig.getConnectorName(), topicPartition.topic(), topicPartition.partition());
    return getChannel(channelName);
  }

  public Map<String, TopicPartitionChannel> getPartitionChannels() {
    return partitionChannels;
  }

  /** Blocks until all partition channels have finished initialization. */
  @VisibleForTesting
  public void awaitAllPartitions() {
    partitionChannels.values().forEach(TopicPartitionChannel::awaitInitialization);
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/streaming/v2/service/ThreadPools.java
================================================
package com.snowflake.kafka.connector.internal.streaming.v2.service;

import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.internal.KCLogger;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.atomic.AtomicInteger;

/**
 * JVM-global registry of per-connector thread pools.
 *
 * <ul>
 *   <li><b>ioExecutor</b> — an unbounded cached thread pool for bursty blocking I/O: SDK client
 *       creation and batch offset fetching ({@code getChannelStatus} HTTP calls). Threads are
 *       created on demand and reclaimed after 60 s of idleness.
 *   <li><b>openChannelIoExecutor</b> — a fixed-size thread pool that rate-limits channel open
 *       operations. The size is controlled by {@code snowflake.open.channel.io.threads}.
 * </ul>
 *
 * <p>Like {@link com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientPools},
 * this class uses a static {@link ConcurrentHashMap} keyed by connector name. Each connector gets
 * its own pools, and the pools are shut down when the last task for a connector calls {@link
 * #closeForTask(String)}.
 */
public class ThreadPools {
  private static final KCLogger LOGGER = new KCLogger(ThreadPools.class.getName());

  private static final Map<String, ConnectorThreadPool> connectorPools = new ConcurrentHashMap<>();

  private ThreadPools() {}

  /** Holds the executors and a reference count of tasks currently using them. */
  private static class ConnectorThreadPool {
    final ExecutorService ioExecutor;
    final ExecutorService openChannelIoExecutor;
    final AtomicInteger refCount = new AtomicInteger(0);

    ConnectorThreadPool(String connectorName, int openChannelIoThreads) {
      LOGGER.info("Creating I/O thread pool for connector: {}", connectorName);
      this.ioExecutor =
          Executors.newCachedThreadPool(new DaemonThreadFactory(connectorName + "-io"));

      int maxThreads = Math.max(1, openChannelIoThreads);
      LOGGER.info(
          "Creating channel thread pool for connector: {}, threads: {}", connectorName, maxThreads);
      this.openChannelIoExecutor =
          Executors.newFixedThreadPool(
              maxThreads, new DaemonThreadFactory(connectorName + "-channel"));
    }
  }

  /**
   * Returns the I/O executor (cached thread pool) for the given connector. The pool must have been
   * created by a prior call to {@link #registerTask(String, SinkTaskConfig)}.
   */
  public static ExecutorService getIoExecutor(final String connectorName) {
    ConnectorThreadPool pool = connectorPools.get(connectorName);
    if (pool == null) {
      throw new IllegalStateException("No thread pool registered for connector: " + connectorName);
    }
    return pool.ioExecutor;
  }

  /**
   * Returns the open-channel executor (fixed-size thread pool) for the given connector. The pool
   * must have been created by a prior call to {@link #registerTask(String, SinkTaskConfig)}.
   */
  public static ExecutorService getOpenChannelIoExecutor(final String connectorName) {
    ConnectorThreadPool pool = connectorPools.get(connectorName);
    if (pool == null) {
      throw new IllegalStateException("No thread pool registered for connector: " + connectorName);
    }
    return pool.openChannelIoExecutor;
  }

  /**
   * Registers a task as a user of the connector's thread pools, creating the pools if this is the
   * first task for the connector. Must be paired with a later call to {@link #closeForTask(String)}
   * to ensure the pools are shut down when no tasks remain.
   */
  public static void registerTask(final String connectorName, final SinkTaskConfig config) {
    connectorPools.compute(
        connectorName,
        (key, pool) -> {
          if (pool == null) {
            pool = new ConnectorThreadPool(connectorName, config.getOpenChannelIoThreads());
          }
          pool.refCount.incrementAndGet();
          return pool;
        });
  }

  /**
   * Unregisters a task from the connector's thread pools. When the last task unregisters, the
   * executors are shut down and removed from the registry.
   */
  public static void closeForTask(final String connectorName) {
    connectorPools.computeIfPresent(
        connectorName,
        (key, pool) -> {
          if (pool.refCount.decrementAndGet() == 0) {
            LOGGER.info("Shutting down thread pools for connector: {}", connectorName);
            pool.ioExecutor.shutdownNow();
            pool.openChannelIoExecutor.shutdownNow();
            return null;
          }
          return pool;
        });
  }

  /**
   * The context class loader is captured at factory creation time because Kafka Connect uses a
   * PluginClassLoader that must be on the thread context for the SDK's native library loading
   * (FFIBootstrap) to find resources inside plugin JARs.
   */
  private static final class DaemonThreadFactory implements ThreadFactory {
    private final AtomicInteger counter = new AtomicInteger(0);
    private final String prefix;
    private final ClassLoader contextClassLoader;

    DaemonThreadFactory(String prefix) {
      this.prefix = prefix;
      this.contextClassLoader = Thread.currentThread().getContextClassLoader();
    }

    @Override
    public Thread newThread(Runnable r) {
      Thread t = new Thread(r, prefix + "-" + counter.getAndIncrement());
      t.setDaemon(true);
      t.setContextClassLoader(contextClassLoader);
      return t;
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/telemetry/SnowflakeTelemetryBasicInfo.java
================================================
package com.snowflake.kafka.connector.internal.telemetry;

import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.snowflake.kafka.connector.internal.KCLogger;
import net.snowflake.client.jdbc.internal.fasterxml.jackson.databind.node.ObjectNode;

/** Minimum information needed to sent to Snowflake through Telemetry API */
public abstract class SnowflakeTelemetryBasicInfo {
  public final String tableName;
  public final SnowflakeTelemetryService.TelemetryType telemetryType;

  public static final KCLogger LOGGER = new KCLogger(SnowflakeTelemetryBasicInfo.class.getName());

  /**
   * Base Constructor. Accepts a tableName and StageName.
   *
   * @param tableName Checks for Nullability
   */
  public SnowflakeTelemetryBasicInfo(
      final String tableName, SnowflakeTelemetryService.TelemetryType telemetryType) {
    Preconditions.checkArgument(
        !Strings.isNullOrEmpty(tableName), "tableName cannot be null or empty");
    this.tableName = tableName;
    this.telemetryType = telemetryType;
  }

  /**
   * Adds the required fields into the given ObjectNode which will then be used as payload in
   * Telemetry API
   *
   * @param msg ObjectNode in which extra fields needs to be added.
   */
  public abstract void dumpTo(ObjectNode msg);

  /**
   * @return true if it would suggest that their was no update to corresponding implementation's
   *     member variables. Or, in other words, the corresponding partition didnt receive any
   *     records, in which case we would not call telemetry API.
   */
  public abstract boolean isEmpty();
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/telemetry/SnowflakeTelemetryService.java
================================================
package com.snowflake.kafka.connector.internal.telemetry;

import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.Utils;
import com.snowflake.kafka.connector.internal.KCLogger;
import com.snowflake.kafka.connector.internal.streaming.IngestionMethodConfig;
import java.sql.Connection;
import java.util.Map;
import java.util.Set;
import net.snowflake.client.internal.jdbc.telemetry.Telemetry;
import net.snowflake.client.internal.jdbc.telemetry.TelemetryClient;
import net.snowflake.client.internal.jdbc.telemetry.TelemetryUtil;
import net.snowflake.client.jdbc.internal.fasterxml.jackson.databind.JsonNode;
import net.snowflake.client.jdbc.internal.fasterxml.jackson.databind.ObjectMapper;
import net.snowflake.client.jdbc.internal.fasterxml.jackson.databind.node.ObjectNode;
import org.apache.kafka.common.utils.AppInfoParser;

public class SnowflakeTelemetryService {

  private final KCLogger LOGGER = new KCLogger(SnowflakeTelemetryService.class.getName());

  private static final ObjectMapper MAPPER = new ObjectMapper();

  // constant string list
  private static final String SOURCE = "source";
  private static final String TYPE = "type";
  private static final String KAFKA_CONNECTOR = "kafka_connector";
  static final String INGESTION_METHOD = "snowflake.ingestion.method";
  private static final String DATA = "data";
  private static final String MAX_TASKS = "max_tasks";
  private static final String START_TIME = "start_time";
  private static final String END_TIME = "end_time";
  private static final String APP_NAME = "app_name";
  private static final String TASK_ID = "task_id";
  private static final String ERROR_DETAIL = "error_detail";
  private static final String TIME = "unix_time";
  private static final String VERSION = "version";
  private static final String KAFKA_VERSION = "kafka_version";
  private static final String IS_CHANNEL_CLOSING = "is_channel_closing";
  public static final String JDK_VERSION = "jdk_version";
  public static final String JDK_DISTRIBUTION = "jdk_distribution";
  private static final String TOPICS = "topics";

  // Telemetry instance fetched from JDBC
  private final Telemetry telemetry;

  // Snowflake Kafka connector name defined in JSON
  private String name = null;
  private String taskID = null;

  public SnowflakeTelemetryService(Connection conn) {
    this.telemetry = TelemetryClient.createTelemetry(conn);
  }

  public SnowflakeTelemetryService(Telemetry telemetry) {
    this.telemetry = telemetry;
  }

  public void setAppName(String name) {
    this.name = name;
  }

  public void setTaskID(String taskID) {
    this.taskID = taskID;
  }

  public void reportKafkaConnectStart(
      final long startTime, final Map<String, String> userProvidedConfig) {
    ObjectNode dataObjectNode = getObjectNode();

    String jdkVersion = System.getProperty("java.version");
    String jdkDistribution = System.getProperty("java.vendor");

    dataObjectNode.put(START_TIME, startTime);
    dataObjectNode.put(KAFKA_VERSION, AppInfoParser.getVersion());
    dataObjectNode.put(JDK_VERSION, jdkVersion);
    dataObjectNode.put(JDK_DISTRIBUTION, jdkDistribution);
    addUserConnectorPropertiesToDataNode(userProvidedConfig, dataObjectNode);

    send(TelemetryType.KAFKA_START, dataObjectNode);
  }

  public void reportKafkaConnectStop(final long startTime) {
    ObjectNode msg = getObjectNode();

    msg.put(START_TIME, startTime);
    msg.put(END_TIME, System.currentTimeMillis());

    send(TelemetryType.KAFKA_STOP, msg);
  }

  public void reportKafkaConnectFatalError(final String errorDetail) {
    ObjectNode msg = getObjectNode();

    msg.put(TIME, System.currentTimeMillis());
    msg.put(ERROR_DETAIL, errorDetail);

    send(TelemetryType.KAFKA_FATAL_ERROR, msg);
  }

  public void reportKafkaConnectFatalError(
      final String errorDetail,
      final String channelName,
      final String tableName,
      final String pipeName) {
    ObjectNode msg = getObjectNode();

    msg.put(TIME, System.currentTimeMillis());
    msg.put(ERROR_DETAIL, errorDetail);
    if (channelName != null) {
      msg.put(TelemetryConstants.TOPIC_PARTITION_CHANNEL_NAME, channelName);
    }
    if (tableName != null) {
      msg.put(TelemetryConstants.TABLE_NAME, tableName);
    }
    if (pipeName != null) {
      msg.put(TelemetryConstants.PIPE_NAME, pipeName);
    }

    send(TelemetryType.KAFKA_FATAL_ERROR, msg);
  }

  /**
   * Reports connector's partition usage.
   *
   * @param partitionStatus SnowflakeTelemetryBasicInfo object
   * @param isClosing is the underlying channel closing
   */
  public void reportKafkaPartitionUsage(
      final SnowflakeTelemetryBasicInfo partitionStatus, boolean isClosing) {
    ObjectNode msg = getObjectNode();

    partitionStatus.dumpTo(msg);
    msg.put(IS_CHANNEL_CLOSING, isClosing);

    send(partitionStatus.telemetryType, msg);
  }

  /**
   * Reports connector partition start.
   *
   * @param partitionCreation SnowflakeTelemetryBasicInfo object
   */
  public void reportKafkaPartitionStart(final SnowflakeTelemetryBasicInfo partitionCreation) {
    ObjectNode msg = getObjectNode();

    partitionCreation.dumpTo(msg);

    send(partitionCreation.telemetryType, msg);
  }

  /** Reports a one-shot SSv1 offset migration attempt and its outcome for a single channel. */
  public void reportSsv1Migration(final SnowflakeTelemetryBasicInfo migration) {
    ObjectNode msg = getObjectNode();
    migration.dumpTo(msg);
    send(TelemetryType.KAFKA_SSV1_MIGRATION, msg);
  }

  /**
   * Creates the default ObjectNode which will be part of every telemetry being sent to Snowflake.
   *
   * <p>Format:
   *
   * <pre>
   * {
   *  "app_name": "<connector_app_name>",
   *  "task_id": 1,
   *  "snowflake.ingestion.method": "<Enum Ordinal>" for {@link IngestionMethodConfig}
   * }
   * </pre>
   *
   * @return An ObjectNode which is by default always created with certain defined properties in it.
   */
  ObjectNode getObjectNode() {
    ObjectNode msg = MAPPER.createObjectNode();
    msg.put(APP_NAME, getAppName());
    msg.put(TASK_ID, getTaskID());
    msg.put(INGESTION_METHOD, IngestionMethodConfig.SNOWPIPE_STREAMING.toString());
    return msg;
  }

  /**
   * JsonNode data is wrapped into another ObjectNode which looks like this:
   *
   * <pre>
   *   {
   *   "data": {
   *     "app_name": "<app_name>",
   *     "task_id": "-1"
   *   },
   *   "source": "kafka_connector",
   *   "type": "kafka_start/<One of TelemetryType Enums>",
   *   "version": "snowflake_kc_version"
   * }
   *
   * </pre>
   *
   * @param type type of Data
   * @param data JsonData to wrap in a json field called data
   */
  private void send(TelemetryType type, JsonNode data) {
    ObjectNode msg = MAPPER.createObjectNode();
    msg.put(SOURCE, KAFKA_CONNECTOR);
    msg.put(TYPE, type.toString());
    msg.set(DATA, data);
    msg.put(VERSION, Utils.VERSION); // version number
    try {
      telemetry.addLogToBatch(TelemetryUtil.buildJobData(msg));
      LOGGER.debug("sending telemetry data: {} of type:{}", data.toString(), type.toString());
      telemetry.sendBatchAsync();
    } catch (Exception e) {
      LOGGER.error("Failed to send telemetry data: {}, Error: {}", data.toString(), e.getMessage());
    }
  }

  private String getAppName() {
    if (name == null || name.isEmpty()) {
      LOGGER.warn("appName in telemetry service is empty");
      return "empty_appName";
    }
    return name;
  }

  private String getTaskID() {
    if (taskID == null || taskID.isEmpty()) {
      LOGGER.warn("taskID in telemetry service is empty");
      return "empty_taskID";
    }
    return taskID;
  }

  // IMPORTANT: update this set when adding new credential/secret config params.
  private static final Set<String> SENSITIVE_KEYS =
      Set.of(
          KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY,
          KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE,
          KafkaConnectorConfigParams.JVM_PROXY_USERNAME,
          KafkaConnectorConfigParams.JVM_PROXY_PASSWORD,
          KafkaConnectorConfigParams.HTTPS_PROXY_USER,
          KafkaConnectorConfigParams.HTTPS_PROXY_PASSWORD,
          KafkaConnectorConfigParams.HTTP_PROXY_USER,
          KafkaConnectorConfigParams.HTTP_PROXY_PASSWORD);

  /**
   * Adds all user-provided connector config to the telemetry payload, excluding sensitive keys
   * (credentials, passwords). Future config additions are automatically included.
   */
  private void addUserConnectorPropertiesToDataNode(
      final Map<String, String> userProvidedConfig, final ObjectNode dataObjectNode) {
    for (Map.Entry<String, String> entry : userProvidedConfig.entrySet()) {
      if (!SENSITIVE_KEYS.contains(entry.getKey())) {
        dataObjectNode.put(entry.getKey(), entry.getValue());
      }
    }
  }

  /** Types of telemetry events that can be sent. */
  public enum TelemetryType {
    KAFKA_START("kafka_start"),
    KAFKA_STOP("kafka_stop"),
    KAFKA_FATAL_ERROR("kafka_fatal_error"),
    KAFKA_CHANNEL_USAGE("kafka_channel_usage"),
    KAFKA_CHANNEL_START("kafka_channel_start"),
    KAFKA_SSV1_MIGRATION("kafka_ssv1_migration");

    private final String name;

    TelemetryType(String name) {
      this.name = name;
    }

    @Override
    public String toString() {
      return this.name;
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/telemetry/SnowflakeTelemetryServiceFactory.java
================================================
package com.snowflake.kafka.connector.internal.telemetry;

import java.sql.Connection;

/**
 * Factory class which produces the telemetry service which essentially has a telemetry client
 * instance.
 */
public final class SnowflakeTelemetryServiceFactory {

  private SnowflakeTelemetryServiceFactory() {}

  public static SnowflakeTelemetryServiceBuilder builder(Connection conn) {
    return new SnowflakeTelemetryServiceBuilder(conn);
  }

  /** Builder for TelemetryService */
  public static final class SnowflakeTelemetryServiceBuilder {
    private final SnowflakeTelemetryService service;

    /**
     * @param conn snowflake connection is required for telemetry service
     */
    SnowflakeTelemetryServiceBuilder(Connection conn) {
      this.service = new SnowflakeTelemetryService(conn);
    }

    /**
     * @param name connector name
     * @return builder instance
     */
    public SnowflakeTelemetryServiceBuilder setAppName(String name) {
      this.service.setAppName(name);
      return this;
    }

    /**
     * @param taskID taskId
     * @return builder instance
     */
    public SnowflakeTelemetryServiceBuilder setTaskID(String taskID) {
      this.service.setTaskID(taskID);
      return this;
    }

    public SnowflakeTelemetryService build() {
      return this.service;
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/telemetry/TelemetryConstants.java
================================================
package com.snowflake.kafka.connector.internal.telemetry;

/**
 * Placeholder for all constants used for Sending information from Connector to Snowflake through
 * Telemetry API
 */
public final class TelemetryConstants {
  public static final String TABLE_NAME = "table_name";
  public static final String CONNECTOR_NAME = "connector_name";

  public static final String PROCESSED_OFFSET = "processed-offset";

  public static final String START_TIME = "start_time";
  public static final String UNIX_TIME = "unix_time";
  public static final String ERROR_DETAIL = "error_detail";

  // ************ Streaming Constants ************//
  public static final String OFFSET_PERSISTED_IN_SNOWFLAKE = "persisted-in-snowflake-offset";
  public static final String LATEST_CONSUMER_OFFSET = "latest-consumer-offset";

  public static final String TOPIC_PARTITION_CHANNEL_NAME = "topic_partition_channel_name";
  public static final String TOPIC_PARTITION_CHANNEL_CREATION_TIME =
      "topic_partition_channel_creation_time";
  public static final String TOPIC_PARTITION_CHANNEL_CLOSE_TIME =
      "topic_partition_channel_close_time";
  public static final String VALIDATION_FAILURE_COUNT = "validation_failure_count";
  public static final String ERROR_TOLERATED_COUNT = "error_tolerated_count";
  public static final String CHANNEL_RECOVERY_COUNT = "channel_recovery_count";
  public static final String VALIDATION_DISABLED = "validation_disabled";
  public static final String ROWS_INSERTED_COUNT = "rows_inserted_count";
  public static final String ROWS_PARSED_COUNT = "rows_parsed_count";
  public static final String ROWS_ERROR_COUNT = "rows_error_count";
  public static final String SERVER_AVG_PROCESSING_LATENCY_MS = "server_avg_processing_latency_ms";
  public static final String DATABASE_NAME = "database_name";
  public static final String SCHEMA_NAME = "schema_name";
  public static final String PIPE_NAME = "pipe_name";
  public static final String STATUS_CODE = "status_code";
  public static final String LAST_ERROR_TIMESTAMP = "last_error_timestamp";
  public static final String LAST_ERROR_OFFSET_TOKEN_UPPER_BOUND =
      "last_error_offset_token_upper_bound";
  public static final String BACKPRESSURE_RETRY_COUNT = "backpressure_retry_count";
  public static final String APPEND_ROW_FALLBACK_COUNT = "append_row_fallback_count";
  public static final String SCHEMA_EVOLUTION_FAILURE_COUNT = "schema_evolution_failure_count";
  // SSv1 offset migration
  public static final String SSV1_MIGRATION_MODE = "ssv1_migration_mode";
  public static final String SSV1_MIGRATION_OUTCOME = "ssv1_migration_outcome";
  public static final String SSV1_CHANNEL_NAME = "ssv1_channel_name";
  public static final String SSV1_MIGRATED_OFFSET = "ssv1_migrated_offset";
  // ********** ^ Streaming Constants ^ **********//
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/BinaryStringUtils.java
================================================
/*
 * COPIED FROM SNOWFLAKE INGEST SDK V1
 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/streaming/internal/BinaryStringUtils.java
 *
 * Modifications:
 * - Only unicodeCharactersCount() method retained (only method used by validation)
 * - Package changed to com.snowflake.kafka.connector.internal.validation
 *
 * Copyright (c) 2023 Snowflake Computing Inc. All rights reserved.
 */

package com.snowflake.kafka.connector.internal.validation;

public class BinaryStringUtils {
  /** Returns the number of unicode code points in a string */
  public static int unicodeCharactersCount(String s) {
    return s.codePointCount(0, s.length());
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/ByteArraySerializer.java
================================================
/*
 * COPIED FROM SNOWFLAKE INGEST SDK V1
 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/streaming/internal/serialization/ByteArraySerializer.java
 *
 * Modifications:
 * - Package changed to com.snowflake.kafka.connector.internal.validation
 *
 * Copyright (c) 2021-2022 Snowflake Computing Inc. All rights reserved.
 */

package com.snowflake.kafka.connector.internal.validation;

import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.JsonSerializer;
import com.fasterxml.jackson.databind.SerializerProvider;
import java.io.IOException;

/**
 * Serialize Java byte arrays as JSON arrays of numbers instead of the default Jackson
 * base64-encoding.
 */
public class ByteArraySerializer extends JsonSerializer<byte[]> {
  @Override
  public void serialize(byte[] value, JsonGenerator gen, SerializerProvider serializers)
      throws IOException {
    gen.writeStartArray();
    for (byte v : value) {
      gen.writeNumber(v);
    }
    gen.writeEndArray();
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/ColumnLogicalType.java
================================================
/*
 * COPIED FROM SNOWFLAKE INGEST SDK V1
 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java
 *
 * Modifications:
 * - Extracted ColumnLogicalType enum from AbstractRowBuffer class
 * - Package changed to com.snowflake.kafka.connector.internal.validation
 *
 * Copyright (c) 2022-2024 Snowflake Computing Inc. All rights reserved.
 */

package com.snowflake.kafka.connector.internal.validation;

/** Snowflake table column logical type */
public enum ColumnLogicalType {
  ANY,
  BOOLEAN(1),
  ROWINDEX,
  NULL(15),
  REAL(8),
  FIXED(2),
  TEXT(9),
  CHAR,
  BINARY(10),
  DATE(7),
  TIME(6),
  TIMESTAMP_LTZ(3),
  TIMESTAMP_NTZ(4),
  TIMESTAMP_TZ(5),
  INTERVAL,
  RAW,
  ARRAY(13, true),
  OBJECT(12, true),
  VARIANT(11, true),
  ROW,
  SEQUENCE,
  FUNCTION,
  USER_DEFINED_TYPE,
  ;

  private static final int INVALID_SERVER_SIDE_DATA_TYPE_ORDINAL = -1;

  // ordinal should be in sync with the server side scanner
  private final int ordinal;
  // whether it is a composite data type: array, object or variant
  private final boolean object;

  ColumnLogicalType() {
    // no valid server side ordinal by default
    this(INVALID_SERVER_SIDE_DATA_TYPE_ORDINAL);
  }

  ColumnLogicalType(int ordinal) {
    this(ordinal, false);
  }

  ColumnLogicalType(int ordinal, boolean object) {
    this.ordinal = ordinal;
    this.object = object;
  }

  /**
   * Ordinal to encode the data type for the server side scanner
   *
   * <p>currently used for Parquet format
   */
  public int getOrdinal() {
    return ordinal;
  }

  /** Whether the data type is a composite type: OBJECT, VARIANT, ARRAY. */
  public boolean isObject() {
    return object;
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/ColumnPhysicalType.java
================================================
/*
 * COPIED FROM SNOWFLAKE INGEST SDK V1
 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/streaming/internal/AbstractRowBuffer.java
 *
 * Modifications:
 * - Extracted ColumnPhysicalType enum from AbstractRowBuffer class
 * - Package changed to com.snowflake.kafka.connector.internal.validation
 *
 * Copyright (c) 2022-2024 Snowflake Computing Inc. All rights reserved.
 */

package com.snowflake.kafka.connector.internal.validation;

/** Snowflake table column physical type */
public enum ColumnPhysicalType {
  ROWINDEX(9),
  DOUBLE(7),
  SB1(1),
  SB2(2),
  SB4(3),
  SB8(4),
  SB16(5),
  LOB(8),
  BINARY,
  ROW(10),
  ;

  private static final int INVALID_SERVER_SIDE_DATA_TYPE_ORDINAL = -1;

  // ordinal should be in sync with the server side scanner
  private final int ordinal;

  ColumnPhysicalType() {
    // no valid server side ordinal by default
    this(INVALID_SERVER_SIDE_DATA_TYPE_ORDINAL);
  }

  ColumnPhysicalType(int ordinal) {
    this.ordinal = ordinal;
  }

  /**
   * Ordinal to encode the data type for the server side scanner
   *
   * <p>currently used for Parquet format
   */
  public int getOrdinal() {
    return ordinal;
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/ColumnSchema.java
================================================
/*
 * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved.
 *
 * This file provides integration between SSv1 validation code and KC v4.
 */

package com.snowflake.kafka.connector.internal.validation;

import java.sql.ResultSet;
import java.sql.SQLException;

/**
 * Represents the schema of a Snowflake table column for validation purposes. Constructed from JDBC
 * ResultSet (DESCRIBE TABLE or system function).
 */
public class ColumnSchema {
  /**
   * Maximum byte length for TEXT/VARCHAR columns, matching SSv1 SDK's BYTES_16_MB limit. SSv1 SDK
   * enforces that strings can never be larger than 16MB bytes, even if the VARCHAR character length
   * would theoretically allow more (e.g., VARCHAR(16777216) with 4-byte UTF-8 chars could be 64MB,
   * but is capped at 16MB).
   *
   * @see DataValidationUtil line 721 in SSv1 SDK
   */
  private static final int MAX_LOB_SIZE_BYTES = 16 * 1024 * 1024; // 16,777,216 bytes

  private final String name;
  private final ColumnLogicalType logicalType;
  private final ColumnPhysicalType physicalType;
  private final boolean nullable;
  private final Integer precision;
  private final Integer scale;
  private final Integer length;
  private final Integer byteLength;
  private final String collation;
  private final boolean hasDefault;
  private final boolean isAutoincrement;

  /** Full constructor with default and autoincrement metadata. */
  public ColumnSchema(
      String name,
      ColumnLogicalType logicalType,
      ColumnPhysicalType physicalType,
      boolean nullable,
      Integer precision,
      Integer scale,
      Integer length,
      Integer byteLength,
      String collation,
      boolean hasDefault,
      boolean isAutoincrement) {
    this.name = name;
    this.logicalType = logicalType;
    this.physicalType = physicalType;
    this.nullable = nullable;
    this.precision = precision;
    this.scale = scale;
    this.length = length;
    this.byteLength = byteLength;
    this.collation = collation;
    this.hasDefault = hasDefault;
    this.isAutoincrement = isAutoincrement;
  }

  /** Backward-compatible constructor (no default/autoincrement metadata). */
  public ColumnSchema(
      String name,
      ColumnLogicalType logicalType,
      ColumnPhysicalType physicalType,
      boolean nullable,
      Integer precision,
      Integer scale,
      Integer length,
      Integer byteLength,
      String collation) {
    this(
        name,
        logicalType,
        physicalType,
        nullable,
        precision,
        scale,
        length,
        byteLength,
        collation,
        false,
        false);
  }

  /**
   * Construct ColumnSchema from DESCRIBE TABLE ResultSet row.
   *
   * <p>Thread-safety: This method is NOT thread-safe. Caller must synchronize if sharing ResultSet.
   *
   * <p>Resource management: Caller is responsible for closing the ResultSet.
   *
   * <p>ResultSet state: Must be positioned at a valid row before calling.
   *
   * @param rs ResultSet positioned at a DESCRIBE TABLE row (must not be closed)
   * @return ColumnSchema
   * @throws SQLException if column metadata cannot be read or ResultSet is closed/invalid
   * @throws IllegalArgumentException if ResultSet is null or closed
   */
  public static ColumnSchema fromDescribeTableRow(ResultSet rs) throws SQLException {
    if (rs == null || rs.isClosed()) {
      throw new IllegalArgumentException("ResultSet must be open and positioned at a row");
    }

    String name = rs.getString("name");
    String typeStr = rs.getString("type");
    String nullStr = rs.getString("null?");

    boolean hasDefault = false;
    boolean isAutoincrement = false;
    try {
      String defaultVal = rs.getString("default");
      hasDefault = defaultVal != null && !defaultVal.isEmpty();
      String autoinc = rs.getString("autoincrement");
      isAutoincrement = autoinc != null && !autoinc.isEmpty();
    } catch (SQLException e) {
      // default/autoincrement columns not available (e.g., in test mocks)
    }

    return fromDescribeTableFields(name, typeStr, nullStr, hasDefault, isAutoincrement);
  }

  /**
   * Construct ColumnSchema from individual DESCRIBE TABLE fields.
   *
   * @param name Column name
   * @param typeStr Type string (e.g. "NUMBER(38,0)", "VARCHAR(16777216)")
   * @param nullStr Nullable flag ("Y" or "N")
   * @return ColumnSchema
   */
  public static ColumnSchema fromDescribeTableFields(String name, String typeStr, String nullStr) {
    boolean nullable = "Y".equals(nullStr);

    // Parse type string to extract logical type and parameters
    TypeInfo typeInfo = parseTypeString(typeStr);

    return new ColumnSchema(
        name,
        typeInfo.logicalType,
        typeInfo.physicalType,
        nullable,
        typeInfo.precision,
        typeInfo.scale,
        typeInfo.length,
        typeInfo.byteLength,
        null); // DESCRIBE TABLE doesn't return collation
  }

  /** Construct ColumnSchema from DESCRIBE TABLE fields including default/autoincrement metadata. */
  public static ColumnSchema fromDescribeTableFields(
      String name, String typeStr, String nullStr, boolean hasDefault, boolean isAutoincrement) {
    boolean nullable = "Y".equals(nullStr);
    TypeInfo typeInfo = parseTypeString(typeStr);

    return new ColumnSchema(
        name,
        typeInfo.logicalType,
        typeInfo.physicalType,
        nullable,
        typeInfo.precision,
        typeInfo.scale,
        typeInfo.length,
        typeInfo.byteLength,
        null,
        hasDefault,
        isAutoincrement);
  }

  private static class TypeInfo {
    ColumnLogicalType logicalType;
    ColumnPhysicalType physicalType;
    Integer precision;
    Integer scale;
    Integer length;
    Integer byteLength;
  }

  /** Parse Snowflake type string (e.g., "NUMBER(38,0)", "VARCHAR(16777216)") into TypeInfo. */
  private static TypeInfo parseTypeString(String typeStr) {
    // Input validation
    if (typeStr == null || typeStr.trim().isEmpty()) {
      throw new IllegalArgumentException("Type string cannot be null or empty");
    }

    TypeInfo info = new TypeInfo();

    // Extract base type and parameters
    String baseType;
    String params = null;
    String trimmedType = typeStr.trim();
    int parenIdx = trimmedType.indexOf('(');
    if (parenIdx > 0) {
      baseType = trimmedType.substring(0, parenIdx).toUpperCase();
      // Use lastIndexOf to handle nested types like OBJECT(a NUMBER(38,0), b VARCHAR)
      int closeParenIdx = trimmedType.lastIndexOf(')');
      if (closeParenIdx <= parenIdx) {
        throw new IllegalArgumentException(
            "Malformed type string (missing closing parenthesis): " + typeStr);
      }
      params = trimmedType.substring(parenIdx + 1, closeParenIdx).trim();
    } else {
      baseType = trimmedType.toUpperCase();
    }

    // Map to logical and physical types
    switch (baseType) {
      case "NUMBER":
      case "NUMERIC":
      case "DECIMAL":
      case "INT":
      case "INTEGER":
      case "BIGINT":
      case "SMALLINT":
      case "TINYINT":
      case "BYTEINT":
        info.logicalType = ColumnLogicalType.FIXED;
        info.physicalType = ColumnPhysicalType.SB16;
        if (params != null && params.contains(",")) {
          String[] parts = params.split(",");
          try {
            info.precision = Integer.parseInt(parts[0].trim());
            info.scale = Integer.parseInt(parts[1].trim());
          } catch (NumberFormatException e) {
            throw new IllegalArgumentException(
                "Invalid numeric parameter in type string: " + typeStr, e);
          }
        } else if (params != null) {
          try {
            info.precision = Integer.parseInt(params.trim());
          } catch (NumberFormatException e) {
            throw new IllegalArgumentException(
                "Invalid numeric parameter in type string: " + typeStr, e);
          }
          info.scale = 0;
        } else {
          info.precision = 38;
          info.scale = 0;
        }
        break;

      case "FLOAT":
      case "FLOAT4":
      case "FLOAT8":
      case "DOUBLE":
      case "DOUBLE PRECISION":
      case "REAL":
        info.logicalType = ColumnLogicalType.REAL;
        info.physicalType = ColumnPhysicalType.DOUBLE;
        break;

      case "VARCHAR":
      case "STRING":
      case "TEXT":
      case "CHAR":
      case "CHARACTER":
        info.logicalType = ColumnLogicalType.TEXT;
        info.physicalType = ColumnPhysicalType.LOB;
        if (params != null) {
          try {
            info.length = Integer.parseInt(params.trim());
          } catch (NumberFormatException e) {
            throw new IllegalArgumentException(
                "Invalid length parameter in type string: " + typeStr, e);
          }
          // Cap at MAX_LOB_SIZE_BYTES (SSv1 SDK limit: strings never exceed 16MB bytes)
          // Use long to prevent integer overflow if length is corrupted/malformed
          long byteLengthLong = (long) info.length * 4;
          info.byteLength = (int) Math.min(MAX_LOB_SIZE_BYTES, byteLengthLong);
        } else {
          info.length = 16777216; // Default VARCHAR max
          // Cap at MAX_LOB_SIZE_BYTES (SSv1 SDK limit: strings never exceed 16MB bytes)
          // Use long to prevent integer overflow if length is corrupted/malformed
          long byteLengthLong = (long) info.length * 4;
          info.byteLength = (int) Math.min(MAX_LOB_SIZE_BYTES, byteLengthLong);
        }
        break;

      case "BINARY":
      case "VARBINARY":
        info.logicalType = ColumnLogicalType.BINARY;
        info.physicalType = ColumnPhysicalType.BINARY;
        if (params != null) {
          try {
            info.byteLength = Integer.parseInt(params.trim());
          } catch (NumberFormatException e) {
            throw new IllegalArgumentException(
                "Invalid length parameter in type string: " + typeStr, e);
          }
        } else {
          info.byteLength = 8388608; // Default BINARY max
        }
        break;

      case "BOOLEAN":
        info.logicalType = ColumnLogicalType.BOOLEAN;
        info.physicalType = ColumnPhysicalType.SB1;
        break;

      case "DATE":
        info.logicalType = ColumnLogicalType.DATE;
        info.physicalType = ColumnPhysicalType.SB8;
        break;

      case "TIME":
        info.logicalType = ColumnLogicalType.TIME;
        info.physicalType = ColumnPhysicalType.SB8;
        if (params != null) {
          try {
            info.scale = Integer.parseInt(params.trim());
          } catch (NumberFormatException e) {
            throw new IllegalArgumentException(
                "Invalid scale parameter in type string: " + typeStr, e);
          }
        } else {
          info.scale = 9; // Default TIME scale
        }
        break;

      case "TIMESTAMP":
      case "DATETIME":
        info.logicalType = ColumnLogicalType.TIMESTAMP_NTZ;
        info.physicalType = ColumnPhysicalType.SB8;
        if (params != null) {
          try {
            info.scale = Integer.parseInt(params.trim());
          } catch (NumberFormatException e) {
            throw new IllegalArgumentException(
                "Invalid scale parameter in type string: " + typeStr, e);
          }
        } else {
          info.scale = 9; // Default TIMESTAMP scale
        }
        break;

      case "TIMESTAMP_LTZ":
        info.logicalType = ColumnLogicalType.TIMESTAMP_LTZ;
        info.physicalType = ColumnPhysicalType.SB8;
        if (params != null) {
          try {
            info.scale = Integer.parseInt(params.trim());
          } catch (NumberFormatException e) {
            throw new IllegalArgumentException(
                "Invalid scale parameter in type string: " + typeStr, e);
          }
        } else {
          info.scale = 9;
        }
        break;

      case "TIMESTAMP_NTZ":
        info.logicalType = ColumnLogicalType.TIMESTAMP_NTZ;
        info.physicalType = ColumnPhysicalType.SB8;
        if (params != null) {
          try {
            info.scale = Integer.parseInt(params.trim());
          } catch (NumberFormatException e) {
            throw new IllegalArgumentException(
                "Invalid scale parameter in type string: " + typeStr, e);
          }
        } else {
          info.scale = 9;
        }
        break;

      case "TIMESTAMP_TZ":
        info.logicalType = ColumnLogicalType.TIMESTAMP_TZ;
        info.physicalType = ColumnPhysicalType.SB8;
        if (params != null) {
          try {
            info.scale = Integer.parseInt(params.trim());
          } catch (NumberFormatException e) {
            throw new IllegalArgumentException(
                "Invalid scale parameter in type string: " + typeStr, e);
          }
        } else {
          info.scale = 9;
        }
        break;

      case "VARIANT":
        info.logicalType = ColumnLogicalType.VARIANT;
        info.physicalType = ColumnPhysicalType.LOB;
        break;

      case "OBJECT":
        // Reject structured OBJECT types like OBJECT(a INT, b TEXT)
        // SSv1 SDK only supports unstructured OBJECT
        if (params != null && !params.trim().isEmpty()) {
          throw new IllegalArgumentException(
              "Structured OBJECT types are not supported by Snowpipe Streaming. "
                  + "Use unstructured OBJECT instead. Type: "
                  + typeStr);
        }
        info.logicalType = ColumnLogicalType.OBJECT;
        info.physicalType = ColumnPhysicalType.LOB;
        break;

      case "ARRAY":
        // Reject structured ARRAY types like ARRAY(INT)
        // SSv1 SDK only supports unstructured ARRAY
        if (params != null && !params.trim().isEmpty()) {
          throw new IllegalArgumentException(
              "Structured ARRAY types are not supported by Snowpipe Streaming. "
                  + "Use unstructured ARRAY instead. Type: "
                  + typeStr);
        }
        info.logicalType = ColumnLogicalType.ARRAY;
        info.physicalType = ColumnPhysicalType.LOB;
        break;

      default:
        // Unknown type - will be caught by validateSchema
        info.logicalType = null;
        info.physicalType = null;
    }

    return info;
  }

  public String getName() {
    return name;
  }

  public ColumnLogicalType getLogicalType() {
    return logicalType;
  }

  public ColumnPhysicalType getPhysicalType() {
    return physicalType;
  }

  public boolean isNullable() {
    return nullable;
  }

  public Integer getPrecision() {
    return precision;
  }

  public Integer getScale() {
    return scale;
  }

  public Integer getLength() {
    return length;
  }

  public Integer getByteLength() {
    return byteLength;
  }

  public String getCollation() {
    return collation;
  }

  public boolean hasDefault() {
    return hasDefault;
  }

  public boolean isAutoincrement() {
    return isAutoincrement;
  }

  /** True when the column value is filled by the server (has DEFAULT or is AUTOINCREMENT). */
  public boolean isServerFilled() {
    return hasDefault || isAutoincrement;
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/DataValidationUtil.java
================================================
/*
 * COPIED FROM SNOWFLAKE INGEST SDK V1
 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/streaming/internal/DataValidationUtil.java
 *
 * Modifications:
 * - Iceberg-specific validation methods removed (not needed for Kafka Connector)
 * - Package changed to com.snowflake.kafka.connector.internal.validation
 *
 * Copyright (c) 2021-2024 Snowflake Computing Inc. All rights reserved.
 */

package com.snowflake.kafka.connector.internal.validation;

import static com.snowflake.kafka.connector.internal.validation.BinaryStringUtils.unicodeCharactersCount;

import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectWriter;
import com.fasterxml.jackson.databind.module.SimpleModule;
import com.fasterxml.jackson.databind.ser.std.ToStringSerializer;
import com.google.common.collect.Sets;
import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.time.OffsetDateTime;
import java.time.OffsetTime;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.time.format.DateTimeParseException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.Stack;
import java.util.function.Supplier;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.io.output.StringBuilderWriter;

/** Utility class for parsing and validating inputs based on Snowflake types */
class DataValidationUtil {

  /**
   * Seconds limit used for integer-stored timestamp scale guessing. Value needs to be aligned with
   * the value from {@link SnowflakeDateTimeFormat#parse}
   */
  private static final long SECONDS_LIMIT_FOR_EPOCH = 31536000000L;

  /**
   * Milliseconds limit used for integer-stored timestamp scale guessing. Value needs to be aligned
   * with the value from {@link SnowflakeDateTimeFormat#parse}
   */
  private static final long MILLISECONDS_LIMIT_FOR_EPOCH = SECONDS_LIMIT_FOR_EPOCH * 1000L;

  /**
   * Microseconds limit used for integer-stored timestamp scale guessing. Value needs to be aligned
   * with the value from {@link SnowflakeDateTimeFormat#parse}
   */
  private static final long MICROSECONDS_LIMIT_FOR_EPOCH = SECONDS_LIMIT_FOR_EPOCH * 1000000L;

  public static final int BYTES_8_MB = 8 * 1024 * 1024;
  public static final int BYTES_16_MB = 2 * BYTES_8_MB;

  // TODO SNOW-664249: There is a few-byte mismatch between the value sent by the user and its
  // server-side representation. Validation leaves a small buffer for this difference.
  static final int MAX_SEMI_STRUCTURED_LENGTH = BYTES_16_MB - 64;

  private static final ObjectMapper objectMapper = new ObjectMapper();

  private static final JsonFactory factory =
      new JsonFactory()
          // Handle duplicate fields in JSON objects by ourselves
          .configure(JsonGenerator.Feature.STRICT_DUPLICATE_DETECTION, false);

  // The version of Jackson we are using does not support serialization of date objects from the
  // java.time package. Here we define a module with custom java.time serializers. Additionally, we
  // define custom serializer for byte[] because the Jackson default is to serialize it as
  // base64-encoded string, and we would like to serialize it as JSON array of numbers.
  static {
    SimpleModule module = new SimpleModule();
    module.addSerializer(byte[].class, new ByteArraySerializer());
    module.addSerializer(ZonedDateTime.class, new ZonedDateTimeSerializer());
    module.addSerializer(LocalTime.class, new ToStringSerializer());
    module.addSerializer(OffsetTime.class, new ToStringSerializer());
    module.addSerializer(LocalDate.class, new ToStringSerializer());
    module.addSerializer(LocalDateTime.class, new ToStringSerializer());
    module.addSerializer(OffsetDateTime.class, new ToStringSerializer());
    module.addSerializer(DuplicateKeyValidatedObject.class, new DuplicateKeyValidatingSerializer());
    objectMapper.registerModule(module);
  }

  private static final ObjectWriter objectWriter = objectMapper.writer();

  // Caching the powers of 10 that are used for checking the range of numbers because computing them
  // on-demand is expensive.
  private static final BigDecimal[] POWER_10 = makePower10Table();

  private static BigDecimal[] makePower10Table() {
    BigDecimal[] power10 = new BigDecimal[Power10Util.sb16Size];
    for (int i = 0; i < Power10Util.sb16Size; i++) {
      power10[i] = new BigDecimal(Power10Util.sb16Table[i]);
    }
    return power10;
  }

  /**
   * Validates and parses input as JSON. All types in the object tree must be valid variant types,
   * see {@link DataValidationUtil#isAllowedSemiStructuredType}.
   *
   * @param input Object to validate
   * @return JSON tree representing the input
   */
  private static JsonNode validateAndParseSemiStructuredAsJsonTree(
      String columnName, Object input, String snowflakeType, final long insertRowIndex) {
    if (input instanceof String) {
      String stringInput = (String) input;
      verifyValidUtf8(stringInput, columnName, snowflakeType, insertRowIndex);
      try {
        return objectMapper.readTree(stringInput);
      } catch (JsonProcessingException e) {
        throw valueFormatNotAllowedException(
            columnName, snowflakeType, "Not a valid JSON", insertRowIndex);
      }
    } else if (isAllowedSemiStructuredType(input)) {
      return objectMapper.valueToTree(input);
    }

    throw typeNotAllowedException(
        columnName,
        input.getClass(),
        snowflakeType,
        new String[] {
          "String",
          "Primitive data types and their arrays",
          "java.time.*",
          "List<T>",
          "Map<String, T>",
          "T[]"
        },
        insertRowIndex);
  }

  /**
   * Validates and parses input as JSON. All types in the object tree must be valid variant types,
   * see {@link DataValidationUtil#isAllowedSemiStructuredType}.
   *
   * @param input Object to validate
   * @return Minified JSON string
   */
  private static String validateAndParseSemiStructured(
      String columnName, Object input, String snowflakeType, final long insertRowIndex) {
    if (input instanceof String) {
      final String stringInput = (String) input;
      verifyValidUtf8(stringInput, columnName, snowflakeType, insertRowIndex);
      final StringBuilderWriter resultWriter = new StringBuilderWriter(stringInput.length());
      Stack<DuplicateDetector<String>> fieldsByLevel = new Stack<>();
      try (final JsonParser parser = factory.createParser(stringInput);
          final JsonGenerator generator = factory.createGenerator(resultWriter)) {
        while (parser.nextToken() != null) {
          final JsonToken token = parser.currentToken();
          if (token.isNumeric()) {
            // If the current token is a number, we cannot just copy the current event because it
            // would write token the token from double (or big decimal), whose scientific notation
            // may have been altered during deserialization. We want to preserve the scientific
            // notation from the user input, so we write the current numer as text.
            generator.writeNumber(parser.getText());
          } else {
            // Validates duplicate JSON object fields
            if (token == JsonToken.START_OBJECT) {
              fieldsByLevel.push(new DuplicateDetector<>());
            }
            if (token == JsonToken.END_OBJECT) {
              fieldsByLevel.pop();
            }
            if (token == JsonToken.FIELD_NAME) {
              // We need to strip trailing nulls from the field name to match the behavior of the
              // server side json parser. See SNOW-1772196 for more details.
              String strippedFieldName = Utils.stripTrailingNulls(parser.currentName());
              if (fieldsByLevel.peek().isDuplicate(strippedFieldName)) {
                throw valueFormatNotAllowedException(
                    columnName,
                    snowflakeType,
                    String.format("Not a valid JSON: duplicate field %s", strippedFieldName),
                    insertRowIndex);
              }
            }
            generator.copyCurrentEvent(parser);
          }
        }
      } catch (JsonParseException e) {
        throw valueFormatNotAllowedException(
            columnName, snowflakeType, "Not a valid JSON", insertRowIndex);
      } catch (IOException e) {
        if (e.getMessage().contains("Duplicate field")) {
          throw valueFormatNotAllowedException(
              columnName, snowflakeType, "Not a valid JSON: duplicate field", insertRowIndex);
        }
        throw new SFExceptionValidation(
            e,
            ErrorCode.IO_ERROR,
            String.format(
                "Cannot create JSON Parser or JSON generator for column %s of type %s, rowIndex:%d",
                columnName, snowflakeType, insertRowIndex));
      }
      // We return the minified string from the result writer
      return resultWriter.toString();
    } else if (isAllowedSemiStructuredType(input)) {
      try {
        String result = objectWriter.writeValueAsString(new DuplicateKeyValidatedObject(input));
        verifyValidUtf8(result, columnName, snowflakeType, insertRowIndex);
        return result;
      } catch (JsonProcessingException e) {
        throw valueFormatNotAllowedException(
            columnName, snowflakeType, e.getMessage(), insertRowIndex);
      }
    }

    throw typeNotAllowedException(
        columnName,
        input.getClass(),
        snowflakeType,
        new String[] {
          "String",
          "Primitive data types and their arrays",
          "java.time.*",
          "List<T>",
          "Map<String, T>",
          "T[]"
        },
        insertRowIndex);
  }

  /**
   * Validates and parses input as JSON. All types in the object tree must be valid variant types,
   * see {@link DataValidationUtil#isAllowedSemiStructuredType}.
   *
   * @param input Object to validate
   * @param insertRowIndex
   * @return JSON string representing the input
   */
  static String validateAndParseVariant(String columnName, Object input, long insertRowIndex) {
    JsonNode node =
        validateAndParseSemiStructuredAsJsonTree(columnName, input, "VARIANT", insertRowIndex);

    // Missing nodes are not valid json, ingest them as NULL instead
    if (node.isMissingNode()) {
      return null;
    }

    String output = node.toString();
    int stringLength = output.getBytes(StandardCharsets.UTF_8).length;
    if (stringLength > MAX_SEMI_STRUCTURED_LENGTH) {
      throw valueFormatNotAllowedException(
          columnName,
          "VARIANT",
          String.format(
              "Variant too long: length=%d maxLength=%d", stringLength, MAX_SEMI_STRUCTURED_LENGTH),
          insertRowIndex);
    }
    return output;
  }

  /**
   * Validates and parses input for VARIANT columns, returning a native Java object (Map, List, or
   * primitive) instead of a JSON string. For String inputs this avoids the serialize→re-parse
   * roundtrip of {@link #validateAndParseVariant}.
   *
   * @param input Object to validate
   * @param insertRowIndex
   * @return Native Java object (Map, List, String, Number, Boolean, or null for missing nodes)
   */
  static Object validateAndParseVariantAsObject(
      String columnName, Object input, long insertRowIndex) {
    JsonNode node =
        validateAndParseSemiStructuredAsJsonTree(columnName, input, "VARIANT", insertRowIndex);

    if (node.isMissingNode()) {
      return null;
    }

    String output = node.toString();
    int stringLength = output.getBytes(StandardCharsets.UTF_8).length;
    if (stringLength > MAX_SEMI_STRUCTURED_LENGTH) {
      throw valueFormatNotAllowedException(
          columnName,
          "VARIANT",
          String.format(
              "Variant too long: length=%d maxLength=%d", stringLength, MAX_SEMI_STRUCTURED_LENGTH),
          insertRowIndex);
    }
    try {
      return objectMapper.treeToValue(node, Object.class);
    } catch (JsonProcessingException e) {
      // Should never happen: node was already validated by validateAndParseSemiStructuredAsJsonTree
      throw new IllegalStateException("Failed to convert validated JsonNode to Object", e);
    }
  }

  /**
   * Validates and parses input as JSON. All types in the object tree must be valid variant types,
   * see {@link DataValidationUtil#isAllowedSemiStructuredType}.
   *
   * @param input Object to validate
   * @param insertRowIndex
   * @return JSON string representing the input
   */
  static String validateAndParseVariantNew(String columnName, Object input, long insertRowIndex) {
    final String result =
        validateAndParseSemiStructured(columnName, input, "VARIANT", insertRowIndex);

    // Empty json strings are ingested as nulls
    if (result.isEmpty()) {
      return null;
    }
    int stringLength = result.getBytes(StandardCharsets.UTF_8).length;
    if (stringLength > MAX_SEMI_STRUCTURED_LENGTH) {
      throw valueFormatNotAllowedException(
          columnName,
          "VARIANT",
          String.format(
              "Variant too long: length=%d maxLength=%d", stringLength, MAX_SEMI_STRUCTURED_LENGTH),
          insertRowIndex);
    }
    return result;
  }

  /**
   * Validates that passed object is allowed data type for semi-structured columns (i.e. VARIANT,
   * ARRAY, OBJECT). For non-trivial types like maps, arrays or lists, it recursively traverses the
   * object tree and validates that all types in the tree are also allowed. Allowed Java types:
   *
   * <ul>
   *   <li>primitive types (int, long, boolean, ...)
   *   <li>String
   *   <li>BigInteger
   *   <li>BigDecimal
   *   <li>LocalTime
   *   <li>OffsetTime
   *   <li>LocalDate
   *   <li>LocalDateTime
   *   <li>OffsetDateTime
   *   <li>ZonedDateTime
   *   <li>Map<String, T> where T is an allowed semi-structured type
   *   <li>List<T> where T is an allowed semi-structured type
   *   <li>primitive arrays (char[], int[], ...)
   *   <li>T[] where T is an allowed semi-structured type
   * </ul>
   *
   * @param o Object to validate
   * @return If the passed object is allowed for ingestion into semi-structured column
   */
  static boolean isAllowedSemiStructuredType(Object o) {
    // Allow null
    if (o == null) {
      return true;
    }

    // Allow string
    if (o instanceof String) {
      return true;
    }

    // Allow all primitive Java data types
    if (o instanceof Long
        || o instanceof Integer
        || o instanceof Short
        || o instanceof Byte
        || o instanceof Float
        || o instanceof Double
        || o instanceof Boolean
        || o instanceof Character) {
      return true;
    }

    // Allow BigInteger and BigDecimal
    if (o instanceof BigInteger || o instanceof BigDecimal) {
      return true;
    }

    // Allow supported types from java.time package
    if (o instanceof java.time.LocalTime
        || o instanceof OffsetTime
        || o instanceof LocalDate
        || o instanceof LocalDateTime
        || o instanceof ZonedDateTime
        || o instanceof OffsetDateTime) {
      return true;
    }

    // Map<String, T> is allowed, as long as T is also a supported semi-structured type
    if (o instanceof Map) {
      boolean allKeysAreStrings =
          ((Map<?, ?>) o).keySet().stream().allMatch(x -> x instanceof String);
      if (!allKeysAreStrings) {
        return false;
      }
      boolean allValuesAreAllowed =
          ((Map<?, ?>) o)
              .values().stream().allMatch(DataValidationUtil::isAllowedSemiStructuredType);
      return allValuesAreAllowed;
    }

    // Allow arrays of primitive data types
    if (o instanceof byte[]
        || o instanceof short[]
        || o instanceof int[]
        || o instanceof long[]
        || o instanceof float[]
        || o instanceof double[]
        || o instanceof boolean[]
        || o instanceof char[]) {
      return true;
    }

    // Allow arrays of allowed semi-structured objects
    if (o.getClass().isArray()) {
      return Arrays.stream((Object[]) o).allMatch(DataValidationUtil::isAllowedSemiStructuredType);
    }

    // Allow lists consisting of allowed semi-structured objects
    if (o instanceof List) {
      return ((List<?>) o).stream().allMatch(DataValidationUtil::isAllowedSemiStructuredType);
    }

    // If nothing matches, reject the input
    return false;
  }

  /**
   * Validates and parses JSON array. Non-array types are converted into single-element arrays. All
   * types in the array tree must be valid variant types, see {@link
   * DataValidationUtil#isAllowedSemiStructuredType}.
   *
   * @param input Object to validate
   * @param insertRowIndex
   * @return JSON array representing the input
   */
  static String validateAndParseArray(String columnName, Object input, long insertRowIndex) {
    JsonNode jsonNode =
        validateAndParseSemiStructuredAsJsonTree(columnName, input, "ARRAY", insertRowIndex);

    // Non-array values are ingested as single-element arrays, mimicking the Worksheets behavior
    if (!jsonNode.isArray()) {
      jsonNode = objectMapper.createArrayNode().add(jsonNode);
    }

    String output = jsonNode.toString();
    // Throw an exception if the size is too large
    int stringLength = output.getBytes(StandardCharsets.UTF_8).length;
    if (stringLength > MAX_SEMI_STRUCTURED_LENGTH) {
      throw valueFormatNotAllowedException(
          columnName,
          "ARRAY",
          String.format(
              "Array too large. length=%d maxLength=%d", stringLength, MAX_SEMI_STRUCTURED_LENGTH),
          insertRowIndex);
    }
    return output;
  }

  /**
   * Validates and parses input for ARRAY columns, returning a native Java List instead of a JSON
   * string. For String inputs this avoids the serialize→re-parse roundtrip of {@link
   * #validateAndParseArray}.
   *
   * @param input Object to validate
   * @param insertRowIndex
   * @return Native Java List
   */
  @SuppressWarnings("unchecked")
  static List<Object> validateAndParseArrayAsList(
      String columnName, Object input, long insertRowIndex) {
    JsonNode jsonNode =
        validateAndParseSemiStructuredAsJsonTree(columnName, input, "ARRAY", insertRowIndex);

    if (!jsonNode.isArray()) {
      jsonNode = objectMapper.createArrayNode().add(jsonNode);
    }

    String output = jsonNode.toString();
    int stringLength = output.getBytes(StandardCharsets.UTF_8).length;
    if (stringLength > MAX_SEMI_STRUCTURED_LENGTH) {
      throw valueFormatNotAllowedException(
          columnName,
          "ARRAY",
          String.format(
              "Array too large. length=%d maxLength=%d", stringLength, MAX_SEMI_STRUCTURED_LENGTH),
          insertRowIndex);
    }
    try {
      return objectMapper.treeToValue(jsonNode, List.class);
    } catch (JsonProcessingException e) {
      // Should never happen: node was already validated by validateAndParseSemiStructuredAsJsonTree
      throw new IllegalStateException("Failed to convert validated JsonNode to List", e);
    }
  }

  /**
   * Validates and parses JSON array. Non-array types are converted into single-element arrays. All
   * types in the array tree must be valid variant types, see {@link
   * DataValidationUtil#isAllowedSemiStructuredType}.
   *
   * @param input Object to validate
   * @param insertRowIndex
   * @return JSON array representing the input
   */
  static String validateAndParseArrayNew(String columnName, Object input, long insertRowIndex) {
    String result = validateAndParseSemiStructured(columnName, input, "ARRAY", insertRowIndex);
    if (result.isEmpty()) {
      // Empty input is ingested as an array of null
      result =
          JsonToken.START_ARRAY.asString()
              + JsonToken.VALUE_NULL.asString()
              + JsonToken.END_ARRAY.asString();
    } else if (!result.startsWith(JsonToken.START_ARRAY.asString())) {
      // Non-array values are ingested as single-element arrays, mimicking the Worksheets behavior
      result = JsonToken.START_ARRAY.asString() + result + JsonToken.END_ARRAY.asString();
    }

    // Throw an exception if the size is too large
    int stringLength = result.getBytes(StandardCharsets.UTF_8).length;
    if (stringLength > MAX_SEMI_STRUCTURED_LENGTH) {
      throw valueFormatNotAllowedException(
          columnName,
          "ARRAY",
          String.format(
              "Array too large. length=%d maxLength=%d", stringLength, MAX_SEMI_STRUCTURED_LENGTH),
          insertRowIndex);
    }
    return result;
  }

  /**
   * Validates and parses JSON object. Input is rejected if the value does not represent JSON object
   * (e.g. String '{}' or Map<String, T>). All types in the object tree must be valid variant types,
   * see {@link DataValidationUtil#isAllowedSemiStructuredType}.
   *
   * @param input Object to validate
   * @param insertRowIndex
   * @return JSON object representing the input
   */
  static String validateAndParseObject(String columnName, Object input, long insertRowIndex) {
    JsonNode jsonNode =
        validateAndParseSemiStructuredAsJsonTree(columnName, input, "OBJECT", insertRowIndex);
    if (!jsonNode.isObject()) {
      throw valueFormatNotAllowedException(columnName, "OBJECT", "Not an object", insertRowIndex);
    }

    String output = jsonNode.toString();
    // Throw an exception if the size is too large
    int stringLength = output.getBytes(StandardCharsets.UTF_8).length;
    if (stringLength > MAX_SEMI_STRUCTURED_LENGTH) {
      throw valueFormatNotAllowedException(
          columnName,
          "OBJECT",
          String.format(
              "Object too large. length=%d maxLength=%d", stringLength, MAX_SEMI_STRUCTURED_LENGTH),
          insertRowIndex);
    }
    return output;
  }

  /**
   * Validates and parses JSON object. Input is rejected if the value does not represent JSON object
   * (e.g. String '{}' or Map<String, T>). All types in the object tree must be valid variant types,
   * see {@link DataValidationUtil#isAllowedSemiStructuredType}.
   *
   * @param input Object to validate
   * @param insertRowIndex
   * @return JSON object representing the input
   */
  static String validateAndParseObjectNew(String columnName, Object input, long insertRowIndex) {
    final String result =
        validateAndParseSemiStructured(columnName, input, "OBJECT", insertRowIndex);
    if (!result.startsWith(JsonToken.START_OBJECT.asString())) {
      throw valueFormatNotAllowedException(columnName, "OBJECT", "Not an object", insertRowIndex);
    }
    // Throw an exception if the size is too large
    int stringLength = result.getBytes(StandardCharsets.UTF_8).length;
    if (stringLength > MAX_SEMI_STRUCTURED_LENGTH) {
      throw valueFormatNotAllowedException(
          columnName,
          "OBJECT",
          String.format(
              "Object too large. length=%d maxLength=%d", stringLength, MAX_SEMI_STRUCTURED_LENGTH),
          insertRowIndex);
    }
    return result;
  }

  /**
   * Converts user input to offset date time, which is the canonical representation of dates and
   * timestamps.
   */
  private static OffsetDateTime inputToOffsetDateTime(
      String columnName,
      String typeName,
      Object input,
      ZoneId defaultTimezone,
      final long insertRowIndex) {
    if (input instanceof OffsetDateTime) {
      return (OffsetDateTime) input;
    }

    if (input instanceof ZonedDateTime) {
      return ((ZonedDateTime) input).toOffsetDateTime();
    }

    if (input instanceof LocalDateTime) {
      return ((LocalDateTime) input).atZone(defaultTimezone).toOffsetDateTime();
    }

    if (input instanceof LocalDate) {
      return ((LocalDate) input).atStartOfDay().atZone(defaultTimezone).toOffsetDateTime();
    }

    if (input instanceof Instant) {
      // Just like integer-stored timestamps, instants are always interpreted in UTC
      return ((Instant) input).atZone(ZoneOffset.UTC).toOffsetDateTime();
    }

    if (input instanceof String) {
      String stringInput = ((String) input).trim();
      {
        // First, try to parse ZonedDateTime
        ZonedDateTime zoned = catchParsingError(() -> ZonedDateTime.parse(stringInput));
        if (zoned != null) {
          return zoned.toOffsetDateTime();
        }
      }

      {
        // Next, try to parse OffsetDateTime
        OffsetDateTime offset = catchParsingError(() -> OffsetDateTime.parse(stringInput));
        if (offset != null) {
          return offset;
        }
      }

      {
        // Alternatively, try to parse LocalDateTime
        LocalDateTime localDateTime = catchParsingError(() -> LocalDateTime.parse(stringInput));
        if (localDateTime != null) {
          return localDateTime.atZone(defaultTimezone).toOffsetDateTime();
        }
      }

      {
        // Alternatively, try to parse LocalDate
        LocalDate localDate = catchParsingError(() -> LocalDate.parse(stringInput));
        if (localDate != null) {
          return localDate.atStartOfDay().atZone(defaultTimezone).toOffsetDateTime();
        }
      }

      {
        // Alternatively, try to parse integer-stored timestamp
        // Just like in Snowflake, integer-stored timestamps are always in UTC
        Instant instant = catchParsingError(() -> parseInstantGuessScale(stringInput));
        if (instant != null) {
          return instant.atOffset(ZoneOffset.UTC);
        }
      }

      // Couldn't parse anything, throw an exception
      throw valueFormatNotAllowedException(
          columnName,
          typeName,
          "Not a valid value, see"
              + " https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview"
              + " for the list of supported formats",
          insertRowIndex);
    }

    // Type is not supported, throw an exception
    throw typeNotAllowedException(
        columnName,
        input.getClass(),
        typeName,
        new String[] {"String", "LocalDate", "LocalDateTime", "ZonedDateTime", "OffsetDateTime"},
        insertRowIndex);
  }

  private static <T> T catchParsingError(Supplier<T> op) {
    try {
      return op.get();
    } catch (DateTimeParseException | NumberFormatException e) {
      return null;
    }
  }

  /**
   * Validates and parses input for TIMESTAMP_NTZ, TIMESTAMP_LTZ and TIMEATAMP_TZ Snowflake types.
   * Allowed Java types:
   *
   * <ul>
   *   <li>String
   *   <li>LocalDate
   *   <li>LocalDateTime
   *   <li>OffsetDateTime
   *   <li>ZonedDateTime
   * </ul>
   *
   * @param columnName Column name, used in validation error messages
   * @param input String date in valid format, seconds past the epoch or java.time.* object. Accepts
   *     fractional seconds with precision up to the column's scale
   * @param scale decimal scale of timestamp 16 byte integer
   * @param defaultTimezone Input, which does not carry timezone information is going to be
   *     interpreted in the default timezone.
   * @param trimTimezone Whether timezone information should be removed from the resulting date,
   *     should be true for TIMESTAMP_NTZ columns.
   * @param insertRowIndex
   * @return TimestampWrapper
   */
  static TimestampWrapper validateAndParseTimestamp(
      String columnName,
      Object input,
      int scale,
      ZoneId defaultTimezone,
      boolean trimTimezone,
      long insertRowIndex) {
    // Integer/Long epoch values from Kafka JsonConverter — delegate to the same
    // scale-guessing logic used for string-encoded epochs.  Only whole numbers
    // (Integer, Long) are accepted; fractional types (float, double, BigDecimal)
    // and BigInteger remain rejected to match SSv1 behavior.
    if (input instanceof Integer || input instanceof Long) {
      input = input.toString();
    }

    OffsetDateTime offsetDateTime =
        inputToOffsetDateTime(columnName, "TIMESTAMP", input, defaultTimezone, insertRowIndex);

    if (trimTimezone) {
      offsetDateTime = offsetDateTime.withOffsetSameLocal(ZoneOffset.UTC);
    }
    if (offsetDateTime.getYear() < 1 || offsetDateTime.getYear() > 9999) {
      throw new SFExceptionValidation(
          ErrorCode.INVALID_VALUE_ROW,
          String.format(
              "Timestamp out of representable inclusive range of years between 1 and 9999,"
                  + " rowIndex:%d, column:%s, value:%s",
              insertRowIndex, columnName, offsetDateTime));
    }
    return new TimestampWrapper(offsetDateTime, scale);
  }

  /**
   * Validates a timestamp value and returns an ISO-formatted string. Unlike {@link
   * #validateAndParseTimestamp} (which returns a {@link TimestampWrapper} for Parquet
   * serialization), this method returns a human-readable ISO string suitable for passing to the
   * SSv2 SDK.
   *
   * <p>This is used by RowValidator to normalize Integer/Long epoch values into unambiguous ISO
   * strings, so the Snowflake backend interprets them correctly regardless of channel timezone.
   *
   * <p>Note: Unlike {@link #validateAndParseTimestamp}, this method omits the {@code scale}
   * parameter because it only handles Integer/Long epoch inputs which have no fractional seconds.
   *
   * @param columnName Column name, used in error messages
   * @param input Timestamp value (Integer, Long, String, or java.time.* object)
   * @param defaultTimezone Timezone for inputs without timezone info
   * @param trimTimezone true for TIMESTAMP_NTZ (strip timezone), false for LTZ/TZ
   * @param insertRowIndex Row index for error messages
   * @return ISO timestamp string (e.g., "2024-01-15T10:00" for NTZ, "2024-01-15T10:00Z" for LTZ)
   */
  static String validateAndFormatTimestamp(
      String columnName,
      Object input,
      ZoneId defaultTimezone,
      boolean trimTimezone,
      long insertRowIndex) {
    if (input instanceof Integer || input instanceof Long) {
      input = input.toString();
    }

    OffsetDateTime offsetDateTime =
        inputToOffsetDateTime(columnName, "TIMESTAMP", input, defaultTimezone, insertRowIndex);

    if (trimTimezone) {
      offsetDateTime = offsetDateTime.withOffsetSameLocal(ZoneOffset.UTC);
    }
    if (offsetDateTime.getYear() < 1 || offsetDateTime.getYear() > 9999) {
      throw new SFExceptionValidation(
          ErrorCode.INVALID_VALUE_ROW,
          String.format(
              "Timestamp out of representable inclusive range of years between 1 and 9999,"
                  + " rowIndex:%d, column:%s, value:%s",
              insertRowIndex, columnName, offsetDateTime));
    }
    return trimTimezone ? offsetDateTime.toLocalDateTime().toString() : offsetDateTime.toString();
  }

  /**
   * Converts input to string, validates that length is less than max allowed string size
   * https://docs.snowflake.com/en/sql-reference/data-types-text.html#varchar. Allowed data types:
   *
   * <ul>
   *   <li>String
   *   <li>Number
   *   <li>boolean
   *   <li>char
   * </ul>
   *
   * @param input Object to validate and parse to String
   * @param maxLengthOptional Maximum allowed length of the output String, if empty then uses
   *     maximum allowed by Snowflake
   *     (https://docs.snowflake.com/en/sql-reference/data-types-text.html#varchar)
   * @param insertRowIndex
   */
  static String validateAndParseString(
      String columnName, Object input, Optional<Integer> maxLengthOptional, long insertRowIndex) {
    String output;
    if (input instanceof String) {
      output = (String) input;
      verifyValidUtf8(output, columnName, "STRING", insertRowIndex);
    } else if (input instanceof Number) {
      output = new BigDecimal(input.toString()).stripTrailingZeros().toPlainString();
    } else if (input instanceof Boolean || input instanceof Character) {
      output = input.toString();
    } else {
      throw typeNotAllowedException(
          columnName,
          input.getClass(),
          "STRING",
          new String[] {"String", "Number", "boolean", "char"},
          insertRowIndex);
    }
    byte[] utf8Bytes = output.getBytes(StandardCharsets.UTF_8);

    // Strings can never be larger than 16MB
    if (utf8Bytes.length > BYTES_16_MB) {
      throw valueFormatNotAllowedException(
          columnName,
          "STRING",
          String.format(
              "String too long: length=%d bytes maxLength=%d bytes", utf8Bytes.length, BYTES_16_MB),
          insertRowIndex);
    }

    // If max allowed length is specified (e.g. VARCHAR(10)), the number of unicode characters must
    // not exceed this value
    maxLengthOptional.ifPresent(
        maxAllowedCharacters -> {
          int actualCharacters = unicodeCharactersCount(output);
          if (actualCharacters > maxAllowedCharacters) {
            throw valueFormatNotAllowedException(
                columnName,
                "STRING",
                String.format(
                    "String too long: length=%d characters maxLength=%d characters",
                    actualCharacters, maxAllowedCharacters),
                insertRowIndex);
          }
        });
    return output;
  }

  /**
   * Returns a BigDecimal representation of the input. Strings of the form "1.23E4" will be treated
   * as being written in * scientific notation (e.g. 1.23 * 10^4). Does not perform any size
   * validation. Allowed Java types:
   * <li>byte, short, int, long
   * <li>float, double
   * <li>BigInteger, BigDecimal
   * <li>String
   */
  static BigDecimal validateAndParseBigDecimal(
      String columnName, Object input, long insertRowIndex) {
    if (input instanceof BigDecimal) {
      return (BigDecimal) input;
    } else if (input instanceof BigInteger) {
      return new BigDecimal((BigInteger) input);
    } else if (input instanceof Byte
        || input instanceof Short
        || input instanceof Integer
        || input instanceof Long) {
      return BigDecimal.valueOf(((Number) input).longValue());
    } else if (input instanceof Float || input instanceof Double) {
      try {
        return BigDecimal.valueOf(((Number) input).doubleValue());
      } catch (NumberFormatException e) {
        /* NaN and infinity are not allowed */
        throw valueFormatNotAllowedException(
            columnName, "NUMBER", "Not a valid number", insertRowIndex);
      }
    } else if (input instanceof String) {
      try {
        final String stringInput = ((String) input).trim();
        return new BigDecimal(stringInput);
      } catch (NumberFormatException e) {
        throw valueFormatNotAllowedException(
            columnName, "NUMBER", "Not a valid number", insertRowIndex);
      }
    } else {
      throw typeNotAllowedException(
          columnName,
          input.getClass(),
          "NUMBER",
          new String[] {
            "int", "long", "byte", "short", "float", "double", "BigDecimal", "BigInteger", "String"
          },
          insertRowIndex);
    }
  }

  /**
   * Returns the number of days between the epoch and the passed date. Allowed Java types:
   *
   * <ul>
   *   <li>String
   *   <li>{@link LocalDate}
   *   <li>{@link LocalDateTime}
   *   <li>{@link OffsetDateTime}
   *   <li>{@link ZonedDateTime}
   *   <li>{@link Instant}
   * </ul>
   */
  static int validateAndParseDate(String columnName, Object input, long insertRowIndex) {
    OffsetDateTime offsetDateTime =
        inputToOffsetDateTime(columnName, "DATE", input, ZoneOffset.UTC, insertRowIndex);

    if (offsetDateTime.getYear() < -9999 || offsetDateTime.getYear() > 9999) {
      throw new SFExceptionValidation(
          ErrorCode.INVALID_VALUE_ROW,
          String.format(
              "Date out of representable inclusive range of years between -9999 and 9999,"
                  + " rowIndex:%d, column:%s, value:%s",
              insertRowIndex, columnName, offsetDateTime));
    }

    return Math.toIntExact(offsetDateTime.toLocalDate().toEpochDay());
  }

  /**
   * Validates input for data type BINARY. Allowed Java types:
   *
   * <ul>
   *   <li>byte[]
   *   <li>String (hex-encoded)
   * </ul>
   *
   * @param input Array to validate
   * @param maxLengthOptional Max array length, defaults to 8MB, which is the max allowed length for
   *     BINARY column
   * @param insertRowIndex
   * @return Validated array
   */
  static byte[] validateAndParseBinary(
      String columnName, Object input, Optional<Integer> maxLengthOptional, long insertRowIndex) {
    byte[] output;
    if (input instanceof byte[]) {
      // byte[] is a mutable object, we need to create a defensive copy to protect against
      // concurrent modifications of the array, which could lead to mismatch between data
      // and metadata
      byte[] originalInputArray = (byte[]) input;
      output = new byte[originalInputArray.length];
      System.arraycopy(originalInputArray, 0, output, 0, originalInputArray.length);
    } else if (input instanceof String) {
      try {
        String stringInput = ((String) input).trim();
        output = Hex.decodeHex(stringInput);
      } catch (DecoderException e) {
        throw valueFormatNotAllowedException(
            columnName, "BINARY", "Not a valid hex string", insertRowIndex);
      }
    } else {
      throw typeNotAllowedException(
          columnName,
          input.getClass(),
          "BINARY",
          new String[] {"byte[]", "String"},
          insertRowIndex);
    }

    int maxLength = maxLengthOptional.orElse(BYTES_8_MB);
    if (output.length > maxLength) {
      throw valueFormatNotAllowedException(
          columnName,
          "BINARY",
          String.format("Binary too long: length=%d maxLength=%d", output.length, maxLength),
          insertRowIndex);
    }
    return output;
  }

  /**
   * Returns the number of units since 00:00, depending on the scale (scale=0: seconds, scale=3:
   * milliseconds, scale=9: nanoseconds). Allowed Java types:
   *
   * <ul>
   *   <li>String
   *   <li>{@link LocalTime}
   *   <li>{@link OffsetTime}
   * </ul>
   */
  static BigInteger validateAndParseTime(
      String columnName, Object input, int scale, long insertRowIndex) {
    if (input instanceof LocalTime) {
      LocalTime localTime = (LocalTime) input;
      return BigInteger.valueOf(localTime.toNanoOfDay()).divide(Power10Util.sb16Table[9 - scale]);
    } else if (input instanceof OffsetTime) {
      return validateAndParseTime(
          columnName, ((OffsetTime) input).toLocalTime(), scale, insertRowIndex);
    } else if (input instanceof String) {
      String stringInput = ((String) input).trim();
      {
        // First, try to parse LocalTime
        LocalTime localTime = catchParsingError(() -> LocalTime.parse(stringInput));
        if (localTime != null) {
          return validateAndParseTime(columnName, localTime, scale, insertRowIndex);
        }
      }

      {
        // Alternatively, try to parse OffsetTime
        OffsetTime offsetTime = catchParsingError((() -> OffsetTime.parse(stringInput)));
        if (offsetTime != null) {
          return validateAndParseTime(columnName, offsetTime.toLocalTime(), scale, insertRowIndex);
        }
      }

      {
        // Alternatively, try to parse integer-stored time
        Instant parsedInstant = catchParsingError(() -> parseInstantGuessScale(stringInput));
        if (parsedInstant != null) {
          return validateAndParseTime(
              columnName,
              LocalDateTime.ofInstant(parsedInstant, ZoneOffset.UTC).toLocalTime(),
              scale,
              insertRowIndex);
        }
      }

      throw valueFormatNotAllowedException(
          columnName,
          "TIME",
          "Not a valid time, see"
              + " https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview"
              + " for the list of supported formats",
          insertRowIndex);

    } else {
      throw typeNotAllowedException(
          columnName,
          input.getClass(),
          "TIME",
          new String[] {"String", "LocalTime", "OffsetTime"},
          insertRowIndex);
    }
  }

  /**
   * Attempts to parse integer-stored date from string input. Tries to guess the scale according to
   * the rules documented at
   * https://docs.snowflake.com/en/user-guide/date-time-input-output.html#auto-detection-of-integer-stored-date-time-and-timestamp-values.
   *
   * @param input String to parse, must represent a valid long
   * @return Instant representing the input
   * @throws NumberFormatException If the input in not a valid long
   */
  private static Instant parseInstantGuessScale(String input) {
    BigInteger epochNanos;
    try {
      long val = Long.parseLong(input);

      if (val > -SECONDS_LIMIT_FOR_EPOCH && val < SECONDS_LIMIT_FOR_EPOCH) {
        epochNanos = BigInteger.valueOf(val).multiply(Power10Util.sb16Table[9]);
      } else if (val > -MILLISECONDS_LIMIT_FOR_EPOCH && val < MILLISECONDS_LIMIT_FOR_EPOCH) {
        epochNanos = BigInteger.valueOf(val).multiply(Power10Util.sb16Table[6]);
      } else if (val > -MICROSECONDS_LIMIT_FOR_EPOCH && val < MICROSECONDS_LIMIT_FOR_EPOCH) {
        epochNanos = BigInteger.valueOf(val).multiply(Power10Util.sb16Table[3]);
      } else {
        epochNanos = BigInteger.valueOf(val);
      }
    } catch (NumberFormatException e) {
      // The input is bigger than max long value, treat it as nano-seconds directly
      epochNanos = new BigInteger(input);
    }

    return Instant.ofEpochSecond(
        epochNanos.divide(Power10Util.sb16Table[9]).longValue(),
        epochNanos.remainder(Power10Util.sb16Table[9]).longValue());
  }

  /**
   * Converts input to double value. Allowed Java types:
   *
   * <ul>
   *   <li>Number
   *   <li>String
   * </ul>
   *
   * @param input
   * @param insertRowIndex
   */
  static double validateAndParseReal(String columnName, Object input, long insertRowIndex) {
    if (input instanceof Float) {
      return Double.parseDouble(input.toString());
    } else if (input instanceof Number) {
      return ((Number) input).doubleValue();
    } else if (input instanceof String) {
      String stringInput = ((String) input).trim();
      try {
        return Double.parseDouble(stringInput);
      } catch (NumberFormatException err) {
        stringInput = stringInput.toLowerCase();
        switch (stringInput) {
          case "nan":
            return Double.NaN;
          case "inf":
            return Double.POSITIVE_INFINITY;
          case "-inf":
            return Double.NEGATIVE_INFINITY;
          default:
            throw valueFormatNotAllowedException(
                columnName, "REAL", "Not a valid decimal number", insertRowIndex);
        }
      }
    }
    throw typeNotAllowedException(
        columnName, input.getClass(), "REAL", new String[] {"Number", "String"}, insertRowIndex);
  }

  static int validateAndParseBoolean(String columnName, Object input, long insertRowIndex) {
    if (input instanceof Boolean) {
      return (boolean) input ? 1 : 0;
    } else if (input instanceof Number) {
      return new BigDecimal(input.toString()).compareTo(BigDecimal.ZERO) == 0 ? 0 : 1;
    } else if (input instanceof String) {
      return convertStringToBoolean(columnName, (String) input, insertRowIndex) ? 1 : 0;
    }

    throw typeNotAllowedException(
        columnName,
        input.getClass(),
        "BOOLEAN",
        new String[] {"boolean", "Number", "String"},
        insertRowIndex);
  }

  static void checkValueInRange(
      String columnName,
      BigDecimal bigDecimalValue,
      int scale,
      int precision,
      final long insertRowIndex) {
    BigDecimal comparand =
        (precision >= scale) && (precision - scale) < POWER_10.length
            ? POWER_10[precision - scale]
            : BigDecimal.TEN.pow(precision - scale);
    if (bigDecimalValue.abs().compareTo(comparand) >= 0) {
      throw new SFExceptionValidation(
          ErrorCode.INVALID_FORMAT_ROW,
          String.format(
              "Number out of representable exclusive range of (-1e%s..1e%s), rowIndex:%d,"
                  + " column:%s, value:%s",
              precision - scale, precision - scale, insertRowIndex, columnName, bigDecimalValue));
    }
  }

  static void checkFixedLengthByteArray(
      String columnName, byte[] bytes, int length, final long insertRowIndex) {
    if (bytes.length != length) {
      throw new SFExceptionValidation(
          ErrorCode.INVALID_VALUE_ROW,
          String.format(
              "Binary length mismatch: expected:%d, actual:%d, rowIndex:%d, column:%s",
              length, bytes.length, insertRowIndex, columnName));
    }
  }

  static Set<String> allowedBooleanStringsLowerCased =
      Sets.newHashSet("1", "0", "yes", "no", "y", "n", "t", "f", "true", "false", "on", "off");

  private static boolean convertStringToBoolean(
      String columnName, String value, final long insertRowIndex) {
    String normalizedInput = value.toLowerCase().trim();
    if (!allowedBooleanStringsLowerCased.contains(normalizedInput)) {
      throw valueFormatNotAllowedException(
          columnName,
          "BOOLEAN",
          "Not a valid boolean, see"
              + " https://docs.snowflake.com/en/sql-reference/data-types-logical.html#conversion-to-boolean"
              + " for the list of supported formats",
          insertRowIndex);
    }
    return "1".equals(normalizedInput)
        || "yes".equals(normalizedInput)
        || "y".equals(normalizedInput)
        || "t".equals(normalizedInput)
        || "true".equals(normalizedInput)
        || "on".equals(normalizedInput);
  }

  /**
   * Create exception that a Java type cannot be ingested into a specific Snowflake column type
   *
   * @param javaType Java type failing the validation
   * @param snowflakeType Target Snowflake column type
   * @param allowedJavaTypes Java types supported for the Java type
   */
  private static SFExceptionValidation typeNotAllowedException(
      String columnName,
      Class<?> javaType,
      String snowflakeType,
      String[] allowedJavaTypes,
      final long insertRowIndex) {
    return new SFExceptionValidation(
        ErrorCode.INVALID_FORMAT_ROW,
        String.format(
            "Object of type %s cannot be ingested into Snowflake column %s of type %s, rowIndex:%d",
            javaType.getName(), columnName, snowflakeType, insertRowIndex),
        String.format(
            String.format("Allowed Java types: %s", String.join(", ", allowedJavaTypes))));
  }

  /**
   * Create exception when the Java type is correct, but the value is invalid (e.g. boolean cannot
   * be parsed from a string)
   *
   * <p>Note: Do not log actual Object Value
   *
   * @param columnName Column Name
   * @param snowflakeType Snowflake column type
   * @param reason Reason why value format is not allowed.
   * @param rowIndex Index of the Input row primarily for debugging purposes.
   * @return SFExceptionValidation is thrown
   */
  // Package-private: used by RowValidator for consistent error formatting
  static SFExceptionValidation valueFormatNotAllowedException(
      String columnName, String snowflakeType, String reason, final long rowIndex) {
    return new SFExceptionValidation(
        ErrorCode.INVALID_VALUE_ROW,
        String.format(
            "Value cannot be ingested into Snowflake column %s of type %s, rowIndex:%d, reason: %s",
            columnName, snowflakeType, rowIndex, reason));
  }

  /**
   * Validates that a string is valid UTF-8 string. It catches situations like unmatched high/low
   * UTF-16 surrogate, for example.
   */
  private static void verifyValidUtf8(
      String input, String columnName, String dataType, final long insertRowIndex) {
    String roundTripStr =
        new String(input.getBytes(StandardCharsets.UTF_8), StandardCharsets.UTF_8);
    if (!input.equals(roundTripStr)) {
      throw valueFormatNotAllowedException(
          columnName, dataType, "Invalid Unicode string", insertRowIndex);
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/DuplicateDetector.java
================================================
/*
 * COPIED FROM SNOWFLAKE INGEST SDK V1
 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/utils/DuplicateDetector.java
 *
 * Modifications:
 * - Package changed to com.snowflake.kafka.connector.internal.validation
 *
 * Copyright (c) 2021 Snowflake Computing Inc. All rights reserved.
 */

package com.snowflake.kafka.connector.internal.validation;

import java.util.HashSet;
import java.util.Set;

/**
 * A utility class that detects duplicate objects. Optimized for Json objects with a small number of
 * keys.
 */
public class DuplicateDetector<T> {
  private T firstKey;
  private T secondKey;
  private Set<T> keys;

  public boolean isDuplicate(T key) {
    if (firstKey == null) {
      firstKey = key;
      return false;
    }
    if (firstKey.equals(key)) {
      return true;
    }
    if (secondKey == null) {
      secondKey = key;
      return false;
    }
    if (secondKey.equals(key)) {
      return true;
    }

    if (keys == null) {
      keys = new HashSet<>();
    }
    return !keys.add(key);
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/DuplicateKeyValidatedObject.java
================================================
/*
 * COPIED FROM SNOWFLAKE INGEST SDK V1
 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/streaming/internal/serialization/DuplicateKeyValidatedObject.java
 *
 * Modifications:
 * - Package changed to com.snowflake.kafka.connector.internal.validation
 *
 * Copyright (c) 2021 Snowflake Computing Inc. All rights reserved.
 */

package com.snowflake.kafka.connector.internal.validation;

/**
 * A wrapper for an Object that is going to be validated by {@link
 * DuplicateKeyValidatingSerializer}.
 */
public class DuplicateKeyValidatedObject {
  private final Object object;

  public DuplicateKeyValidatedObject(Object object) {
    this.object = object;
  }

  public Object getObject() {
    return object;
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/DuplicateKeyValidatingSerializer.java
================================================
/*
 * COPIED FROM SNOWFLAKE INGEST SDK V1
 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/streaming/internal/serialization/DuplicateKeyValidatingSerializer.java
 *
 * Modifications:
 * - Package changed to com.snowflake.kafka.connector.internal.validation
 *
 * Copyright (c) 2021 Snowflake Computing Inc. All rights reserved.
 */

package com.snowflake.kafka.connector.internal.validation;

import com.fasterxml.jackson.core.JsonGenerationException;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.JsonSerializer;
import com.fasterxml.jackson.databind.SerializerProvider;
import java.io.IOException;
import java.lang.reflect.Array;
import java.util.List;
import java.util.Map;

/**
 * A custom Jackson serializer that validates Objects by removing trailing nulls in keys for
 * duplication check. See SNOW-1772196 for more details.
 */
public class DuplicateKeyValidatingSerializer extends JsonSerializer<DuplicateKeyValidatedObject> {
  @Override
  public void serialize(
      DuplicateKeyValidatedObject value, JsonGenerator gen, SerializerProvider serializers)
      throws IOException {
    sanitizeAndWrite(value.getObject(), gen, serializers);
  }

  private void sanitizeAndWrite(Object object, JsonGenerator gen, SerializerProvider serializers)
      throws IOException {
    if (object == null) {
      gen.writeNull();
      return;
    }
    if (object instanceof Map) {
      gen.writeStartObject();
      Map<?, ?> map = (Map<?, ?>) object;
      DuplicateDetector<String> duplicateDetector = new DuplicateDetector<>();
      for (Map.Entry<?, ?> entry : map.entrySet()) {
        String key = entry.getKey().toString();
        String strippedKey = Utils.stripTrailingNulls(key);
        if (duplicateDetector.isDuplicate(strippedKey)) {
          throw new JsonGenerationException("Duplicate key in JSON object: " + key, gen);
        }
        gen.writeFieldName(key);
        sanitizeAndWrite(entry.getValue(), gen, serializers);
      }
      gen.writeEndObject();
    } else if (object instanceof List) {
      gen.writeStartArray();
      for (Object item : (List<?>) object) {
        sanitizeAndWrite(item, gen, serializers);
      }
      gen.writeEndArray();
    } else if (object.getClass().isArray()) {
      gen.writeStartArray();
      if (object.getClass().getComponentType().isPrimitive()) {
        final int length = Array.getLength(object);
        for (int i = 0; i < length; i++) {
          serializers.defaultSerializeValue(Array.get(object, i), gen);
        }
      } else {
        for (Object item : (Object[]) object) {
          sanitizeAndWrite(item, gen, serializers);
        }
      }
      gen.writeEndArray();
    } else {
      serializers.defaultSerializeValue(object, gen);
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/ErrorCode.java
================================================
/*
 * COPIED FROM SNOWFLAKE INGEST SDK V1
 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/utils/ErrorCode.java
 *
 * Modifications:
 * - Only validation-related error codes retained (INVALID_FORMAT_ROW, INVALID_VALUE_ROW, UNKNOWN_DATA_TYPE, UNSUPPORTED_DATA_TYPE, IO_ERROR, INTERNAL_ERROR)
 * - Package changed to com.snowflake.kafka.connector.internal.validation
 *
 * Copyright (c) 2021-2024 Snowflake Computing Inc. All rights reserved.
 */

package com.snowflake.kafka.connector.internal.validation;

/** Ingest SDK internal error codes (validation subset) */
public enum ErrorCode {
  INTERNAL_ERROR("0001"),
  INVALID_FORMAT_ROW("0004"),
  UNKNOWN_DATA_TYPE("0005"),
  IO_ERROR("0020"),
  UNSUPPORTED_DATA_TYPE("0029"),
  INVALID_VALUE_ROW("0030");

  public static final String errorMessageResource =
      "com.snowflake.kafka.connector.internal.validation.ingest_error_messages";

  /** Snowflake internal message associated to the error. */
  private final String messageCode;

  /**
   * Construct a new error code specification given Snowflake internal error code.
   *
   * @param messageCode Snowflake internal error code
   */
  ErrorCode(String messageCode) {
    this.messageCode = messageCode;
  }

  public String getMessageCode() {
    return messageCode;
  }

  @Override
  public String toString() {
    return "ErrorCode{" + "name=" + this.name() + ", messageCode=" + messageCode + "}";
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/Power10Util.java
================================================
package com.snowflake.kafka.connector.internal.validation;

import java.math.BigInteger;

/**
 * Powers of 10 used for timestamp/time scaling and validation. Replicates the semantics of
 * Snowflake JDBC internal Power10 so the connector does not depend on JDBC internal APIs (removed
 * in JDBC 4.x).
 */
public final class Power10Util {

  private Power10Util() {}

  /** Size of the power tables (10^0 through 10^9). */
  public static final int sb16Size = 10;

  /** 10^i as int for i in [0, 9]. Used for timestamp fraction scaling. */
  public static final int[] intTable = new int[sb16Size];

  /** 10^i as BigInteger for i in [0, 9]. Used for time/timestamp validation and scaling. */
  public static final BigInteger[] sb16Table = new BigInteger[sb16Size];

  static {
    for (int i = 0; i < sb16Size; i++) {
      intTable[i] = (int) Math.pow(10, i);
      sb16Table[i] = BigInteger.TEN.pow(i);
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/RowValidator.java
================================================
/*
 * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved.
 *
 * This file provides integration between SSv1 validation code and KC v4.
 */

package com.snowflake.kafka.connector.internal.validation;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.snowflake.kafka.connector.Utils;
import java.math.BigDecimal;
import java.time.ZoneId;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Validates rows against a table schema using SSv1 validation logic. This is the main facade that
 * integrates DataValidationUtil with KC v4.
 *
 * <p>Thread-safety: This class is thread-safe. The schema map is immutably captured at construction
 * time. Multiple threads can safely call validateRow() on the same RowValidator instance
 * concurrently.
 */
public class RowValidator {
  private static final Logger logger = LoggerFactory.getLogger(RowValidator.class);
  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
  private final Map<String, ColumnSchema> columnSchemaMap;

  /**
   * Default timezone for timestamp parsing, matching SSv1 SDK behavior.
   *
   * <p>When parsing timestamps without timezone information (e.g., "2024-03-06 10:00:00"), this
   * timezone determines how the timestamp is interpreted. Must match SSv1 SDK's
   * OpenChannelRequest.DEFAULT_DEFAULT_TIMEZONE to ensure identical validation behavior.
   *
   * <p>SSv1 SDK uses America/Los_Angeles, not UTC.
   */
  private final ZoneId defaultTimezone = ZoneId.of("America/Los_Angeles");

  public RowValidator(Map<String, ColumnSchema> columnSchemaMap) {
    // Input validation
    Objects.requireNonNull(columnSchemaMap, "columnSchemaMap cannot be null");
    if (columnSchemaMap.isEmpty()) {
      throw new IllegalArgumentException("columnSchemaMap cannot be empty");
    }

    // Defensive copy for thread safety
    this.columnSchemaMap = Collections.unmodifiableMap(new HashMap<>(columnSchemaMap));
  }

  /**
   * Validate a row against the table schema. Performs both structural validation (column presence,
   * NOT NULL checks) and type/value validation.
   *
   * <p><b>Side effect:</b> For BINARY columns, hex string values in the row are replaced in-place
   * with their {@code byte[]} equivalents so the Ingest SDK receives an unambiguous type.
   *
   * @param row Map of column name to value (may be mutated for BINARY normalization)
   * @return ValidationResult indicating success or failure with error details
   */
  public ValidationResult validateRow(Map<String, Object> row) {
    // Input validation
    Objects.requireNonNull(row, "row cannot be null");

    // Column names are expected to be already normalized (raw internal names) by the caller.
    // When column identifier normalization is enabled, SnowflakeSinkRecord sanitizes keys
    // at record creation time. DESCRIBE TABLE results are already raw names.

    // Step 1: Structural validation (matching AbstractRowBuffer.verifyInputColumns)
    Set<String> colNames = row.keySet();
    Set<String> extraCols = detectExtraColumns(colNames);
    Set<String> missingNotNullCols = detectMissingNotNullColumns(colNames);
    Set<String> nullNotNullCols = detectNullValuesInNotNullColumns(row);

    if (!extraCols.isEmpty() || !missingNotNullCols.isEmpty() || !nullNotNullCols.isEmpty()) {
      return ValidationResult.structuralError(extraCols, missingNotNullCols, nullNotNullCols);
    }

    // Step 2: Type/value validation (dispatch to DataValidationUtil)
    for (Map.Entry<String, Object> entry : row.entrySet()) {
      String colName = entry.getKey();
      Object value = entry.getValue();
      ColumnSchema col = columnSchemaMap.get(colName);

      // These conditions should have been caught by structural validation above.
      // If we reach here, it indicates a bug in structural validation logic.
      if (col == null) {
        throw new IllegalStateException(
            "Column "
                + colName
                + " not found in schema but was not caught by structural validation");
      }
      if (value == null) {
        // Null values are valid for nullable columns, skip type validation
        if (col.isNullable()) {
          continue; // Valid null for nullable column
        }
        // Null value in NOT NULL column should have been caught by structural validation
        throw new IllegalStateException(
            "Null value for NOT NULL column "
                + colName
                + " but was not caught by structural validation");
      }

      // Skip type validation for the legacy RECORD_CONTENT wrapper column.
      // In non-schematized mode, this column contains the raw payload (e.g. a plain string, bytes,
      // or object) and should accept any value the connector places there.
      // Otherwise, VARIANT client-side validation requires that the payload is a complex object.
      if (Utils.TABLE_COLUMN_CONTENT.equals(colName)) {
        continue;
      }

      try {
        Object normalized = validateAndNormalizeColumnValue(col, value);
        // Reference equality: same object returned for types that don't need normalization
        if (normalized != value) {
          entry.setValue(normalized);
        }
      } catch (SFExceptionValidation e) {
        return ValidationResult.typeError(colName, e.getMessage());
      }
    }

    return ValidationResult.valid();
  }

  /**
   * Validate a single column value using DataValidationUtil, and return the canonical form to
   * ingest.
   */
  private Object validateAndNormalizeColumnValue(ColumnSchema col, Object value)
      throws SFExceptionValidation {
    // insertRowIndex parameter is used for error messages - use 0 for now
    final long insertRowIndex = 0;

    switch (col.getLogicalType()) {
      case BOOLEAN:
        // SSv2 SDK only accepts Boolean — normalize to avoid silent drops.
        // Pre-reject non-0/1 Numbers for KC v3 parity: KC v3's StreamingRecordMapper stringified
        // all values, and SSv1's convertStringToBoolean rejects e.g. "42".
        if (value instanceof Number && !(value instanceof Boolean)) {
          BigDecimal bd = new BigDecimal(value.toString());
          if (bd.compareTo(BigDecimal.ZERO) != 0 && bd.compareTo(BigDecimal.ONE) != 0) {
            throw DataValidationUtil.valueFormatNotAllowedException(
                col.getName(),
                "BOOLEAN",
                "Only 0 and 1 are accepted for numeric boolean values",
                insertRowIndex);
          }
        }
        return DataValidationUtil.validateAndParseBoolean(col.getName(), value, insertRowIndex) == 1
            ? Boolean.TRUE
            : Boolean.FALSE;

      case FIXED:
        // Note: DataValidationUtil.validateAndParseBigDecimal doesn't check precision/scale
        // It just parses the value. Precision/scale checking would need to be done separately
        // if needed, but SSv1 didn't enforce it at validation time either.
        DataValidationUtil.validateAndParseBigDecimal(col.getName(), value, insertRowIndex);
        break;

      case REAL:
        DataValidationUtil.validateAndParseReal(col.getName(), value, insertRowIndex);
        break;

      case TEXT:
      case CHAR:
        // DVU.validateAndParseString only accepts String, Number, boolean, char — it rejects
        // Map/Collection.  However, KC v3's StreamingRecordMapper serialized all non-textual
        // JsonNodes to JSON strings via Jackson before the SDK saw them.  We replicate that
        // pipeline-level serialization so v4-compat handles Map/Collection inputs the same way.
        if (value instanceof Map || value instanceof Collection) {
          try {
            String json = OBJECT_MAPPER.writeValueAsString(value);
            DataValidationUtil.validateAndParseString(
                col.getName(), json, Optional.ofNullable(col.getLength()), insertRowIndex);
            return json;
          } catch (JsonProcessingException e) {
            throw DataValidationUtil.valueFormatNotAllowedException(
                col.getName(),
                "STRING",
                "Cannot serialize " + value.getClass().getSimpleName() + " to JSON",
                insertRowIndex);
          }
        }
        DataValidationUtil.validateAndParseString(
            col.getName(), value, Optional.ofNullable(col.getLength()), insertRowIndex);
        break;

      case BINARY:
        // The SSv2 interprets String values for BINARY columns as either hex or base64
        // depending on the server-side parameter ENABLE_SSV2_DEFAULT_BINARY_FORMAT_BASE64.
        // Returning byte[] sidesteps this ambiguity: byte[] is accepted uniformly regardless of
        // how that parameter is set.
        return DataValidationUtil.validateAndParseBinary(
            col.getName(), value, Optional.ofNullable(col.getByteLength()), insertRowIndex);

      case DATE:
        DataValidationUtil.validateAndParseDate(col.getName(), value, insertRowIndex);
        break;

      case TIME:
        DataValidationUtil.validateAndParseTime(
            col.getName(), value, col.getScale() != null ? col.getScale() : 9, insertRowIndex);
        break;

      case TIMESTAMP_NTZ:
        return validateAndNormalizeTimestamp(col, value, /* trimTimezone= */ true, insertRowIndex);

      case TIMESTAMP_LTZ:
      case TIMESTAMP_TZ:
        return validateAndNormalizeTimestamp(col, value, /* trimTimezone= */ false, insertRowIndex);

      case VARIANT:
        // When input is a String, the SSv2 SDK stores it as a JSON-quoted string (e.g.
        // '{"a":1}' → '"{\\"a\\":1}"'), whereas SSv1 stored the parsed native object.
        // validateAndParseVariantAsObject returns a native Java object (Map/List/primitive)
        // so the SDK receives the right type.
        if (value instanceof String) {
          return DataValidationUtil.validateAndParseVariantAsObject(
              col.getName(), value, insertRowIndex);
        }
        DataValidationUtil.validateAndParseVariant(col.getName(), value, insertRowIndex);
        break;

      case ARRAY:
        // SSv2 SDK wraps a String value for an ARRAY column as a single-element array (e.g.
        // "[1,2,3]" → ["[1,2,3]"]), while SSv1 parsed the string into a proper array.
        // validateAndParseArrayAsList returns a native List so the SDK gets the right type.
        if (value instanceof String) {
          return DataValidationUtil.validateAndParseArrayAsList(
              col.getName(), value, insertRowIndex);
        }
        DataValidationUtil.validateAndParseArray(col.getName(), value, insertRowIndex);
        break;

      case OBJECT:
        // No normalization needed: SSv2 SDK correctly parses JSON strings for OBJECT columns
        // (unlike VARIANT/ARRAY). Passing the original String value through is safe.
        DataValidationUtil.validateAndParseObject(col.getName(), value, insertRowIndex);
        break;

      default:
        throw new SFExceptionValidation(
            ErrorCode.UNKNOWN_DATA_TYPE, col.getName(), col.getLogicalType());
    }
    return value;
  }

  /**
   * Validate and optionally normalize a timestamp value. Integer/Long epoch values are converted to
   * ISO strings so the SSv2 SDK interprets them correctly; other types are validated in place.
   */
  private Object validateAndNormalizeTimestamp(
      ColumnSchema col, Object value, boolean trimTimezone, long insertRowIndex)
      throws SFExceptionValidation {
    if (value instanceof Integer || value instanceof Long) {
      return DataValidationUtil.validateAndFormatTimestamp(
          col.getName(), value, defaultTimezone, trimTimezone, insertRowIndex);
    }
    DataValidationUtil.validateAndParseTimestamp(
        col.getName(),
        value,
        col.getScale() != null ? col.getScale() : 9,
        defaultTimezone,
        trimTimezone,
        insertRowIndex);
    return value;
  }

  /** Detect columns in the row that don't exist in the table schema. */
  private Set<String> detectExtraColumns(Set<String> unquotedRowCols) {
    Set<String> extraCols = new HashSet<>();
    for (String unquotedName : unquotedRowCols) {
      if (!columnSchemaMap.containsKey(unquotedName)) {
        extraCols.add(unquotedName);
      }
    }
    return extraCols;
  }

  /** Detect NOT NULL columns that are missing from the row, excluding server-filled columns. */
  private Set<String> detectMissingNotNullColumns(Set<String> unquotedRowCols) {
    Set<String> missingNotNullCols = new HashSet<>();
    for (Map.Entry<String, ColumnSchema> entry : columnSchemaMap.entrySet()) {
      String colName = entry.getKey();
      ColumnSchema col = entry.getValue();

      if (!col.isNullable() && !col.isServerFilled() && !unquotedRowCols.contains(colName)) {
        missingNotNullCols.add(colName);
      }
    }
    return missingNotNullCols;
  }

  /** Detect NOT NULL columns that have null values in the row. */
  private Set<String> detectNullValuesInNotNullColumns(Map<String, Object> normalizedRow) {
    Set<String> nullNotNullCols = new HashSet<>();
    for (Map.Entry<String, Object> entry : normalizedRow.entrySet()) {
      String colName = entry.getKey(); // Already normalized

      // Validate column name is not empty
      if (colName == null || colName.trim().isEmpty()) {
        logger.warn("Skipping validation for empty column name");
        continue;
      }

      Object value = entry.getValue();

      ColumnSchema col = columnSchemaMap.get(colName);
      if (col != null && !col.isNullable() && value == null) {
        nullNotNullCols.add(colName);
      }
    }
    return nullNotNullCols;
  }

  /**
   * Static validator for unsupported types at channel open time. Throws SFExceptionValidation if
   * the schema contains unsupported types.
   *
   * @param schema Map of column name to ColumnSchema
   * @throws SFExceptionValidation if unsupported types are found
   */
  public static void validateSchema(Map<String, ColumnSchema> schema) throws SFExceptionValidation {
    for (ColumnSchema col : schema.values()) {
      if (col.getLogicalType() == null) {
        throw new SFExceptionValidation(ErrorCode.UNKNOWN_DATA_TYPE, col.getName());
      }

      // Reject collated columns (not supported in SSv1 validation)
      if (col.getCollation() != null && !col.getCollation().isEmpty()) {
        throw new SFExceptionValidation(
            ErrorCode.UNSUPPORTED_DATA_TYPE, "Collated columns not supported", col.getName());
      }

      // GEOGRAPHY and GEOMETRY are not in ColumnLogicalType enum
      // They would show up as null logicalType and be caught above
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/SFExceptionValidation.java
================================================
/*
 * COPIED FROM SNOWFLAKE INGEST SDK V1
 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/utils/SFException.java
 *
 * Modifications:
 * - Package changed to com.snowflake.kafka.connector.internal.validation
 * - Class renamed to SFExceptionValidation to avoid conflict with com.snowflake.ingest.streaming.SFException
 *
 * Copyright (c) 2021 Snowflake Computing Inc. All rights reserved.
 */

package com.snowflake.kafka.connector.internal.validation;

import java.text.MessageFormat;
import java.util.ResourceBundle;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/** Snowflake exception for client-side validation */
public class SFExceptionValidation extends RuntimeException {
  static final Logger logger = LoggerFactory.getLogger(SFExceptionValidation.class);
  static final ResourceBundle errorMessageBundle =
      ResourceBundle.getBundle(ErrorCode.errorMessageResource);

  private Throwable cause;
  private String vendorCode;
  private Object[] params;

  private static String getErrorMessage(final ErrorCode errorCode, final Object... params) {
    final String messageTemplate = errorMessageBundle.getString(errorCode.getMessageCode());
    return MessageFormat.format(messageTemplate, params);
  }

  /**
   * Construct a Snowflake exception from a cause, an error code and message parameters
   *
   * @param cause
   * @param errorCode
   * @param params
   */
  public SFExceptionValidation(Throwable cause, ErrorCode errorCode, Object... params) {
    super(getErrorMessage(errorCode, params), cause);

    this.vendorCode = errorCode.getMessageCode();
    this.params = params;
    this.cause = cause;
  }

  /**
   * Construct a Snowflake exception from an error code and message parameters
   *
   * @param errorCode
   * @param params
   */
  public SFExceptionValidation(ErrorCode errorCode, Object... params) {
    this(null, errorCode, params);
  }

  public String getVendorCode() {
    return vendorCode;
  }

  public Object[] getParams() {
    return params;
  }

  public Throwable getCause() {
    return cause;
  }

  /**
   * Checks if this exception has the specified error code
   *
   * @param errorCode the error code to check
   * @return true if this exception's vendor code matches the given error code
   */
  public boolean isErrorCode(ErrorCode errorCode) {
    return errorCode != null && errorCode.getMessageCode().equals(this.vendorCode);
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/SqlIdentifierNormalizer.java
================================================
/*
 * COPIED FROM SNOWFLAKE INGEST SDK V1
 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/streaming/internal/LiteralQuoteUtils.java
 *
 * Modifications:
 * - Package changed to com.snowflake.kafka.connector.internal.validation
 * - Renamed from LiteralQuoteUtils to SqlIdentifierNormalizer
 * - Method names updated to reflect normalization semantics
 *
 * Copyright (c) 2022 Snowflake Computing Inc. All rights reserved.
 */
package com.snowflake.kafka.connector.internal.validation;

import com.github.benmanes.caffeine.cache.Caffeine;
import com.github.benmanes.caffeine.cache.LoadingCache;

/**
 * Normalizes SQL identifiers to their raw internal column names, matching server-side storage.
 *
 * <p>Rules:
 *
 * <ul>
 *   <li>Quoted identifier {@code "MyCol"} → strips quotes, preserves case → {@code MyCol}
 *   <li>Quoted with escaped quotes {@code "col""name"} → strips quotes, unescapes → {@code
 *       col"name}
 *   <li>Unquoted identifier {@code myCol} → uppercases → {@code MYCOL}
 * </ul>
 *
 * <p>Note: The methods in this class have to be kept in sync with the respective methods on server
 * side.
 */
public class SqlIdentifierNormalizer {

  /** Maximum number of normalized identifiers to store in cache */
  static final int NORMALIZED_IDENTIFIER_CACHE_MAX_SIZE = 30000;

  /** Cache storing normalized identifiers */
  private static final LoadingCache<String, String> normalizedIdentifierCache;

  static {
    normalizedIdentifierCache =
        Caffeine.newBuilder()
            .maximumSize(NORMALIZED_IDENTIFIER_CACHE_MAX_SIZE)
            .build(SqlIdentifierNormalizer::normalizeSqlIdentifierInternal);
  }

  /**
   * Normalize a SQL identifier to its raw internal column name. Uses a cache to avoid repeated
   * computation for the same identifier.
   *
   * @param sqlIdentifier the SQL identifier (may be quoted or unquoted)
   * @return the raw internal column name
   */
  public static String normalizeSqlIdentifier(String sqlIdentifier) {
    return normalizedIdentifierCache.get(sqlIdentifier);
  }

  /**
   * Normalize a SQL identifier to its raw internal column name.
   *
   * <p>Normalises the column name to how it is stored internally. This function needs to keep in
   * sync with server side normalisation.
   *
   * @param sqlIdentifier SQL identifier to normalize
   * @return raw internal column name
   */
  private static String normalizeSqlIdentifierInternal(String sqlIdentifier) {
    int length = sqlIdentifier.length();

    if (length == 0) {
      return sqlIdentifier;
    }

    // If this is an identifier that starts and ends with double quotes,
    // remove them - accounting for escaped double quotes.
    // Differs from the second condition in that this one allows repeated
    // double quotes
    if (sqlIdentifier.charAt(0) == '"'
        && (length >= 2
            && sqlIdentifier.charAt(length - 1) == '"'
            &&
            // Condition that the string contains no single double-quotes
            // but allows repeated double-quotes
            !sqlIdentifier.substring(1, length - 1).replace("\"\"", "").contains("\""))) {
      // Remove quotes and turn escaped double-quotes to single double-quotes
      return sqlIdentifier.substring(1, length - 1).replace("\"\"", "\"");
    }

    // If this is an identifier that starts and ends with double quotes,
    // remove them. Internal single double-quotes are not allowed.
    else if (sqlIdentifier.charAt(0) == '"'
        && (length >= 2
            && sqlIdentifier.charAt(length - 1) == '"'
            && !sqlIdentifier.substring(1, length - 1).contains("\""))) {
      // Remove the quotes
      return sqlIdentifier.substring(1, length - 1);
    }

    // unquoted string that can have escaped spaces
    else {
      // replace escaped spaces in unquoted name
      if (sqlIdentifier.contains("\\ ")) {
        sqlIdentifier = sqlIdentifier.replace("\\ ", " ");
      }
      return sqlIdentifier.toUpperCase();
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/TimestampWrapper.java
================================================
/*
 * COPIED FROM SNOWFLAKE INGEST SDK V1
 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/streaming/internal/TimestampWrapper.java
 *
 * Modifications:
 * - Package changed to com.snowflake.kafka.connector.internal.validation
 *
 * Copyright (c) 2023 Snowflake Computing Inc. All rights reserved.
 */

package com.snowflake.kafka.connector.internal.validation;

import java.math.BigDecimal;
import java.math.BigInteger;
import java.math.RoundingMode;
import java.time.OffsetDateTime;

/**
 * This class represents the outcome of timestamp parsing and validation. It contains methods needed
 * to serialize timestamps into Parquet.
 */
public class TimestampWrapper {

  /** Epoch seconds */
  private final long epoch;

  /** Fractional part of the second */
  private final int fraction;

  /** Timezone offset in seconds */
  private final int timezoneOffsetSeconds;

  /** Scale of the timestamp column (0-9) */
  private final int scale;

  /**
   * How many bits should be reserver for the timezone part. Needs to be aligned with {@link
   * net.snowflake.client.jdbc.internal.snowflake.common.core.SFTimestamp#BITS_FOR_TIMEZONE}
   */
  private static final int BITS_FOR_TIMEZONE = 14;

  /**
   * Mask of the timezone bits. Needs to be aligned with {@link
   * net.snowflake.client.jdbc.internal.snowflake.common.core.SFTimestamp#MASK_OF_TIMEZONE}
   */
  private static final int MASK_OF_TIMEZONE = (1 << BITS_FOR_TIMEZONE) - 1;

  /** Create a new instance from {@link OffsetDateTime} and its scale. */
  public TimestampWrapper(OffsetDateTime offsetDateTime, int scale) {
    if (scale < 0 || scale > 9) {
      throw new IllegalArgumentException(
          String.format("Scale must be between 0 and 9, actual: %d", scale));
    }
    this.epoch = offsetDateTime.toEpochSecond();
    this.fraction =
        offsetDateTime.getNano()
            / Power10Util.intTable[9 - scale]
            * Power10Util.intTable[9 - scale];
    this.timezoneOffsetSeconds = offsetDateTime.getOffset().getTotalSeconds();
    this.scale = scale;
  }

  /**
   * Convert the timestamp to a binary representation. Needs to be aligned with {@link
   * net.snowflake.client.jdbc.internal.snowflake.common.core.SFTimestamp#toBinary}.
   */
  public BigInteger toBinary(boolean includeTimezone) {
    BigDecimal timeInNs =
        BigDecimal.valueOf(epoch).scaleByPowerOfTen(9).add(new BigDecimal(fraction));
    BigDecimal scaledTime = timeInNs.scaleByPowerOfTen(scale - 9);
    scaledTime = scaledTime.setScale(0, RoundingMode.DOWN);
    BigInteger fcpInt = scaledTime.unscaledValue();
    if (includeTimezone) {
      int offsetMin = timezoneOffsetSeconds / 60;
      assert offsetMin >= -1440 && offsetMin <= 1440;
      offsetMin += 1440;
      fcpInt = fcpInt.shiftLeft(14);
      fcpInt = fcpInt.add(BigInteger.valueOf(offsetMin & MASK_OF_TIMEZONE));
    }
    return fcpInt;
  }

  /** Get epoch in seconds */
  public long getEpochSecond() {
    return epoch;
  }

  /** Get fractional part of a second */
  public int getFraction() {
    return fraction;
  }

  /** Get timezone offset in seconds */
  public int getTimezoneOffsetSeconds() {
    return timezoneOffsetSeconds;
  }

  /**
   * Get timezone index, 1440 means UTC. Calculation needs to be aligned with {@link
   * net.snowflake.client.jdbc.internal.snowflake.common.core.SFTimestamp#toBinary}
   */
  public int getTimeZoneIndex() {
    return timezoneOffsetSeconds / 60 + 1440;
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/Utils.java
================================================
/*
 * COPIED FROM SNOWFLAKE INGEST SDK V1
 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/utils/Utils.java
 *
 * Modifications:
 * - Only stripTrailingNulls() method retained (only method used by validation)
 * - Package changed to com.snowflake.kafka.connector.internal.validation
 *
 * Copyright (c) 2021 Snowflake Computing Inc. All rights reserved.
 */

package com.snowflake.kafka.connector.internal.validation;

/** Utility methods for validation */
class Utils {
  /**
   * Strip trailing null characters from a string
   *
   * @param key input string
   * @return string with trailing nulls removed
   */
  public static String stripTrailingNulls(String key) {
    int end = key.length();
    while (end > 0 && key.charAt(end - 1) == '\u0000') {
      end--;
    }
    return end == key.length() ? key : key.substring(0, end);
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/ValidationResult.java
================================================
/*
 * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved.
 *
 * This file provides integration between SSv1 validation code and KC v4.
 */

package com.snowflake.kafka.connector.internal.validation;

import java.util.Collections;
import java.util.HashSet;
import java.util.Set;

/** Result of row validation containing validation status and error details. */
public class ValidationResult {
  private final boolean valid;
  private final boolean hasTypeError;
  private final boolean hasStructuralError;
  private final String valueError;
  private final String columnName;
  private final Set<String> extraColNames;
  private final Set<String> missingNotNullColNames;
  private final Set<String> nullValueForNotNullColNames;

  private ValidationResult(
      boolean valid,
      boolean hasTypeError,
      boolean hasStructuralError,
      String valueError,
      String columnName,
      Set<String> extraColNames,
      Set<String> missingNotNullColNames,
      Set<String> nullValueForNotNullColNames) {
    this.valid = valid;
    this.hasTypeError = hasTypeError;
    this.hasStructuralError = hasStructuralError;
    this.valueError = valueError;
    this.columnName = columnName;
    // Create defensive immutable copies of all sets for thread safety
    this.extraColNames = Collections.unmodifiableSet(new HashSet<>(extraColNames));
    this.missingNotNullColNames =
        Collections.unmodifiableSet(new HashSet<>(missingNotNullColNames));
    this.nullValueForNotNullColNames =
        Collections.unmodifiableSet(new HashSet<>(nullValueForNotNullColNames));
  }

  /** Create a valid result */
  public static ValidationResult valid() {
    return new ValidationResult(
        true,
        false,
        false,
        null,
        null,
        Collections.emptySet(),
        Collections.emptySet(),
        Collections.emptySet());
  }

  /** Create a type/value error result */
  public static ValidationResult typeError(String columnName, String errorMessage) {
    return new ValidationResult(
        false,
        true,
        false,
        errorMessage,
        columnName,
        Collections.emptySet(),
        Collections.emptySet(),
        Collections.emptySet());
  }

  /** Create a structural error result */
  public static ValidationResult structuralError(
      Set<String> extraColNames,
      Set<String> missingNotNullColNames,
      Set<String> nullValueForNotNullColNames) {
    return new ValidationResult(
        false,
        false,
        true,
        null,
        null,
        extraColNames,
        missingNotNullColNames,
        nullValueForNotNullColNames);
  }

  public boolean isValid() {
    return valid;
  }

  public boolean hasTypeError() {
    return hasTypeError;
  }

  public boolean hasStructuralError() {
    return hasStructuralError;
  }

  public String getValueError() {
    return valueError;
  }

  public String getColumnName() {
    return columnName;
  }

  public Set<String> getExtraColNames() {
    return extraColNames;
  }

  public Set<String> getMissingNotNullColNames() {
    return missingNotNullColNames;
  }

  public Set<String> getNullValueForNotNullColNames() {
    return nullValueForNotNullColNames;
  }

  /**
   * Check if this structural error can be resolved with schema evolution.
   *
   * <p>Matches KC v3 behavior where ALL structural errors trigger schema evolution: - Extra
   * columns: YES - add via ALTER TABLE ADD COLUMN - Null in NOT NULL: YES - drop constraint via
   * ALTER TABLE DROP NOT NULL - Missing NOT NULL columns: YES - drop constraint via ALTER TABLE
   * DROP NOT NULL (KC v3 behavior)
   *
   * <p>KC v3's InsertErrorMapper.java joined missingNotNullColNames and nullValueForNotNullColNames
   * into a single list of columns to drop NOT NULL. We maintain this behavior.
   *
   * @return true if the error can be resolved with schema evolution
   */
  public boolean needsSchemaEvolution() {
    return hasStructuralError
        && (!extraColNames.isEmpty()
            || !nullValueForNotNullColNames.isEmpty()
            || !missingNotNullColNames.isEmpty());
  }

  /**
   * Check if this structural error cannot be resolved with schema evolution.
   *
   * <p>In KC v3, all structural errors (extra columns, missing NOT NULL, null NOT NULL) were
   * resolvable via schema evolution. We maintain the same behavior for backwards compatibility.
   *
   * @return true if the error is unresolvable (always false for structural errors)
   */
  public boolean hasUnresolvableError() {
    // All structural errors are resolvable via schema evolution (matches KC v3 behavior)
    return false;
  }

  public String getErrorType() {
    if (hasTypeError) {
      return "type_error";
    } else if (hasStructuralError) {
      return "structural_error";
    } else {
      return "unknown";
    }
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/internal/validation/ZonedDateTimeSerializer.java
================================================
/*
 * COPIED FROM SNOWFLAKE INGEST SDK V1
 * Source: snowflake-ingest-java/src/main/java/net/snowflake/ingest/streaming/internal/serialization/ZonedDateTimeSerializer.java
 *
 * Modifications:
 * - Package changed to com.snowflake.kafka.connector.internal.validation
 *
 * Copyright (c) 2021-2022 Snowflake Computing Inc. All rights reserved.
 */

package com.snowflake.kafka.connector.internal.validation;

import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.JsonSerializer;
import com.fasterxml.jackson.databind.SerializerProvider;
import java.io.IOException;
import java.time.ZonedDateTime;

/** Snowflake does not support parsing zones, so serialize it in offset instead */
public class ZonedDateTimeSerializer extends JsonSerializer<ZonedDateTime> {
  @Override
  public void serialize(ZonedDateTime value, JsonGenerator gen, SerializerProvider serializers)
      throws IOException {
    gen.writeString(value.toOffsetDateTime().toString());
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/records/KafkaRecordConverter.java
================================================
package com.snowflake.kafka.connector.records;

import com.snowflake.kafka.connector.internal.KCLogger;
import com.snowflake.kafka.connector.internal.SnowflakeErrors;
import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.kafka.connect.data.ConnectSchema;
import org.apache.kafka.connect.data.Date;
import org.apache.kafka.connect.data.Decimal;
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.data.Time;
import org.apache.kafka.connect.data.Timestamp;
import org.apache.kafka.connect.header.Header;
import org.apache.kafka.connect.header.Headers;

public final class KafkaRecordConverter {

  private static final KCLogger LOGGER = new KCLogger(KafkaRecordConverter.class.getName());

  private static final int MAX_SNOWFLAKE_NUMBER_PRECISION = 38;

  private static final ConcurrentHashMap<Class<?>, Optional<Schema.Type>> SCHEMA_TYPE_CACHE =
      new ConcurrentHashMap<>();

  private static final DateTimeFormatter ISO_DATE_TIME_FORMAT =
      DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'").withZone(ZoneOffset.UTC);

  private static final DateTimeFormatter TIME_FORMAT =
      DateTimeFormatter.ofPattern("HH:mm:ss.SSSXXX").withZone(ZoneId.systemDefault());

  private KafkaRecordConverter() {}

  /**
   * Converts a Kafka Connect value with its schema directly to a Map suitable for Snowflake
   * streaming ingest.
   */
  public static Map<String, Object> convertToMap(Schema schema, Object value) {
    if (LOGGER.isDebugEnabled()) {
      LOGGER.debug(
          "Converting record to map. Schema: {}, valueType: {}",
          schema != null ? schema.type() : "null",
          value != null ? value.getClass().getSimpleName() : "null");
    }

    if (value == null) {
      return new HashMap<>();
    }

    if (value instanceof Map) {
      return convertMapToMap((Map<?, ?>) value, schema);
    }

    if (value instanceof Struct) {
      return convertStructToMap((Struct) value);
    }

    throw SnowflakeErrors.ERROR_5015.getException(
        "Cannot schematize record. Record value must be a Map or Struct. Consider using kafka"
            + " HoistField transformer to wrap the value of the record.");
  }

  public static Map<String, String> convertHeaders(Headers headers) {
    Map<String, String> result = new HashMap<>();
    if (headers == null) {
      LOGGER.trace("Headers is null, returning empty map");
      return result;
    }
    for (Header header : headers) {
      Object headerValue = convertValue(header.schema(), header.value());
      result.put(header.key(), headerValue == null ? null : String.valueOf(headerValue));
    }
    return result;
  }

  public static Object convertKey(Schema keySchema, Object key) {
    if (LOGGER.isTraceEnabled()) {
      LOGGER.trace(
          "Converting key. Schema: {}, keyType: {}",
          keySchema != null ? keySchema.type() : "null",
          key != null ? key.getClass().getSimpleName() : "null");
    }
    if (key == null) {
      LOGGER.trace("Key is null, returning null");
      return null;
    }
    return convertValue(keySchema, key);
  }

  private static Map<String, Object> convertStructToMap(Struct struct) {
    Map<String, Object> result = new HashMap<>();
    Schema schema = struct.schema();
    for (Field field : schema.fields()) {
      if (LOGGER.isTraceEnabled()) {

        LOGGER.trace(
            "Converting struct field: {}, schema: {}",
            field.name(),
            field.schema() != null ? field.schema().type() : "null");
      }
      Object fieldValue = convertValue(field.schema(), struct.get(field));
      result.put(field.name(), fieldValue);
    }
    return result;
  }

  private static Map<String, Object> convertMapToMap(Map<?, ?> map, Schema schema) {
    Map<String, Object> result = new LinkedHashMap<>();

    Schema valueSchema = schema != null ? schema.valueSchema() : null;

    for (Map.Entry<?, ?> entry : map.entrySet()) {
      String key = String.valueOf(entry.getKey());

      Object convertedValue;
      if (entry.getValue() instanceof Map) {
        convertedValue = convertMapToMap((Map<?, ?>) entry.getValue(), valueSchema);
      } else if (entry.getValue() instanceof Struct) {
        convertedValue = convertStructToMap((Struct) entry.getValue());
      } else {
        convertedValue = convertValue(valueSchema, entry.getValue());
      }
      result.put(key, convertedValue);
    }
    return result;
  }

  static Object convertValue(Schema schema, Object value) {
    if (value == null) {
      if (schema == null) {
        LOGGER.trace("Value is null with no schema, returning null");
        return null;
      }
      if (schema.defaultValue() != null) {
        return convertValue(schema, schema.defaultValue());
      }
      if (schema.isOptional()) {
        LOGGER.trace("Value is null for optional field, returning null");
        return null;
      }
      throw SnowflakeErrors.ERROR_5015.getException(
          "Conversion error: null value for field that is required and has no default value");
    }

    final Schema.Type schemaType = getSchemaType(schema, value);
    if (LOGGER.isTraceEnabled()) {
      LOGGER.trace(
          "Converting value of type {} with schemaType {}",
          value.getClass().getSimpleName(),
          schemaType);
    }

    // Validate that value type matches schema type when schema is present
    if (schema != null) {
      validateValueType(schema.type(), value);
    }

    switch (schemaType) {
      case INT8:
      case INT16:
      case BOOLEAN:
        LOGGER.trace("Passthrough for primitive type: {}", schemaType);
        return value;
      case INT32:
        return convertInt32(schema, value);
      case INT64:
        return convertInt64(schema, value);
      case FLOAT32:
        return handleFloatSpecialValues((Float) value);
      case FLOAT64:
        return handleDoubleSpecialValues((Double) value);
      case STRING:
        LOGGER.trace("Converting to String");
        return value.toString();
      case BYTES:
        return convertBytes(schema, value);
      case ARRAY:
        return convertArray(schema, value);
      case MAP:
        return convertMapValue(schema, value);
      case STRUCT:
        return convertStructToMap((Struct) value);
      default:
        throw SnowflakeErrors.ERROR_5015.getException("Couldn't convert " + value + " to Object.");
    }
  }

  private static void validateValueType(Schema.Type schemaType, Object value) {
    boolean valid;
    switch (schemaType) {
      case INT8:
        valid = value instanceof Byte;
        break;
      case INT16:
        valid = value instanceof Short;
        break;
      case INT32:
        valid = value instanceof Integer || value instanceof java.util.Date;
        break;
      case INT64:
        valid = value instanceof Long || value instanceof java.util.Date;
        break;
      case FLOAT32:
        valid = value instanceof Float;
        break;
      case FLOAT64:
        valid = value instanceof Double;
        break;
      case BOOLEAN:
        valid = value instanceof Boolean;
        break;
      case STRING:
        valid = value instanceof String;
        break;
      case BYTES:
        valid =
            value instanceof byte[] || value instanceof ByteBuffer || value instanceof BigDecimal;
        break;
      case ARRAY:
        valid = value instanceof Collection;
        break;
      case MAP:
        valid = value instanceof Map;
        break;
      case STRUCT:
        valid = value instanceof Struct;
        break;
      default:
        valid = false;
    }
    if (!valid) {
      throw SnowflakeErrors.ERROR_5015.getException(
          "Type mismatch: expected " + schemaType + " but got " + value.getClass().getName());
    }
  }

  private static Schema.Type getSchemaType(Schema schema, Object value) {
    if (schema != null) {
      return schema.type();
    }

    LOGGER.trace(
        "No schema provided, inferring type from value class: {}", value.getClass().getName());

    // Handle collections and maps before checking primitive schema types
    // ConnectSchema.schemaType() only matches exact classes, not subclasses
    if (value instanceof Map) {
      return Schema.Type.MAP;
    }
    if (value instanceof Collection) {
      return Schema.Type.ARRAY;
    }

    Optional<Schema.Type> cachedType =
        SCHEMA_TYPE_CACHE.computeIfAbsent(
            value.getClass(), clazz -> Optional.ofNullable(ConnectSchema.schemaType(clazz)));
    if (cachedType.isPresent()) {
      return cachedType.get();
    }

    if (value instanceof java.util.Date) {
      return Schema.Type.INT64;
    }

    throw SnowflakeErrors.ERROR_5015.getException(
        "Java class " + value.getClass() + " does not have corresponding schema type.");
  }

  private static Object convertInt32(Schema schema, Object value) {
    if (schema != null && Date.LOGICAL_NAME.equals(schema.name())) {
      LOGGER.trace("Converting INT32 Date logical type to ISO format");
      return ISO_DATE_TIME_FORMAT.format(((java.util.Date) value).toInstant());
    }
    if (schema != null && Time.LOGICAL_NAME.equals(schema.name())) {
      LOGGER.trace("Converting INT32 Time logical type to time format");
      return TIME_FORMAT.format(((java.util.Date) value).toInstant());
    }
    LOGGER.trace("Passthrough for INT32 value");
    return value;
  }

  private static Object convertInt64(Schema schema, Object value) {
    if (schema != null && Timestamp.LOGICAL_NAME.equals(schema.name())) {
      LOGGER.trace("Converting INT64 Timestamp logical type to string");
      return ISO_DATE_TIME_FORMAT.format(((java.util.Date) value).toInstant());
    }
    LOGGER.trace("Passthrough for INT64 value");
    return value;
  }

  private static Object convertBytes(Schema schema, Object value) {
    if (schema != null && Decimal.LOGICAL_NAME.equals(schema.name())) {
      BigDecimal bigDecimalValue = (BigDecimal) value;
      if (bigDecimalValue.precision() > MAX_SNOWFLAKE_NUMBER_PRECISION) {
        if (LOGGER.isTraceEnabled()) {
          LOGGER.trace(
              "Converting Decimal with precision {} (exceeds max {}) to string",
              bigDecimalValue.precision(),
              MAX_SNOWFLAKE_NUMBER_PRECISION);
        }
        return bigDecimalValue.toString();
      }
      return bigDecimalValue;
    }

    LOGGER.trace("Converting bytes to byte[]");
    return toByteArray(value);
  }

  private static byte[] toByteArray(Object value) {
    if (value instanceof byte[]) {
      return (byte[]) value;
    }
    if (value instanceof ByteBuffer) {
      ByteBuffer byteBuffer = (ByteBuffer) value;
      if (byteBuffer.hasArray()) {
        return byteBuffer.array();
      }
      ByteBuffer clone = ByteBuffer.allocate(byteBuffer.capacity());
      byteBuffer.rewind();
      clone.put(byteBuffer);
      byteBuffer.rewind();
      clone.flip();
      return clone.array();
    }
    throw SnowflakeErrors.ERROR_5015.getException(
        "Invalid type for bytes type: " + value.getClass());
  }

  private static List<Object> convertArray(Schema schema, Object value) {
    Collection<?> collection = (Collection<?>) value;
    List<Object> result = new ArrayList<>(collection.size());
    Schema elementSchema = schema != null ? schema.valueSchema() : null;
    if (LOGGER.isTraceEnabled()) {
      LOGGER.trace(
          "Array element schema: {}", elementSchema != null ? elementSchema.type() : "null");
    }
    for (Object elem : collection) {
      result.add(convertValue(elementSchema, elem));
    }
    return result;
  }

  private static Object convertMapValue(Schema schema, Object value) {
    Map<?, ?> map = (Map<?, ?>) value;
    boolean useObjectMode = shouldUseObjectMode(schema, map);

    if (LOGGER.isTraceEnabled()) {
      LOGGER.trace(
          "Converting nested Map with {} entries, useObjectMode: {}", map.size(), useObjectMode);
    }

    if (useObjectMode) {
      Map<String, Object> result = new LinkedHashMap<>();
      Schema valueSchema = schema != null ? schema.valueSchema() : null;
      for (Map.Entry<?, ?> entry : map.entrySet()) {
        String key = String.valueOf(entry.getKey());
        result.put(key, convertValue(valueSchema, entry.getValue()));
      }
      return result;
    } else {
      // Non-string keys: use array encoding [[key, value], [key, value], ...]
      List<List<Object>> result = new ArrayList<>();
      Schema keySchema = schema != null ? schema.keySchema() : null;
      Schema valueSchema = schema != null ? schema.valueSchema() : null;
      if (LOGGER.isTraceEnabled()) {
        LOGGER.trace(
            "Array mode key schema: {}, value schema: {}",
            keySchema != null ? keySchema.type() : "null",
            valueSchema != null ? valueSchema.type() : "null");
      }
      for (Map.Entry<?, ?> entry : map.entrySet()) {
        List<Object> pair = new ArrayList<>(2);
        pair.add(convertValue(keySchema, entry.getKey()));
        pair.add(convertValue(valueSchema, entry.getValue()));
        result.add(pair);
      }
      return result;
    }
  }

  private static boolean shouldUseObjectMode(Schema schema, Map<?, ?> map) {
    if (schema != null) {
      return schema.keySchema() != null && schema.keySchema().type() == Schema.Type.STRING;
    }
    // For schemaless, check if all keys are strings
    for (Object key : map.keySet()) {
      if (!(key instanceof String)) {
        return false;
      }
    }
    return true;
  }

  private static Object handleFloatSpecialValues(Float value) {
    if (Float.isNaN(value)) {
      return "NaN";
    }
    if (Float.isInfinite(value)) {
      return value > 0 ? "Inf" : "-Inf";
    }
    return value;
  }

  private static Object handleDoubleSpecialValues(Double value) {
    if (Double.isNaN(value)) {
      return "NaN";
    }
    if (Double.isInfinite(value)) {
      return value > 0 ? "Inf" : "-Inf";
    }
    return value;
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/records/SnowflakeMetadataConfig.java
================================================
package com.snowflake.kafka.connector.records;

import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_METADATA_ALL;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_METADATA_CREATETIME;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_METADATA_OFFSET_AND_PARTITION;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_METADATA_TOPIC;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_METADATA_CONNECTOR_PUSH_TIME;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_METADATA_CONNECTOR_PUSH_TIME_DEFAULT;

import com.google.common.base.MoreObjects;
import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;

public class SnowflakeMetadataConfig {
  final boolean createtimeFlag;
  final boolean connectorPushTimeFlag;
  final boolean topicFlag;
  final boolean offsetAndPartitionFlag;
  final boolean allFlag;

  /** initialize with default config */
  public SnowflakeMetadataConfig() {
    this(new HashMap<>());
  }

  /**
   * Set flag to false only if metadata config is not set to "true" in config.
   *
   * @param config a String to String map of configs
   */
  public SnowflakeMetadataConfig(Map<String, String> config) {
    createtimeFlag = getMetadataProperty(config, SNOWFLAKE_METADATA_CREATETIME);
    topicFlag = getMetadataProperty(config, SNOWFLAKE_METADATA_TOPIC);
    offsetAndPartitionFlag = getMetadataProperty(config, SNOWFLAKE_METADATA_OFFSET_AND_PARTITION);
    allFlag = getMetadataProperty(config, SNOWFLAKE_METADATA_ALL);

    connectorPushTimeFlag =
        Optional.ofNullable(config.get(SNOWFLAKE_STREAMING_METADATA_CONNECTOR_PUSH_TIME))
            .map(Boolean::parseBoolean)
            .orElse(SNOWFLAKE_STREAMING_METADATA_CONNECTOR_PUSH_TIME_DEFAULT);
  }

  public boolean shouldIncludeAllMetadata() {
    return allFlag;
  }

  private static boolean getMetadataProperty(Map<String, String> config, String property) {
    String value =
        Optional.ofNullable(config.get(property))
            .orElse(KafkaConnectorConfigParams.SNOWFLAKE_METADATA_ALL_DEFAULT);

    return Boolean.parseBoolean(value);
  }

  @Override
  public String toString() {
    return MoreObjects.toStringHelper(this)
        .add("createtimeFlag", createtimeFlag)
        .add("connectorPushTimeFlag", connectorPushTimeFlag)
        .add("topicFlag", topicFlag)
        .add("offsetAndPartitionFlag", offsetAndPartitionFlag)
        .add("allFlag", allFlag)
        .toString();
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/records/SnowflakeSinkRecord.java
================================================
package com.snowflake.kafka.connector.records;

import static com.snowflake.kafka.connector.Utils.TABLE_COLUMN_METADATA;

import com.snowflake.kafka.connector.Utils;
import com.snowflake.kafka.connector.internal.validation.SqlIdentifierNormalizer;
import java.time.Instant;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import org.apache.kafka.common.record.TimestampType;
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaBuilder;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.sink.SinkRecord;

/**
 * A lightweight wrapper for Kafka SinkRecords that stores data in the format required by the
 * Snowflake Streaming Ingest SDK ({@code Map<String, Object>}).
 */
public final class SnowflakeSinkRecord {

  static final String OFFSET = "offset";

  static final String TOPIC = "topic";
  static final String PARTITION = "partition";
  static final String KEY = "key";
  static final String CONNECTOR_PUSH_TIME = "SnowflakeConnectorPushTime";
  static final String HEADERS = "headers";

  private final Map<String, Object> content;
  private final Map<String, Object> metadata;
  private final Schema schema;
  private final RecordState state;
  private final Exception brokenReason;

  public enum RecordState {
    VALID,
    TOMBSTONE,
    BROKEN
  }

  private SnowflakeSinkRecord(
      Map<String, Object> content,
      Map<String, Object> metadata,
      Schema schema,
      RecordState state,
      Exception brokenReason) {
    this.content = content;
    this.metadata = metadata;
    this.schema = schema;
    this.state = state;
    this.brokenReason = brokenReason;
  }

  public static SnowflakeSinkRecord from(
      SinkRecord record,
      SnowflakeMetadataConfig metadataConfig,
      boolean enableSchematization,
      boolean enableColumnIdentifierNormalization) {
    return from(
        record,
        metadataConfig,
        Instant.now(),
        enableSchematization,
        enableColumnIdentifierNormalization);
  }

  public static SnowflakeSinkRecord from(
      SinkRecord record,
      SnowflakeMetadataConfig metadataConfig,
      Instant connectorPushTime,
      boolean enableSchematization,
      boolean enableColumnIdentifierNormalization) {
    // First validate the key if present - a broken key means a broken record
    if (record.key() != null && record.keySchema() != null) {
      try {
        KafkaRecordConverter.convertKey(record.keySchema(), record.key());
      } catch (Exception e) {
        return createBrokenRecord(record, metadataConfig, connectorPushTime, e);
      }
    }

    if (record.value() == null) {
      return createTombstoneRecord(record, metadataConfig, connectorPushTime);
    }

    try {
      Map<String, Object> content;
      Schema schema = record.valueSchema();
      if (enableSchematization) {
        content = KafkaRecordConverter.convertToMap(schema, record.value());
        if (enableColumnIdentifierNormalization) {
          content = normalizeColumnNames(content);
          schema = normalizeSchemaFieldNames(schema);
        }
      } else {
        content = wrapValueAsRecordContent(schema, record.value());
        schema = RECORD_CONTENT_WRAPPER_SCHEMA;
      }
      Map<String, Object> metadata = buildMetadata(record, metadataConfig, connectorPushTime);
      return new SnowflakeSinkRecord(content, metadata, schema, RecordState.VALID, null);
    } catch (Exception e) {
      return createBrokenRecord(record, metadataConfig, connectorPushTime, e);
    }
  }

  /**
   * Wraps the record value under the {@code RECORD_CONTENT} key.
   *
   * <p>For structured types (Map/Struct) the value is converted to a Map so the SDK infers VARIANT.
   *
   * <p>For primitive types the converted value is placed directly into the map. The SSv2 SDK
   * serializes the map to NDJSON via Jackson, which handles native Java types (String, Number,
   * Boolean) correctly for VARIANT columns. Unlike KCv3/SSv1 (which required JSON-serialized
   * strings because SSv1 re-parsed them via {@code readTree}), SSv2 passes NDJSON straight to the
   * server — so JSON-serializing here would produce double-quoted strings.
   */
  private static Map<String, Object> wrapValueAsRecordContent(Schema schema, Object value) {
    Map<String, Object> content = new HashMap<>();
    Object convertedValue;
    if (value instanceof Map || value instanceof Struct) {
      convertedValue = KafkaRecordConverter.convertToMap(schema, value);
    } else {
      convertedValue = KafkaRecordConverter.convertValue(schema, value);
    }
    content.put(Utils.TABLE_COLUMN_CONTENT, convertedValue);
    return content;
  }

  /**
   * Builds a synthetic Struct schema declaring {@code RECORD_CONTENT} as STRUCT (→ VARIANT).
   *
   * <p>Assumptions:
   *
   * <ul>
   *   <li>RECORD_CONTENT is always a VARIANT column in Snowflake, regardless of the Kafka value
   *       type. Even bare strings (from StringConverter) must land as VARIANT, not VARCHAR.
   *   <li>STRUCT is used because {@link
   *       com.snowflake.kafka.connector.internal.schemaevolution.SnowflakeColumnTypeMapper} maps
   *       STRUCT to "VARIANT". If schema evolution needs to ADD this column, it must infer VARIANT.
   *   <li>This only applies to standard Snowflake tables. Iceberg tables with typed RECORD_CONTENT
   *       columns would need a different schema strategy.
   * </ul>
   */
  private static final Schema RECORD_CONTENT_WRAPPER_SCHEMA =
      SchemaBuilder.struct()
          .field(Utils.TABLE_COLUMN_CONTENT, SchemaBuilder.struct().optional().build())
          .build();

  private static SnowflakeSinkRecord createTombstoneRecord(
      SinkRecord record, SnowflakeMetadataConfig metadataConfig, Instant connectorPushTime) {
    Map<String, Object> metadata = buildMetadata(record, metadataConfig, connectorPushTime);
    return new SnowflakeSinkRecord(
        Collections.emptyMap(), metadata, record.valueSchema(), RecordState.TOMBSTONE, null);
  }

  private static SnowflakeSinkRecord createBrokenRecord(
      SinkRecord record,
      SnowflakeMetadataConfig metadataConfig,
      Instant connectorPushTime,
      Exception reason) {
    Map<String, Object> metadata = buildMetadataSafe(record, metadataConfig, connectorPushTime);
    return new SnowflakeSinkRecord(
        Collections.emptyMap(), metadata, record.valueSchema(), RecordState.BROKEN, reason);
  }

  private static Map<String, Object> buildMetadataSafe(
      SinkRecord record, SnowflakeMetadataConfig metadataConfig, Instant connectorPushTime) {
    final Map<String, Object> metadata =
        buildMetadataBase(record, metadataConfig, connectorPushTime);

    // For broken records, store key as string if conversion fails
    if (record.key() != null) {
      try {
        Object convertedKey = KafkaRecordConverter.convertKey(record.keySchema(), record.key());
        metadata.put(KEY, convertedKey);
      } catch (Exception e) {
        metadata.put(KEY, String.valueOf(record.key()));
      }
    }

    // Add headers (these should be safe to convert)
    if (record.headers() != null && !record.headers().isEmpty()) {
      try {
        metadata.put(HEADERS, KafkaRecordConverter.convertHeaders(record.headers()));
      } catch (Exception e) {
        // Skip headers if conversion fails
      }
    }

    return metadata;
  }

  private static Map<String, Object> buildMetadata(
      SinkRecord record, SnowflakeMetadataConfig metadataConfig, Instant connectorPushTime) {
    final Map<String, Object> metadata =
        buildMetadataBase(record, metadataConfig, connectorPushTime);

    // Add key to metadata
    addKeyToMetadata(record, metadata);

    // Add headers
    if (record.headers() != null && !record.headers().isEmpty()) {
      metadata.put(HEADERS, KafkaRecordConverter.convertHeaders(record.headers()));
    }

    return metadata;
  }

  private static Map<String, Object> buildMetadataBase(
      SinkRecord record, SnowflakeMetadataConfig metadataConfig, Instant connectorPushTime) {
    final Map<String, Object> metadata = new HashMap<>();

    if (metadataConfig.topicFlag) {
      metadata.put(TOPIC, record.topic());
    }

    if (metadataConfig.offsetAndPartitionFlag) {
      metadata.put(OFFSET, record.kafkaOffset());
      metadata.put(PARTITION, record.kafkaPartition());
    }

    if (record.timestampType() != TimestampType.NO_TIMESTAMP_TYPE
        && metadataConfig.createtimeFlag) {
      metadata.put(record.timestampType().name, record.timestamp());
    }

    if (connectorPushTime != null && metadataConfig.connectorPushTimeFlag) {
      metadata.put(CONNECTOR_PUSH_TIME, connectorPushTime.toEpochMilli());
    }

    return metadata;
  }

  private static void addKeyToMetadata(SinkRecord record, Map<String, Object> metadata) {
    if (record.key() == null) {
      return;
    }

    Schema keySchema = record.keySchema();
    Object key = record.key();

    try {
      // Always use convertKey to ensure type validation when schema is present
      Object convertedKey = KafkaRecordConverter.convertKey(keySchema, key);
      metadata.put(KEY, convertedKey);
    } catch (Exception e) {
      // If key conversion fails, store the key as a string representation
      metadata.put(KEY, String.valueOf(key));
    }
  }

  public Schema getSchema() {
    return schema;
  }

  private static Schema normalizeSchemaFieldNames(Schema schema) {
    if (schema == null || schema.type() != Schema.Type.STRUCT) {
      return schema;
    }
    SchemaBuilder builder = SchemaBuilder.struct();
    if (schema.name() != null) {
      builder.name(schema.name());
    }
    if (schema.isOptional()) {
      builder.optional();
    }
    for (Field field : schema.fields()) {
      String normalizedName = SqlIdentifierNormalizer.normalizeSqlIdentifier(field.name());
      builder.field(normalizedName, field.schema());
    }
    return builder.build();
  }

  private static Map<String, Object> normalizeColumnNames(Map<String, Object> content) {
    Map<String, Object> normalized = new HashMap<>(content.size());
    for (Map.Entry<String, Object> entry : content.entrySet()) {
      normalized.put(
          SqlIdentifierNormalizer.normalizeSqlIdentifier(entry.getKey()), entry.getValue());
    }
    return normalized;
  }

  public Map<String, Object> getContent() {
    return content;
  }

  public Map<String, Object> getContentWithMetadata(boolean includeMetadata) {
    if (!includeMetadata || metadata.isEmpty()) {
      return content;
    }

    Map<String, Object> result = new HashMap<>(content);
    result.put(TABLE_COLUMN_METADATA, metadata);
    return result;
  }

  public Map<String, Object> getMetadata() {
    return metadata;
  }

  public RecordState getState() {
    return state;
  }

  public boolean isValid() {
    return state == RecordState.VALID;
  }

  public boolean isTombstone() {
    return state == RecordState.TOMBSTONE;
  }

  public boolean isBroken() {
    return state == RecordState.BROKEN;
  }

  public Exception getBrokenReason() {
    return brokenReason;
  }
}


================================================
FILE: src/main/java/com/snowflake/kafka/connector/streaming/iceberg/IcebergDDLTypes.java
================================================
package com.snowflake.kafka.connector.streaming.iceberg;

public class IcebergDDLTypes {

  public static String ICEBERG_METADATA_OBJECT_SCHEMA =
      "OBJECT("
          + "offset LONG,"
          + "topic STRING,"
          + "partition INTEGER,"
          + "key STRING,"
          + "CreateTime BIGINT,"
          + "SnowflakeConnectorPushTime BIGINT,"
          + "headers MAP(VARCHAR, VARCHAR)"
          + ")";
}


================================================
FILE: src/main/resources/com/snowflake/kafka/connector/ingest_error_messages.properties
================================================
#
# Copyright (c) 2024 Snowflake Computing Inc. All rights reserved.
#

#
# Exception messages.
# These error code are associated to symbols in ErrorCode.java
#

0001=Ingest client internal error: {0}.
0002=Required value is null, Key: {0}.
0003=Required value is empty, Key: {0}.
0004=The given row cannot be converted to the internal format: {0}. {1}
0005=Unknown data type for column: {0}. logical: {1}, physical: {2}.
0006=Register blob request failed: {0}.
0007=Open channel request failed: {0}.
0008=Failed to construct HTTP request: {0}.
0009=Client configure request failed: {0}.
0010=Missing {0} in config file.
0011=Failed to upload blob.
0012=Failed to cleanup resources during {0}.
0013=Channel {0} is invalid and might contain uncommitted rows, please consider reopening the channel to restart. Channel invalidation cause: "{1}".
0014=Channel {0} is closed, please reopen the channel to restart.
0015=Invalid Snowflake URL, URL format: 'https://<account_name>.<region_name>.snowflakecomputing.com:443', 'https://' and ':443' are optional.
0016=Client is closed, please recreate to restart.
0017=Invalid private key, private key should be a valid PEM RSA private key.
0018=Invalid encrypted private key or passphrase, failed to decrypt private key.
0019=Invalid table data in chunk.
0020=Ingest client encountered IO error.
0021=Unable to connect to streaming ingest stage: {0}.
0022=Unable to create key pair from the provided private key.
0023=MD5 hashing algorithm is not available.
0024=Get channel status request failed: {0}.
0025=One or more channels {0} might contain uncommitted rows due to server side errors, please consider reopening the channels to replay the data loading by using the latest persistent offset token.
0026=Invalid collation string: {0}. {1}
0027=Failure during data encryption.
0028=Get channel status indicates Channel {0} is invalid with status code {1}, please reopen the channel.
0029=Data type not supported: {0}
0030=The given row cannot be converted to the internal format due to invalid value: {0}
0031=The given row exceeds the maximum allowed row size {0}
0032=URI builder fail to build url: {0}
0033=OAuth token refresh failure: {0}
0034=Invalid config parameter: {0}
0035=Failed to load {0}. If you use FIPS, import BouncyCastleFipsProvider in the application: {1}
0036=Failed to drop channel: {0}
0037=Deployment ID mismatch, Client was created on: {0}, Got upload location for: {1}. Please restart client: {2}.
0038=Generate presigned URLs request failed: {0}.
0039=Refresh Table Information request failed: {0}.

================================================
FILE: src/main/resources/com/snowflake/kafka/connector/internal/validation/ingest_error_messages.properties
================================================
#
# COPIED FROM SNOWFLAKE INGEST SDK V1
# Source: snowflake-ingest-java/src/main/resources/net/snowflake/ingest/ingest_error_messages.properties
#
# Modifications:
# - Only validation-related error codes retained (0001, 0004, 0005, 0020, 0029, 0030)
# - Resource path changed to match new package structure
#
# Copyright (c) 2024 Snowflake Computing Inc. All rights reserved.
#

#
# Exception messages.
# These error codes are associated to symbols in ErrorCode.java
#

0001=Ingest client internal error: {0}.
0004=The given row cannot be converted to the internal format: {0}. {1}
0005=Unknown data type for column: {0}. logical: {1}, physical: {2}.
0020=Ingest client encountered IO error.
0029=Data type not supported: {0}
0030=The given row cannot be converted to the internal format due to invalid value: {0}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/CachingConfigValidatorTest.java
================================================
package com.snowflake.kafka.connector;

import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_PIPE_EXISTS;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_TABLE_EXISTS;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertThrows;

import com.snowflake.kafka.connector.config.SnowflakeSinkConnectorConfigBuilder;
import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException;
import com.snowflake.kafka.connector.internal.streaming.DefaultStreamingConfigValidator;
import java.util.Map;
import java.util.stream.Stream;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

class CachingConfigValidatorTest {

  private final ConnectorConfigValidator validator =
      new DefaultConnectorConfigValidator(new DefaultStreamingConfigValidator());

  private static Stream<Arguments> validCacheExpirations() {
    return Stream.of(Arguments.of("30000", "60000"), Arguments.of("3600000", "7200000"));
  }

  private static Stream<Arguments> validCacheEnabledDisabled() {
    return Stream.of(
        Arguments.of("true", "true"),
        Arguments.of("True", "True"),
        Arguments.of("TRUE", "TRUE"),
        Arguments.of("false", "false"),
        Arguments.of("False", "False"),
        Arguments.of("FALSE", "FALSE"));
  }

  @ParameterizedTest(name = "[{index}] {2}")
  @MethodSource("validCacheExpirations")
  void test_valid_expirations(String tableExpireMs, String pipeExpireMs) {
    Map<String, String> config = SnowflakeSinkConnectorConfigBuilder.streamingConfig().build();
    config.put(CACHE_TABLE_EXISTS_EXPIRE_MS, tableExpireMs);
    config.put(CACHE_PIPE_EXISTS_EXPIRE_MS, pipeExpireMs);

    assertDoesNotThrow(() -> validator.validateConfig(config));
  }

  @ParameterizedTest(name = "[{index}] {2}")
  @MethodSource("validCacheEnabledDisabled")
  void test_valid_enabled_disabled(String tableExists, String pipeExists) {
    Map<String, String> config = SnowflakeSinkConnectorConfigBuilder.streamingConfig().build();
    config.put(CACHE_TABLE_EXISTS, tableExists);
    config.put(CACHE_PIPE_EXISTS, pipeExists);

    assertDoesNotThrow(() -> validator.validateConfig(config));
  }

  private static Stream<Arguments> invalidConfigurationProvider() {
    return Stream.of(
        Arguments.of(CACHE_TABLE_EXISTS_EXPIRE_MS, "0", "Should reject zero table expiration"),
        Arguments.of(CACHE_TABLE_EXISTS_EXPIRE_MS, "-1", "Should reject negative table expiration"),
        Arguments.of(CACHE_PIPE_EXISTS_EXPIRE_MS, "0", "Should reject zero pipe expiration"),
        Arguments.of(
            CACHE_PIPE_EXISTS_EXPIRE_MS, "-5000", "Should reject negative pipe expiration"),
        Arguments.of(
            CACHE_TABLE_EXISTS_EXPIRE_MS, "invalid", "Should reject non-numeric table expiration"),
        Arguments.of(
            CACHE_PIPE_EXISTS_EXPIRE_MS,
            "not a number",
            "Should reject non-numeric pipe expiration"),
        Arguments.of(
            CACHE_TABLE_EXISTS, "blag blag", "Should reject invalid boolean for table exists"),
        Arguments.of(CACHE_TABLE_EXISTS, "ture", "Should reject typo in boolean for table exists"),
        Arguments.of(CACHE_TABLE_EXISTS, "1", "Should reject numeric boolean for table exists"),
        Arguments.of(
            CACHE_TABLE_EXISTS, "yes", "Should reject non-boolean string for table exists"),
        Arguments.of(CACHE_PIPE_EXISTS, "0", "Should reject numeric boolean for pipe exists"),
        Arguments.of(CACHE_PIPE_EXISTS, "no", "Should reject non-boolean string for pipe exists"));
  }

  @ParameterizedTest(name = "[{index}] {2}")
  @MethodSource("invalidConfigurationProvider")
  void testInvalidCacheConfiguration(String configKey, String configValue, String description) {
    Map<String, String> config = SnowflakeSinkConnectorConfigBuilder.streamingConfig().build();
    config.put(configKey, configValue);

    assertThrows(
        SnowflakeKafkaConnectorException.class,
        () -> validator.validateConfig(config),
        description);
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/ConnectClusterBaseIT.java
================================================
package com.snowflake.kafka.connector;

import static org.awaitility.Awaitility.await;

import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.internal.TestUtils;
import com.snowflake.kafka.connector.internal.streaming.FakeIngestClientSupplier;
import com.snowflake.kafka.connector.internal.streaming.FakeSnowflakeStreamingIngestClient;
import java.time.Duration;
import java.util.HashMap;
import java.util.Map;
import org.apache.kafka.connect.json.JsonConverter;
import org.apache.kafka.connect.runtime.ConnectorConfig;
import org.apache.kafka.connect.sink.SinkConnector;
import org.apache.kafka.connect.storage.StringConverter;
import org.apache.kafka.connect.util.clusters.EmbeddedConnectCluster;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.TestInstance;

/** Base class for integration tests using an embedded Kafka Connect cluster. */
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
public abstract class ConnectClusterBaseIT {

  protected EmbeddedConnectCluster connectCluster;
  protected final FakeIngestClientSupplier fakeClientSupplier = new FakeIngestClientSupplier();

  static final Integer TASK_NUMBER = 1;

  @BeforeAll
  public void beforeAll() {
    Map<String, String> workerConfig = new HashMap<>();
    workerConfig.put("plugin.discovery", "hybrid_warn");
    // this parameter decides how often preCommit is called on the task
    workerConfig.put("offset.flush.interval.ms", "5000");

    connectCluster =
        new EmbeddedConnectCluster.Builder()
            .name("kafka-push-connector-connect-cluster")
            .numWorkers(1)
            .workerProps(workerConfig)
            .build();
    connectCluster.start();
  }

  @AfterAll
  public void afterAll() {
    if (connectCluster != null) {
      connectCluster.stop();
      connectCluster = null;
    }
  }

  protected FakeSnowflakeStreamingIngestClient getOpenedFakeIngestClient(String connectorName) {
    await("channelsCreated")
        .atMost(Duration.ofSeconds(60))
        .ignoreExceptions()
        .until(
            () ->
                !getFakeSnowflakeStreamingIngestClient(connectorName)
                    .getOpenedChannels()
                    .isEmpty());

    return getFakeSnowflakeStreamingIngestClient(connectorName);
  }

  protected void waitForOpenedFakeIngestClient(String connectorName) {
    getOpenedFakeIngestClient(connectorName);
  }

  protected final Map<String, String> defaultProperties(String topicName, String connectorName) {
    Map<String, String> config = TestUtils.transformProfileFileToConnectorConfiguration(false);

    config.put(SinkConnector.TOPICS_CONFIG, topicName);
    config.put(
        ConnectorConfig.CONNECTOR_CLASS_CONFIG, SnowflakeStreamingSinkConnector.class.getName());
    config.put(ConnectorConfig.TASKS_MAX_CONFIG, TASK_NUMBER.toString());
    config.put(ConnectorConfig.KEY_CONVERTER_CLASS_CONFIG, StringConverter.class.getName());
    config.put(ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG, JsonConverter.class.getName());
    config.put(KafkaConnectorConfigParams.NAME, connectorName);
    config.put(KafkaConnectorConfigParams.VALUE_CONVERTER_SCHEMAS_ENABLE, "false");
    config.put(
        KafkaConnectorConfigParams.SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION,
        "true");

    return config;
  }

  protected final void waitForConnectorRunning(String connectorName) {
    try {
      connectCluster
          .assertions()
          .assertConnectorAndAtLeastNumTasksAreRunning(
              connectorName, 1, "The connector did not start.");
    } catch (InterruptedException e) {
      throw new IllegalStateException("The connector is not running");
    }
  }

  protected final void waitForConnectorDoesNotExist(String connectorName) {
    try {
      connectCluster
          .assertions()
          .assertConnectorDoesNotExist(connectorName, "Failed to stop the connector");
    } catch (InterruptedException e) {
      throw new IllegalStateException("The connector is not running");
    }
  }

  protected final void waitForConnectorStopped(String connectorName) {
    try {
      connectCluster
          .assertions()
          .assertConnectorIsStopped(connectorName, "Connector should be stopped");
    } catch (InterruptedException e) {
      throw new IllegalStateException("The connector is not running");
    }
  }

  private FakeSnowflakeStreamingIngestClient getFakeSnowflakeStreamingIngestClient(
      String connectorName) {
    // Connector names are sanitized/uppercased by Utils.convertAppName() in the connector
    Map<String, String> config = new HashMap<>();
    config.put(KafkaConnectorConfigParams.NAME, connectorName);
    Utils.convertAppName(config);
    String sanitizedConnectorName = config.get(KafkaConnectorConfigParams.NAME);
    return fakeClientSupplier.getFakeIngestClients().stream()
        .filter((client) -> client.getClientName().contains(sanitizedConnectorName))
        .findFirst()
        .orElseThrow();
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/ConnectorConfigValidatorLogsTest.java
================================================
package com.snowflake.kafka.connector;

import static com.snowflake.kafka.connector.internal.TestUtils.generateAESKey;
import static com.snowflake.kafka.connector.internal.TestUtils.generatePrivateKey;
import static com.snowflake.kafka.connector.internal.TestUtils.getConfig;

import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.internal.PrivateKeyTool;
import com.snowflake.kafka.connector.internal.streaming.DefaultStreamingConfigValidator;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.security.PrivateKey;
import java.util.Map;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

public class ConnectorConfigValidatorLogsTest {

  private final ConnectorConfigValidator connectorConfigValidator =
      new DefaultConnectorConfigValidator(new DefaultStreamingConfigValidator());

  @Test
  public void testRSAPasswordOutput() throws Exception {
    // given
    PrivateKey privateKey = generatePrivateKey();
    String testPasswd = "TestPassword1234!";
    String testKey = generateAESKey(privateKey, testPasswd.toCharArray());
    Map<String, String> testConf = getConfig();
    testConf.remove(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY);
    testConf.put(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY, testKey);
    testConf.put(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE, testPasswd);
    // when
    connectorConfigValidator.validateConfig(testConf);

    // then
    PrivateKeyTool.parsePrivateKey(testKey, testPasswd);
    Assertions.assertFalse(logFileContains(testPasswd));
  }

  // Note that sf.log accumulates logs between the consecutive test runs
  // That's why it's very hard to test many scenarios without hacks like test ordering and deleting
  // log file
  private boolean logFileContains(String str) throws IOException {
    String fileName = "sf.log";
    File log = new File(fileName);
    FileReader fileReader = new FileReader(log);
    BufferedReader buffer = new BufferedReader(fileReader);
    String line;
    while ((line = buffer.readLine()) != null) {
      if (line.contains(str)) {
        return true;
      }
    }
    buffer.close();
    fileReader.close();
    return false;
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/ConnectorConfigValidatorTest.java
================================================
package com.snowflake.kafka.connector;

import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.ERRORS_LOG_ENABLE_CONFIG;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.ERRORS_TOLERANCE_CONFIG;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTPS_PROXY_HOST;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTPS_PROXY_PASSWORD;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTPS_PROXY_PORT;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTPS_PROXY_USER;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTP_NON_PROXY_HOSTS;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTP_PROXY_HOST;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTP_PROXY_PORT;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTP_USE_PROXY;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.JVM_PROXY_HOST;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.JVM_PROXY_PORT;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.NAME;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME;
import static com.snowflake.kafka.connector.internal.TestUtils.getConfig;
import static org.assertj.core.api.Assertions.assertThatCode;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.config.AuthenticatorType;
import com.snowflake.kafka.connector.config.SnowflakeSinkConnectorConfigBuilder;
import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException;
import com.snowflake.kafka.connector.internal.streaming.DefaultStreamingConfigValidator;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Stream;
import org.apache.kafka.connect.storage.Converter;
import org.junit.Assert;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.CsvSource;
import org.junit.jupiter.params.provider.MethodSource;

public class ConnectorConfigValidatorTest {

  // subset of valid community converters
  public static final List<Converter> COMMUNITY_CONVERTER_SUBSET =
      Arrays.asList(
          new org.apache.kafka.connect.json.JsonConverter(),
          new io.confluent.connect.avro.AvroConverter());

  private final ConnectorConfigValidator connectorConfigValidator =
      new DefaultConnectorConfigValidator(new DefaultStreamingConfigValidator());

  public static Stream<Arguments> validConfigs() {
    return Stream.of(
        Arguments.of(SnowflakeSinkConnectorConfigBuilder.streamingConfig().build()),
        Arguments.of(SnowflakeSinkConnectorConfigBuilder.streamingConfig().build()));
  }

  @ParameterizedTest(name = "Valid config: {0}")
  @MethodSource("validConfigs")
  public void shouldValidateCorrectConfig(Map<String, String> config) {
    // no exception thrown
    connectorConfigValidator.validateConfig(config);
  }

  @Test
  public void testConfig() {
    Map<String, String> config = SnowflakeSinkConnectorConfigBuilder.streamingConfig().build();
    connectorConfigValidator.validateConfig(config);
  }

  @Test
  public void testConfig_ConvertedInvalidAppName() {
    Map<String, String> config =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig()
            .withName("testConfig.snowflake-connector")
            .build();

    Utils.convertAppName(config);

    connectorConfigValidator.validateConfig(config);
  }

  @ParameterizedTest
  @CsvSource({
    NAME,
    SNOWFLAKE_URL_NAME,
    SNOWFLAKE_USER_NAME,
    SNOWFLAKE_DATABASE_NAME,
    SNOWFLAKE_SCHEMA_NAME,
    SNOWFLAKE_PRIVATE_KEY,
    SNOWFLAKE_PRIVATE_KEY,
    SNOWFLAKE_ROLE_NAME
  })
  public void shouldThrowExForEmptyProperty(String prop) {
    Map<String, String> config = getConfig();
    config.remove(prop);
    assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config))
        .isInstanceOf(SnowflakeKafkaConnectorException.class)
        .hasMessageContaining(prop);
  }

  @Test
  public void testCorrectProxyHost() {
    Map<String, String> config = getConfig();
    config.put(JVM_PROXY_HOST, "127.0.0.1");
    config.put(JVM_PROXY_PORT, "3128");
    connectorConfigValidator.validateConfig(config);
  }

  @Test
  public void testEmptyPort() {
    Map<String, String> config = getConfig();
    config.put(JVM_PROXY_HOST, "127.0.0.1");
    assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config))
        .isInstanceOf(SnowflakeKafkaConnectorException.class)
        .hasMessageContaining(JVM_PROXY_HOST);
  }

  @Test
  public void testEmptyHost() {
    Map<String, String> config = getConfig();
    config.put(JVM_PROXY_PORT, "3128");
    assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config))
        .isInstanceOf(SnowflakeKafkaConnectorException.class)
        .hasMessageContaining(JVM_PROXY_PORT);
  }

  @Test
  public void testNonProxyHosts() {
    String oldNonProxyHosts =
        (System.getProperty(HTTP_NON_PROXY_HOSTS) != null)
            ? System.getProperty(HTTP_NON_PROXY_HOSTS)
            : null;

    System.setProperty(HTTP_NON_PROXY_HOSTS, "host1.com|host2.com|localhost");
    Map<String, String> config = getConfig();
    config.put(JVM_PROXY_HOST, "127.0.0.1");
    config.put(JVM_PROXY_PORT, "3128");
    config.put(
        KafkaConnectorConfigParams.JVM_NON_PROXY_HOSTS, "*.snowflakecomputing.com|*.amazonaws.com");
    Utils.enableJVMProxy(config);
    String mergedNonProxyHosts = System.getProperty(HTTP_NON_PROXY_HOSTS);
    Assert.assertTrue(
        mergedNonProxyHosts.equals(
            "host1.com|host2.com|localhost|*.snowflakecomputing.com|*.amazonaws.com"));

    if (oldNonProxyHosts != null) {
      System.setProperty(HTTP_NON_PROXY_HOSTS, oldNonProxyHosts);
    } else {
      System.clearProperty(HTTP_NON_PROXY_HOSTS);
    }

    // clear properties to prevent other tests from failing
    System.clearProperty(HTTP_USE_PROXY);
    System.clearProperty(HTTP_PROXY_HOST);
    System.clearProperty(HTTP_PROXY_PORT);
    System.clearProperty(HTTPS_PROXY_HOST);
    System.clearProperty(HTTPS_PROXY_PORT);
    System.clearProperty(HTTPS_PROXY_USER);
    System.clearProperty(HTTPS_PROXY_PASSWORD);
  }

  @Test
  public void testIllegalTopicMap() {
    Map<String, String> config = getConfig();
    config.put(SNOWFLAKE_TOPICS2TABLE_MAP, "$@#$#@%^$12312");
    assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config))
        .isInstanceOf(SnowflakeKafkaConnectorException.class)
        .hasMessageContaining(SNOWFLAKE_TOPICS2TABLE_MAP);
  }

  @Test
  public void testIllegalTableName() {
    Map<String, String> config = getConfig();
    config.put(SNOWFLAKE_TOPICS2TABLE_MAP, "topic1:\"unterminated");
    assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config))
        .isInstanceOf(SnowflakeKafkaConnectorException.class)
        .hasMessageContaining("Unterminated quoted token");
  }

  @Test
  public void testDuplicatedTopic() {
    Map<String, String> config = getConfig();
    config.put(SNOWFLAKE_TOPICS2TABLE_MAP, "topic1:table1,topic1:table2");
    assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config))
        .isInstanceOf(SnowflakeKafkaConnectorException.class)
        .hasMessageContaining("Duplicate topic: topic1");
  }

  @Test
  public void testDuplicatedTableName() {
    Map<String, String> config = getConfig();
    config.put(SNOWFLAKE_TOPICS2TABLE_MAP, "topic1:table1,topic2:table1");
    connectorConfigValidator.validateConfig(config);
  }

  @Test
  public void testNameMapCovered() {
    Map<String, String> config = getConfig();
    config.put(KafkaConnectorConfigParams.TOPICS, "!@#,$%^,test");
    config.put(SNOWFLAKE_TOPICS2TABLE_MAP, "!@#:table1,$%^:table2");
    connectorConfigValidator.validateConfig(config);
  }

  @Test
  public void testBehaviorOnNullValuesConfig_valid_value() {
    Map<String, String> config = getConfig();
    config.put(KafkaConnectorConfigParams.BEHAVIOR_ON_NULL_VALUES, "IGNORE");
    connectorConfigValidator.validateConfig(config);

    config.put(KafkaConnectorConfigParams.BEHAVIOR_ON_NULL_VALUES, "DEFAULT");
    connectorConfigValidator.validateConfig(config);
  }

  @Test
  public void testBehaviorOnNullValuesConfig_invalid_value() {
    Map<String, String> config = getConfig();
    config.put(KafkaConnectorConfigParams.BEHAVIOR_ON_NULL_VALUES, "INVALID");
    assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config))
        .isInstanceOf(SnowflakeKafkaConnectorException.class)
        .hasMessageContaining(KafkaConnectorConfigParams.BEHAVIOR_ON_NULL_VALUES);
  }

  @Test
  public void testJMX_valid_value() {
    Map<String, String> config = getConfig();
    config.put(KafkaConnectorConfigParams.JMX_OPT, "true");
    connectorConfigValidator.validateConfig(config);

    config.put(KafkaConnectorConfigParams.JMX_OPT, "False");
    connectorConfigValidator.validateConfig(config);
  }

  @Test
  public void testJMX_invalid_value() {
    Map<String, String> config = getConfig();
    config.put(KafkaConnectorConfigParams.JMX_OPT, "INVALID");
    assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config))
        .isInstanceOf(SnowflakeKafkaConnectorException.class)
        .hasMessageContaining(KafkaConnectorConfigParams.JMX_OPT);
  }

  @Test
  public void testIngestionTypeConfig_valid_value_snowpipe() {
    Map<String, String> config = getConfig();

    config.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, "ACCOUNTADMIN");
    connectorConfigValidator.validateConfig(config);
  }

  @Test
  public void testIngestionTypeConfig_valid_value_snowpipe_streaming() {
    Map<String, String> config = getConfig();

    config.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, "ACCOUNTADMIN");
    connectorConfigValidator.validateConfig(config);
  }

  @Test
  public void testIngestionTypeConfig_invalid_snowpipe_streaming() {
    Map<String, String> config = getConfig();

    config.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, "");
    assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config))
        .isInstanceOf(SnowflakeKafkaConnectorException.class)
        .hasMessageContaining(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME);
  }

  /** These error tests are not going to enforce errors if they are not passed as configs. */
  @Test
  public void testErrorTolerance_AllowedValues() {
    Map<String, String> config = getConfig();
    config.put(ERRORS_TOLERANCE_CONFIG, ConnectorConfigTools.ErrorTolerance.ALL.toString());

    config.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, "ACCOUNTADMIN");
    connectorConfigValidator.validateConfig(config);

    config.put(ERRORS_TOLERANCE_CONFIG, ConnectorConfigTools.ErrorTolerance.NONE.toString());
    connectorConfigValidator.validateConfig(config);

    config.put(ERRORS_TOLERANCE_CONFIG, "all");
    connectorConfigValidator.validateConfig(config);
  }

  @Test
  public void testErrorTolerance_DisallowedValues() {
    Map<String, String> config = getConfig();
    config.put(ERRORS_TOLERANCE_CONFIG, "INVALID");

    config.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, "ACCOUNTADMIN");
    assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config))
        .isInstanceOf(SnowflakeKafkaConnectorException.class)
        .hasMessageContaining(ERRORS_TOLERANCE_CONFIG);
  }

  @Test
  public void testErrorLog_AllowedValues() {
    Map<String, String> config = getConfig();
    config.put(ERRORS_LOG_ENABLE_CONFIG, "true");

    config.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, "ACCOUNTADMIN");
    connectorConfigValidator.validateConfig(config);

    config.put(ERRORS_LOG_ENABLE_CONFIG, "FALSE");
    connectorConfigValidator.validateConfig(config);

    config.put(ERRORS_LOG_ENABLE_CONFIG, "TRUE");
    connectorConfigValidator.validateConfig(config);
  }

  @Test
  public void testErrorLog_DisallowedValues() {
    Map<String, String> config = getConfig();
    config.put(ERRORS_LOG_ENABLE_CONFIG, "INVALID");

    config.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, "ACCOUNTADMIN");
    assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config))
        .isInstanceOf(SnowflakeKafkaConnectorException.class)
        .hasMessageContaining(KafkaConnectorConfigParams.ERRORS_LOG_ENABLE_CONFIG);
  }

  @Test
  public void testValidKeyAndValueConvertersForStreamingSnowpipe() {
    Map<String, String> config = getConfig();

    config.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, "ACCOUNTADMIN");

    COMMUNITY_CONVERTER_SUBSET.forEach(
        converter -> {
          config.put(KafkaConnectorConfigParams.KEY_CONVERTER, converter.getClass().toString());
          connectorConfigValidator.validateConfig(config);
        });

    COMMUNITY_CONVERTER_SUBSET.forEach(
        converter -> {
          config.put(KafkaConnectorConfigParams.VALUE_CONVERTER, converter.getClass().toString());
          connectorConfigValidator.validateConfig(config);
        });
  }

  @Test
  public void testUnsupportedConverter() {
    Map<String, String> config = getConfig();
    config.put(
        KafkaConnectorConfigParams.VALUE_CONVERTER,
        "org.apache.kafka.connect.storage.StringConverter");
    config.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, "ACCOUNTADMIN");
    assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config))
        .isInstanceOf(SnowflakeKafkaConnectorException.class)
        .hasMessageContaining("org.apache.kafka.connect.storage.StringConverter");
  }

  @Test
  public void testStreamingProviderOverrideConfig_validWithSnowpipeStreaming() {
    Map<String, String> config = getConfig();
    config.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, "ACCOUNTADMIN");
    config.put(
        KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP,
        "a:b,c:d,e:100,f:true");
    connectorConfigValidator.validateConfig(config);
  }

  @Test
  public void testInvalidEmptyConfig() {
    Map<String, String> config = new HashMap<>();
    assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config))
        .isInstanceOf(SnowflakeKafkaConnectorException.class)
        .hasMessageContaining(SNOWFLAKE_DATABASE_NAME)
        .hasMessageContaining(SNOWFLAKE_SCHEMA_NAME)
        .hasMessageContaining(SNOWFLAKE_PRIVATE_KEY)
        .hasMessageContaining(SNOWFLAKE_USER_NAME)
        .hasMessageContaining(NAME)
        .hasMessageContaining(SNOWFLAKE_ROLE_NAME)
        .hasMessageContaining(SNOWFLAKE_URL_NAME);
  }

  // removes each of the following params iteratively to test if the log/exception has all the
  // expected removed params
  @Test
  public void testMultipleInvalidConfigs() {
    List<String> emptyParams =
        Arrays.asList(
            SNOWFLAKE_DATABASE_NAME,
            SNOWFLAKE_SCHEMA_NAME,
            SNOWFLAKE_PRIVATE_KEY,
            SNOWFLAKE_USER_NAME,
            NAME,
            SNOWFLAKE_ROLE_NAME,
            SNOWFLAKE_URL_NAME);
    List<String> paramsToRemove = new ArrayList<String>();

    for (String param : emptyParams) {
      paramsToRemove.add(param);
      this.invalidConfigRunner(paramsToRemove);
    }
  }

  @Test
  public void shouldValidateSSv2Config() {
    Map<String, String> config = SnowflakeSinkConnectorConfigBuilder.streamingConfig().build();

    assertThatCode(() -> connectorConfigValidator.validateConfig(config))
        .doesNotThrowAnyException();
  }

  @Test
  public void shouldThrowExceptionWhenRoleNotDefinedForSSv2() {
    Map<String, String> config =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig().withoutRole().build();

    assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config))
        .isInstanceOf(SnowflakeKafkaConnectorException.class)
        .hasMessageContaining(SNOWFLAKE_ROLE_NAME);
  }

  // -- Compatibility validation tests --

  @Test
  public void shouldPassWhenCompatValidateEnabledAndAllCompatSettingsCorrect() {
    Map<String, String> config =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig()
            .withCompatibilityValidate(true)
            .withV3CompatibilitySettings()
            .build();

    assertThatCode(() -> connectorConfigValidator.validateConfig(config))
        .doesNotThrowAnyException();
  }

  @Test
  public void shouldPassWhenCompatValidateEnabledAndSchematizationExplicitlyTrue() {
    Map<String, String> config =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig()
            .withCompatibilityValidate(true)
            .withV3CompatibilitySettings()
            .build();
    config.put(KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION, "true");

    assertThatCode(() -> connectorConfigValidator.validateConfig(config))
        .doesNotThrowAnyException();
  }

  @Test
  public void shouldFailWhenCompatValidateEnabledAndValidationModeWrong() {
    Map<String, String> config =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig()
            .withCompatibilityValidate(true)
            .withV3CompatibilitySettings()
            .build();
    config.put(KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, "server_side");

    assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config))
        .isInstanceOf(SnowflakeKafkaConnectorException.class)
        .hasMessageContaining(KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION)
        .hasMessageContaining("client_side");
  }

  @Test
  public void shouldFailWhenCompatValidateEnabledAndSchematizationNotSet() {
    Map<String, String> config =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig()
            .withCompatibilityValidate(true)
            .withV3CompatibilitySettings()
            .build();
    config.remove(KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION);

    assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config))
        .isInstanceOf(SnowflakeKafkaConnectorException.class)
        .hasMessageContaining(KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION)
        .hasMessageContaining("not explicitly set");
  }

  @Test
  public void shouldFailWhenCompatValidateEnabledAndColumnNormalizationWrong() {
    Map<String, String> config =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig()
            .withCompatibilityValidate(true)
            .withV3CompatibilitySettings()
            .build();
    config.put(
        KafkaConnectorConfigParams.SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION,
        "false");

    assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config))
        .isInstanceOf(SnowflakeKafkaConnectorException.class)
        .hasMessageContaining(
            KafkaConnectorConfigParams
                .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION);
  }

  @Test
  public void shouldFailWhenCompatValidateEnabledAndTableSanitizationWrong() {
    Map<String, String> config =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig()
            .withCompatibilityValidate(true)
            .withV3CompatibilitySettings()
            .build();
    config.put(
        KafkaConnectorConfigParams
            .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION,
        "false");

    assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config))
        .isInstanceOf(SnowflakeKafkaConnectorException.class)
        .hasMessageContaining(
            KafkaConnectorConfigParams
                .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION);
  }

  @Test
  public void shouldFailWhenCompatValidateEnabledAndMultipleSettingsWrong() {
    Map<String, String> config =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig()
            .withCompatibilityValidate(true)
            .build();
    config.put(KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION, "true");

    // offset.migration is not set → fails; but since it defaults to skip,
    // include.connector.name is not required.
    assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config))
        .isInstanceOf(SnowflakeKafkaConnectorException.class)
        .hasMessageContaining(KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION)
        .hasMessageContaining(
            KafkaConnectorConfigParams
                .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION)
        .hasMessageContaining(
            KafkaConnectorConfigParams
                .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION)
        .hasMessageContaining(KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION);
  }

  @Test
  public void shouldPassWhenCompatValidateEnabledAndOffsetMigrationIsSkip() {
    Map<String, String> config =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig()
            .withCompatibilityValidate(true)
            .withV3CompatibilitySettings()
            .build();
    config.put(KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION, "skip");

    assertThatCode(() -> connectorConfigValidator.validateConfig(config))
        .doesNotThrowAnyException();
  }

  @Test
  public void shouldFailWhenCompatValidateEnabledAndOffsetMigrationNotSet() {
    Map<String, String> config =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig()
            .withCompatibilityValidate(true)
            .withV3CompatibilitySettings()
            .build();
    config.remove(KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION);

    assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config))
        .isInstanceOf(SnowflakeKafkaConnectorException.class)
        .hasMessageContaining(KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION)
        .hasMessageContaining("not explicitly set");
  }

  @Test
  public void shouldPassWhenCompatValidateEnabledAndOffsetMigrationStrict() {
    Map<String, String> config =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig()
            .withCompatibilityValidate(true)
            .withV3CompatibilitySettings()
            .build();
    config.put(KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION, "strict");

    assertThatCode(() -> connectorConfigValidator.validateConfig(config))
        .doesNotThrowAnyException();
  }

  @Test
  public void shouldPassWhenCompatValidateEnabledAndOffsetMigrationBestEffort() {
    Map<String, String> config =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig()
            .withCompatibilityValidate(true)
            .withV3CompatibilitySettings()
            .build();
    config.put(KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION, "best_effort");

    assertThatCode(() -> connectorConfigValidator.validateConfig(config))
        .doesNotThrowAnyException();
  }

  @Test
  public void shouldFailWhenCompatValidateEnabledAndIncludeConnectorNameNotSet() {
    Map<String, String> config =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig()
            .withCompatibilityValidate(true)
            .withV3CompatibilitySettings()
            .build();
    config.remove(
        KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME);

    assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config))
        .isInstanceOf(SnowflakeKafkaConnectorException.class)
        .hasMessageContaining(
            KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME)
        .hasMessageContaining("not explicitly set");
  }

  @Test
  public void shouldPassWhenCompatValidateEnabledAndSkipWithoutIncludeConnectorName() {
    Map<String, String> config =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig()
            .withCompatibilityValidate(true)
            .withV3CompatibilitySettings()
            .build();
    config.put(KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION, "skip");
    config.remove(
        KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME);

    assertThatCode(() -> connectorConfigValidator.validateConfig(config))
        .doesNotThrowAnyException();
  }

  @Test
  public void shouldPassWhenCompatValidateEnabledAndIncludeConnectorNameTrue() {
    Map<String, String> config =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig()
            .withCompatibilityValidate(true)
            .withV3CompatibilitySettings()
            .build();
    config.put(
        KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME, "true");

    assertThatCode(() -> connectorConfigValidator.validateConfig(config))
        .doesNotThrowAnyException();
  }

  @Test
  public void shouldPassWhenCompatValidateDisabled() {
    Map<String, String> config =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig()
            .withCompatibilityValidate(false)
            .build();

    assertThatCode(() -> connectorConfigValidator.validateConfig(config))
        .doesNotThrowAnyException();
  }

  @Test
  public void shouldPassWhenCompatValidateDefaultAndAllCompatSettingsCorrect() {
    Map<String, String> config =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig().withV3CompatibilitySettings().build();
    config.remove(
        KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC);

    assertThatCode(() -> connectorConfigValidator.validateConfig(config))
        .doesNotThrowAnyException();
  }

  @Test
  public void shouldFailWhenCompatValidateDefaultAndNoCompatSettings() {
    Map<String, String> config = SnowflakeSinkConnectorConfigBuilder.streamingConfig().build();
    config.remove(
        KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC);

    // offset.migration is not set → fails; but since it defaults to skip,
    // include.connector.name is not required.
    assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config))
        .isInstanceOf(SnowflakeKafkaConnectorException.class)
        .hasMessageContaining(KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION)
        .hasMessageContaining(
            KafkaConnectorConfigParams
                .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION)
        .hasMessageContaining(
            KafkaConnectorConfigParams
                .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION)
        .hasMessageContaining(KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION)
        .hasMessageContaining(KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION);
  }

  @Test
  public void testOAuthAuthenticator() {
    Map<String, String> config =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig()
            .withAuthenticator(AuthenticatorType.OAUTH.toConfigValue())
            .withOauthClientId("client_id")
            .withOauthClientSecret("client_secret")
            .withOauthRefreshToken("refresh_token")
            .withoutPrivateKey()
            .build();
    assertThatCode(() -> connectorConfigValidator.validateConfig(config))
        .doesNotThrowAnyException();
  }

  @Test
  public void testOAuthWithoutRefreshToken_clientCredentialsGrant() {
    Map<String, String> config =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig()
            .withAuthenticator(AuthenticatorType.OAUTH.toConfigValue())
            .withOauthClientId("client_id")
            .withOauthClientSecret("client_secret")
            .withoutPrivateKey()
            .build();
    assertThatCode(() -> connectorConfigValidator.validateConfig(config))
        .doesNotThrowAnyException();
  }

  @Test
  public void testOAuthWithTokenEndpoint() {
    Map<String, String> config =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig()
            .withAuthenticator(AuthenticatorType.OAUTH.toConfigValue())
            .withOauthClientId("client_id")
            .withOauthClientSecret("client_secret")
            .withOauthRefreshToken("refresh_token")
            .withOauthTokenEndpoint("https://login.example.com/oauth2/v2.0/token")
            .withoutPrivateKey()
            .build();
    assertThatCode(() -> connectorConfigValidator.validateConfig(config))
        .doesNotThrowAnyException();
  }

  @Test
  public void testInvalidAuthenticator() {
    Map<String, String> config =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig()
            .withAuthenticator("invalid_authenticator")
            .build();
    assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config))
        .isInstanceOf(SnowflakeKafkaConnectorException.class)
        .hasMessageContaining(KafkaConnectorConfigParams.SNOWFLAKE_AUTHENTICATOR);
  }

  @Test
  public void testOAuthEmptyClientId() {
    Map<String, String> config =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig()
            .withAuthenticator(AuthenticatorType.OAUTH.toConfigValue())
            .withOauthClientSecret("client_secret")
            .withoutPrivateKey()
            .build();
    assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config))
        .isInstanceOf(SnowflakeKafkaConnectorException.class)
        .hasMessageContaining(KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_ID);
  }

  @Test
  public void testOAuthEmptyClientSecret() {
    Map<String, String> config =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig()
            .withAuthenticator(AuthenticatorType.OAUTH.toConfigValue())
            .withOauthClientId("client_id")
            .withoutPrivateKey()
            .build();
    assertThatThrownBy(() -> connectorConfigValidator.validateConfig(config))
        .isInstanceOf(SnowflakeKafkaConnectorException.class)
        .hasMessageContaining(KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_SECRET);
  }

  @Test
  public void testOAuthDoesNotRequirePrivateKey() {
    Map<String, String> config =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig()
            .withAuthenticator(AuthenticatorType.OAUTH.toConfigValue())
            .withOauthClientId("client_id")
            .withOauthClientSecret("client_secret")
            .withOauthRefreshToken("refresh_token")
            .withoutPrivateKey()
            .build();
    assertThatCode(() -> connectorConfigValidator.validateConfig(config))
        .doesNotThrowAnyException();
  }

  private void invalidConfigRunner(List<String> paramsToRemove) {
    Map<String, String> config = getConfig();
    for (String configParam : paramsToRemove) {
      config.remove(configParam);
    }

    try {
      connectorConfigValidator.validateConfig(config);
    } catch (SnowflakeKafkaConnectorException exception) {
      for (String configParam : paramsToRemove) {
        assert exception.getMessage().contains(configParam);
      }
    }
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/ConnectorIT.java
================================================
package com.snowflake.kafka.connector;

import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.JVM_PROXY_HOST;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.JVM_PROXY_PASSWORD;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.JVM_PROXY_PORT;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.JVM_PROXY_USERNAME;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.NAME;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_METADATA_ALL;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_METADATA_CREATETIME;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_METADATA_OFFSET_AND_PARTITION;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_METADATA_TOPIC;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME;
import static com.snowflake.kafka.connector.Utils.TASK_ID;
import static com.snowflake.kafka.connector.internal.TestUtils.TEST_CONNECTOR_NAME;
import static com.snowflake.kafka.connector.internal.TestUtils.transformProfileFileToConnectorConfiguration;

import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.internal.SnowflakeDataSourceFactory;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import org.apache.kafka.common.config.Config;
import org.apache.kafka.common.config.ConfigValue;
import org.junit.Test;

public class ConnectorIT {
  static final String[] allPropertiesList = {
    SNOWFLAKE_URL_NAME,
    SNOWFLAKE_USER_NAME,
    SNOWFLAKE_SCHEMA_NAME,
    SNOWFLAKE_DATABASE_NAME,
    SNOWFLAKE_METADATA_ALL,
    SNOWFLAKE_METADATA_TOPIC,
    SNOWFLAKE_METADATA_OFFSET_AND_PARTITION,
    SNOWFLAKE_METADATA_CREATETIME,
    SNOWFLAKE_TOPICS2TABLE_MAP,
    SNOWFLAKE_PRIVATE_KEY,
    JVM_PROXY_PORT,
    JVM_PROXY_HOST,
    SNOWFLAKE_PRIVATE_KEY_PASSPHRASE
  };

  static final Set<String> allProperties = new HashSet<>(Arrays.asList(allPropertiesList));

  private static void assertPropHasError(
      final Map<String, ConfigValue> validateMap, final String[] propArray) {
    List<String> propList = Arrays.asList(propArray);
    for (String prop : allProperties) {
      if (propList.contains(prop)) {
        assert !validateMap.get(prop).errorMessages().isEmpty();
      } else {
        assert validateMap.get(prop).errorMessages().isEmpty();
      }
    }
  }

  private static Map<String, ConfigValue> toValidateMap(final Map<String, String> config) {
    SnowflakeStreamingSinkConnector sinkConnector = new SnowflakeStreamingSinkConnector();
    Config result = sinkConnector.validate(config);
    return Utils.validateConfigToMap(result);
  }

  static Map<String, String> getEmptyConfig() {
    Map<String, String> config = new HashMap<>();
    return config;
  }

  static Map<String, String> getCorrectConfig() {
    Map<String, String> config = transformProfileFileToConnectorConfiguration(false);
    config.remove(SnowflakeDataSourceFactory.SF_WAREHOUSE);
    config.remove(KafkaConnectorConfigParams.NAME);
    config.remove(TASK_ID);
    return config;
  }

  @Test
  public void testValidateErrorConfigImproved() {
    // Given: a configuration with intentionally invalid values
    Map<String, String> config = new HashMap<>();
    config.put(SNOWFLAKE_URL_NAME, "");
    config.put(SNOWFLAKE_USER_NAME, "");
    config.put(SNOWFLAKE_DATABASE_NAME, "");
    config.put(SNOWFLAKE_PRIVATE_KEY, "");
    config.put(SNOWFLAKE_ROLE_NAME, "");
    config.put(SNOWFLAKE_PRIVATE_KEY_PASSPHRASE, "");
    config.put(SNOWFLAKE_SCHEMA_NAME, "");
    config.put(SNOWFLAKE_METADATA_TOPIC, "falseee");
    config.put(SNOWFLAKE_METADATA_OFFSET_AND_PARTITION, "falseee");
    config.put(SNOWFLAKE_METADATA_CREATETIME, "falseee");
    config.put(SNOWFLAKE_TOPICS2TABLE_MAP, "jfsja,,");
    Map<String, ConfigValue> validateMap = toValidateMap(config);

    // Optional properties that should NOT have errors (even when empty or missing)
    Set<String> optionalProperties =
        new HashSet<>(
            Arrays.asList(
                SNOWFLAKE_PRIVATE_KEY,
                JVM_PROXY_PORT,
                JVM_PROXY_HOST,
                SNOWFLAKE_PRIVATE_KEY_PASSPHRASE,
                SNOWFLAKE_METADATA_ALL));

    // Required properties or properties with format validation that SHOULD have errors
    Set<String> requiredOrValidatedProperties =
        new HashSet<>(
            Arrays.asList(
                SNOWFLAKE_URL_NAME, // empty string - required
                SNOWFLAKE_USER_NAME, // empty string - required
                SNOWFLAKE_DATABASE_NAME, // empty string - required
                SNOWFLAKE_SCHEMA_NAME, // empty string - required
                SNOWFLAKE_METADATA_TOPIC, // invalid boolean "falseee"
                KafkaConnectorConfigParams
                    .SNOWFLAKE_METADATA_OFFSET_AND_PARTITION, // invalid boolean "falseee"
                KafkaConnectorConfigParams
                    .SNOWFLAKE_METADATA_CREATETIME, // invalid boolean "falseee"
                SNOWFLAKE_TOPICS2TABLE_MAP // invalid format "jfsja,,"
                ));

    // Assert: optional properties should have NO errors
    for (String optionalProp : optionalProperties) {
      ConfigValue configValue = validateMap.get(optionalProp);
      assert configValue != null
          : String.format("Property '%s' not found in validation results", optionalProp);
      assert configValue.errorMessages().isEmpty()
          : String.format(
              "Optional property '%s' should not have errors, but has: %s",
              optionalProp, configValue.errorMessages());
    }

    // Assert: required/validated properties SHOULD have errors
    for (String requiredProp : requiredOrValidatedProperties) {
      ConfigValue configValue = validateMap.get(requiredProp);
      assert configValue != null
          : String.format("Property '%s' not found in validation results", requiredProp);
      assert !configValue.errorMessages().isEmpty()
          : String.format(
              "Required/validated property '%s' should have validation errors but has none. "
                  + "Current value: '%s'",
              requiredProp, configValue.value());
    }
  }

  @Test
  public void testValidateEmptyConfig() {
    Map<String, ConfigValue> validateMap = toValidateMap(getEmptyConfig());
    assertPropHasError(
        validateMap,
        new String[] {
          SNOWFLAKE_USER_NAME, SNOWFLAKE_URL_NAME, SNOWFLAKE_SCHEMA_NAME, SNOWFLAKE_DATABASE_NAME,
        });
  }

  @Test
  public void testValidateCorrectConfig() {
    Map<String, ConfigValue> validateMap = toValidateMap(getCorrectConfig());
    assertPropHasError(validateMap, new String[] {});
  }

  @Test
  public void testValidateErrorURLFormatConfig() {
    Map<String, String> config = getCorrectConfig();
    config.put(SNOWFLAKE_URL_NAME, "https://google.com");
    Map<String, ConfigValue> validateMap = toValidateMap(config);
    assertPropHasError(validateMap, new String[] {SNOWFLAKE_URL_NAME});
  }

  @Test
  public void testValidateErrorURLAccountConfig() {
    Map<String, String> config = getCorrectConfig();
    config.put(SNOWFLAKE_URL_NAME, "wronggAccountt.snowflakecomputing.com:443");
    Map<String, ConfigValue> validateMap = toValidateMap(config);
    assertPropHasError(
        validateMap, new String[] {SNOWFLAKE_USER_NAME, SNOWFLAKE_URL_NAME, SNOWFLAKE_PRIVATE_KEY});
  }

  @Test
  public void testValidateErrorUserConfig() {
    Map<String, String> config = getCorrectConfig();
    config.put(SNOWFLAKE_USER_NAME, "wrongUser");
    Map<String, ConfigValue> validateMap = toValidateMap(config);
    assertPropHasError(
        validateMap, new String[] {SNOWFLAKE_USER_NAME, SNOWFLAKE_URL_NAME, SNOWFLAKE_PRIVATE_KEY});
  }

  @Test
  public void testValidateErrorPasswordConfig() {
    Map<String, String> config = getCorrectConfig();
    config.put(SNOWFLAKE_PRIVATE_KEY, "wrongPassword");
    Map<String, ConfigValue> validateMap = toValidateMap(config);
    assertPropHasError(validateMap, new String[] {SNOWFLAKE_PRIVATE_KEY});
  }

  @Test
  public void testValidateEmptyPasswordConfig() {
    Map<String, String> config = getCorrectConfig();
    config.put(SNOWFLAKE_PRIVATE_KEY, "");
    Map<String, ConfigValue> validateMap = toValidateMap(config);
    assertPropHasError(validateMap, new String[] {SNOWFLAKE_PRIVATE_KEY});
  }

  @Test
  public void testValidateNullPasswordConfig() {
    Map<String, String> config = getCorrectConfig();
    config.remove(SNOWFLAKE_PRIVATE_KEY);
    Map<String, ConfigValue> validateMap = toValidateMap(config);
    assertPropHasError(validateMap, new String[] {SNOWFLAKE_PRIVATE_KEY});
  }

  @Test
  public void testValidateFilePasswordConfig() {
    Map<String, String> config = getCorrectConfig();
    config.put(SNOWFLAKE_PRIVATE_KEY, " ${file:/");
    Map<String, ConfigValue> validateMap = toValidateMap(config);
    assertPropHasError(validateMap, new String[] {});
  }

  @Test
  public void testValidateConfigProviderPasswordConfig() {
    Map<String, String> config = getCorrectConfig();
    config.put(SNOWFLAKE_PRIVATE_KEY, " ${configProvider:/");
    Map<String, ConfigValue> validateMap = toValidateMap(config);
    assertPropHasError(validateMap, new String[] {});
  }

  @Test
  public void testValidateFilePassphraseConfig() {
    Map<String, String> config = getCorrectConfig();
    config.put(SNOWFLAKE_PRIVATE_KEY_PASSPHRASE, " ${file:/");
    Map<String, ConfigValue> validateMap = toValidateMap(config);
    assertPropHasError(validateMap, new String[] {});
  }

  @Test
  public void testValidateConfigProviderPassphraseConfig() {
    Map<String, String> config = getCorrectConfig();
    config.put(SNOWFLAKE_PRIVATE_KEY_PASSPHRASE, " ${configProvider:/");
    Map<String, ConfigValue> validateMap = toValidateMap(config);
    assertPropHasError(validateMap, new String[] {});
  }

  @Test
  public void testValidateErrorPassphraseConfig() {
    Map<String, String> config = getCorrectConfig();
    config.put(SNOWFLAKE_PRIVATE_KEY_PASSPHRASE, "wrongPassphrase");
    Map<String, ConfigValue> validateMap = toValidateMap(config);
    assertPropHasError(
        validateMap, new String[] {SNOWFLAKE_PRIVATE_KEY, SNOWFLAKE_PRIVATE_KEY_PASSPHRASE});
  }

  @Test
  public void testValidateErrorDatabaseConfig() {
    Map<String, String> config = getCorrectConfig();
    config.put(SNOWFLAKE_DATABASE_NAME, "wrongDatabase");
    Map<String, ConfigValue> validateMap = toValidateMap(config);
    assertPropHasError(validateMap, new String[] {SNOWFLAKE_DATABASE_NAME});
  }

  @Test
  public void testValidateErrorSchemaConfig() {
    Map<String, String> config = getCorrectConfig();
    config.put(SNOWFLAKE_SCHEMA_NAME, "wrongSchema");
    Map<String, ConfigValue> validateMap = toValidateMap(config);
    assertPropHasError(validateMap, new String[] {SNOWFLAKE_SCHEMA_NAME});
  }

  @Test
  public void testErrorProxyHostConfig() {
    Map<String, String> config = getCorrectConfig();
    config.put(JVM_PROXY_HOST, "localhost");
    Map<String, ConfigValue> validateMap = toValidateMap(config);
    assertPropHasError(validateMap, new String[] {JVM_PROXY_HOST, JVM_PROXY_PORT});
  }

  @Test
  public void testErrorProxyPortConfig() {
    Map<String, String> config = getCorrectConfig();
    config.put(JVM_PROXY_PORT, "8080");
    Map<String, ConfigValue> validateMap = toValidateMap(config);
    assertPropHasError(validateMap, new String[] {JVM_PROXY_HOST, JVM_PROXY_PORT});
  }

  @Test
  public void testProxyHostPortConfig() {
    Map<String, String> config = getCorrectConfig();
    config.put(JVM_PROXY_HOST, "localhost");
    config.put(JVM_PROXY_PORT, "8080");
    Utils.validateProxySettings(config);
  }

  @Test
  public void testErrorProxyUsernameConfig() {
    Map<String, String> config = getCorrectConfig();
    config.put(JVM_PROXY_HOST, "localhost");
    config.put(JVM_PROXY_PORT, "8080");
    config.put(JVM_PROXY_USERNAME, "user");

    Map<String, String> invalidConfigs = Utils.validateProxySettings(config);
    assert invalidConfigs.containsKey(JVM_PROXY_USERNAME);
  }

  @Test
  public void testErrorProxyPasswordConfig() {
    Map<String, String> config = getCorrectConfig();
    config.put(JVM_PROXY_HOST, "localhost");
    config.put(JVM_PROXY_PORT, "8080");
    config.put(JVM_PROXY_PASSWORD, "pass");

    Map<String, String> invalidConfigs = Utils.validateProxySettings(config);
    assert invalidConfigs.containsKey(JVM_PROXY_PASSWORD);
  }

  @Test
  public void testProxyUsernamePasswordConfig() {
    Map<String, String> config = getCorrectConfig();
    config.put(JVM_PROXY_HOST, "localhost");
    config.put(JVM_PROXY_PORT, "3128");
    config.put(JVM_PROXY_USERNAME, "admin");
    config.put(JVM_PROXY_PASSWORD, "test");
    Utils.validateProxySettings(config);
  }

  @Test
  public void testConnectorComprehensive() {
    Map<String, String> config = transformProfileFileToConnectorConfiguration(false);
    SnowflakeStreamingSinkConnector sinkConnector = new SnowflakeStreamingSinkConnector();
    sinkConnector.start(config);
    assert sinkConnector.taskClass().equals(SnowflakeSinkTask.class);
    List<Map<String, String>> taskConfigs = sinkConnector.taskConfigs(2);
    assert taskConfigs.get(0).get(TASK_ID).equals("0");
    assert taskConfigs.get(0).get(NAME).equals(TEST_CONNECTOR_NAME);
    assert taskConfigs.get(1).get(TASK_ID).equals("1");
    sinkConnector.stop();
    assert sinkConnector.version().equals(Utils.VERSION);
  }

  @Test
  public void testConnectorComprehensiveNegative() throws Exception {
    Map<String, String> config = transformProfileFileToConnectorConfiguration(false);
    SnowflakeStreamingSinkConnector sinkConnector = new SnowflakeStreamingSinkConnector();
    ExecutorService testThread = Executors.newSingleThreadExecutor();
    testThread.submit(
        () -> {
          // After 10 minutes this thread will throw error. 10 minutes is too long
          // for this test, so kill the thread after 6 seconds, which should have
          // covered enough lines.
          sinkConnector.taskConfigs(2);
        });
    Thread.sleep(6000);
    testThread.shutdownNow();
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/InjectQueryRunner.java
================================================
package com.snowflake.kafka.connector;

import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;

/**
 * Annotation to mark fields and parameters for QueryRunner injection. Use with {@link
 * InjectQueryRunnerExtension}.
 */
@Target({ElementType.FIELD, ElementType.PARAMETER})
@Retention(RetentionPolicy.RUNTIME)
public @interface InjectQueryRunner {}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/InjectQueryRunnerExtension.java
================================================
package com.snowflake.kafka.connector;

import com.snowflake.kafka.connector.internal.SnowflakeDataSourceFactory;
import java.lang.reflect.Field;
import org.apache.commons.dbutils.QueryRunner;
import org.junit.jupiter.api.extension.BeforeEachCallback;
import org.junit.jupiter.api.extension.ExtensionContext;
import org.junit.jupiter.api.extension.ParameterContext;
import org.junit.jupiter.api.extension.ParameterResolver;

public class InjectQueryRunnerExtension implements BeforeEachCallback, ParameterResolver {

  @Override
  public void beforeEach(final ExtensionContext context) throws Exception {
    final Object testInstance = context.getRequiredTestInstance();
    injectFields(testInstance, testInstance.getClass());
  }

  private void injectFields(final Object testInstance, Class<?> testClass) throws Exception {
    // Process fields in the current class
    for (final Field field : testClass.getDeclaredFields()) {
      if (field.isAnnotationPresent(InjectQueryRunner.class)) {
        field.setAccessible(true);
        field.set(testInstance, getQueryRunner());
      }
    }

    // If this is a nested class, recursively process the enclosing class fields
    Class<?> enclosingClass = testClass.getEnclosingClass();
    if (enclosingClass != null) {
      // Get the enclosing instance for nested classes
      Field thisField = getEnclosingInstanceField(testClass);
      if (thisField != null) {
        thisField.setAccessible(true);
        Object enclosingInstance = thisField.get(testInstance);
        if (enclosingInstance != null) {
          injectFields(enclosingInstance, enclosingClass);
        }
      }
    }
  }

  private Field getEnclosingInstanceField(final Class<?> innerClass) {
    try {
      // Non-static inner classes have a synthetic field named "this$0" pointing to the enclosing
      // instance
      for (final Field field : innerClass.getDeclaredFields()) {
        if (field.isSynthetic() && field.getName().startsWith("this$")) {
          return field;
        }
      }
    } catch (final Exception e) {
      // If we can't find the field, return null
    }
    return null;
  }

  @Override
  public boolean supportsParameter(
      final ParameterContext parameterContext, final ExtensionContext extensionContext) {
    return parameterContext.getParameter().isAnnotationPresent(InjectQueryRunner.class)
        && parameterContext.getParameter().getType().equals(QueryRunner.class);
  }

  @Override
  public Object resolveParameter(
      final ParameterContext parameterContext, final ExtensionContext extensionContext) {
    return getQueryRunner();
  }

  private QueryRunner getQueryRunner() {
    return new QueryRunner(SnowflakeDataSourceFactory.get());
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/InjectSnowflakeDataSource.java
================================================
package com.snowflake.kafka.connector;

import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;

/**
 * Annotation to mark fields and parameters for DataSource injection. Use with {@link
 * InjectSnowflakeDataSourceExtension}.
 */
@Target({ElementType.FIELD, ElementType.PARAMETER})
@Retention(RetentionPolicy.RUNTIME)
public @interface InjectSnowflakeDataSource {}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/InjectSnowflakeDataSourceExtension.java
================================================
package com.snowflake.kafka.connector;

import com.snowflake.kafka.connector.internal.SnowflakeDataSourceFactory;
import java.lang.reflect.Field;
import javax.sql.DataSource;
import org.junit.jupiter.api.Order;
import org.junit.jupiter.api.extension.BeforeEachCallback;
import org.junit.jupiter.api.extension.ExtensionContext;
import org.junit.jupiter.api.extension.ParameterContext;
import org.junit.jupiter.api.extension.ParameterResolver;

@Order(1)
public class InjectSnowflakeDataSourceExtension implements BeforeEachCallback, ParameterResolver {

  @Override
  public void beforeEach(final ExtensionContext context) throws Exception {
    final Object testInstance = context.getRequiredTestInstance();
    injectFields(testInstance, testInstance.getClass());
  }

  private void injectFields(final Object testInstance, Class<?> testClass) throws Exception {
    // Process fields in the current class
    for (final Field field : testClass.getDeclaredFields()) {
      if (field.isAnnotationPresent(InjectSnowflakeDataSource.class)) {
        field.setAccessible(true);
        field.set(testInstance, SnowflakeDataSourceFactory.get());
      }
    }

    // If this is a nested class, recursively process the enclosing class fields
    Class<?> enclosingClass = testClass.getEnclosingClass();
    if (enclosingClass != null) {
      // Get the enclosing instance for nested classes
      Field thisField = getEnclosingInstanceField(testClass);
      if (thisField != null) {
        thisField.setAccessible(true);
        Object enclosingInstance = thisField.get(testInstance);
        if (enclosingInstance != null) {
          injectFields(enclosingInstance, enclosingClass);
        }
      }
    }
  }

  private Field getEnclosingInstanceField(final Class<?> innerClass) {
    try {
      // Non-static inner classes have a synthetic field named "this$0" pointing to the enclosing
      // instance
      for (final Field field : innerClass.getDeclaredFields()) {
        if (field.isSynthetic() && field.getName().startsWith("this$")) {
          return field;
        }
      }
    } catch (final Exception e) {
      // If we can't find the field, return null
    }
    return null;
  }

  @Override
  public boolean supportsParameter(
      final ParameterContext parameterContext, final ExtensionContext extensionContext) {
    return parameterContext.getParameter().isAnnotationPresent(InjectSnowflakeDataSource.class)
        && parameterContext.getParameter().getType().equals(DataSource.class);
  }

  @Override
  public Object resolveParameter(
      final ParameterContext parameterContext, final ExtensionContext extensionContext) {
    return SnowflakeDataSourceFactory.get();
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/LegacySchemaToggleIT.java
================================================
package com.snowflake.kafka.connector;

import static org.assertj.core.api.Assertions.assertThat;
import static org.awaitility.Awaitility.await;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.snowflake.kafka.connector.internal.TestUtils;
import com.snowflake.kafka.connector.internal.streaming.FakeSnowflakeStreamingIngestChannel;
import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientFactory;
import java.time.Duration;
import java.util.Map;
import org.apache.commons.dbutils.QueryRunner;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;

@ExtendWith({InjectSnowflakeDataSourceExtension.class, InjectQueryRunnerExtension.class})
public class LegacySchemaToggleIT extends ConnectClusterBaseIT {

  private static final int PARTITION_COUNT = 1;
  private static final String RECORD_CONTENT = "RECORD_CONTENT";
  private static final String RECORD_METADATA = "RECORD_METADATA";
  private String topicName;
  private String connectorName;
  private ObjectMapper objectMapper = new ObjectMapper();

  @InjectQueryRunner private QueryRunner queryRunner;

  @BeforeEach
  void before() {
    topicName = TestUtils.randomTableName();
    connectorName = String.format("%s_connector", topicName);
    connectCluster.kafka().createTopic(topicName, PARTITION_COUNT);
    TestUtils.createTableWithMetadataColumn(topicName);
    StreamingClientFactory.setStreamingClientSupplier(fakeClientSupplier);
  }

  @AfterEach
  void after() {
    connectCluster.kafka().deleteTopic(topicName);
    connectCluster.deleteConnector(connectorName);
    StreamingClientFactory.resetStreamingClientSupplier();
    TestUtils.dropTable(topicName);
    TestUtils.dropPipe(topicName + "-STREAMING");
  }

  @Test
  void test_legacyMode_jsonConverter_wrapsInRecordContent() throws Exception {
    final Map<String, String> config = defaultProperties(topicName, connectorName);
    config.put("snowflake.enable.schematization", "false");
    config.put(Constants.KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, "server_side");

    connectCluster.configureConnector(connectorName, config);
    waitForConnectorRunning(connectorName);
    waitForOpenedFakeIngestClient(connectorName);
    connectCluster
        .kafka()
        .produce(topicName, objectMapper.writeValueAsString(Map.of("city", "Portland", "age", 25)));

    await()
        .timeout(Duration.ofSeconds(30))
        .pollInterval(Duration.ofSeconds(1))
        .untilAsserted(
            () -> {
              assertThat(getOpenedFakeIngestClient(connectorName).getAppendedRowCount())
                  .isEqualTo(1);
              FakeSnowflakeStreamingIngestChannel channel =
                  getOpenedFakeIngestClient(connectorName).getOpenedChannels().get(0);
              final Map<String, Object> row = channel.getAppendedRows().get(0);
              assertThat(row).containsKeys(RECORD_METADATA, RECORD_CONTENT);
              assertThat(row.get(RECORD_CONTENT)).isInstanceOf(Map.class);
              @SuppressWarnings("unchecked")
              Map<String, Object> contentMap = (Map<String, Object>) row.get(RECORD_CONTENT);
              assertThat(contentMap).containsEntry("city", "Portland");
              assertThat(contentMap).containsEntry("age", 25L);
            });
  }

  @Test
  void test_legacyMode_stringConverter_wrapsInRecordContent() {
    final Map<String, String> config = defaultProperties(topicName, connectorName);
    config.put("snowflake.enable.schematization", "false");
    config.put(Constants.KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, "server_side");
    config.put("value.converter", "org.apache.kafka.connect.storage.StringConverter");

    connectCluster.configureConnector(connectorName, config);
    waitForConnectorRunning(connectorName);
    waitForOpenedFakeIngestClient(connectorName);
    connectCluster.kafka().produce(topicName, "raw string payload");

    await()
        .timeout(Duration.ofSeconds(30))
        .pollInterval(Duration.ofSeconds(1))
        .untilAsserted(
            () -> {
              assertThat(getOpenedFakeIngestClient(connectorName).getAppendedRowCount())
                  .isEqualTo(1);
              FakeSnowflakeStreamingIngestChannel channel =
                  getOpenedFakeIngestClient(connectorName).getOpenedChannels().get(0);
              final Map<String, Object> row = channel.getAppendedRows().get(0);
              assertThat(row).containsKeys(RECORD_METADATA, RECORD_CONTENT);
              assertThat(row.get(RECORD_CONTENT)).isEqualTo("raw string payload");
            });
  }

  @Test
  void test_legacyMode_defaultSchematization_doesNotWrap() throws Exception {
    final Map<String, String> config = defaultProperties(topicName, connectorName);
    config.put(Constants.KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, "server_side");
    config.put(
        Constants.KafkaConnectorConfigParams
            .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION,
        "false");

    connectCluster.configureConnector(connectorName, config);
    waitForConnectorRunning(connectorName);
    waitForOpenedFakeIngestClient(connectorName);
    connectCluster
        .kafka()
        .produce(topicName, objectMapper.writeValueAsString(Map.of("city", "Portland")));

    await()
        .timeout(Duration.ofSeconds(30))
        .pollInterval(Duration.ofSeconds(1))
        .untilAsserted(
            () -> {
              assertThat(getOpenedFakeIngestClient(connectorName).getAppendedRowCount())
                  .isEqualTo(1);
              FakeSnowflakeStreamingIngestChannel channel =
                  getOpenedFakeIngestClient(connectorName).getOpenedChannels().get(0);
              final Map<String, Object> row = channel.getAppendedRows().get(0);
              assertThat(row).containsKey("city");
              assertThat(row).doesNotContainKey(RECORD_CONTENT);
            });
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/SchemaEvolutionAvroSrIT.java
================================================
package com.snowflake.kafka.connector;

import static com.snowflake.kafka.connector.internal.TestUtils.assertTableColumnCount;
import static com.snowflake.kafka.connector.internal.TestUtils.assertWithRetry;

import com.snowflake.kafka.connector.internal.TestUtils;
import io.confluent.connect.avro.AvroConverter;
import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient;
import io.confluent.kafka.schemaregistry.testutil.MockSchemaRegistry;
import io.confluent.kafka.serializers.KafkaAvroSerializer;
import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import org.apache.avro.Conversions;
import org.apache.avro.LogicalTypes;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.serialization.StringSerializer;
import org.apache.kafka.connect.runtime.ConnectorConfig;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

/**
 * Integration test for schema evolution using Avro with Schema Registry. Tests that the table is
 * updated with correct column types when records with different Avro schemas are sent from multiple
 * topics.
 */
class SchemaEvolutionAvroSrIT extends SchemaEvolutionBase {

  private static final String MOCK_SCHEMA_REGISTRY_URL = "mock://test-schema-registry";

  private static final String PERFORMANCE_STRING = "PERFORMANCE_STRING";
  private static final String PERFORMANCE_CHAR = "PERFORMANCE_CHAR";
  private static final String RATING_INT = "RATING_INT";
  private static final String RATING_DOUBLE = "RATING_DOUBLE";
  private static final String APPROVAL = "APPROVAL";
  private static final String TIME_MILLIS = "TIME_MILLIS";
  private static final String TIMESTAMP_MILLIS = "TIMESTAMP_MILLIS";
  private static final String DATE = "DATE";
  private static final String DECIMAL = "DECIMAL";
  private static final String SOME_FLOAT_NAN = "SOME_FLOAT_NAN";
  private static final String RECORD_METADATA = "RECORD_METADATA";

  private static final Map<String, String> EXPECTED_SCHEMA = new HashMap();

  static {
    EXPECTED_SCHEMA.put(PERFORMANCE_STRING, "VARCHAR");
    EXPECTED_SCHEMA.put(PERFORMANCE_CHAR, "VARCHAR");
    EXPECTED_SCHEMA.put(RATING_INT, "NUMBER");
    EXPECTED_SCHEMA.put(RATING_DOUBLE, "FLOAT");
    EXPECTED_SCHEMA.put(APPROVAL, "BOOLEAN");
    EXPECTED_SCHEMA.put(SOME_FLOAT_NAN, "FLOAT");
    EXPECTED_SCHEMA.put(TIME_MILLIS, "TIME");
    EXPECTED_SCHEMA.put(TIMESTAMP_MILLIS, "TIMESTAMP");
    EXPECTED_SCHEMA.put(DATE, "DATE");
    EXPECTED_SCHEMA.put(DECIMAL, "VARCHAR");
    EXPECTED_SCHEMA.put(RECORD_METADATA, "VARIANT");
  }

  private static final String VALUE_SCHEMA_0 =
      "{\"type\": \"record\",\"name\": \"value_schema_0\",\"fields\": [  {\"name\":"
          + " \"PERFORMANCE_CHAR\", \"type\": \"string\"},  {\"name\": \"PERFORMANCE_STRING\","
          + " \"type\": \"string\"},"
          + " {\"name\":\"TIME_MILLIS\",\"type\":{\"type\":\"int\",\"logicalType\":\"time-millis\"}},"
          + "{\"name\":\"DATE\",\"type\":{\"type\":\"int\",\"logicalType\":\"date\"}},{\"name\":\"DECIMAL\",\"type\":{\"type\":\"bytes\",\"logicalType\":\"decimal\","
          + " \"precision\":4, \"scale\":2}},"
          + "{\"name\":\"TIMESTAMP_MILLIS\",\"type\":{\"type\":\"long\",\"logicalType\":\"timestamp-millis\"}},"
          + "  {\"name\": \"RATING_INT\", \"type\": \"int\"}]}";

  private static final String VALUE_SCHEMA_1 =
      "{"
          + "\"type\": \"record\","
          + "\"name\": \"value_schema_1\","
          + "\"fields\": ["
          + "  {\"name\": \"RATING_DOUBLE\", \"type\": \"float\"},"
          + "  {\"name\": \"PERFORMANCE_STRING\", \"type\": \"string\"},"
          + "  {\"name\": \"APPROVAL\", \"type\": \"boolean\"},"
          + "  {\"name\": \"SOME_FLOAT_NAN\", \"type\": \"float\"}"
          + "]"
          + "}";

  private static final String SCHEMA_REGISTRY_SCOPE = "test-schema-registry";
  private static final int COL_NUM = 11;

  private KafkaProducer<String, Object> avroProducer;

  @BeforeEach
  void beforeEach() {
    avroProducer = createAvroProducer();
  }

  @AfterEach
  void afterEach() {
    if (avroProducer != null) {
      avroProducer.close();
    }
    MockSchemaRegistry.dropScope(SCHEMA_REGISTRY_SCOPE);
  }

  @Test
  void testSchemaEvolutionWithMultipleTopicsAndAvroSr() throws Exception {
    // given
    final Map<String, String> config = createConnectorConfig();
    config.put(ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG, AvroConverter.class.getName());
    config.put("value.converter.schema.registry.url", MOCK_SCHEMA_REGISTRY_URL);
    connectCluster.configureConnector(connectorName, config);
    waitForConnectorRunning(connectorName);

    // when
    sendRecordsToTopic0();
    sendRecordsToTopic1();

    // then
    final int expectedTotalRecords = TOPIC_COUNT * RECORD_COUNT;
    assertWithRetry(() -> snowflake.tableExist(tableName));
    assertWithRetry(() -> TestUtils.getNumberOfRows(tableName) == expectedTotalRecords);
    assertTableColumnCount(tableName, COL_NUM);
    TestUtils.checkTableSchema(tableName, EXPECTED_SCHEMA);
  }

  private KafkaProducer<String, Object> createAvroProducer() {
    final Properties props = new Properties();
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, connectCluster.kafka().bootstrapServers());
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, KafkaAvroSerializer.class.getName());
    props.put("schema.registry.url", MOCK_SCHEMA_REGISTRY_URL);
    return new KafkaProducer<>(props, new StringSerializer(), createAvroSerializer());
  }

  private KafkaAvroSerializer createAvroSerializer() {
    final SchemaRegistryClient schemaRegistryClient =
        MockSchemaRegistry.getClientForScope(SCHEMA_REGISTRY_SCOPE);
    final KafkaAvroSerializer serializer = new KafkaAvroSerializer(schemaRegistryClient);
    serializer.configure(Map.of("schema.registry.url", MOCK_SCHEMA_REGISTRY_URL), false);
    return serializer;
  }

  private void sendRecordsToTopic0() {
    final Schema schema = new Schema.Parser().parse(VALUE_SCHEMA_0);
    for (int i = 0; i < RECORD_COUNT; i++) {
      final GenericRecord record = createTopic0Record(schema);
      avroProducer.send(new ProducerRecord<>(topic0, "key-" + i, record));
    }
    avroProducer.flush();
  }

  private void sendRecordsToTopic1() {
    final Schema schema = new Schema.Parser().parse(VALUE_SCHEMA_1);
    for (int i = 0; i < RECORD_COUNT; i++) {
      final GenericRecord record = createTopic1Record(schema);
      avroProducer.send(new ProducerRecord<>(topic1, "key-" + i, record));
    }
    avroProducer.flush();
  }

  private GenericRecord createTopic0Record(final Schema schema) {
    Schema decimalSchema = schema.getField(DECIMAL).schema();
    LogicalTypes.Decimal decimalType = (LogicalTypes.Decimal) decimalSchema.getLogicalType();
    BigDecimal value = new BigDecimal("0.03125");
    BigDecimal scaledValue = value.setScale(decimalType.getScale(), BigDecimal.ROUND_HALF_UP);
    ByteBuffer byteBuffer =
        new Conversions.DecimalConversion().toBytes(scaledValue, decimalSchema, decimalType);

    final GenericRecord record = new GenericData.Record(schema);
    record.put(PERFORMANCE_STRING, "Excellent");
    record.put(PERFORMANCE_CHAR, "A");
    record.put(RATING_INT, 100);
    record.put(TIME_MILLIS, 10);
    record.put(TIMESTAMP_MILLIS, 12);
    record.put(DECIMAL, byteBuffer);
    record.put(DATE, 11);
    return record;
  }

  private GenericRecord createTopic1Record(final Schema schema) {
    final GenericRecord record = new GenericData.Record(schema);
    record.put(PERFORMANCE_STRING, "Excellent");
    record.put(RATING_DOUBLE, 0.99f);
    record.put(APPROVAL, true);
    record.put(SOME_FLOAT_NAN, Float.NaN);
    return record;
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/SchemaEvolutionBase.java
================================================
package com.snowflake.kafka.connector;

import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.internal.SnowflakeConnectionService;
import com.snowflake.kafka.connector.internal.SnowflakeConnectionServiceFactory;
import com.snowflake.kafka.connector.internal.TestUtils;
import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientFactory;
import java.util.Map;
import org.apache.kafka.connect.runtime.ConnectorConfig;
import org.apache.kafka.connect.storage.StringConverter;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;

abstract class SchemaEvolutionBase extends ConnectClusterBaseIT {

  static final int PARTITION_COUNT = 1;
  static final int RECORD_COUNT = 100;
  static final int TOPIC_COUNT = 2;

  final ObjectMapper objectMapper = new ObjectMapper();

  String tableName;
  String connectorName;
  String topic0;
  String topic1;

  SnowflakeConnectionService snowflake;

  @BeforeEach
  void before() {

    tableName = TestUtils.randomTableName();
    connectorName = String.format("%s_connector", tableName);
    topic0 = tableName + "0";
    topic1 = tableName + "1";
    connectCluster.kafka().createTopic(topic0, PARTITION_COUNT);
    connectCluster.kafka().createTopic(topic1, PARTITION_COUNT);
    Map<String, String> config = TestUtils.transformProfileFileToConnectorConfiguration(false);
    config.put(KafkaConnectorConfigParams.CACHE_TABLE_EXISTS, "false");
    config.put(KafkaConnectorConfigParams.CACHE_PIPE_EXISTS, "false");
    snowflake = SnowflakeConnectionServiceFactory.builder().setProperties(config).build();

    StreamingClientFactory.resetStreamingClientSupplier();
  }

  @AfterEach
  void after() {
    connectCluster.kafka().deleteTopic(topic0);
    connectCluster.kafka().deleteTopic(topic1);
    connectCluster.deleteConnector(connectorName);
    StreamingClientFactory.resetStreamingClientSupplier();
    TestUtils.dropTable(tableName);
  }

  Map<String, String> createConnectorConfig() {
    final String topics = topic0 + "," + topic1;
    final String topicsToTableMap = topic0 + ":" + tableName + "," + topic1 + ":" + tableName;

    final Map<String, String> config = defaultProperties(topics, connectorName);
    config.put(SNOWFLAKE_TOPICS2TABLE_MAP, topicsToTableMap);
    config.put(ConnectorConfig.KEY_CONVERTER_CLASS_CONFIG, StringConverter.class.getName());
    config.put("value.converter.schemas.enable", "false");
    config.put("errors.tolerance", "none");
    config.put("errors.log.enable", "true");
    config.put("errors.deadletterqueue.topic.name", "DLQ_TOPIC");
    config.put("errors.deadletterqueue.topic.replication.factor", "1");
    config.put("jmx", "true");
    // Schema evolution type inference tests depend on client-side validation behavior
    config.put(Constants.KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, "client_side");
    return config;
  }

  void sendTombstoneRecords(final String topic) {
    // Send null tombstone
    connectCluster.kafka().produce(topic, null);
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/SchemaEvolutionJsonIT.java
================================================
package com.snowflake.kafka.connector;

import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP;
import static com.snowflake.kafka.connector.internal.TestUtils.assertColumnNullable;
import static com.snowflake.kafka.connector.internal.TestUtils.assertTableColumnCount;
import static com.snowflake.kafka.connector.internal.TestUtils.assertWithRetry;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.snowflake.kafka.connector.internal.TestUtils;
import java.util.Map;
import org.apache.kafka.connect.json.JsonConverter;
import org.apache.kafka.connect.runtime.ConnectorConfig;
import org.junit.jupiter.api.Test;

class SchemaEvolutionJsonIT extends SchemaEvolutionBase {

  @Test()
  void testSchemaEvolutionWithMultipleTopics() throws Exception {
    // two topics write to the same table. Each topic sends unique set of columns. Test that after
    // ingestion all exepcted columns are present in the database
    // given
    final Map<String, String> config = createConnectorConfig();
    config.put(ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG, JsonConverter.class.getName());

    connectCluster.configureConnector(connectorName, config);
    waitForConnectorRunning(connectorName);

    // when
    sendRecordsToTopic0();
    sendRecordsToTopic1();
    sendTombstoneRecords(topic1);
    sendTombstoneRecords(topic0);

    // then
    final int expectedTotalRecords = TOPIC_COUNT * RECORD_COUNT + 2; // + 2 tombstone records
    makeCommonAssertions(expectedTotalRecords);
  }

  @Test
  void testSchemaEvolutionIgnoreTombstone() throws Exception {
    // given
    final Map<String, String> config = createConnectorConfig();
    config.put("behavior.on.null.values", "IGNORE");
    config.put(ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG, JsonConverter.class.getName());

    connectCluster.configureConnector(connectorName, config);
    waitForConnectorRunning(connectorName);

    // when
    sendRecordsToTopic0();
    sendRecordsToTopic1();
    sendTombstoneRecords(topic1);
    sendTombstoneRecords(topic0);

    // then
    final int expectedTotalRecords = TOPIC_COUNT * RECORD_COUNT;
    makeCommonAssertions(expectedTotalRecords);
  }

  @Test
  void removeNotNullConstraint() throws Exception {
    // test that schema evolution is able to remove NON NULL constraint from the column
    // given
    final Map<String, String> config = createConnectorConfig();
    config.put(ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG, JsonConverter.class.getName());

    // COL1 has not null constraint
    snowflake.executeQueryWithParameters(
        "CREATE OR REPLACE TABLE "
            + tableName
            + " (RECORD_METADATA VARIANT, COL1 VARCHAR NOT NULL, COL2 VARCHAR)"
            + " ENABLE_SCHEMA_EVOLUTION = true");

    connectCluster.configureConnector(connectorName, config);
    waitForConnectorRunning(connectorName);

    final ObjectNode fullRow = objectMapper.createObjectNode();
    fullRow.put("col1", "col1value");
    fullRow.put("col2", "col2value");
    // inserting normal non null columns
    connectCluster.kafka().produce(topic0, objectMapper.writeValueAsString(fullRow));

    // then
    assertWithRetry(() -> TestUtils.getNumberOfRows(tableName) == 1);
    assertTableColumnCount(tableName, 3);
    TestUtils.checkTableSchema(
        tableName,
        Map.of(
            "COL1", "VARCHAR",
            "COL2", "VARCHAR",
            "RECORD_METADATA", "VARIANT"));
    assertColumnNullable(tableName, "COL1", false);

    // col1 not initialized
    final ObjectNode rowWithNullValue = objectMapper.createObjectNode();
    rowWithNullValue.put("col2", "col2value");

    // now insert row with col1 == null
    connectCluster.kafka().produce(topic0, objectMapper.writeValueAsString(rowWithNullValue));

    assertWithRetry(() -> TestUtils.getNumberOfRows(tableName) == 2);
    // constraint has been removed
    assertColumnNullable(tableName, "COL1", true);
  }

  @Test
  void testSchemaEvolutionIgnoreTombstoneAfterSmt() throws Exception {
    // given
    final Map<String, String> config = createConnectorConfig();
    config.put("behavior.on.null.values", "IGNORE");
    config.put("errors.tolerance", "all");
    config.put(
        SNOWFLAKE_TOPICS2TABLE_MAP,
        topic0 + ":" + tableName); // reading only from one topic for this test
    config.put("transforms", "extractField");
    config.put(
        "transforms.extractField.type", "org.apache.kafka.connect.transforms.ExtractField$Value");
    config.put("transforms.extractField.field", "optionalField");
    config.put(ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG, JsonConverter.class.getName());

    connectCluster.configureConnector(connectorName, config);
    waitForConnectorRunning(connectorName);

    // produce records that should result in null value after SMT transformation
    for (int i = 0; i < RECORD_COUNT; i++) {
      final ObjectNode record = objectMapper.createObjectNode();
      record.put("PERFORMANCE_STRING", "Excellent");
      record.put("APPROVAL", true);
      connectCluster.kafka().produce(topic0, objectMapper.writeValueAsString(record));
    }

    // produce records that should result in non-null value after SMT transformation
    for (int i = 0; i < RECORD_COUNT; i++) {
      final ObjectNode record = objectMapper.createObjectNode();
      final ObjectNode optionalFieldValue = objectMapper.createObjectNode();
      optionalFieldValue.put("hasSomething", true);
      record.set("optionalField", optionalFieldValue);
      connectCluster.kafka().produce(topic0, objectMapper.writeValueAsString(record));
    }

    // then
    final int expectedTotalRecords =
        RECORD_COUNT; // not 2x, just half of the records produced should get into destination table
    assertWithRetry(() -> snowflake.tableExist(tableName));
    assertWithRetry(() -> TestUtils.getNumberOfRows(tableName) == expectedTotalRecords);
    assertTableColumnCount(tableName, 2);
    TestUtils.checkTableSchema(
        tableName,
        Map.of(
            "HASSOMETHING", "BOOLEAN",
            "RECORD_METADATA", "VARIANT"));
  }

  @Test
  void testSchemaEvolutionDropTable() throws Exception {
    // given
    final Map<String, String> config = createConnectorConfig();
    config.put(ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG, JsonConverter.class.getName());

    connectCluster.configureConnector(connectorName, config);
    waitForConnectorRunning(connectorName);

    sendRecordsToTopic0();
    sendRecordsToTopic1();
    sendTombstoneRecords(topic1);
    sendTombstoneRecords(topic0);

    // then
    final int expectedTotalRecords = TOPIC_COUNT * RECORD_COUNT + 2; // +2 tombstone records
    makeCommonAssertions(expectedTotalRecords);
    // wait 10 secs to make sure precommit advances consumer group offset and
    // the connector does not reingest the same records after the restart
    // precommit frequency is decided by offset.flush.interval.ms parameter
    Thread.sleep(10000);

    TestUtils.dropTable(tableName);
    connectCluster.restartConnectorAndTasks(connectorName, false, true, false);
    waitForConnectorRunning(connectorName);

    sendRecordsToTopic0();
    sendRecordsToTopic1();
    sendTombstoneRecords(topic1);
    sendTombstoneRecords(topic0);

    makeCommonAssertions(expectedTotalRecords);
  }

  private void sendRecordsToTopic0() throws JsonProcessingException {
    // Record schema for topic 0: PERFORMANCE_STRING, RATING_INT
    for (int i = 0; i < RECORD_COUNT; i++) {
      connectCluster.kafka().produce(topic0, createTopic0Record());
    }
  }

  private void sendRecordsToTopic1() throws JsonProcessingException {
    // Record schema for topic 1: PERFORMANCE_STRING, RATING_DOUBLE, APPROVAL
    for (int i = 0; i < RECORD_COUNT; i++) {
      connectCluster.kafka().produce(topic1, createTopic1Record());
    }
  }

  private String createTopic0Record() throws JsonProcessingException {
    final ObjectNode record = objectMapper.createObjectNode();
    record.put("PERFORMANCE_STRING", "Excellent");
    record.put("RATING_INT", 100);
    return objectMapper.writeValueAsString(record);
  }

  private String createTopic1Record() throws JsonProcessingException {
    final ObjectNode record = objectMapper.createObjectNode();
    record.put("PERFORMANCE_STRING", "Excellent");
    record.put("RATING_DOUBLE", 0.99);
    record.put("APPROVAL", true);
    return objectMapper.writeValueAsString(record);
  }

  private void makeCommonAssertions(final int expectedTotalRecords) throws Exception {
    assertWithRetry(() -> snowflake.tableExist(tableName));
    assertWithRetry(() -> TestUtils.getNumberOfRows(tableName) == expectedTotalRecords);
    assertTableColumnCount(tableName, 5);
    TestUtils.checkTableSchema(
        tableName,
        Map.of(
            "PERFORMANCE_STRING", "VARCHAR",
            "RECORD_METADATA", "VARIANT",
            "RATING_INT", "NUMBER",
            "APPROVAL", "BOOLEAN",
            "RATING_DOUBLE", "FLOAT"));
  }

  @Test
  void testSnowpipeStreamingSchemaEvolution() throws Exception {
    // Test schema evolution with streaming ingestion using interactive table
    // Migrated from test_snowpipe_streaming_schema_evolution.py

    // given - create interactive table with schema evolution enabled
    final int partitionCount = 3;
    final int recordsPerPartition = 1000;
    final int schemaEvolutionRecordCount = 100;
    final int initialRecordCount = recordsPerPartition - schemaEvolutionRecordCount;

    final String streamingTopic = tableName + "_streaming";
    connectCluster.kafka().createTopic(streamingTopic, partitionCount);

    // Create interactive table with schema evolution enabled
    System.out.println("Creating interactive table: " + tableName);
    snowflake.executeQueryWithParameters(
        "CREATE OR REPLACE INTERACTIVE TABLE "
            + tableName
            + " (RECORD_METADATA VARIANT, FIELDNAME VARCHAR) "
            + "CLUSTER BY (FIELDNAME) "
            + "ENABLE_SCHEMA_EVOLUTION = TRUE");
    System.out.println("Interactive table created successfully");

    final Map<String, String> config = defaultProperties(streamingTopic, connectorName);
    config.put(
        ConnectorConfig.KEY_CONVERTER_CLASS_CONFIG,
        org.apache.kafka.connect.storage.StringConverter.class.getName());
    config.put(ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG, JsonConverter.class.getName());
    config.put("value.converter.schemas.enable", "false");
    config.put("errors.tolerance", "none");
    config.put("errors.log.enable", "true");
    config.put("errors.deadletterqueue.topic.name", "DLQ_TOPIC");
    config.put("errors.deadletterqueue.topic.replication.factor", "1");
    config.put("jmx", "true");
    config.put(SNOWFLAKE_TOPICS2TABLE_MAP, streamingTopic + ":" + tableName);

    connectCluster.configureConnector(connectorName, config);
    waitForConnectorRunning(connectorName);

    // when - send records with initial schema, then evolved schema
    for (int partition = 0; partition < partitionCount; partition++) {
      // First, send records with initial schema (only fieldName)
      for (int i = 0; i < initialRecordCount; i++) {
        final ObjectNode record = objectMapper.createObjectNode();
        record.put("fieldName", String.valueOf(i));
        connectCluster
            .kafka()
            .produce(
                streamingTopic, partition, "key-" + i, objectMapper.writeValueAsString(record));
      }

      // Then, send records with evolved schema (fieldName + newField)
      for (int i = 0; i < schemaEvolutionRecordCount; i++) {
        final ObjectNode record = objectMapper.createObjectNode();
        record.put("fieldName", String.valueOf(i + initialRecordCount));
        record.put("newField", "new_" + i);
        connectCluster
            .kafka()
            .produce(
                streamingTopic,
                partition,
                "key-" + (i + initialRecordCount),
                objectMapper.writeValueAsString(record));
      }
    }

    // Send tombstone records to each partition
    for (int partition = 0; partition < partitionCount; partition++) {
      connectCluster.kafka().produce(streamingTopic, partition, "tombstone-key", null);
    }

    // then - verify schema evolution occurred
    final int expectedTotalRecords =
        recordsPerPartition * partitionCount + partitionCount; // +partitionCount for tombstones

    // Verify table exists and record count matches expected
    System.out.println("Checking if table exists: " + tableName);
    System.out.println("Table exists: " + snowflake.tableExist(tableName));
    assertWithRetry(
        () -> {
          boolean exists = snowflake.tableExist(tableName);
          System.out.println("Table exists check: " + exists);
          return exists;
        });
    System.out.println("Table exists check passed, now checking row count");
    assertWithRetry(
        () -> {
          int rowCount = TestUtils.getNumberOfRows(tableName);
          System.out.println(
              "Current row count: " + rowCount + ", expected: " + expectedTotalRecords);
          return rowCount == expectedTotalRecords;
        });

    // Verify schema contains expected columns including the evolved NEWFIELD column
    TestUtils.checkTableSchema(
        tableName,
        Map.of(
            "FIELDNAME", "VARCHAR",
            "NEWFIELD", "VARCHAR",
            "RECORD_METADATA", "VARIANT"));

    // cleanup
    connectCluster.kafka().deleteTopic(streamingTopic);
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/SinkTaskIT.java
================================================
package com.snowflake.kafka.connector;

import static org.assertj.core.api.Assertions.assertThat;
import static org.testcontainers.shaded.org.awaitility.Awaitility.await;

import com.snowflake.kafka.connector.internal.SnowflakeConnectionService;
import com.snowflake.kafka.connector.internal.TestUtils;
import com.snowflake.kafka.connector.internal.streaming.InMemorySinkTaskContext;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.record.TimestampType;
import org.apache.kafka.connect.data.SchemaAndValue;
import org.apache.kafka.connect.json.JsonConverter;
import org.apache.kafka.connect.sink.SinkRecord;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

class SinkTaskIT {
  private static final int PARTITION = 0;
  private static final int RECORD_COUNT = 10000;

  private String topicName;
  private SnowflakeConnectionService snowflakeConnectionService;

  @BeforeEach
  public void setup() {
    topicName = TestUtils.randomTableName();
    snowflakeConnectionService = TestUtils.getConnectionService();
    snowflakeConnectionService.createTableWithOnlyMetadataColumn(topicName);
  }

  @AfterEach
  public void after() {
    TestUtils.dropTable(topicName);
    TestUtils.dropPipe(topicName + "-STREAMING");
  }

  @Test
  public void testPreCommit() {
    SnowflakeSinkTask sinkTask = new SnowflakeSinkTask();
    Map<TopicPartition, OffsetAndMetadata> offsetMap = new HashMap<>();

    sinkTask.preCommit(offsetMap);
  }

  @Test
  public void testSinkTask() throws Exception {
    Map<String, String> config = TestUtils.transformProfileFileToConnectorConfiguration(true);
    ConnectorConfigTools.setDefaultValues(config);
    config.put(Utils.TASK_ID, "0");
    SnowflakeSinkTask sinkTask = new SnowflakeSinkTask();

    final TopicPartition topicPartition = new TopicPartition(topicName, PARTITION);
    sinkTask.initialize(new InMemorySinkTaskContext(Collections.singleton(topicPartition)));
    sinkTask.start(config);
    ArrayList<TopicPartition> topicPartitions = new ArrayList<>();
    topicPartitions.add(topicPartition);
    sinkTask.open(topicPartitions);
    sinkTask.awaitInitialization();

    // commit offset should skip when offset=0
    Map<TopicPartition, OffsetAndMetadata> offsetMap = new HashMap<>();
    offsetMap.put(topicPartitions.get(0), new OffsetAndMetadata(0));
    offsetMap = sinkTask.preCommit(offsetMap);
    assertThat(offsetMap).isEmpty();

    // send regular data
    List<SinkRecord> records = createSinkRecords(PARTITION, RECORD_COUNT);
    sinkTask.put(records);

    // Wait for all records to be committed and verify offset
    long expectedOffset = records.get(records.size() - 1).kafkaOffset() + 1;
    await()
        .atMost(60, TimeUnit.SECONDS)
        .untilAsserted(
            () -> {
              Map<TopicPartition, OffsetAndMetadata> committed =
                  sinkTask.preCommit(Map.of(topicPartition, new OffsetAndMetadata(0)));
              assertThat(committed)
                  .containsKey(topicPartition)
                  .extractingByKey(topicPartition)
                  .satisfies(offset -> assertThat(offset.offset()).isEqualTo(expectedOffset));
            });

    sinkTask.close(topicPartitions);
    sinkTask.stop();
  }

  @Test
  public void testSinkTaskNegative() throws Exception {
    Map<String, String> config = TestUtils.transformProfileFileToConnectorConfiguration(true);
    ConnectorConfigTools.setDefaultValues(config);
    config.put(Utils.TASK_ID, "0");
    SnowflakeSinkTask sinkTask = new SnowflakeSinkTask();

    TopicPartition topicPartition = new TopicPartition(topicName, PARTITION);
    sinkTask.initialize(new InMemorySinkTaskContext(Collections.singleton(topicPartition)));
    sinkTask.start(config);
    sinkTask.start(config);
    assertThat(sinkTask.version()).isEqualTo(Utils.VERSION);
    ArrayList<TopicPartition> topicPartitions = new ArrayList<>();
    topicPartitions.add(topicPartition);
    // Test put and precommit without open

    // commit offset
    Map<TopicPartition, OffsetAndMetadata> offsetMap = new HashMap<>();
    offsetMap.put(topicPartitions.get(0), new OffsetAndMetadata(0));
    offsetMap = sinkTask.preCommit(offsetMap);

    sinkTask.close(topicPartitions);

    // send regular data
    List<SinkRecord> records = createSinkRecords(PARTITION, RECORD_COUNT);
    sinkTask.put(records);

    // commit offset
    sinkTask.preCommit(offsetMap);

    sinkTask.close(topicPartitions);
    sinkTask.stop();
  }

  /**
   * Tests that multiple sink tasks can concurrently process data for different partitions of the
   * same topic. Each task handles its own partition and should correctly track offsets.
   */
  @Test
  public void testMultipleSinkTasks() throws Exception {
    final int partition0 = 0;
    final int partition1 = 1;

    SnowflakeSinkTask task0 = new SnowflakeSinkTask();
    SnowflakeSinkTask task1 = new SnowflakeSinkTask();

    List<TopicPartition> topicPartitions0 = List.of(new TopicPartition(topicName, partition0));
    List<TopicPartition> topicPartitions1 = List.of(new TopicPartition(topicName, partition1));

    try {
      // Start both tasks
      Map<String, String> task0Config =
          TestUtils.transformProfileFileToConnectorConfiguration(false);
      ConnectorConfigTools.setDefaultValues(task0Config);
      task0Config.put(Utils.TASK_ID, "0");
      task0.initialize(new InMemorySinkTaskContext(Collections.singleton(topicPartitions0.get(0))));
      task0.start(task0Config);

      Map<String, String> task1Config =
          TestUtils.transformProfileFileToConnectorConfiguration(false);
      ConnectorConfigTools.setDefaultValues(task1Config);
      task1Config.put(Utils.TASK_ID, "1");
      task1.initialize(new InMemorySinkTaskContext(Collections.singleton(topicPartitions1.get(0))));
      task1.start(task1Config);

      // Open partitions
      task0.open(topicPartitions0);
      task1.open(topicPartitions1);
      task0.awaitInitialization();
      task1.awaitInitialization();

      // Put records to both tasks
      task0.put(createSinkRecords(partition0, RECORD_COUNT));
      task1.put(createSinkRecords(partition1, RECORD_COUNT));

      // Wait for offsets to be committed and verify
      TopicPartition tp0 = topicPartitions0.get(0);
      TopicPartition tp1 = topicPartitions1.get(0);

      await()
          .atMost(60, TimeUnit.SECONDS)
          .untilAsserted(
              () -> {
                Map<TopicPartition, OffsetAndMetadata> offsetMap0 =
                    task0.preCommit(Map.of(tp0, new OffsetAndMetadata(0)));
                assertThat(offsetMap0)
                    .containsKey(tp0)
                    .extractingByKey(tp0)
                    .satisfies(offset -> assertThat(offset.offset()).isEqualTo(RECORD_COUNT));
              });

      await()
          .atMost(60, TimeUnit.SECONDS)
          .untilAsserted(
              () -> {
                Map<TopicPartition, OffsetAndMetadata> offsetMap1 =
                    task1.preCommit(Map.of(tp1, new OffsetAndMetadata(0)));
                assertThat(offsetMap1)
                    .containsKey(tp1)
                    .extractingByKey(tp1)
                    .satisfies(offset -> assertThat(offset.offset()).isEqualTo(RECORD_COUNT));
              });
    } finally {
      // Always cleanup even if test fails
      task0.close(topicPartitions0);
      task1.close(topicPartitions1);
      task0.stop();
      task1.stop();
    }
  }

  @Test
  public void testTopicToTableRegex() {
    Map<String, String> config = TestUtils.transformProfileFileToConnectorConfiguration(false);
    ConnectorConfigTools.setDefaultValues(config);

    SnowflakeSinkTaskForStreamingIT.testTopicToTableRegexMain(config);
  }

  private List<SinkRecord> createSinkRecords(int partition, int count) {
    JsonConverter jsonConverter = new JsonConverter();
    jsonConverter.configure(Map.of("schemas.enable", "false"), false);
    String json = "{ \"f1\" : \"v1\" }";
    SchemaAndValue schemaAndValue =
        jsonConverter.toConnectData(topicName, json.getBytes(StandardCharsets.UTF_8));

    List<SinkRecord> records = new ArrayList<>(count);
    for (int i = 0; i < count; i++) {
      records.add(
          new SinkRecord(
              topicName,
              partition,
              null,
              null,
              schemaAndValue.schema(),
              schemaAndValue.value(),
              i,
              System.currentTimeMillis(),
              TimestampType.CREATE_TIME));
    }
    return records;
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/SinkTaskProxyIT.java
================================================
package com.snowflake.kafka.connector;

import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.internal.EmbeddedProxyServer;
import com.snowflake.kafka.connector.internal.SnowflakeConnectionService;
import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException;
import com.snowflake.kafka.connector.internal.TestUtils;
import java.util.Map;
import java.util.Optional;
import org.junit.After;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Rule;
import org.junit.Test;

/**
 * Integration tests for Snowflake Sink Task proxy configuration. Uses Testcontainers with a real
 * Squid proxy server running in Docker to test JVM proxy settings with authentication. The proxy
 * server uses a random available port on the host to avoid conflicts.
 *
 * <p>Each test method gets its own proxy server instance via JUnit {@code @Rule}, ensuring tests
 * can run in parallel without port conflicts.
 *
 * <p>Note: This test requires Docker to be installed and running.
 */
public class SinkTaskProxyIT {

  private static final String PROXY_USERNAME = "admin";
  private static final String PROXY_PASSWORD = "test";

  @Rule
  public final EmbeddedProxyServer proxyServer =
      new EmbeddedProxyServer(PROXY_USERNAME, PROXY_PASSWORD);

  @After
  public void testCleanup() {
    TestUtils.resetProxyParametersInJVM();
  }

  @Test(expected = SnowflakeKafkaConnectorException.class)
  @Ignore
  public void testSinkTaskProxyConfigMock() {
    Map<String, String> config = TestUtils.transformProfileFileToConnectorConfiguration(false);
    ConnectorConfigTools.setDefaultValues(config);

    config.put(Utils.TASK_ID, "0");
    config.put(KafkaConnectorConfigParams.JVM_PROXY_HOST, "wronghost");
    config.put(KafkaConnectorConfigParams.JVM_PROXY_PORT, "9093"); // wrongport
    config.put(KafkaConnectorConfigParams.JVM_PROXY_USERNAME, "user");
    config.put(KafkaConnectorConfigParams.JVM_PROXY_PASSWORD, "password");
    SnowflakeSinkTask sinkTask = new SnowflakeSinkTask();
    try {
      sinkTask.start(config);
    } catch (SnowflakeKafkaConnectorException e) {
      assert System.getProperty(KafkaConnectorConfigParams.HTTP_USE_PROXY).equals("true");
      assert System.getProperty(KafkaConnectorConfigParams.HTTP_PROXY_HOST).equals("wronghost");
      assert System.getProperty(KafkaConnectorConfigParams.HTTP_PROXY_PORT).equals("9093");
      assert System.getProperty(KafkaConnectorConfigParams.HTTPS_PROXY_HOST).equals("wronghost");
      assert System.getProperty(KafkaConnectorConfigParams.HTTPS_PROXY_PORT).equals("9093");
      assert System.getProperty(Utils.JDK_HTTP_AUTH_TUNNELING).isEmpty();
      assert System.getProperty(KafkaConnectorConfigParams.HTTP_PROXY_USER).equals("user");
      assert System.getProperty(KafkaConnectorConfigParams.HTTP_PROXY_PASSWORD).equals("password");
      assert System.getProperty(KafkaConnectorConfigParams.HTTPS_PROXY_USER).equals("user");
      assert System.getProperty(KafkaConnectorConfigParams.HTTPS_PROXY_PASSWORD).equals("password");

      // unset the system parameters please.
      TestUtils.resetProxyParametersInJVM();
      throw e;
    }
  }

  /**
   * Tests that the Snowflake Sink Task properly configures JVM proxy settings. This test verifies
   * that the JVM system properties are correctly set when proxy configuration is provided, without
   * actually connecting through a proxy or to Snowflake.
   *
   * <p>This is a focused unit test that verifies the proxy configuration logic.
   */
  @Test
  public void testProxyJvmPropertiesConfiguration() {
    Map<String, String> config = TestUtils.transformProfileFileToConnectorConfiguration(false);
    ConnectorConfigTools.setDefaultValues(config);

    // Configure proxy settings
    config.put(KafkaConnectorConfigParams.JVM_PROXY_HOST, "test-proxy.example.com");
    config.put(KafkaConnectorConfigParams.JVM_PROXY_PORT, "8080");
    config.put(KafkaConnectorConfigParams.JVM_PROXY_USERNAME, proxyServer.getUsername());
    config.put(KafkaConnectorConfigParams.JVM_PROXY_PASSWORD, proxyServer.getPassword());

    // Set proxy properties (this is what the connector does internally)
    Utils.enableJVMProxy(config);

    // Verify all JVM proxy properties are set correctly
    Assert.assertEquals("true", System.getProperty(KafkaConnectorConfigParams.HTTP_USE_PROXY));
    Assert.assertEquals(
        "test-proxy.example.com", System.getProperty(KafkaConnectorConfigParams.HTTP_PROXY_HOST));
    Assert.assertEquals("8080", System.getProperty(KafkaConnectorConfigParams.HTTP_PROXY_PORT));
    Assert.assertEquals(
        "test-proxy.example.com", System.getProperty(KafkaConnectorConfigParams.HTTPS_PROXY_HOST));
    Assert.assertEquals("8080", System.getProperty(KafkaConnectorConfigParams.HTTPS_PROXY_PORT));
    Assert.assertEquals(
        proxyServer.getUsername(), System.getProperty(KafkaConnectorConfigParams.HTTP_PROXY_USER));
    Assert.assertEquals(
        proxyServer.getPassword(),
        System.getProperty(KafkaConnectorConfigParams.HTTP_PROXY_PASSWORD));
    Assert.assertEquals(
        proxyServer.getUsername(), System.getProperty(KafkaConnectorConfigParams.HTTPS_PROXY_USER));
    Assert.assertEquals(
        proxyServer.getPassword(),
        System.getProperty(KafkaConnectorConfigParams.HTTPS_PROXY_PASSWORD));
  }

  @Test
  public void testSinkTaskProxyConfig() {
    Map<String, String> config = TestUtils.transformProfileFileToConnectorConfiguration(false);
    ConnectorConfigTools.setDefaultValues(config);

    config.put(Utils.TASK_ID, "0");
    int proxyPort = proxyServer.getPort();
    config.put(KafkaConnectorConfigParams.JVM_PROXY_HOST, "localhost");
    config.put(KafkaConnectorConfigParams.JVM_PROXY_PORT, String.valueOf(proxyPort));
    config.put(KafkaConnectorConfigParams.JVM_PROXY_USERNAME, proxyServer.getUsername());
    config.put(KafkaConnectorConfigParams.JVM_PROXY_PASSWORD, proxyServer.getPassword());
    SnowflakeSinkTask sinkTask = new SnowflakeSinkTask();

    sinkTask.start(config);

    assert System.getProperty(KafkaConnectorConfigParams.HTTP_USE_PROXY).equals("true");
    assert System.getProperty(KafkaConnectorConfigParams.HTTP_PROXY_HOST).equals("localhost");
    assert System.getProperty(KafkaConnectorConfigParams.HTTP_PROXY_PORT)
        .equals(String.valueOf(proxyPort));
    assert System.getProperty(KafkaConnectorConfigParams.HTTPS_PROXY_HOST).equals("localhost");
    assert System.getProperty(KafkaConnectorConfigParams.HTTPS_PROXY_PORT)
        .equals(String.valueOf(proxyPort));
    assert System.getProperty(Utils.JDK_HTTP_AUTH_TUNNELING).isEmpty();
    assert System.getProperty(KafkaConnectorConfigParams.HTTP_PROXY_USER)
        .equals(proxyServer.getUsername());
    assert System.getProperty(KafkaConnectorConfigParams.HTTP_PROXY_PASSWORD)
        .equals(proxyServer.getPassword());
    assert System.getProperty(KafkaConnectorConfigParams.HTTPS_PROXY_USER)
        .equals(proxyServer.getUsername());
    assert System.getProperty(KafkaConnectorConfigParams.HTTPS_PROXY_PASSWORD)
        .equals(proxyServer.getPassword());

    // Verify the snowflake connection service was created successfully
    Optional<SnowflakeConnectionService> optSfConnectionService = sinkTask.getSnowflakeConnection();
    Assert.assertTrue(optSfConnectionService.isPresent());

    // Cleanup
    sinkTask.stop();
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/SmtIT.java
================================================
package com.snowflake.kafka.connector;

import static org.apache.kafka.connect.runtime.ConnectorConfig.TRANSFORMS_CONFIG;
import static org.apache.kafka.connect.runtime.ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG;
import static org.assertj.core.api.Assertions.assertThat;
import static org.awaitility.Awaitility.await;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.snowflake.kafka.connector.internal.TestUtils;
import com.snowflake.kafka.connector.internal.streaming.FakeSnowflakeStreamingIngestChannel;
import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientFactory;
import java.time.Duration;
import java.util.List;
import java.util.Map;
import java.util.function.UnaryOperator;
import java.util.stream.Stream;
import org.apache.commons.dbutils.QueryRunner;
import org.apache.kafka.connect.json.JsonConverter;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.CsvSource;

@ExtendWith({InjectSnowflakeDataSourceExtension.class, InjectQueryRunnerExtension.class})
public class SmtIT extends ConnectClusterBaseIT {

  private static final int PARTITION_COUNT = 1;
  public static final String RECORD_METADATA = "RECORD_METADATA";
  public static final String RECORD_CONTENT = "record_content";
  private String topicName;
  private String connectorName;
  private ObjectMapper objectMapper = new ObjectMapper();

  @InjectQueryRunner private QueryRunner queryRunner;

  @BeforeEach
  void before() {
    topicName = TestUtils.randomTableName();
    connectorName = String.format("%s_connector", topicName);
    connectCluster.kafka().createTopic(topicName, PARTITION_COUNT);
    TestUtils.createTableWithMetadataColumn(topicName);
    StreamingClientFactory.setStreamingClientSupplier(fakeClientSupplier);
  }

  @AfterEach
  void after() {
    connectCluster.kafka().deleteTopic(topicName);
    connectCluster.deleteConnector(connectorName);
    StreamingClientFactory.resetStreamingClientSupplier();
    TestUtils.dropTable(topicName);
    TestUtils.dropPipe(topicName + "-STREAMING");
  }

  @Test
  void test_with_record_content_variant_added_by_smt() throws Exception {
    final Map<String, String> config = defaultProperties(topicName, connectorName);
    config.put(Constants.KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, "server_side");
    config.put(
        Constants.KafkaConnectorConfigParams
            .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION,
        "false");
    config.put("transforms", "add_record_content");
    config.put(
        "transforms.add_record_content.type",
        "org.apache.kafka.connect.transforms.HoistField$Value");
    config.put("transforms.add_record_content.field", RECORD_CONTENT);

    connectCluster.configureConnector(connectorName, config);

    waitForConnectorRunning(connectorName);
    waitForOpenedFakeIngestClient(connectorName);
    connectCluster.kafka().produce(topicName, getTestJsonContent());

    // then
    await()
        .timeout(Duration.ofSeconds(30))
        .pollInterval(Duration.ofSeconds(1))
        .untilAsserted(
            () -> {
              assertThat(getOpenedFakeIngestClient(connectorName).getAppendedRowCount())
                  .isEqualTo(1);
              // get first open channel, there is going to be only one because partition count is 1
              FakeSnowflakeStreamingIngestChannel openedChannels =
                  getOpenedFakeIngestClient(connectorName).getOpenedChannels().get(0);

              assertThat(openedChannels.getAppendedRows()).hasSize(1);
              final Map<String, Object> firstRow = openedChannels.getAppendedRows().get(0);
              assertThat(firstRow).containsKeys(RECORD_METADATA, RECORD_CONTENT);
              assertThat(firstRow)
                  .hasEntrySatisfying(
                      RECORD_METADATA,
                      value -> {
                        assertThat(value).isInstanceOf(Map.class);
                      });
              assertThat(firstRow)
                  .hasEntrySatisfying(
                      RECORD_CONTENT,
                      value -> {
                        assertThat(value).isInstanceOf(Map.class);
                      });
            });
  }

  @ParameterizedTest
  @CsvSource({"DEFAULT, 10, 18", "IGNORE, 0, -1"}) // -1 means No offset registered
  void testIfSmtReturningNullsIngestDataCorrectly(
      String behaviorOnNull, int expectedRecordNumber, int expectedLastOffset) {
    // given
    connectCluster.configureConnector(
        connectorName, smtProperties(topicName, connectorName, behaviorOnNull));
    waitForConnectorRunning(connectorName);
    waitForOpenedFakeIngestClient(connectorName);

    // when
    // Send 20 messages: 10x "{}" (becomes null after ExtractField SMT) alternating with
    // 10x {"message":"value"} (becomes String "value" after SMT - treated as broken record)
    Stream.iterate(0, UnaryOperator.identity())
        .limit(10)
        .flatMap(v -> Stream.of("{}", "{\"message\":\"value\"}"))
        .forEach(message -> connectCluster.kafka().produce(topicName, message));

    // then
    // For DEFAULT mode: 10 tombstones are inserted at even offsets (0,2,4,...,18), last offset=18
    // For IGNORE mode: nulls are skipped, broken records don't insert, no rows appended
    final String expectedOffsetToken =
        expectedLastOffset >= 0 ? String.valueOf(expectedLastOffset) : null;
    await()
        .timeout(Duration.ofSeconds(30))
        .pollInterval(Duration.ofSeconds(1))
        .untilAsserted(
            () -> {
              assertThat(getOpenedFakeIngestClient(connectorName).getAppendedRowCount())
                  .isEqualTo(expectedRecordNumber);
              List<FakeSnowflakeStreamingIngestChannel> openedChannels =
                  getOpenedFakeIngestClient(connectorName).getOpenedChannels();
              // get first open channel, there is going to be only one because partition count is 1
              String offsetToken = openedChannels.get(0).getLatestCommittedOffsetToken();

              assertThat(openedChannels).hasSize(PARTITION_COUNT);
              assertThat(offsetToken).isEqualTo(expectedOffsetToken);
            });
  }

  @Test
  void testIfSmtExtractingNestedStructuresWorksCorrectly() {
    connectCluster.configureConnector(
        connectorName, smtProperties(topicName, connectorName, "IGNORE"));
    waitForConnectorRunning(connectorName);
    waitForOpenedFakeIngestClient(connectorName);
    final String message = "{\"message\":{\"title\":\"abcd\", \"length\":5999}}";
    connectCluster.kafka().produce(topicName, message);
    await()
        .timeout(Duration.ofSeconds(30))
        .pollInterval(Duration.ofSeconds(2))
        .untilAsserted(
            () -> {
              assertThat(getOpenedFakeIngestClient(connectorName).getAppendedRowCount())
                  .isEqualTo(1);
              List<FakeSnowflakeStreamingIngestChannel> openedChannels =
                  getOpenedFakeIngestClient(connectorName).getOpenedChannels();
              // get first open channel, there is going to be only one because partition count is 1
              String offsetToken = openedChannels.get(0).getLatestCommittedOffsetToken();
              assertThat(offsetToken).isEqualTo("0");
            });
  }

  private Map<String, String> smtProperties(
      String smtTopic, String smtConnector, String behaviorOnNull) {
    Map<String, String> config = defaultProperties(smtTopic, smtConnector);
    config.put(Constants.KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, "server_side");

    config.put(VALUE_CONVERTER_CLASS_CONFIG, JsonConverter.class.getName());
    config.put("value.converter.schemas.enable", "false");
    config.put("behavior.on.null.values", behaviorOnNull);

    config.put(TRANSFORMS_CONFIG, "extractField");
    config.put(
        "transforms.extractField.type", "org.apache.kafka.connect.transforms.ExtractField$Value");
    config.put("transforms.extractField.field", "message");

    // Allow broken records (plain Strings after SMT) to be sent to DLQ instead of failing
    config.put("errors.tolerance", "all");
    config.put("errors.deadletterqueue.topic.name", "DLQ_TOPIC");
    config.put("errors.deadletterqueue.topic.replication.factor", "1");

    return config;
  }

  private String getTestJsonContent() throws JsonProcessingException {
    return objectMapper.writeValueAsString(
        Map.of(
            "city",
            "Pcim Górny",
            "age",
            30,
            "married",
            true,
            "has cat",
            true,
            "! @&$#* has Łułósżź",
            true,
            "skills",
            List.of("sitting", "standing", "eating"),
            "family",
            Map.of("son", "Jack", "daughter", "Anna")));
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/SnowflakeSinkTaskAuthorizationExceptionTrackerTest.java
================================================
package com.snowflake.kafka.connector;

import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.ENABLE_TASK_FAIL_ON_AUTHORIZATION_ERRORS;

import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException;
import com.snowflake.kafka.connector.internal.TestUtils;
import java.util.Map;
import java.util.stream.Stream;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

class SnowflakeSinkTaskAuthorizationExceptionTrackerTest {

  @Test
  public void shouldThrowExceptionOnAuthorizationError() {
    // given
    SnowflakeSinkTaskAuthorizationExceptionTracker tracker =
        new SnowflakeSinkTaskAuthorizationExceptionTracker();
    Map<String, String> config = TestUtils.getConfig();
    config.put(ENABLE_TASK_FAIL_ON_AUTHORIZATION_ERRORS, "true");
    tracker.updateStateOnTaskStart(config);

    // when
    tracker.reportPrecommitException(new Exception("Authorization failed after retry"));

    // then
    Assertions.assertThrows(
        SnowflakeKafkaConnectorException.class, tracker::throwExceptionIfAuthorizationFailed);
  }

  @Test
  public void shouldNotThrowExceptionWhenNoExceptionReported() {
    // given
    SnowflakeSinkTaskAuthorizationExceptionTracker tracker =
        new SnowflakeSinkTaskAuthorizationExceptionTracker();
    Map<String, String> config = TestUtils.getConfig();
    config.put(ENABLE_TASK_FAIL_ON_AUTHORIZATION_ERRORS, "true");
    tracker.updateStateOnTaskStart(config);

    // expect
    Assertions.assertDoesNotThrow(tracker::throwExceptionIfAuthorizationFailed);
  }

  @ParameterizedTest
  @MethodSource("noExceptionConditions")
  public void shouldNotThrowException(boolean enabled, String exceptionMessage) {
    // given
    SnowflakeSinkTaskAuthorizationExceptionTracker tracker =
        new SnowflakeSinkTaskAuthorizationExceptionTracker();
    Map<String, String> config = TestUtils.getConfig();
    config.put(ENABLE_TASK_FAIL_ON_AUTHORIZATION_ERRORS, Boolean.toString(enabled));
    tracker.updateStateOnTaskStart(config);

    // when
    tracker.reportPrecommitException(new Exception(exceptionMessage));

    // then
    Assertions.assertDoesNotThrow(tracker::throwExceptionIfAuthorizationFailed);
  }

  public static Stream<Arguments> noExceptionConditions() {
    return Stream.of(
        Arguments.of(false, "Authorization failed after retry"),
        Arguments.of(true, "NullPointerException"));
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/SnowflakeSinkTaskForStreamingIT.java
================================================
package com.snowflake.kafka.connector;

import static com.snowflake.kafka.connector.internal.TestUtils.getConnectionServiceWithEncryptedKey;
import static java.lang.String.format;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.internal.SnowflakeConnectionService;
import com.snowflake.kafka.connector.internal.SnowflakeSinkService;
import com.snowflake.kafka.connector.internal.TestUtils;
import com.snowflake.kafka.connector.internal.streaming.InMemorySinkTaskContext;
import java.sql.ResultSet;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.connect.sink.SinkRecord;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.Mockito;

/**
 * Sink Task IT test which uses {@link
 * com.snowflake.kafka.connector.internal.streaming.SnowflakeSinkServiceV2}
 */
public class SnowflakeSinkTaskForStreamingIT {

  private String topicName;
  private static final int partition = 0;
  private TopicPartition topicPartition;

  @BeforeEach
  public void beforeEach() {
    topicName = TestUtils.randomTableName();
    topicPartition = new TopicPartition(topicName, partition);
    getConnectionServiceWithEncryptedKey()
        .executeQueryWithParameters(
            format("create or replace table %s (record_metadata variant, f1 varchar)", topicName));
  }

  @AfterEach
  public void afterEach() {
    TestUtils.dropTable(topicName);
    // Drop the associated streaming pipe to prevent account-level pipe limit errors
    TestUtils.dropPipe(topicName + "-STREAMING");
  }

  @Test
  public void testSinkTask() throws Exception {
    Map<String, String> config = getConfig();
    ConnectorConfigTools.setDefaultValues(config);

    SnowflakeSinkTask sinkTask = new SnowflakeSinkTask();

    // Inits the sinktaskcontext
    sinkTask.initialize(new InMemorySinkTaskContext(Collections.singleton(topicPartition)));
    sinkTask.start(config);
    ArrayList<TopicPartition> topicPartitions = new ArrayList<>();
    topicPartitions.add(new TopicPartition(topicName, partition));
    sinkTask.open(topicPartitions);
    sinkTask.awaitInitialization();

    // commit offset
    final Map<TopicPartition, OffsetAndMetadata> offsetMap = new HashMap<>();
    offsetMap.put(topicPartitions.get(0), new OffsetAndMetadata(0));
    TestUtils.assertWithRetry(() -> sinkTask.preCommit(offsetMap).size() == 0, 5, 20);

    // send regular data
    List<SinkRecord> records = TestUtils.createJsonStringSinkRecords(0, 1, topicName, partition);
    sinkTask.put(records);

    // commit offset
    offsetMap.clear();
    offsetMap.put(topicPartitions.get(0), new OffsetAndMetadata(10000));

    TestUtils.assertWithRetry(() -> sinkTask.preCommit(offsetMap).size() == 1, 5, 20);

    TestUtils.assertWithRetry(
        () -> sinkTask.preCommit(offsetMap).get(topicPartitions.get(0)).offset() == 1, 5, 20);

    sinkTask.close(topicPartitions);
    sinkTask.stop();
  }

  @Test
  public void testSinkTaskWithMultipleOpenClose() throws Exception {
    Map<String, String> config = getConfig();
    ConnectorConfigTools.setDefaultValues(config);

    SnowflakeSinkTask sinkTask = new SnowflakeSinkTask();
    // Inits the sinktaskcontext
    sinkTask.initialize(new InMemorySinkTaskContext(Collections.singleton(topicPartition)));

    sinkTask.start(config);
    ArrayList<TopicPartition> topicPartitions = new ArrayList<>();
    topicPartitions.add(new TopicPartition(topicName, partition));
    sinkTask.open(topicPartitions);
    sinkTask.awaitInitialization();

    final long noOfRecords = 1L;
    final long lastOffsetNo = noOfRecords - 1;

    // send regular data
    List<SinkRecord> records =
        TestUtils.createJsonStringSinkRecords(0, noOfRecords, topicName, partition);
    sinkTask.put(records);

    // commit offset
    final Map<TopicPartition, OffsetAndMetadata> offsetMap = new HashMap<>();
    offsetMap.put(topicPartitions.get(0), new OffsetAndMetadata(lastOffsetNo));

    TestUtils.assertWithRetry(() -> sinkTask.preCommit(offsetMap).size() == 1, 5, 20);

    // precommit is one more than offset last inserted
    TestUtils.assertWithRetry(
        () -> sinkTask.preCommit(offsetMap).get(topicPartitions.get(0)).offset() == noOfRecords,
        20,
        5);

    sinkTask.close(topicPartitions);

    // Add one more partition
    topicPartitions.add(new TopicPartition(topicName, partition + 1));

    sinkTask.open(topicPartitions);
    sinkTask.awaitInitialization();

    // trying to put same records
    sinkTask.put(records);

    List<SinkRecord> recordsWithAnotherPartition =
        TestUtils.createJsonStringSinkRecords(0, noOfRecords, topicName, partition + 1);
    sinkTask.put(recordsWithAnotherPartition);

    // Adding to offsetMap so that this gets into precommit
    offsetMap.put(topicPartitions.get(1), new OffsetAndMetadata(lastOffsetNo));

    TestUtils.assertWithRetry(() -> sinkTask.preCommit(offsetMap).size() == 2, 5, 20);

    TestUtils.assertWithRetry(
        () -> sinkTask.preCommit(offsetMap).get(topicPartitions.get(0)).offset() == 1, 5, 20);

    TestUtils.assertWithRetry(
        () -> sinkTask.preCommit(offsetMap).get(topicPartitions.get(1)).offset() == 1, 5, 20);

    sinkTask.close(topicPartitions);

    sinkTask.stop();

    ResultSet resultSet = TestUtils.showTable(topicName);
    LinkedList<String> contentResult = new LinkedList<>();
    LinkedList<String> metadataResult = new LinkedList<>();

    while (resultSet.next()) {
      contentResult.add(resultSet.getString("F1"));
      metadataResult.add(resultSet.getString("RECORD_METADATA"));
    }
    resultSet.close();
    assert metadataResult.size() == 2;
    assert contentResult.size() == 2;
    ObjectMapper mapper = new ObjectMapper();

    Set<Long> partitionsInTable = new HashSet<>();
    metadataResult.forEach(
        s -> {
          try {
            JsonNode metadata = mapper.readTree(s);
            metadata.get("offset").asText().equals("0");
            partitionsInTable.add(metadata.get("partition").asLong());
          } catch (JsonProcessingException e) {
            Assertions.fail();
          }
        });

    assert partitionsInTable.size() == 2;
  }

  @Test
  public void testTopicToTableRegex() {
    Map<String, String> config = getConfig();

    testTopicToTableRegexMain(config);
  }

  public static void testTopicToTableRegexMain(Map<String, String> config) {
    // constants
    String catTable = "cat_table";
    String catTopicRegex = ".*_cat";
    String catTopicStr1 = "calico_cat";
    String catTopicStr2 = "orange_cat";

    String bigCatTable = "big_cat_table";
    String bigCatTopicRegex = "big.*_.*_cat";
    String bigCatTopicStr1 = "big_calico_cat";
    String bigCatTopicStr2 = "biggest_orange_cat";

    String dogTable = "dog_table";
    String dogTopicRegex = ".*_dog";
    String dogTopicStr1 = "corgi_dog";

    String catchallTable = "animal_table";
    String catchAllRegex = ".*";
    String birdTopicStr1 = "bird";

    // test two regexes. bird should create its own table
    String twoRegexConfig =
        Utils.formatString("{}:{}, {}:{}", bigCatTopicRegex, bigCatTable, dogTopicRegex, dogTable);
    List<String> twoRegexPartitionStrs =
        Arrays.asList(bigCatTopicStr1, bigCatTopicStr2, dogTopicStr1, birdTopicStr1);
    Map<String, String> twoRegexExpected = new HashMap<>();
    twoRegexExpected.put(bigCatTopicStr1, bigCatTable);
    twoRegexExpected.put(bigCatTopicStr2, bigCatTable);
    twoRegexExpected.put(dogTopicStr1, dogTable);
    twoRegexExpected.put(birdTopicStr1, birdTopicStr1);
    testTopicToTableRegexRunner(config, twoRegexConfig, twoRegexPartitionStrs, twoRegexExpected);

    // test catchall regex
    String catchAllConfig = Utils.formatString("{}:{}", catchAllRegex, catchallTable);
    List<String> catchAllPartitionStrs =
        Arrays.asList(catTopicStr1, catTopicStr2, dogTopicStr1, birdTopicStr1);
    Map<String, String> catchAllExpected = new HashMap<>();
    catchAllExpected.put(catTopicStr1, catchallTable);
    catchAllExpected.put(catTopicStr2, catchallTable);
    catchAllExpected.put(dogTopicStr1, catchallTable);
    catchAllExpected.put(birdTopicStr1, catchallTable);
    testTopicToTableRegexRunner(config, catchAllConfig, catchAllPartitionStrs, catchAllExpected);
  }

  private static void testTopicToTableRegexRunner(
      Map<String, String> connectorBaseConfig,
      String topic2tableRegex,
      List<String> partitionStrList,
      Map<String, String> expectedTopic2TableConfig) {
    // setup
    connectorBaseConfig.put(
        KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP, topic2tableRegex);

    // setup partitions
    List<TopicPartition> testPartitions = new ArrayList<>();
    for (int i = 0; i < partitionStrList.size(); i++) {
      testPartitions.add(new TopicPartition(partitionStrList.get(i), i));
    }

    // mocks
    SnowflakeSinkService serviceSpy = Mockito.spy(SnowflakeSinkService.class);
    SnowflakeConnectionService connSpy = Mockito.spy(SnowflakeConnectionService.class);
    Map<String, String> config = new HashMap<>(connectorBaseConfig);
    config.putIfAbsent(KafkaConnectorConfigParams.NAME, "test-topic-to-table-regex");
    config.put(Utils.TASK_ID, "1");
    SnowflakeSinkTask sinkTask = new SnowflakeSinkTask(serviceSpy, connSpy);

    // test topics were mapped correctly
    sinkTask.open(testPartitions);

    // verify expected num tasks opened
    Mockito.verify(serviceSpy, Mockito.times(1)).startPartitions(Mockito.anyCollection());

    for (String topicStr : expectedTopic2TableConfig.keySet()) {
      TopicPartition topic = null;
      for (TopicPartition currTp : testPartitions) {
        if (currTp.topic().equals(topicStr)) {
          topic = currTp;
        }
      }
      Assertions.assertNotNull(topic, "Expected topic partition was not opened by the tast");
    }
  }

  private Map<String, String> getConfig() {

    return TestUtils.getConnectorConfigurationForStreaming(false);
  }

  @Test
  public void testSanitizationEnabledAutoGenerated() throws Exception {
    // Topic with valid identifier that needs uppercasing
    // Use uppercase letters to avoid hash generation
    String topicName = "TestTopic" + System.currentTimeMillis();
    TopicPartition topicPartition = new TopicPartition(topicName, 0);

    Map<String, String> config = TestUtils.getConnectorConfigurationForStreaming(false);
    ConnectorConfigTools.setDefaultValues(config);
    config.put(
        KafkaConnectorConfigParams
            .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION,
        "true");
    config.put(KafkaConnectorConfigParams.TOPICS, topicName);

    SnowflakeSinkTask task = new SnowflakeSinkTask();
    task.initialize(new InMemorySinkTaskContext(Collections.singleton(topicPartition)));
    task.start(config);
    task.open(Collections.singletonList(topicPartition));
    task.awaitInitialization();

    // Create and send records
    List<SinkRecord> records = TestUtils.createJsonStringSinkRecords(0, 5, topicName, 0);
    task.put(records);

    // Wait for preCommit to confirm data is flushed
    final Map<TopicPartition, OffsetAndMetadata> offsetMap = new HashMap<>();
    offsetMap.put(topicPartition, new OffsetAndMetadata(10000));
    TestUtils.assertWithRetry(() -> task.preCommit(offsetMap).size() == 1, 5, 20);

    task.close(Collections.singletonList(topicPartition));
    task.stop();

    // When sanitization is enabled, valid identifiers are uppercased
    String expectedTableName = topicName.toUpperCase();

    SnowflakeConnectionService conn = getConnectionServiceWithEncryptedKey();

    // Verify the table exists and is uppercased
    boolean tableExists = conn.tableExist(expectedTableName);
    Assertions.assertTrue(tableExists, "Should find uppercased table: " + expectedTableName);

    Assertions.assertTrue(
        expectedTableName.matches("^[A-Z_0-9]+$"),
        "Table name should be fully uppercased with only alphanumeric and underscore characters");

    // Verify data
    ResultSet data = TestUtils.showTable(expectedTableName);
    int count = 0;
    while (data.next()) {
      count++;
    }
    Assertions.assertEquals(5, count, "Should have 5 rows");

    // Cleanup table and pipe
    String pipeName = expectedTableName + "-STREAMING";
    TestUtils.dropTable(expectedTableName);
    TestUtils.dropPipe(pipeName);
  }

  @Test
  public void testSanitizationDisabledQuotedMap() throws Exception {
    // Quoting in topic2table.map preserves case through the parser (no uppercasing).
    String topicName = "myTopic_" + System.currentTimeMillis();
    String mixedCaseTable = "My_Test_Table_" + System.currentTimeMillis();
    TopicPartition topicPartition = new TopicPartition(topicName, 0);

    Map<String, String> config = TestUtils.getConnectorConfigurationForStreaming(false);
    ConnectorConfigTools.setDefaultValues(config);
    config.put(
        KafkaConnectorConfigParams
            .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION,
        "false");
    config.put(
        KafkaConnectorConfigParams.SNOWFLAKE_TOPICS2TABLE_MAP,
        topicName + ":\"" + mixedCaseTable + "\"");
    config.put(KafkaConnectorConfigParams.TOPICS, topicName);

    SnowflakeSinkTask task = new SnowflakeSinkTask();
    task.initialize(new InMemorySinkTaskContext(Collections.singleton(topicPartition)));
    task.start(config);
    task.open(Collections.singletonList(topicPartition));
    task.awaitInitialization();

    // Create and send records
    List<SinkRecord> records = TestUtils.createJsonStringSinkRecords(0, 5, topicName, 0);
    task.put(records);

    // Wait for preCommit to confirm data is flushed
    final Map<TopicPartition, OffsetAndMetadata> offsetMap = new HashMap<>();
    offsetMap.put(topicPartition, new OffsetAndMetadata(10000));
    TestUtils.assertWithRetry(() -> task.preCommit(offsetMap).size() == 1, 5, 20);

    task.close(Collections.singletonList(topicPartition));
    task.stop();

    // Verify data in the auto-created table
    ResultSet data = TestUtils.showTable(mixedCaseTable);
    int count = 0;
    while (data.next()) {
      count++;
    }
    Assertions.assertEquals(5, count, "Should have 5 rows in table " + mixedCaseTable);

    // Cleanup table and pipe
    String pipeName = mixedCaseTable + "-STREAMING";
    TestUtils.dropTable(mixedCaseTable);
    TestUtils.dropPipe(pipeName);
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/TopicToTableParserTest.java
================================================
package com.snowflake.kafka.connector;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.junit.Test;

public class TopicToTableParserTest {

  @Test
  public void testParseEmptyInput() {
    assertTrue(TopicToTableParser.parse("").isEmpty());
    assertTrue(TopicToTableParser.parse("   ").isEmpty());
  }

  @Test
  public void testParseMultipleEntries() {
    Map<String, String> expected = new LinkedHashMap<>();
    expected.put("topic_a", "TABLE_A");
    expected.put("topic_b", "TABLE_B");

    assertEquals(expected, TopicToTableParser.parse("topic_a:table_a, topic_b:table_b"));
  }

  @Test
  public void testParseQuotedEntries() {
    Map<String, String> expected = new LinkedHashMap<>();
    expected.put("topic:one", "table,one");
    expected.put("topic two", "table two");

    assertEquals(
        expected,
        TopicToTableParser.parse("\"topic:one\":\"table,one\", \"topic two\":\"table two\""));
  }

  @Test
  public void testParseEntriesPreservesOrder() {
    List<TopicToTableParser.Entry> entries =
        new TopicToTableParser("first:one, second:two").parseEntries();

    assertEquals(2, entries.size());
    assertEquals("first", entries.get(0).getTopic());
    assertEquals("ONE", entries.get(0).getTable());
    assertEquals("second", entries.get(1).getTopic());
    assertEquals("TWO", entries.get(1).getTable());
  }

  @Test
  public void testParseUppercasesOnlyUnquotedTableTokens() {
    Map<String, String> expected = new LinkedHashMap<>();
    expected.put("topic", "E");
    expected.put("other_topic", "e");

    assertEquals(expected, TopicToTableParser.parse("topic:e, other_topic:\"e\""));
  }

  @Test
  public void testParseRejectsDuplicateTopics() {
    IllegalArgumentException error = assertParseError("topic:one, topic:two");
    assertEquals("Duplicate topic: topic", error.getMessage());
  }

  @Test
  public void testParseRejectsOverlappingRegexes() {
    IllegalArgumentException error = assertParseError(".*:table_a, .*foo:table_b");
    assertTrue(error.getMessage().contains("Topic regexes cannot overlap"));
    assertTrue(error.getMessage().contains(".*"));
    assertTrue(error.getMessage().contains(".*foo"));
  }

  @Test
  public void testParseRejectsUnterminatedQuotedToken() {
    IllegalArgumentException error = assertParseError("\"topic:table");
    assertTrue(error.getMessage().contains("Unterminated quoted token"));
  }

  @Test
  public void testParseRejectsEmptyQuotedToken() {
    IllegalArgumentException error = assertParseError("\"\":table");
    assertTrue(error.getMessage().contains("Empty quoted token"));
  }

  @Test
  public void testParseRejectsMissingColon() {
    IllegalArgumentException error = assertParseError("topic table");
    assertTrue(error.getMessage().contains("Expected ':'"));
  }

  private static IllegalArgumentException assertParseError(String input) {
    try {
      TopicToTableParser.parse(input);
      fail("Expected IllegalArgumentException");
      return null;
    } catch (IllegalArgumentException error) {
      return error;
    }
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/UtilsTest.java
================================================
package com.snowflake.kafka.connector;

import static java.util.Arrays.*;
import static java.util.Collections.*;
import static org.assertj.core.api.Fail.fail;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;

import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.internal.SnowflakeErrors;
import com.snowflake.kafka.connector.internal.TestUtils;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.junit.Rule;
import org.junit.Test;
import org.junit.contrib.java.lang.system.EnvironmentVariables;

public class UtilsTest {
  @Rule public final EnvironmentVariables environmentVariables = new EnvironmentVariables();

  @Test
  public void testObjectIdentifier() {
    String name = "DATABASE.SCHEMA.TABLE";
    assert !Utils.isValidSnowflakeObjectIdentifier(name);
    String name1 = "table!@#$%^;()";
    assert !Utils.isValidSnowflakeObjectIdentifier(name1);
  }

  @Test
  public void testVersionChecker() {
    assert Utils.checkConnectorVersion();
  }

  @Test
  public void testGetTableName() {
    Map<String, String> topic2table = TopicToTableParser.parse("ab@cd:abcd, 1234:_1234");

    assert Utils.getTableName("ab@cd", topic2table, true).equals("ABCD");
    assert Utils.getTableName("1234", topic2table, true).equals("_1234");

    TestUtils.assertError(
        SnowflakeErrors.ERROR_0020, () -> Utils.getTableName("", topic2table, true));
    TestUtils.assertError(
        SnowflakeErrors.ERROR_0020, () -> Utils.getTableName(null, topic2table, true));

    String topic = "bc*def";
    assert Utils.getTableName(topic, topic2table, true)
        .equals("BC_DEF_" + Math.abs(topic.hashCode()));

    topic = "12345";
    assert Utils.getTableName(topic, topic2table, true)
        .equals("_12345_" + Math.abs(topic.hashCode()));
  }

  @Test
  public void testGetTableNameRegex() {
    String catTable = "cat_table";
    String dogTable = "dog_table";
    String catTopicRegex = ".*_cat";
    String dogTopicRegex = ".*_dog";

    // test two different regexs
    Map<String, String> topic2table =
        TopicToTableParser.parse(
            Utils.formatString("{}:{},{}:{}", catTopicRegex, catTable, dogTopicRegex, dogTable));

    assert Utils.getTableName("calico_cat", topic2table, true).equals("CAT_TABLE");
    assert Utils.getTableName("orange_cat", topic2table, true).equals("CAT_TABLE");
    assert Utils.getTableName("_cat", topic2table, true).equals("CAT_TABLE");
    assert Utils.getTableName("corgi_dog", topic2table, true).equals("DOG_TABLE");

    // test new topic should not have wildcard
    String topic = "bird.*";
    assert Utils.getTableName(topic, topic2table, true)
        .equals("BIRD_" + Math.abs(topic.hashCode()));
  }

  @Test
  public void testConvertAppName() {
    HashMap<String, String> config = new HashMap<String, String>();

    config.put(KafkaConnectorConfigParams.NAME, "_aA1");
    Utils.convertAppName(config);
    assert config.get(KafkaConnectorConfigParams.NAME).equals("_AA1");

    config.put(KafkaConnectorConfigParams.NAME, "-_aA1");
    Utils.convertAppName(config);
    assert config.get(KafkaConnectorConfigParams.NAME).equals("___AA1_44483871");

    config.put(KafkaConnectorConfigParams.NAME, "_aA1-");
    Utils.convertAppName(config);
    assert config.get(KafkaConnectorConfigParams.NAME).equals("_AA1__90688251");

    config.put(KafkaConnectorConfigParams.NAME, "testApp.snowflake-connector");
    Utils.convertAppName(config);
    assert config
        .get(KafkaConnectorConfigParams.NAME)
        .equals("TESTAPP_SNOWFLAKE_CONNECTOR_36242259");
  }

  @Test
  public void testIsValidSnowflakeApplicationName() {
    assert Utils.isValidSnowflakeApplicationName("-_aA1");
    assert Utils.isValidSnowflakeApplicationName("aA_1-");
    assert !Utils.isValidSnowflakeApplicationName("1aA_-");
    assert !Utils.isValidSnowflakeApplicationName("_1.a$");
    assert !Utils.isValidSnowflakeApplicationName("(1.f$-_");
  }

  @Test
  public void testLogMessageBasic() {
    // no variable
    String expected = Utils.SF_LOG_TAG + " test message";

    assert Utils.formatLogMessage("test message").equals(expected);

    // 1 variable
    expected = Utils.SF_LOG_TAG + " 1 test message";

    assert Utils.formatLogMessage("{} test message", 1).equals(expected);
  }

  @Test
  public void testLogMessageNulls() {
    // nulls
    String expected = Utils.SF_LOG_TAG + " null test message";
    assert Utils.formatLogMessage("{} test message", (String) null).equals(expected);

    expected = Utils.SF_LOG_TAG + " some string test null message null";
    assert Utils.formatLogMessage("{} test {} message {}", "some string", null, null)
        .equals(expected);
  }

  @Test
  public void testLogMessageMultiLines() {
    // 2 variables
    String expected = Utils.SF_LOG_TAG + " 1 test message\n" + "2 test message";

    System.out.println(Utils.formatLogMessage("{} test message\n{} test message", 1, 2));

    assert Utils.formatLogMessage("{} test message\n{} test message", 1, 2).equals(expected);

    // 3 variables
    expected = Utils.SF_LOG_TAG + " 1 test message\n" + "2 test message\n" + "3 test message";

    assert Utils.formatLogMessage("{} test message\n{} test message\n{} test " + "message", 1, 2, 3)
        .equals(expected);

    // 4 variables
    expected =
        Utils.SF_LOG_TAG
            + " 1 test message\n"
            + "2 test message\n"
            + "3 test message\n"
            + "4 test message";

    assert Utils.formatLogMessage(
            "{} test message\n{} test message\n{} test " + "message\n{} test message", 1, 2, 3, 4)
        .equals(expected);
  }

  @Test
  public void testSemanticVersionParsing() {
    // Test standard version parsing
    SemanticVersion version311 = new SemanticVersion("3.1.1");
    assertEquals(3, version311.major());
    assertEquals(1, version311.minor());
    assertEquals(1, version311.patch());
    assertFalse(version311.isReleaseCandidate());
    assertEquals("3.1.1", version311.originalVersion());

    // Test version with RC suffix
    SemanticVersion version400rc = new SemanticVersion("4.0.0-rc");
    assertEquals(4, version400rc.major());
    assertEquals(0, version400rc.minor());
    assertEquals(0, version400rc.patch());
    assertTrue(version400rc.isReleaseCandidate());
    assertEquals("4.0.0-rc", version400rc.originalVersion());

    // Test version with RC1 suffix
    SemanticVersion version401rc1 = new SemanticVersion("4.0.1-RC1");
    assertEquals(4, version401rc1.major());
    assertEquals(0, version401rc1.minor());
    assertEquals(1, version401rc1.patch());
    assertTrue(version401rc1.isReleaseCandidate());
    assertEquals("4.0.1-RC1", version401rc1.originalVersion());
  }

  @Test
  public void testSemanticVersionComparison() {
    SemanticVersion v310 = new SemanticVersion("3.1.0");
    SemanticVersion v311 = new SemanticVersion("3.1.1");
    SemanticVersion v320 = new SemanticVersion("3.2.0");
    SemanticVersion v400 = new SemanticVersion("4.0.0");
    SemanticVersion v401 = new SemanticVersion("4.0.1");
    SemanticVersion v501 = new SemanticVersion("5.0.1");

    // Test less than
    assertTrue(v310.compareTo(v311) < 0);
    assertTrue(v311.compareTo(v320) < 0);
    assertTrue(v320.compareTo(v400) < 0);
    assertTrue(v400.compareTo(v401) < 0);
    assertTrue(v310.compareTo(v501) < 0);

    // Test greater than
    assertTrue(v311.compareTo(v310) > 0);
    assertTrue(v320.compareTo(v311) > 0);
    assertTrue(v400.compareTo(v320) > 0);
    assertTrue(v401.compareTo(v400) > 0);
    assertTrue(v501.compareTo(v401) > 0);

    // Test equals
    SemanticVersion v311_2 = new SemanticVersion("3.1.1");
    assertEquals(0, v311.compareTo(v311_2));
    assertEquals(v311, v311_2);

    // Test RC versions are treated same as non-RC for comparison (major.minor.patch only)
    SemanticVersion v400rc = new SemanticVersion("4.0.0-rc");
    assertEquals(0, v400.compareTo(v400rc));
  }

  @Test
  public void testSemanticVersionInvalidFormat() {
    try {
      new SemanticVersion("invalid");
      fail("Should have thrown IllegalArgumentException");
    } catch (IllegalArgumentException e) {
      assertTrue(e.getMessage().contains("Invalid version format"));
    }

    try {
      new SemanticVersion("1.2");
      fail("Should have thrown IllegalArgumentException");
    } catch (IllegalArgumentException e) {
      assertTrue(e.getMessage().contains("Invalid version format"));
    }
  }

  @Test
  public void testFindRecommendedVersion() {
    //  v4.0.0 should recommend v5.0.0 (highest available)
    List<String> availableVersions = asList("3.3.1", "4.0.0", "4.0.1", "4.1.0", "5.0.0");

    SemanticVersion current = new SemanticVersion("4.0.0");
    String recommended = Utils.findRecommendedVersion(current, availableVersions);

    assertEquals("5.0.0", recommended);
  }

  @Test
  public void testFindRecommendedVersionFiltersRCVersions() {
    // Scenario 3: Should not recommend RC versions
    List<String> availableVersions = asList("3.1.1", "3.2.0-rc", "3.2.0-RC1", "4.0.0-rc");

    SemanticVersion current = new SemanticVersion("4.1.1");
    String recommended = Utils.findRecommendedVersion(current, availableVersions);

    assertNull(recommended); // No stable version available newer than 3.1.1
  }

  @Test
  public void testFindRecommendedVersionNoUpgradeAvailable() {
    //  Current is already latest
    List<String> availableVersions = asList("4.1.0", "4.2.0", "4.3.1");

    SemanticVersion current = new SemanticVersion("4.3.1");
    String recommended = Utils.findRecommendedVersion(current, availableVersions);

    assertNull(recommended);
  }

  @Test
  public void testFindRecommendedVersionWithEmptyList() {
    //  Empty version list should return null
    List<String> availableVersions = emptyList();

    SemanticVersion current = new SemanticVersion("3.1.1");
    String recommended = Utils.findRecommendedVersion(current, availableVersions);

    assertNull(recommended);
  }

  @Test
  public void testFindRecommendedVersionWithInvalidVersions() {
    //  Invalid versions should be skipped
    List<String> availableVersions = asList("3.1.1", "invalid", "3.2.0", "bad.version", "3.3.0");

    SemanticVersion current = new SemanticVersion("3.1.1");
    String recommended = Utils.findRecommendedVersion(current, availableVersions);

    assertEquals("3.3.0", recommended);
  }

  @Test
  public void testFindRecommendedVersionOnlyRCVersionsAvailable() {
    //  Only RC versions newer than current - should return null
    List<String> availableVersions = asList("3.1.0", "3.1.1", "3.2.0-RC", "3.3.0-rc1");

    SemanticVersion current = new SemanticVersion("3.1.1");
    String recommended = Utils.findRecommendedVersion(current, availableVersions);

    assertNull(recommended);
  }

  @Test
  public void testSanitizationToggle() {
    Map<String, String> emptyMap = new HashMap<>();

    // Sanitization enabled (v3 compatible)
    String uppercased = Utils.getTableName("MyTopic", emptyMap, true);
    assertEquals("MYTOPIC", uppercased, "Valid identifier should be uppercased");

    String sanitized = Utils.getTableName("my-topic", emptyMap, true);
    assertTrue(
        sanitized.startsWith("MY_TOPIC_"), "Invalid identifier should be sanitized+uppercased");
    assertTrue(sanitized.matches("^[A-Z_0-9]+$"), "Should be fully uppercased");

    // Sanitization disabled (pass through)
    String passedThrough = Utils.getTableName("MyTopic", emptyMap, false);
    assertEquals("MyTopic", passedThrough, "Should pass through unchanged");

    String invalid = Utils.getTableName("my-topic", emptyMap, false);
    assertEquals("my-topic", invalid, "Invalid identifier should pass through");
  }

  @Test
  public void testMapEntriesBypassSanitization() {
    Map<String, String> map = TopicToTableParser.parse("myTopic:\"My-Table\",otherTopic:MixedCase");

    // Quoted table names preserve case; unquoted are uppercased at parse time
    assertEquals("My-Table", Utils.getTableName("myTopic", map, true));
    assertEquals("My-Table", Utils.getTableName("myTopic", map, false));
    assertEquals("MIXEDCASE", Utils.getTableName("otherTopic", map, true));
    assertEquals("MIXEDCASE", Utils.getTableName("otherTopic", map, false));
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/builder/SinkRecordBuilder.java
================================================
package com.snowflake.kafka.connector.builder;

import com.google.common.base.Preconditions;
import org.apache.kafka.common.record.TimestampType;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaAndValue;
import org.apache.kafka.connect.sink.SinkRecord;

public class SinkRecordBuilder {

  private final String topic;
  private final int partition;
  private Schema keySchema = Schema.STRING_SCHEMA;
  private Object key = "key";
  private Schema valueSchema = Schema.STRING_SCHEMA;
  private Object value = "{\"name\":123}";
  private long offset = 0;
  private Long timestamp = null;
  private TimestampType timestampType = TimestampType.NO_TIMESTAMP_TYPE;

  private SinkRecordBuilder(String topic, int partition) {
    this.topic = topic;
    this.partition = partition;
  }

  public static SinkRecordBuilder forTopicPartition(String topic, int partition) {
    return new SinkRecordBuilder(topic, partition);
  }

  public SinkRecord build() {
    return new SinkRecord(
        topic, partition, keySchema, key, valueSchema, value, offset, timestamp, timestampType);
  }

  public SinkRecordBuilder withKeySchema(Schema keySchema) {
    this.keySchema = keySchema;
    return this;
  }

  public SinkRecordBuilder withKey(Object key) {
    this.key = key;
    return this;
  }

  public SinkRecordBuilder withValueSchema(Schema valueSchema) {
    this.valueSchema = valueSchema;
    return this;
  }

  public SinkRecordBuilder withValue(Object value) {
    this.value = value;
    return this;
  }

  public SinkRecordBuilder withSchemaAndValue(SchemaAndValue schemaAndValue) {
    this.valueSchema = schemaAndValue.schema();
    this.value = schemaAndValue.value();
    return this;
  }

  public SinkRecordBuilder withOffset(long offset) {
    this.offset = offset;
    return this;
  }

  public SinkRecordBuilder withTimestamp(long timestamp, TimestampType timestampType) {
    Preconditions.checkArgument(
        timestampType != TimestampType.NO_TIMESTAMP_TYPE,
        "NO_TIMESTAMP_TYPE is the default timestampType");

    this.timestamp = timestamp;
    this.timestampType = timestampType;
    return this;
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/config/ClientValidationConfigTest.java
================================================
package com.snowflake.kafka.connector.config;

import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION_DEFAULT;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;

import java.util.HashMap;
import java.util.Map;
import org.apache.kafka.common.config.ConfigDef;
import org.junit.jupiter.api.Test;

public class ClientValidationConfigTest {

  @Test
  public void testValidationConfigExists() {
    ConfigDef configDef = ConnectorConfigDefinition.getConfig();

    assertNotNull(
        configDef.configKeys().get(SNOWFLAKE_VALIDATION),
        "snowflake.validation should be defined in config");
  }

  @Test
  public void testValidationDefaultValue() {
    ConfigDef configDef = ConnectorConfigDefinition.getConfig();

    Object defaultValue = configDef.configKeys().get(SNOWFLAKE_VALIDATION).defaultValue;

    assertEquals(SNOWFLAKE_VALIDATION_DEFAULT, defaultValue, "Default value should be server_side");
  }

  @Test
  public void testValidationCanBeSetToServerSide() {
    ConfigDef configDef = ConnectorConfigDefinition.getConfig();

    Map<String, String> props = new HashMap<>();
    props.put(SNOWFLAKE_VALIDATION, "server_side");

    Map<String, Object> parsed = configDef.parse(props);

    assertEquals(
        "server_side",
        parsed.get(SNOWFLAKE_VALIDATION),
        "Should be able to set validation to server_side");
  }

  @Test
  public void testValidationCanBeSetToClientSide() {
    ConfigDef configDef = ConnectorConfigDefinition.getConfig();

    Map<String, String> props = new HashMap<>();
    props.put(SNOWFLAKE_VALIDATION, "client_side");

    Map<String, Object> parsed = configDef.parse(props);

    assertEquals(
        "client_side",
        parsed.get(SNOWFLAKE_VALIDATION),
        "Should be able to set validation to client_side");
  }

  @Test
  public void testValidationDefaultsToServerSide() {
    ConfigDef configDef = ConnectorConfigDefinition.getConfig();

    Map<String, String> props = new HashMap<>();

    Map<String, Object> parsed = configDef.parse(props);

    assertEquals(
        "server_side",
        parsed.get(SNOWFLAKE_VALIDATION),
        "Should default to server_side when not specified");
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/config/SinkTaskConfigTest.java
================================================
package com.snowflake.kafka.connector.config;

import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.*;
import static org.junit.jupiter.api.Assertions.*;

import com.snowflake.kafka.connector.ConnectorConfigTools;
import com.snowflake.kafka.connector.Utils;
import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationMode;
import java.util.HashMap;
import java.util.Map;
import org.junit.jupiter.api.Test;

public class SinkTaskConfigTest {

  private static Map<String, String> minimalConfig() {
    Map<String, String> config = new HashMap<>();
    config.put(NAME, "test_connector");
    config.put(Utils.TASK_ID, "0");
    config.put(SNOWFLAKE_URL_NAME, "https://account.snowflakecomputing.com");
    config.put(SNOWFLAKE_USER_NAME, "user");
    config.put(SNOWFLAKE_ROLE_NAME, "role");
    config.put(SNOWFLAKE_DATABASE_NAME, "db");
    config.put(SNOWFLAKE_SCHEMA_NAME, "schema");
    return config;
  }

  @Test
  public void from_minimalConfig_succeeds() {
    SinkTaskConfig config = SinkTaskConfig.from(minimalConfig());

    assertEquals("test_connector", config.getConnectorName());
    assertEquals("0", config.getTaskId());
    assertTrue(config.getTopicToTableMap().isEmpty());
    assertEquals(
        ConnectorConfigTools.BehaviorOnNullValues.DEFAULT, config.getBehaviorOnNullValues());
    assertTrue(config.isJmxEnabled());
    assertFalse(config.isTolerateErrors());
    assertNull(config.getDlqTopicName());
    assertFalse(config.isEnableSanitization());
    assertTrue(config.isEnableSchematization());
    assertEquals(SnowflakeValidation.SERVER_SIDE, config.getValidation());
    assertEquals(50, config.getOpenChannelIoThreads());
    assertNotNull(config.getCachingConfig());
    assertNotNull(config.getMetadataConfig());
  }

  @Test
  public void from_missingConnectorName_throws() {
    Map<String, String> config = minimalConfig();
    config.remove(NAME);

    IllegalArgumentException e =
        assertThrows(IllegalArgumentException.class, () -> SinkTaskConfig.from(config));
    assertTrue(e.getMessage().contains("Connector name"));
  }

  @Test
  public void from_missingTaskId_throws() {
    Map<String, String> config = minimalConfig();
    config.remove(Utils.TASK_ID);

    IllegalArgumentException e =
        assertThrows(IllegalArgumentException.class, () -> SinkTaskConfig.from(config));
    assertTrue(e.getMessage().contains("Task ID"));
  }

  @Test
  public void from_emptyConnectorName_throws() {
    Map<String, String> config = minimalConfig();
    config.put(NAME, "  ");

    assertThrows(IllegalArgumentException.class, () -> SinkTaskConfig.from(config));
  }

  @Test
  public void from_overridesDefaults() {
    Map<String, String> config = minimalConfig();
    config.put(BEHAVIOR_ON_NULL_VALUES, "ignore");
    config.put(JMX_OPT, "false");
    config.put(ERRORS_TOLERANCE_CONFIG, "all");
    config.put(ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_CONFIG, "dlq-topic");
    config.put(SNOWFLAKE_OPEN_CHANNEL_IO_THREADS, "10");
    config.put(SNOWFLAKE_ENABLE_SCHEMATIZATION, "false");

    SinkTaskConfig parsed = SinkTaskConfig.from(config);

    assertEquals(
        ConnectorConfigTools.BehaviorOnNullValues.IGNORE, parsed.getBehaviorOnNullValues());
    assertFalse(parsed.isJmxEnabled());
    assertTrue(parsed.isTolerateErrors());
    assertEquals("dlq-topic", parsed.getDlqTopicName());
    assertEquals(10, parsed.getOpenChannelIoThreads());
    assertFalse(parsed.isEnableSchematization());
  }

  @Test
  public void from_topic2tableMap_parsed() {
    Map<String, String> config = minimalConfig();
    config.put(SNOWFLAKE_TOPICS2TABLE_MAP, "t1:table1,t2:table2");

    SinkTaskConfig parsed = SinkTaskConfig.from(config);

    assertEquals(2, parsed.getTopicToTableMap().size());
    assertEquals("TABLE1", parsed.getTopicToTableMap().get("t1"));
    assertEquals("TABLE2", parsed.getTopicToTableMap().get("t2"));
  }

  @Test
  public void from_nullMap_treatedAsEmptyAndThrowsForMissingRequired() {
    // from(null) replaces null with empty map, then validation fails for missing connector name
    assertThrows(IllegalArgumentException.class, () -> SinkTaskConfig.from(null));
  }

  @Test
  public void from_defaultMigrationMode_isSkip() {
    SinkTaskConfig config = SinkTaskConfig.from(minimalConfig());
    assertEquals(Ssv1MigrationMode.SKIP, config.getSsv1MigrationMode());
  }

  @Test
  public void from_migrationMode_bestEffort() {
    Map<String, String> config = minimalConfig();
    config.put(SNOWFLAKE_SSV1_OFFSET_MIGRATION, "best_effort");

    SinkTaskConfig parsed = SinkTaskConfig.from(config);
    assertEquals(Ssv1MigrationMode.BEST_EFFORT, parsed.getSsv1MigrationMode());
  }

  @Test
  public void from_migrationMode_strict() {
    Map<String, String> config = minimalConfig();
    config.put(SNOWFLAKE_SSV1_OFFSET_MIGRATION, "strict");

    SinkTaskConfig parsed = SinkTaskConfig.from(config);
    assertEquals(Ssv1MigrationMode.STRICT, parsed.getSsv1MigrationMode());
  }

  @Test
  public void from_migrationMode_caseInsensitive() {
    Map<String, String> config = minimalConfig();
    config.put(SNOWFLAKE_SSV1_OFFSET_MIGRATION, "BEST_EFFORT");

    SinkTaskConfig parsed = SinkTaskConfig.from(config);
    assertEquals(Ssv1MigrationMode.BEST_EFFORT, parsed.getSsv1MigrationMode());
  }

  @Test
  public void from_migrationMode_invalidValue_throws() {
    Map<String, String> config = minimalConfig();
    config.put(SNOWFLAKE_SSV1_OFFSET_MIGRATION, "invalid_value");

    IllegalArgumentException ex =
        assertThrows(IllegalArgumentException.class, () -> SinkTaskConfig.from(config));
    assertTrue(ex.getMessage().contains(SNOWFLAKE_SSV1_OFFSET_MIGRATION));
    assertTrue(ex.getMessage().contains("invalid_value"));
  }

  @Test
  public void from_defaultIncludeConnectorName_isFalse() {
    SinkTaskConfig config = SinkTaskConfig.from(minimalConfig());
    assertFalse(config.isSsv1MigrationIncludeConnectorName());
  }

  @Test
  public void from_includeConnectorNameTrue_isParsed() {
    Map<String, String> raw = minimalConfig();
    raw.put(SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME, "true");
    SinkTaskConfig config = SinkTaskConfig.from(raw);
    assertTrue(config.isSsv1MigrationIncludeConnectorName());
  }

  @Test
  public void from_oauthFields_areParsed() {
    Map<String, String> raw = minimalConfig();
    raw.put(SNOWFLAKE_AUTHENTICATOR, AuthenticatorType.OAUTH.toConfigValue());
    raw.put(SNOWFLAKE_OAUTH_CLIENT_ID, "my_client_id");
    raw.put(SNOWFLAKE_OAUTH_CLIENT_SECRET, "my_client_secret");
    raw.put(SNOWFLAKE_OAUTH_REFRESH_TOKEN, "my_refresh_token");
    raw.put(SNOWFLAKE_OAUTH_TOKEN_ENDPOINT, "https://oauth.example.com/token");

    SinkTaskConfig config = SinkTaskConfig.from(raw);

    assertEquals(AuthenticatorType.OAUTH, config.getAuthenticator());
    assertEquals("my_client_id", config.getOauthClientId());
    assertEquals("my_client_secret", config.getOauthClientSecret().value());
    assertEquals("my_refresh_token", config.getOauthRefreshToken().value());
    assertEquals("https://oauth.example.com/token", config.getOauthTokenEndpoint());
  }

  @Test
  public void from_privateKeyFields_wrappedAsPassword() {
    Map<String, String> raw = minimalConfig();
    raw.put(SNOWFLAKE_PRIVATE_KEY, "my_private_key");
    raw.put(SNOWFLAKE_PRIVATE_KEY_PASSPHRASE, "my_passphrase");

    SinkTaskConfig config = SinkTaskConfig.from(raw);

    assertEquals("my_private_key", config.getSnowflakePrivateKey().value());
    assertEquals("my_passphrase", config.getSnowflakePrivateKeyPassphrase().value());
  }

  @Test
  public void from_missingPrivateKey_returnsNull() {
    SinkTaskConfig config = SinkTaskConfig.from(minimalConfig());

    assertNull(config.getSnowflakePrivateKey());
    assertNull(config.getSnowflakePrivateKeyPassphrase());
  }

  @Test
  public void from_defaultAuthenticator_isSnowflakeJwt() {
    SinkTaskConfig config = SinkTaskConfig.from(minimalConfig());
    assertEquals(AuthenticatorType.SNOWFLAKE_JWT, config.getAuthenticator());
  }

  @Test
  public void from_oauthWithoutOptionalFields_succeeds() {
    Map<String, String> raw = minimalConfig();
    raw.put(SNOWFLAKE_AUTHENTICATOR, AuthenticatorType.OAUTH.toConfigValue());
    raw.put(SNOWFLAKE_OAUTH_CLIENT_ID, "client_id");
    raw.put(SNOWFLAKE_OAUTH_CLIENT_SECRET, "client_secret");

    SinkTaskConfig config = SinkTaskConfig.from(raw);
    assertEquals(AuthenticatorType.OAUTH, config.getAuthenticator());
    assertNull(config.getOauthRefreshToken());
    assertNull(config.getOauthTokenEndpoint());
  }

  @Test
  public void from_skipTaskSpecificConfig_succeedsWithoutTaskId() {
    Map<String, String> raw = minimalConfig();
    raw.remove(Utils.TASK_ID);
    SinkTaskConfig config = SinkTaskConfig.from(raw, true);
    assertEquals("", config.getTaskId());
    assertEquals("test_connector", config.getConnectorName());
  }

  @Test
  public void from_skipTaskSpecificConfig_succeedsWithoutConnectorName() {
    Map<String, String> raw = minimalConfig();
    raw.remove(NAME);
    raw.remove(Utils.TASK_ID);
    SinkTaskConfig config = SinkTaskConfig.from(raw, true);
    assertEquals("", config.getConnectorName());
    assertEquals("", config.getTaskId());
  }

  @Test
  public void from_skipTaskSpecificConfig_false_throwsWithoutTaskId() {
    Map<String, String> raw = minimalConfig();
    raw.remove(Utils.TASK_ID);
    assertThrows(IllegalArgumentException.class, () -> SinkTaskConfig.from(raw));
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/config/SinkTaskConfigTestBuilder.java
================================================
package com.snowflake.kafka.connector.config;

import com.snowflake.kafka.connector.ConnectorConfigTools;
import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.internal.CachingConfig;
import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationMode;
import com.snowflake.kafka.connector.records.SnowflakeMetadataConfig;
import java.util.Collections;
import java.util.HashMap;

/**
 * Test-only builder for {@link SinkTaskConfig}. Provides a builder with default values for all
 * optional fields. Caller must set connectorName and taskId before build().
 *
 * <p>Production code uses {@link SinkTaskConfig#from(java.util.Map)}; this class is for tests that
 * need to construct a config without parsing a Map.
 */
public final class SinkTaskConfigTestBuilder {

  private SinkTaskConfigTestBuilder() {}

  /**
   * Returns a builder with default values for all optional fields. Caller must set connectorName
   * and taskId before build().
   */
  public static SinkTaskConfig.Builder builder() {
    return SinkTaskConfig.builder()
        .topicToTableMap(new HashMap<>())
        .behaviorOnNullValues(ConnectorConfigTools.BehaviorOnNullValues.DEFAULT)
        .jmxEnabled(KafkaConnectorConfigParams.JMX_OPT_DEFAULT)
        .tolerateErrors(false)
        .errorsLogEnable(KafkaConnectorConfigParams.ERRORS_LOG_ENABLE_DEFAULT)
        .dlqTopicName(KafkaConnectorConfigParams.ERRORS_DEAD_LETTER_QUEUE_TOPIC_NAME_DEFAULT)
        .enableSanitization(
            KafkaConnectorConfigParams
                .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION_DEFAULT)
        .enableColumnIdentifierNormalization(
            KafkaConnectorConfigParams
                .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION_DEFAULT)
        .enableSchematization(KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION_DEFAULT)
        .validation(
            SnowflakeValidation.fromConfig(KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION_DEFAULT))
        .openChannelIoThreads(KafkaConnectorConfigParams.SNOWFLAKE_OPEN_CHANNEL_IO_THREADS_DEFAULT)
        .streamingClientProviderOverrideMap("")
        .cachingConfig(CachingConfig.fromConfig(Collections.emptyMap()))
        .metadataConfig(new SnowflakeMetadataConfig())
        .snowflakeUrl("")
        .snowflakeUser("")
        .snowflakeRole("")
        .snowflakePrivateKey(null)
        .snowflakePrivateKeyPassphrase(null)
        .authenticator(AuthenticatorType.SNOWFLAKE_JWT)
        .snowflakeDatabase("")
        .snowflakeSchema("")
        .ssv1MigrationMode(Ssv1MigrationMode.SKIP)
        .ssv1MigrationIncludeConnectorName(false);
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/config/SnowflakeSinkConnectorConfigBuilder.java
================================================
package com.snowflake.kafka.connector.config;

import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME;

import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import java.util.HashMap;
import java.util.Map;

/**
 * This is a builder class for the connector config. For now it returns map. Let's change it to a
 * more convenient abstraction when we have it.
 */
public class SnowflakeSinkConnectorConfigBuilder {

  private final Map<String, String> config = new HashMap<String, String>();

  private SnowflakeSinkConnectorConfigBuilder() {}

  public static SnowflakeSinkConnectorConfigBuilder streamingConfig() {
    return commonRequiredFields().withCompatibilityValidate(false);
  }

  private static SnowflakeSinkConnectorConfigBuilder commonRequiredFields() {
    return new SnowflakeSinkConnectorConfigBuilder()
        .withName("test")
        .withTopics("topic1,topic2")
        .withUrl("https://testaccount.snowflake.com:443")
        .withSchema("testSchema")
        .withDatabase("testDatabase")
        .withUser("userName")
        .withPrivateKey("fdsfsdfsdfdsfdsrqwrwewrwrew42314424")
        .withRole("role");
  }

  public SnowflakeSinkConnectorConfigBuilder withName(String name) {
    config.put(KafkaConnectorConfigParams.NAME, name);
    return this;
  }

  public SnowflakeSinkConnectorConfigBuilder withTopics(String topics) {
    config.put(KafkaConnectorConfigParams.TOPICS, topics);
    return this;
  }

  public SnowflakeSinkConnectorConfigBuilder withUrl(String url) {
    config.put(SNOWFLAKE_URL_NAME, url);
    return this;
  }

  public SnowflakeSinkConnectorConfigBuilder withDatabase(String database) {
    config.put(SNOWFLAKE_DATABASE_NAME, database);
    return this;
  }

  public SnowflakeSinkConnectorConfigBuilder withSchema(String schema) {
    config.put(SNOWFLAKE_SCHEMA_NAME, schema);
    return this;
  }

  public SnowflakeSinkConnectorConfigBuilder withUser(String user) {
    config.put(SNOWFLAKE_USER_NAME, user);
    return this;
  }

  public SnowflakeSinkConnectorConfigBuilder withPrivateKey(String privateKey) {
    config.put(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY, privateKey);
    return this;
  }

  public SnowflakeSinkConnectorConfigBuilder withRole(String role) {
    config.put(SNOWFLAKE_ROLE_NAME, role);
    return this;
  }

  public SnowflakeSinkConnectorConfigBuilder withoutRole() {
    config.remove(SNOWFLAKE_ROLE_NAME);
    return this;
  }

  public SnowflakeSinkConnectorConfigBuilder withAuthenticator(String authenticator) {
    config.put(KafkaConnectorConfigParams.SNOWFLAKE_AUTHENTICATOR, authenticator);
    return this;
  }

  public SnowflakeSinkConnectorConfigBuilder withOauthClientId(String clientId) {
    config.put(KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_ID, clientId);
    return this;
  }

  public SnowflakeSinkConnectorConfigBuilder withOauthClientSecret(String clientSecret) {
    config.put(KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_CLIENT_SECRET, clientSecret);
    return this;
  }

  public SnowflakeSinkConnectorConfigBuilder withOauthRefreshToken(String refreshToken) {
    config.put(KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_REFRESH_TOKEN, refreshToken);
    return this;
  }

  public SnowflakeSinkConnectorConfigBuilder withOauthTokenEndpoint(String tokenEndpoint) {
    config.put(KafkaConnectorConfigParams.SNOWFLAKE_OAUTH_TOKEN_ENDPOINT, tokenEndpoint);
    return this;
  }

  public SnowflakeSinkConnectorConfigBuilder withoutPrivateKey() {
    config.remove(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY);
    return this;
  }

  public SnowflakeSinkConnectorConfigBuilder withCompatibilityValidate(boolean validate) {
    config.put(
        KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC,
        String.valueOf(validate));
    return this;
  }

  /**
   * Sets the three value-checked settings to their v3 values and explicitly sets schematization.
   */
  public SnowflakeSinkConnectorConfigBuilder withV3CompatibilitySettings() {
    config.put(KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, "client_side");
    config.put(
        KafkaConnectorConfigParams.SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION,
        "true");
    config.put(
        KafkaConnectorConfigParams
            .SNOWFLAKE_COMPATIBILITY_ENABLE_AUTOGENERATED_TABLE_NAME_SANITIZATION,
        "true");
    config.put(KafkaConnectorConfigParams.SNOWFLAKE_ENABLE_SCHEMATIZATION, "false");
    config.put(KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION, "best_effort");
    config.put(
        KafkaConnectorConfigParams.SNOWFLAKE_SSV1_OFFSET_MIGRATION_INCLUDE_CONNECTOR_NAME, "false");
    return this;
  }

  public Map<String, String> build() {
    return config;
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/dlq/InMemoryKafkaRecordErrorReporter.java
================================================
package com.snowflake.kafka.connector.dlq;

import static java.util.Collections.unmodifiableList;

import java.util.ArrayList;
import java.util.List;
import org.apache.kafka.connect.sink.SinkRecord;

/**
 * In memory implementation of KafkaRecordErrorReporter which mimics sending records to DLQ. Here we
 * simply insert records into an ArrayList
 *
 * <p>Used for testing.
 */
public final class InMemoryKafkaRecordErrorReporter implements KafkaRecordErrorReporter {
  private final List<ReportedRecord> reportedRecords = new ArrayList<>();

  @Override
  public void reportError(final SinkRecord record, final Exception e) {
    reportedRecords.add(new ReportedRecord(record, e));
  }

  public List<ReportedRecord> getReportedRecords() {
    return unmodifiableList(reportedRecords);
  }

  public static final class ReportedRecord {
    private final SinkRecord record;
    private final Throwable e;

    private ReportedRecord(final SinkRecord record, final Throwable e) {
      this.record = record;
      this.e = e;
    }

    public SinkRecord getRecord() {
      return record;
    }

    public Throwable getException() {
      return e;
    }

    @Override
    public String toString() {
      return "ReportedData{" + "record=" + record + ", e=" + e + '}';
    }
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/CachingSnowflakeConnectionServiceStatsTest.java
================================================
package com.snowflake.kafka.connector.internal;

import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_PIPE_EXISTS;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_TABLE_EXISTS;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

import java.util.HashMap;
import java.util.Map;
import org.junit.jupiter.api.Test;

/**
 * Tests for cache statistics logging in CachedSnowflakeConnectionService. Verifies that cache stats
 * can be logged without exceptions.
 */
class CachingSnowflakeConnectionServiceStatsTest {

  @Test
  void testCacheStatisticsLogging() {
    // Given: A cached service with both caches enabled
    SnowflakeConnectionService mockDelegate = mock(SnowflakeConnectionService.class);
    when(mockDelegate.tableExist("TABLE1")).thenReturn(true);
    when(mockDelegate.pipeExist("PIPE1")).thenReturn(true);

    CachingConfig config = createCacheConfig(true, 30000L, true, 30000L);
    CachingSnowflakeConnectionService cachedService =
        new CachingSnowflakeConnectionService(mockDelegate, config);

    // When: Perform some operations
    cachedService.tableExist("TABLE1");
    cachedService.tableExist("TABLE1"); // Cache hit
    cachedService.pipeExist("PIPE1");
    cachedService.pipeExist("PIPE1"); // Cache hit

    // Then: Log statistics (should not throw any exceptions)
    cachedService.logCacheStatistics();
  }

  @Test
  void testCacheStatisticsLoggingWithNoCacheEnabled() {
    // Given: A cached service with no caches enabled
    SnowflakeConnectionService mockDelegate = mock(SnowflakeConnectionService.class);

    CachingConfig config = createCacheConfig(false, 30000L, false, 30000L);
    CachingSnowflakeConnectionService cachedService =
        new CachingSnowflakeConnectionService(mockDelegate, config);

    // When: Log statistics with no cache enabled
    cachedService.logCacheStatistics();

    // Then: No exception should be thrown
  }

  private CachingConfig createCacheConfig(
      boolean cacheTableExists,
      long tableExpirationMs,
      boolean cachePipeExists,
      long pipeExpirationMs) {
    Map<String, String> config = new HashMap<>();
    config.put(CACHE_TABLE_EXISTS, String.valueOf(cacheTableExists));
    config.put(CACHE_TABLE_EXISTS_EXPIRE_MS, String.valueOf(tableExpirationMs));
    config.put(CACHE_PIPE_EXISTS, String.valueOf(cachePipeExists));
    config.put(CACHE_PIPE_EXISTS_EXPIRE_MS, String.valueOf(pipeExpirationMs));
    return CachingConfig.fromConfig(config);
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/CachingSnowflakeConnectionServiceTest.java
================================================
package com.snowflake.kafka.connector.internal;

import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_PIPE_EXISTS;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_TABLE_EXISTS;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;

import java.util.HashMap;
import java.util.Map;
import org.junit.jupiter.api.Test;

class CachingSnowflakeConnectionServiceTest {

  private static final String TEST_TABLE = "TEST_TABLE";
  private static final String TEST_PIPE = "TEST_PIPE";

  @Test
  void testTableExistCacheEnabled_MultipleCalls_DelegateCalledOnce() {

    SnowflakeConnectionService mockDelegate = mock(SnowflakeConnectionService.class);
    when(mockDelegate.tableExist(TEST_TABLE)).thenReturn(true);

    CachingConfig config = createCacheConfig(true, 30000L, false, 30000L);
    CachingSnowflakeConnectionService cachedService =
        new CachingSnowflakeConnectionService(mockDelegate, config);

    // When: Call tableExist multiple times
    boolean result1 = cachedService.tableExist(TEST_TABLE);
    boolean result2 = cachedService.tableExist(TEST_TABLE);
    boolean result3 = cachedService.tableExist(TEST_TABLE);

    // Then: All calls return true and delegate was called only once
    assertTrue(result1);
    assertTrue(result2);
    assertTrue(result3);
    verify(mockDelegate, times(1)).tableExist(TEST_TABLE);
  }

  @Test
  void testTableExistCacheDisabled_MultipleCalls_DelegateCalledEveryTime() {
    // Given: Cache disabled for table existence
    SnowflakeConnectionService mockDelegate = mock(SnowflakeConnectionService.class);
    when(mockDelegate.tableExist(TEST_TABLE)).thenReturn(true);

    CachingConfig config = createCacheConfig(false, 30000L, false, 30000L);
    CachingSnowflakeConnectionService cachedService =
        new CachingSnowflakeConnectionService(mockDelegate, config);

    // When: Call tableExist multiple times
    boolean result1 = cachedService.tableExist(TEST_TABLE);
    boolean result2 = cachedService.tableExist(TEST_TABLE);
    boolean result3 = cachedService.tableExist(TEST_TABLE);

    // Then: All calls return true and delegate was called every time
    assertTrue(result1);
    assertTrue(result2);
    assertTrue(result3);
    verify(mockDelegate, times(3)).tableExist(TEST_TABLE);
  }

  @Test
  void testTableExistCacheEnabled_DifferentTables_DelegateCalledForEach() {
    // Given: Cache enabled for table existence
    SnowflakeConnectionService mockDelegate = mock(SnowflakeConnectionService.class);
    when(mockDelegate.tableExist("TABLE1")).thenReturn(true);
    when(mockDelegate.tableExist("TABLE2")).thenReturn(false);

    CachingConfig config = createCacheConfig(true, 30000L, false, 30000L);
    CachingSnowflakeConnectionService cachedService =
        new CachingSnowflakeConnectionService(mockDelegate, config);

    // When: Call tableExist for different tables
    boolean result1a = cachedService.tableExist("TABLE1");
    boolean result1b = cachedService.tableExist("TABLE1");
    boolean result2a = cachedService.tableExist("TABLE2");
    boolean result2b = cachedService.tableExist("TABLE2");

    // Then: Delegate called once per unique table
    assertTrue(result1a);
    assertTrue(result1b);
    assertFalse(result2a);
    assertFalse(result2b);
    verify(mockDelegate, times(1)).tableExist("TABLE1");
    verify(mockDelegate, times(1)).tableExist("TABLE2");
  }

  @Test
  void testPipeExistCacheEnabled_MultipleCalls_DelegateCalledOnce() {
    // Given: Cache enabled for pipe existence
    SnowflakeConnectionService mockDelegate = mock(SnowflakeConnectionService.class);
    when(mockDelegate.pipeExist(TEST_PIPE)).thenReturn(true);

    CachingConfig config = createCacheConfig(false, 30000L, true, 30000L);
    CachingSnowflakeConnectionService cachedService =
        new CachingSnowflakeConnectionService(mockDelegate, config);

    // When: Call pipeExist multiple times
    boolean result1 = cachedService.pipeExist(TEST_PIPE);
    boolean result2 = cachedService.pipeExist(TEST_PIPE);
    boolean result3 = cachedService.pipeExist(TEST_PIPE);

    // Then: All calls return true and delegate was called only once
    assertTrue(result1);
    assertTrue(result2);
    assertTrue(result3);
    verify(mockDelegate, times(1)).pipeExist(TEST_PIPE);
  }

  @Test
  void testPipeExistCacheDisabled_MultipleCalls_DelegateCalledEveryTime() {
    // Given: Cache disabled for pipe existence
    SnowflakeConnectionService mockDelegate = mock(SnowflakeConnectionService.class);
    when(mockDelegate.pipeExist(TEST_PIPE)).thenReturn(true);

    CachingConfig config = createCacheConfig(false, 30000L, false, 30000L);
    CachingSnowflakeConnectionService cachedService =
        new CachingSnowflakeConnectionService(mockDelegate, config);

    // When: Call pipeExist multiple times
    boolean result1 = cachedService.pipeExist(TEST_PIPE);
    boolean result2 = cachedService.pipeExist(TEST_PIPE);
    boolean result3 = cachedService.pipeExist(TEST_PIPE);

    // Then: All calls return true and delegate was called every time
    assertTrue(result1);
    assertTrue(result2);
    assertTrue(result3);
    verify(mockDelegate, times(3)).pipeExist(TEST_PIPE);
  }

  @Test
  void testPipeExistCacheEnabled_DifferentPipes_DelegateCalledForEach() {
    // Given: Cache enabled for pipe existence
    SnowflakeConnectionService mockDelegate = mock(SnowflakeConnectionService.class);
    when(mockDelegate.pipeExist("PIPE1")).thenReturn(true);
    when(mockDelegate.pipeExist("PIPE2")).thenReturn(false);

    CachingConfig config = createCacheConfig(false, 30000L, true, 30000L);
    CachingSnowflakeConnectionService cachedService =
        new CachingSnowflakeConnectionService(mockDelegate, config);

    // When: Call pipeExist for different pipes
    boolean result1a = cachedService.pipeExist("PIPE1");
    boolean result1b = cachedService.pipeExist("PIPE1");
    boolean result2a = cachedService.pipeExist("PIPE2");
    boolean result2b = cachedService.pipeExist("PIPE2");

    // Then: Delegate called once per unique pipe
    assertTrue(result1a);
    assertTrue(result1b);
    assertFalse(result2a);
    assertFalse(result2b);
    verify(mockDelegate, times(1)).pipeExist("PIPE1");
    verify(mockDelegate, times(1)).pipeExist("PIPE2");
  }

  @Test
  void testCacheExpiration_TableExists() throws InterruptedException {
    // Given: Very short cache expiration
    SnowflakeConnectionService mockDelegate = mock(SnowflakeConnectionService.class);
    when(mockDelegate.tableExist(TEST_TABLE)).thenReturn(true);

    CachingConfig config = createCacheConfig(true, 100L, false, 30000L);
    CachingSnowflakeConnectionService cachedService =
        new CachingSnowflakeConnectionService(mockDelegate, config);

    // When: Call tableExist, wait for expiration, call again
    cachedService.tableExist(TEST_TABLE);
    Thread.sleep(150); // Wait for cache to expire
    cachedService.tableExist(TEST_TABLE);

    // Then: Delegate was called twice (cache expired)
    verify(mockDelegate, times(2)).tableExist(TEST_TABLE);
  }

  @Test
  void testCacheExpiration_PipeExists() throws InterruptedException {
    // Given: Very short cache expiration
    SnowflakeConnectionService mockDelegate = mock(SnowflakeConnectionService.class);
    when(mockDelegate.pipeExist(TEST_PIPE)).thenReturn(true);

    CachingConfig config = createCacheConfig(false, 30000L, true, 100L);
    CachingSnowflakeConnectionService cachedService =
        new CachingSnowflakeConnectionService(mockDelegate, config);

    // When: Call pipeExist, wait for expiration, call again
    cachedService.pipeExist(TEST_PIPE);
    Thread.sleep(150); // Wait for cache to expire
    cachedService.pipeExist(TEST_PIPE);

    // Then: Delegate was called twice (cache expired)
    verify(mockDelegate, times(2)).pipeExist(TEST_PIPE);
  }

  private CachingConfig createCacheConfig(
      boolean cacheTableExists,
      long tableExpirationMs,
      boolean cachePipeExists,
      long pipeExpirationMs) {
    Map<String, String> config = new HashMap<>();
    config.put(CACHE_TABLE_EXISTS, String.valueOf(cacheTableExists));
    config.put(CACHE_TABLE_EXISTS_EXPIRE_MS, String.valueOf(tableExpirationMs));
    config.put(CACHE_PIPE_EXISTS, String.valueOf(cachePipeExists));
    config.put(CACHE_PIPE_EXISTS_EXPIRE_MS, String.valueOf(pipeExpirationMs));
    return CachingConfig.fromConfig(config);
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/ConnectionServiceIT.java
================================================
package com.snowflake.kafka.connector.internal;

import static com.snowflake.kafka.connector.internal.TestUtils.TEST_CONNECTOR_NAME;
import static org.assertj.core.api.AssertionsForClassTypes.assertThat;

import com.snowflake.kafka.connector.ConnectorConfigTools;
import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Map;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Test;

class ConnectionServiceIT {
  private final SnowflakeConnectionService conn = buildNoCachingConnection();

  private static SnowflakeConnectionService buildNoCachingConnection() {
    Map<String, String> config = TestUtils.transformProfileFileToConnectorConfiguration(false);
    config.put(KafkaConnectorConfigParams.CACHE_TABLE_EXISTS, "false");
    config.put(KafkaConnectorConfigParams.CACHE_PIPE_EXISTS, "false");
    return SnowflakeConnectionServiceFactory.builder().setProperties(config).build();
  }

  private final String tableName = TestUtils.randomTableName();
  private final String tableName1 = TestUtils.randomTableName();

  @Test
  void testEncryptedKey() {
    // no exception
    SnowflakeConnectionServiceFactory.builder()
        .setProperties(TestUtils.transformProfileFileToConnectorConfiguration(true))
        .build();
  }

  @Test
  void testSetSSLProperties() {
    Map<String, String> testConfig = TestUtils.transformProfileFileToConnectorConfiguration(false);
    testConfig.put(
        KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME, "https://sfctest0.snowflakecomputing.com");
    assert SnowflakeConnectionServiceFactory.builder()
        .setProperties(testConfig)
        .getProperties()
        .getProperty(InternalUtils.JDBC_SSL)
        .equals("on");
    testConfig.put(
        KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME, "sfctest0.snowflakecomputing.com");
    assert SnowflakeConnectionServiceFactory.builder()
        .setProperties(testConfig)
        .getProperties()
        .getProperty(InternalUtils.JDBC_SSL)
        .equals("on");
    testConfig.put(
        KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME,
        "http://sfctest0.snowflakecomputing.com:400");
    assert SnowflakeConnectionServiceFactory.builder()
        .setProperties(testConfig)
        .getProperties()
        .getProperty(InternalUtils.JDBC_SSL)
        .equals("off");
  }

  @Test
  void createConnectionService_SnowpipeStreaming() {

    Map<String, String> config = TestUtils.getConnectorConfigurationForStreaming(false);
    ConnectorConfigTools.setDefaultValues(config);
    SnowflakeConnectionService service =
        SnowflakeConnectionServiceFactory.builder().setProperties(config).build();

    assert service.getConnectorName().equals(TEST_CONNECTOR_NAME);

    assertThat(service.getTelemetryClient()).isInstanceOf(SnowflakeTelemetryService.class);
  }

  @AfterEach
  void afterEach() {
    TestUtils.dropTable(tableName);
    TestUtils.dropTable(tableName1);
  }

  @Test
  void testTableFunctions() throws SQLException {
    // table doesn't exist
    assert !conn.tableExist(tableName);
    // create table
    TestUtils.createTableWithMetadataColumn(tableName);
    // table exists
    assert conn.tableExist(tableName);
    // insert some value
    TestUtils.executeQuery("insert into \"" + tableName + "\" values(123)");
    ResultSet resultSet = TestUtils.showTable(tableName);
    // value inserted
    assert InternalUtils.resultSize(resultSet) == 1;
    // create table if not exists
    TestUtils.createTableWithMetadataColumn(tableName);
    resultSet = TestUtils.showTable(tableName);
    // table hasn't been overwritten
    assert InternalUtils.resultSize(resultSet) == 1;
    // overwrite table
    TestUtils.createTableWithMetadataColumn(tableName, true);
    resultSet = TestUtils.showTable(tableName);
    // new table
    assert InternalUtils.resultSize(resultSet) == 0;
    // table is compatible
    assert conn.isTableCompatible(tableName);
    TestUtils.dropTable(tableName);
    // dropped table
    assert !conn.tableExist(tableName);
    // create incompatible table
    TestUtils.executeQuery("create table \"" + tableName + "\" (num int)");
    assert !conn.isTableCompatible(tableName);
    TestUtils.dropTable(tableName);
  }

  @Test
  void testConnectionFunction() {
    SnowflakeConnectionService service = TestUtils.getConnectionService();
    assert !service.isClosed();
    service.close();
    assert service.isClosed();
  }

  /**
   * Integration test for SNOW-3029864: Verifies that the configured snowflake.role.name is actually
   * used when establishing JDBC connections for DDL operations (table creation, schema checks,
   * etc.).
   */
  @Test
  void testRoleIsUsedInJdbcConnection() throws SQLException {
    // given - connection service with role from config
    Map<String, String> config = TestUtils.transformProfileFileToConnectorConfiguration(true);
    String expectedRole = config.get(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME);
    SnowflakeConnectionService service =
        SnowflakeConnectionServiceFactory.builder().setProperties(config).build();

    String actualRole;
    // when - get JDBC connection and query current role
    try (Statement stmt = service.getConnection().createStatement();
        ResultSet resultSet = stmt.executeQuery("SELECT CURRENT_ROLE()")) {
      resultSet.next();
      actualRole = resultSet.getString(1);
    }

    // then - the active role should match the configured role (case-insensitive, Snowflake
    // uppercases)
    assertThat(actualRole)
        .as("JDBC connection should use the configured snowflake.role.name")
        .isEqualToIgnoringCase(expectedRole);

    // and - DDL operations (table creation) should work with this role
    String testTable = TestUtils.randomTableName();
    TestUtils.createTableWithMetadataColumn(testTable);
    assertThat(service.tableExist(testTable))
        .as("Table creation should succeed with the configured role")
        .isTrue();

    // cleanup
    TestUtils.dropTable(testTable);
    service.close();
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/EmbeddedProxyServer.java
================================================
/*
 * Copyright (c) 2019 Snowflake Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.snowflake.kafka.connector.internal;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.ServerSocket;
import java.net.Socket;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import org.junit.rules.ExternalResource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Lightweight in-process HTTP CONNECT proxy for testing proxy configurations. Supports basic
 * authentication and HTTPS tunneling via the CONNECT method.
 *
 * <p>Can be used as a JUnit Rule to automatically manage the proxy lifecycle per test method.
 */
public class EmbeddedProxyServer extends ExternalResource {
  private static final Logger LOGGER = LoggerFactory.getLogger(EmbeddedProxyServer.class);

  private final String username;
  private final String password;
  private ServerSocket serverSocket;
  private ExecutorService executor;
  private volatile boolean running;

  public EmbeddedProxyServer(final String username, final String password) {
    this.username = username;
    this.password = password;
  }

  public final void start() {
    if (serverSocket != null) {
      throw new IllegalStateException("Proxy server is already running");
    }

    try {
      serverSocket = new ServerSocket(0); // random available port
      running = true;
      executor =
          Executors.newCachedThreadPool(
              r -> {
                Thread t = new Thread(r, "proxy-worker");
                t.setDaemon(true);
                return t;
              });

      Thread acceptThread = new Thread(this::acceptLoop, "proxy-accept");
      acceptThread.setDaemon(true);
      acceptThread.start();

      LOGGER.info("Proxy server started on localhost:{}", serverSocket.getLocalPort());
    } catch (IOException e) {
      throw new RuntimeException("Failed to start proxy server: " + e.getMessage(), e);
    }
  }

  private void acceptLoop() {
    while (running) {
      try {
        Socket client = serverSocket.accept();
        executor.submit(() -> handleClient(client));
      } catch (IOException e) {
        if (running) {
          LOGGER.warn("Accept failed: {}", e.getMessage());
        }
      }
    }
  }

  private void handleClient(Socket client) {
    try {
      client.setSoTimeout(300_000);
      InputStream in = client.getInputStream();
      OutputStream out = client.getOutputStream();

      // Read the request line and headers
      String requestLine = readLine(in);
      if (requestLine == null) {
        client.close();
        return;
      }
      LOGGER.debug("Proxy request: {}", requestLine);

      String proxyAuth = null;
      String line;
      while ((line = readLine(in)) != null && !line.isEmpty()) {
        if (line.toLowerCase().startsWith("proxy-authorization:")) {
          proxyAuth = line.substring("proxy-authorization:".length()).trim();
        }
      }

      // Check authentication
      if (!checkAuth(proxyAuth)) {
        String response =
            "HTTP/1.1 407 Proxy Authentication Required\r\n"
                + "Proxy-Authenticate: Basic realm=\"proxy\"\r\n"
                + "Content-Length: 0\r\n\r\n";
        out.write(response.getBytes(StandardCharsets.US_ASCII));
        out.flush();
        client.close();
        return;
      }

      // Handle CONNECT (HTTPS tunneling)
      if (requestLine.startsWith("CONNECT ")) {
        handleConnect(requestLine, client, out);
      } else {
        // For non-CONNECT, just close — tests only need CONNECT for Snowflake HTTPS
        String response = "HTTP/1.1 405 Method Not Allowed\r\nContent-Length: 0\r\n\r\n";
        out.write(response.getBytes(StandardCharsets.US_ASCII));
        out.flush();
        client.close();
      }
    } catch (Exception e) {
      LOGGER.debug("Client handler error: {}", e.getMessage());
      try {
        client.close();
      } catch (IOException ignored) {
      }
    }
  }

  private void handleConnect(String requestLine, Socket client, OutputStream clientOut)
      throws IOException {
    // Parse "CONNECT host:port HTTP/1.1"
    String[] parts = requestLine.split(" ");
    if (parts.length < 2) {
      String response = "HTTP/1.1 400 Bad Request\r\nContent-Length: 0\r\n\r\n";
      clientOut.write(response.getBytes(StandardCharsets.US_ASCII));
      clientOut.flush();
      client.close();
      return;
    }
    String[] hostPort = parts[1].split(":");
    String host = hostPort[0];
    int port;
    try {
      port = hostPort.length > 1 ? Integer.parseInt(hostPort[1]) : 443;
    } catch (NumberFormatException e) {
      String response = "HTTP/1.1 400 Bad Request\r\nContent-Length: 0\r\n\r\n";
      clientOut.write(response.getBytes(StandardCharsets.US_ASCII));
      clientOut.flush();
      client.close();
      return;
    }

    try {
      Socket remote = new Socket(host, port);
      // Send 200 to client
      clientOut.write(
          "HTTP/1.1 200 Connection Established\r\n\r\n".getBytes(StandardCharsets.US_ASCII));
      clientOut.flush();

      // Bidirectional relay
      Thread toRemote = new Thread(() -> relay(client, remote), "proxy-to-remote");
      toRemote.setDaemon(true);
      toRemote.start();
      relay(remote, client);

      toRemote.join(5000);
      toRemote.interrupt();
      remote.close();
    } catch (Exception e) {
      String response = "HTTP/1.1 502 Bad Gateway\r\nContent-Length: 0\r\n\r\n";
      clientOut.write(response.getBytes(StandardCharsets.US_ASCII));
      clientOut.flush();
    }
    client.close();
  }

  private static void relay(Socket from, Socket to) {
    try {
      InputStream in = from.getInputStream();
      OutputStream out = to.getOutputStream();
      byte[] buf = new byte[8192];
      int n;
      while ((n = in.read(buf)) != -1) {
        out.write(buf, 0, n);
        out.flush();
      }
    } catch (IOException ignored) {
      // Connection closed
    }
  }

  private boolean checkAuth(String proxyAuth) {
    if (proxyAuth == null) return false;
    if (!proxyAuth.startsWith("Basic ")) return false;
    String decoded =
        new String(Base64.getDecoder().decode(proxyAuth.substring(6)), StandardCharsets.UTF_8);
    return decoded.equals(username + ":" + password);
  }

  private static String readLine(InputStream in) throws IOException {
    StringBuilder sb = new StringBuilder();
    int c;
    while ((c = in.read()) != -1) {
      if (c == '\r') {
        int next = in.read(); // consume \n
        if (next != '\n' && next != -1) {
          sb.append((char) c);
          sb.append((char) next);
          continue;
        }
        break;
      }
      if (c == '\n') break;
      sb.append((char) c);
    }
    return c == -1 && sb.length() == 0 ? null : sb.toString();
  }

  public final void stop() {
    if (serverSocket == null) {
      throw new IllegalStateException("Proxy server is not running");
    }

    LOGGER.info("Stopping proxy server on port {}", serverSocket.getLocalPort());
    running = false;
    try {
      serverSocket.close();
    } catch (IOException e) {
      LOGGER.warn("Error closing server socket", e);
    }
    serverSocket = null;

    if (executor != null) {
      executor.shutdownNow();
      try {
        executor.awaitTermination(2, TimeUnit.SECONDS);
      } catch (InterruptedException ignored) {
        Thread.currentThread().interrupt();
      }
      executor = null;
    }
    LOGGER.info("Proxy server stopped");
  }

  public final boolean isRunning() {
    return serverSocket != null && !serverSocket.isClosed();
  }

  public final int getPort() {
    if (serverSocket == null) {
      throw new IllegalStateException("Proxy server is not running");
    }
    return serverSocket.getLocalPort();
  }

  public final String getUsername() {
    return username;
  }

  public final String getPassword() {
    return password;
  }

  @Override
  protected final void before() {
    start();
  }

  @Override
  protected final void after() {
    if (isRunning()) {
      stop();
    }
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/InternalUtilsTest.java
================================================
package com.snowflake.kafka.connector.internal;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.config.SinkTaskConfigTestBuilder;
import com.snowflake.kafka.connector.mock.MockResultSetForSizeTest;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Base64;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import org.apache.kafka.common.config.types.Password;
import org.junit.jupiter.api.Test;

public class InternalUtilsTest {
  @Test
  public void testPrivateKey() {
    assert TestUtils.assertError(
        SnowflakeErrors.ERROR_0002, () -> PrivateKeyTool.parsePrivateKey("adfsfsaff", null));

    Map<String, String> connectorConfiguration =
        TestUtils.transformProfileFileToConnectorConfiguration(true);
    String privateKey =
        connectorConfiguration.get(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY);
    String pass =
        connectorConfiguration.get(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE);
    // no exception
    PrivateKeyTool.parsePrivateKey(privateKey, pass);
    StringBuilder builder = new StringBuilder();
    builder.append("-----BEGIN RSA PRIVATE KEY-----\n");
    for (int i = 0; i < privateKey.length(); i++) {
      builder.append(privateKey.charAt(i));
      if ((i + 1) % 64 == 0) {
        builder.append("\n");
      }
    }
    builder.append("\n-----END RSA PRIVATE KEY-----");
    String originalKey = builder.toString();
    // no exception
    PrivateKeyTool.parsePrivateKey(originalKey, pass);
  }

  @Test
  public void testTimestampToDateConversion() {
    long t = 1563492758649L;
    assert InternalUtils.timestampToDate(t).equals("2019-07-18T23:32:38Z");
  }

  @Test
  public void testAssertNotEmpty() {
    InternalUtils.assertNotEmpty("tableName", "name");
    assert TestUtils.assertError(
        SnowflakeErrors.ERROR_0005, () -> InternalUtils.assertNotEmpty("TABLENAME", null));
    assert TestUtils.assertError(
        SnowflakeErrors.ERROR_0005, () -> InternalUtils.assertNotEmpty("tableName", ""));
    assert TestUtils.assertError(
        SnowflakeErrors.ERROR_0006, () -> InternalUtils.assertNotEmpty("pipeName", null));
    assert TestUtils.assertError(
        SnowflakeErrors.ERROR_0006, () -> InternalUtils.assertNotEmpty("pipeName", ""));
    assert TestUtils.assertError(
        SnowflakeErrors.ERROR_0001, () -> InternalUtils.assertNotEmpty("conf", null));
    assert TestUtils.assertError(
        SnowflakeErrors.ERROR_0003, () -> InternalUtils.assertNotEmpty("sfdsfdsfd", null));
    assert TestUtils.assertError(
        SnowflakeErrors.ERROR_0003, () -> InternalUtils.assertNotEmpty("zxcxzcx", ""));
  }

  @Test
  public void testMakeJdbcDriverProperties() {
    Map<String, String> config = TestUtils.transformProfileFileToConnectorConfiguration(true);
    SnowflakeURL url = TestUtils.getUrl();
    SinkTaskConfig parsedConfig = SinkTaskConfig.from(config, true);
    Properties prop = InternalUtils.makeJdbcDriverProperties(parsedConfig, url);
    assert prop.containsKey(InternalUtils.JDBC_DATABASE);
    assert prop.containsKey(InternalUtils.JDBC_PRIVATE_KEY);
    assert prop.containsKey(InternalUtils.JDBC_SCHEMA);
    assert prop.containsKey(InternalUtils.JDBC_USER);
    assert prop.containsKey(InternalUtils.JDBC_SESSION_KEEP_ALIVE);
    assert prop.containsKey(InternalUtils.JDBC_SSL);

    assert prop.getProperty(InternalUtils.JDBC_SESSION_KEEP_ALIVE).equals("true");
    if (url.sslEnabled()) {
      assert prop.getProperty(InternalUtils.JDBC_SSL).equals("on");
    } else {
      assert prop.getProperty(InternalUtils.JDBC_SSL).equals("off");
    }

    assert TestUtils.assertError(
        SnowflakeErrors.ERROR_0013,
        () -> {
          Map<String, String> t = new HashMap<>(config);
          t.remove(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY);
          InternalUtils.makeJdbcDriverProperties(SinkTaskConfig.from(t, true), url);
        });

    assert TestUtils.assertError(
        SnowflakeErrors.ERROR_0014,
        () -> {
          Map<String, String> t = new HashMap<>(config);
          t.remove(KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME);
          InternalUtils.makeJdbcDriverProperties(SinkTaskConfig.from(t, true), url);
        });

    assert TestUtils.assertError(
        SnowflakeErrors.ERROR_0015,
        () -> {
          Map<String, String> t = new HashMap<>(config);
          t.remove(KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME);
          InternalUtils.makeJdbcDriverProperties(SinkTaskConfig.from(t, true), url);
        });

    assert TestUtils.assertError(
        SnowflakeErrors.ERROR_0016,
        () -> {
          Map<String, String> t = new HashMap<>(config);
          t.remove(KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME);
          InternalUtils.makeJdbcDriverProperties(SinkTaskConfig.from(t, true), url);
        });
  }

  /**
   * Regression test for SNOW-3029864: snowflake.role.name must be propagated to the JDBC connection
   * properties so that DDL operations (table creation, schema checks) run under the configured role
   * rather than the user's default role.
   */
  @Test
  public void testMakeJdbcDriverProperties_shouldIncludeRoleName() {
    // given
    Map<String, String> config = TestUtils.transformProfileFileToConnectorConfiguration(true);
    String expectedRole = config.get(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME);
    SnowflakeURL url = TestUtils.getUrl();

    // when
    Properties props =
        InternalUtils.makeJdbcDriverProperties(SinkTaskConfig.from(config, true), url);

    // then — the role from connector config must appear in the JDBC properties
    String rolePropertyKey = JdbcPropertyKeys.ROLE;
    assertTrue(
        props.containsKey(rolePropertyKey),
        "JDBC properties must contain the role property (key='"
            + rolePropertyKey
            + "'), but found keys: "
            + props.keySet());
    assertEquals(
        expectedRole,
        props.getProperty(rolePropertyKey),
        "JDBC role property must match the configured snowflake.role.name");
  }

  @Test
  public void testResultSize() throws SQLException {
    ResultSet resultSet = new MockResultSetForSizeTest(0);
    assert InternalUtils.resultSize(resultSet) == 0;
    resultSet = new MockResultSetForSizeTest(100);
    assert InternalUtils.resultSize(resultSet) == 100;
  }

  @Test
  public void parseJdbcPropertiesMapTest() {
    String input =
        "isInsecureMode:true,  disableSamlURLCheck:false, passcodeInPassword:on, foo:bar,"
            + " networkTimeout:100";
    SinkTaskConfig config =
        SinkTaskConfigTestBuilder.builder()
            .connectorName("test")
            .taskId("0")
            .jdbcMap(input)
            .build();
    // when
    Properties jdbcPropertiesMap = InternalUtils.parseJdbcPropertiesMap(config);
    // then
    assertEquals(jdbcPropertiesMap.size(), 5);
  }

  @Test
  public void makeJdbcDriverProperties_setsAllFields() {
    String pemKey = Base64.getEncoder().encodeToString(TestUtils.generatePrivateKey().getEncoded());
    SnowflakeURL url = new SnowflakeURL("https://testaccount.snowflakecomputing.com:443");

    SinkTaskConfig taskConfig =
        SinkTaskConfigTestBuilder.builder()
            .connectorName("test-connector")
            .taskId("0")
            .snowflakeDatabase("MY_DB")
            .snowflakeSchema("MY_SCHEMA")
            .snowflakeUser("MY_USER")
            .snowflakePrivateKey(new Password(pemKey))
            .snowflakeRole("MY_ROLE")
            .snowflakeUrl(url.getFullUrl())
            .build();

    Properties props = InternalUtils.makeJdbcDriverProperties(taskConfig, url);

    assertEquals("MY_DB", props.getProperty(InternalUtils.JDBC_DATABASE));
    assertEquals("MY_SCHEMA", props.getProperty(InternalUtils.JDBC_SCHEMA));
    assertEquals("MY_USER", props.getProperty(InternalUtils.JDBC_USER));
    assertEquals("MY_ROLE", props.getProperty(JdbcPropertyKeys.ROLE));
    assertTrue(props.containsKey(InternalUtils.JDBC_PRIVATE_KEY));
    assertEquals("on", props.getProperty(InternalUtils.JDBC_SSL));
    assertEquals("true", props.getProperty(InternalUtils.JDBC_SESSION_KEEP_ALIVE));
    assertEquals("json", props.getProperty(InternalUtils.JDBC_QUERY_RESULT_FORMAT));
  }

  @Test
  public void makeJdbcDriverProperties_missingPrivateKey_throws() {
    SnowflakeURL url = new SnowflakeURL("https://testaccount.snowflakecomputing.com:443");

    SinkTaskConfig taskConfig =
        SinkTaskConfigTestBuilder.builder()
            .connectorName("test-connector")
            .taskId("0")
            .snowflakeDatabase("MY_DB")
            .snowflakeSchema("MY_SCHEMA")
            .snowflakeUser("MY_USER")
            .snowflakeRole("MY_ROLE")
            .snowflakeUrl(url.getFullUrl())
            .build();

    SnowflakeKafkaConnectorException exception =
        assertThrows(
            SnowflakeKafkaConnectorException.class,
            () -> InternalUtils.makeJdbcDriverProperties(taskConfig, url));
    assertEquals("0013", exception.getCode());
  }

  @Test
  public void makeJdbcDriverProperties_noRole_omitsRoleProperty() {
    String pemKey = Base64.getEncoder().encodeToString(TestUtils.generatePrivateKey().getEncoded());
    SnowflakeURL url = new SnowflakeURL("https://testaccount.snowflakecomputing.com:443");

    SinkTaskConfig taskConfig =
        SinkTaskConfigTestBuilder.builder()
            .connectorName("test-connector")
            .taskId("0")
            .snowflakeDatabase("MY_DB")
            .snowflakeSchema("MY_SCHEMA")
            .snowflakeUser("MY_USER")
            .snowflakePrivateKey(new Password(pemKey))
            .snowflakeUrl(url.getFullUrl())
            .build();

    Properties props = InternalUtils.makeJdbcDriverProperties(taskConfig, url);

    assertFalse(
        props.containsKey(JdbcPropertyKeys.ROLE),
        "JDBC properties should not contain role when role is blank");
  }

  @Test
  public void makeJdbcDriverProperties_emptyStringRole_omitsRoleProperty() {
    String pemKey = Base64.getEncoder().encodeToString(TestUtils.generatePrivateKey().getEncoded());
    SnowflakeURL url = new SnowflakeURL("https://testaccount.snowflakecomputing.com:443");

    SinkTaskConfig taskConfig =
        SinkTaskConfigTestBuilder.builder()
            .connectorName("test-connector")
            .taskId("0")
            .snowflakeDatabase("MY_DB")
            .snowflakeSchema("MY_SCHEMA")
            .snowflakeUser("MY_USER")
            .snowflakePrivateKey(new Password(pemKey))
            .snowflakeUrl(url.getFullUrl())
            .snowflakeRole("")
            .build();

    Properties props = InternalUtils.makeJdbcDriverProperties(taskConfig, url);

    assertFalse(
        props.containsKey(JdbcPropertyKeys.ROLE),
        "JDBC properties should not contain role when role is empty string");
  }

  @Test
  public void makeJdbcDriverProperties_whitespaceRole_omitsRoleProperty() {
    String pemKey = Base64.getEncoder().encodeToString(TestUtils.generatePrivateKey().getEncoded());
    SnowflakeURL url = new SnowflakeURL("https://testaccount.snowflakecomputing.com:443");

    SinkTaskConfig taskConfig =
        SinkTaskConfigTestBuilder.builder()
            .connectorName("test-connector")
            .taskId("0")
            .snowflakeDatabase("MY_DB")
            .snowflakeSchema("MY_SCHEMA")
            .snowflakeUser("MY_USER")
            .snowflakePrivateKey(new Password(pemKey))
            .snowflakeUrl(url.getFullUrl())
            .snowflakeRole("   ")
            .build();

    Properties props = InternalUtils.makeJdbcDriverProperties(taskConfig, url);

    assertFalse(
        props.containsKey(JdbcPropertyKeys.ROLE),
        "JDBC properties should not contain role when role is whitespace");
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/JdbcPropertiesTest.java
================================================
package com.snowflake.kafka.connector.internal;

import static org.assertj.core.api.AssertionsForClassTypes.assertThatThrownBy;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;

import com.snowflake.kafka.connector.config.SinkTaskConfig;
import java.util.Properties;
import org.junit.jupiter.api.Test;

public class JdbcPropertiesTest {

  @Test
  public void shouldCombineProperties() {
    // given
    SnowflakeURL url = TestUtils.getUrl();
    SinkTaskConfig parsedConfig =
        SinkTaskConfig.from(TestUtils.transformProfileFileToConnectorConfiguration(false), true);
    Properties connection = InternalUtils.makeJdbcDriverProperties(parsedConfig, url);

    Properties proxy = new Properties();
    proxy.put("useProxy", "true");

    Properties jdbcMap = new Properties();
    jdbcMap.put("insecureMode", "true");
    // when
    JdbcProperties jdbcProperties = JdbcProperties.create(connection, proxy, jdbcMap);
    // then
    int givenPropertiesSize = connection.size() + proxy.size() + jdbcMap.size();
    int mergedPropertiesSize = jdbcProperties.getProperties().size();

    assertEquals(givenPropertiesSize, mergedPropertiesSize);
  }

  @Test
  public void shouldThrowWhen_jdbcMap_overridesConnection() {
    Properties connection = new Properties();
    connection.put("user", "test_user1");

    Properties proxy = new Properties();

    Properties jdbcMap = new Properties();
    jdbcMap.put("user", "test_user2");
    jdbcMap.put("insecureMode", "true");
    // expect
    assertThatThrownBy(() -> JdbcProperties.create(connection, proxy, jdbcMap))
        .isInstanceOfSatisfying(
            SnowflakeKafkaConnectorException.class,
            ex -> {
              // property key is printed not value
              assertTrue(ex.getMessage().contains("user"));
              assertEquals("0031", ex.getCode());
            });
  }

  @Test
  public void shouldThrowWhen_jdbcMap_overridesProxy() {
    Properties connection = new Properties();
    connection.put("user", "test_user1");

    Properties proxy = new Properties();
    proxy.put("useProxy", "true");

    Properties jdbcMap = new Properties();
    jdbcMap.put("useProxy", "true");
    jdbcMap.put("insecureMode", "false");
    // expect
    assertThatThrownBy(() -> JdbcProperties.create(connection, proxy, jdbcMap))
        .isInstanceOfSatisfying(
            SnowflakeKafkaConnectorException.class,
            ex -> {
              // property key is printed not value
              assertTrue(ex.getMessage().contains("useProxy"));
              assertEquals("0031", ex.getCode());
            });
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/KCLoggerTest.java
================================================
/*
 * Copyright (c) 2019 Snowflake Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.snowflake.kafka.connector.internal;

import com.snowflake.kafka.connector.Utils;
import org.junit.Before;
import org.junit.Test;
import org.mockito.InjectMocks;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.MockitoAnnotations;
import org.slf4j.Logger;
import org.slf4j.MDC;

public class KCLoggerTest {
  // test constants
  private final String name = "test.logger.name";

  // mock and test setup, inject logger into KCLogger
  @Mock(name = "logger")
  private Logger logger = Mockito.mock(Logger.class);

  @InjectMocks private KCLogger kcLogger = new KCLogger(this.name);

  @Before
  public void before() {
    this.kcLogger = new KCLogger(this.name);
    MockitoAnnotations.initMocks(this);
  }

  @Test
  public void testAllLogMessages() {
    String msg = "super useful logging msg";
    String expectedMsg = Utils.formatLogMessage(msg);
    String formatMsg = "super {} useful {} logging {} msg {}";
    String expectedFormattedMsg = Utils.formatLogMessage("super wow useful wow! logging 1 msg yay");

    KCLogger.toggleGlobalMdcLoggingContext(false);

    this.testLogMessagesRunner(msg, expectedMsg);
    this.testLogMessagesWithFormattingRunner(
        formatMsg, expectedFormattedMsg, "wow", "wow!", 1, "yay");
  }

  @Test
  public void testAllLogMessagesWithMDCContext() {
    String mdcContext = "[mdc context] ";
    KCLogger.toggleGlobalMdcLoggingContext(true);
    MDC.put(KCLogger.MDC_CONN_CTX_KEY, mdcContext);

    String msg = "super useful logging msg";
    String expectedMsg = Utils.formatLogMessage(mdcContext + msg);
    String formatMsg = "super {} useful {} logging {} msg {}";
    String expectedFormattedMsg =
        Utils.formatLogMessage(mdcContext + "super wow useful wow! logging 1 msg yay");

    this.testLogMessagesRunner(msg, expectedMsg);
    this.testLogMessagesWithFormattingRunner(
        formatMsg, expectedFormattedMsg, "wow", "wow!", 1, "yay");
  }

  private void testLogMessagesRunner(String msg, String expectedMsg) {
    // info
    Mockito.when(logger.isInfoEnabled()).thenReturn(true);
    kcLogger.info(msg);

    Mockito.verify(logger, Mockito.times(1)).info(expectedMsg);

    // trace
    Mockito.when(logger.isTraceEnabled()).thenReturn(true);
    kcLogger.trace(msg);

    Mockito.verify(logger, Mockito.times(1)).trace(expectedMsg);

    // debug
    Mockito.when(logger.isDebugEnabled()).thenReturn(true);
    kcLogger.debug(msg);

    Mockito.verify(logger, Mockito.times(1)).debug(expectedMsg);

    // warn
    Mockito.when(logger.isWarnEnabled()).thenReturn(true);
    kcLogger.warn(msg);

    Mockito.verify(logger, Mockito.times(1)).warn(expectedMsg);

    // error
    Mockito.when(logger.isErrorEnabled()).thenReturn(true);
    kcLogger.error(msg);

    Mockito.verify(logger, Mockito.times(1)).error(expectedMsg);
  }

  private void testLogMessagesWithFormattingRunner(
      String formatMsg, String expectedFormattedMsg, Object... vars) {
    // info
    Mockito.when(logger.isInfoEnabled()).thenReturn(true);
    kcLogger.info(formatMsg, vars);

    Mockito.verify(logger, Mockito.times(1)).info(expectedFormattedMsg);

    // trace
    Mockito.when(logger.isTraceEnabled()).thenReturn(true);
    kcLogger.trace(formatMsg, vars);

    Mockito.verify(logger, Mockito.times(1)).trace(expectedFormattedMsg);

    // debug
    Mockito.when(logger.isDebugEnabled()).thenReturn(true);
    kcLogger.debug(formatMsg, vars);

    Mockito.verify(logger, Mockito.times(1)).debug(expectedFormattedMsg);

    // warn
    Mockito.when(logger.isWarnEnabled()).thenReturn(true);
    kcLogger.warn(formatMsg, vars);

    Mockito.verify(logger, Mockito.times(1)).warn(expectedFormattedMsg);

    // error
    Mockito.when(logger.isErrorEnabled()).thenReturn(true);
    kcLogger.error(formatMsg, vars);

    Mockito.verify(logger, Mockito.times(1)).error(expectedFormattedMsg);
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/NonEncryptedKeyTestSnowflakeConnection.java
================================================
package com.snowflake.kafka.connector.internal;

import static com.snowflake.kafka.connector.internal.TestUtils.transformProfileFileToConnectorConfiguration;

import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import java.sql.Connection;
import java.util.Map;
import java.util.Properties;
import net.snowflake.client.api.driver.SnowflakeDriver;

/** Connection to test environment generated from a profile file stored locally. */
public class NonEncryptedKeyTestSnowflakeConnection {

  /** Given a profile file path name, generate a connection by constructing a snowflake driver. */
  public static Connection getConnection() throws Exception {

    Map<String, String> connectorConfiguration =
        transformProfileFileToConnectorConfiguration(false);
    SnowflakeURL url =
        new SnowflakeURL(connectorConfiguration.get(KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME));

    Properties properties =
        InternalUtils.makeJdbcDriverProperties(
            SinkTaskConfig.from(connectorConfiguration, true), url);

    return new SnowflakeDriver().connect(url.getJdbcUrl(), properties);
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/ResetProxyConfigExec.java
================================================
package com.snowflake.kafka.connector.internal;

import net.snowflake.client.api.exception.SnowflakeSQLException;

public class ResetProxyConfigExec {
  public static void main(String[] args) throws SnowflakeSQLException {
    System.out.println("ResetProxyConfigExec::Start wiping Proxy config");
    TestUtils.resetProxyParametersInJVM();
    System.out.println("ResetProxyConfigExec::Proxy Parameters reset in JVM in JDBC");
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/SchematizationTestUtils.java
================================================
package com.snowflake.kafka.connector.internal;

import java.util.HashMap;
import java.util.Map;

public class SchematizationTestUtils {

  public static final Map<String, String> SF_JSON_SCHEMA_FOR_TABLE_CREATION;

  static {
    SF_JSON_SCHEMA_FOR_TABLE_CREATION = new HashMap<>();
    SF_JSON_SCHEMA_FOR_TABLE_CREATION.put("ID_INT8", "NUMBER");
    SF_JSON_SCHEMA_FOR_TABLE_CREATION.put("ID_INT8_OPTIONAL", "VARCHAR");
    SF_JSON_SCHEMA_FOR_TABLE_CREATION.put("ID_INT16", "NUMBER");
    SF_JSON_SCHEMA_FOR_TABLE_CREATION.put("\"id_int32_double_quotes\"", "NUMBER");
    SF_JSON_SCHEMA_FOR_TABLE_CREATION.put("ID_INT64", "NUMBER");
    SF_JSON_SCHEMA_FOR_TABLE_CREATION.put("FIRST_NAME", "VARCHAR");
    SF_JSON_SCHEMA_FOR_TABLE_CREATION.put("RATING_FLOAT32", "FLOAT");
    SF_JSON_SCHEMA_FOR_TABLE_CREATION.put("RATING_FLOAT64", "FLOAT");
    SF_JSON_SCHEMA_FOR_TABLE_CREATION.put("APPROVAL", "BOOLEAN");
    SF_JSON_SCHEMA_FOR_TABLE_CREATION.put("INFO_ARRAY", "ARRAY");
    SF_JSON_SCHEMA_FOR_TABLE_CREATION.put("INFO_MAP", "VARIANT");
    SF_JSON_SCHEMA_FOR_TABLE_CREATION.put("RECORD_METADATA", "VARIANT");
  }

  public static final Map<String, Object> CONTENT_FOR_JSON_TABLE_CREATION;

  static {
    CONTENT_FOR_JSON_TABLE_CREATION = new HashMap<>();
    CONTENT_FOR_JSON_TABLE_CREATION.put("ID_INT8", 0L);
    CONTENT_FOR_JSON_TABLE_CREATION.put("ID_INT8_OPTIONAL", null);
    CONTENT_FOR_JSON_TABLE_CREATION.put("ID_INT16", 42L);
    CONTENT_FOR_JSON_TABLE_CREATION.put("id_int32_double_quotes", 42L);
    CONTENT_FOR_JSON_TABLE_CREATION.put("ID_INT64", 42L);
    CONTENT_FOR_JSON_TABLE_CREATION.put("FIRST_NAME", "zekai");
    CONTENT_FOR_JSON_TABLE_CREATION.put("RATING_FLOAT32", 0.99);
    CONTENT_FOR_JSON_TABLE_CREATION.put("RATING_FLOAT64", 0.99);
    CONTENT_FOR_JSON_TABLE_CREATION.put("APPROVAL", true);
    CONTENT_FOR_JSON_TABLE_CREATION.put("INFO_ARRAY", "[\"a\",\"b\"]");
    CONTENT_FOR_JSON_TABLE_CREATION.put("INFO_MAP", "{\"field\":3}");
    CONTENT_FOR_JSON_TABLE_CREATION.put("RECORD_METADATA", "RECORD_METADATA_PLACE_HOLDER");
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/SnowflakeConnectionServiceCacheTest.java
================================================
package com.snowflake.kafka.connector.internal;

import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_PIPE_EXISTS;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_PIPE_EXISTS_EXPIRE_MS_DEFAULT;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_TABLE_EXISTS;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.CACHE_TABLE_EXISTS_EXPIRE_MS_DEFAULT;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.util.HashMap;
import java.util.Map;
import org.junit.jupiter.api.Test;

/**
 * Tests for CacheConfig class. These tests verify that cache configuration values are properly
 * parsed and validated.
 */
class SnowflakeConnectionServiceCacheTest {

  @Test
  void testCacheConfigDefaults() {
    Map<String, String> configMap = new HashMap<>();
    CachingConfig config = CachingConfig.fromConfig(configMap);

    assertTrue(config.isTableExistsCacheEnabled());
    assertEquals(CACHE_TABLE_EXISTS_EXPIRE_MS_DEFAULT, config.getTableExistsCacheExpireMs());
    assertTrue(config.isPipeExistsCacheEnabled());
    assertEquals(CACHE_PIPE_EXISTS_EXPIRE_MS_DEFAULT, config.getPipeExistsCacheExpireMs());
  }

  @Test
  void testCacheConfigInvalidTableExpiration() {
    Map<String, String> configMap = createConfigWithCache(true, 0L, true, 30000L);

    assertThrows(
        IllegalArgumentException.class,
        () -> CachingConfig.fromConfig(configMap),
        "Should throw exception for non-positive table expiration");
  }

  @Test
  void testCacheConfigInvalidPipeExpiration() {
    Map<String, String> configMap = createConfigWithCache(true, 30000L, true, -100L);

    assertThrows(
        IllegalArgumentException.class,
        () -> CachingConfig.fromConfig(configMap),
        "Should throw exception for negative pipe expiration");
  }

  @Test
  void testCacheConfigInvalidNumberFormat() {
    Map<String, String> configMap = new HashMap<>();
    configMap.put(CACHE_TABLE_EXISTS, "true");
    configMap.put(CACHE_TABLE_EXISTS_EXPIRE_MS, "invalid");
    configMap.put(CACHE_PIPE_EXISTS, "true");
    configMap.put(CACHE_PIPE_EXISTS_EXPIRE_MS, "30000");

    assertThrows(
        IllegalArgumentException.class,
        () -> CachingConfig.fromConfig(configMap),
        "Should throw exception for invalid number format");
  }

  private Map<String, String> createConfigWithCache(
      boolean cacheTableExists,
      long tableExpirationMs,
      boolean cachePipeExists,
      long pipeExpirationMs) {
    Map<String, String> config = new HashMap<>();
    config.put(CACHE_TABLE_EXISTS, String.valueOf(cacheTableExists));
    config.put(CACHE_TABLE_EXISTS_EXPIRE_MS, String.valueOf(tableExpirationMs));
    config.put(CACHE_PIPE_EXISTS, String.valueOf(cachePipeExists));
    config.put(CACHE_PIPE_EXISTS_EXPIRE_MS, String.valueOf(pipeExpirationMs));
    return config;
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/SnowflakeDataSourceFactory.java
================================================
package com.snowflake.kafka.connector.internal;

import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import java.security.PrivateKey;
import java.util.Map;
import java.util.Properties;
import javax.sql.DataSource;
import net.snowflake.client.api.driver.SnowflakeDriver;
import org.apache.commons.dbcp2.ConnectionFactory;
import org.apache.commons.dbcp2.DriverConnectionFactory;
import org.apache.commons.dbcp2.PoolableConnection;
import org.apache.commons.dbcp2.PoolableConnectionFactory;
import org.apache.commons.dbcp2.PoolingDataSource;
import org.apache.commons.pool2.impl.GenericObjectPool;

/** Factory class for creating DataSource instances using Apache Commons DBCP2 for testing. */
public final class SnowflakeDataSourceFactory {

  public static final String SF_WAREHOUSE = "sfwarehouse"; // for test only
  private static DataSource dataSource;

  private SnowflakeDataSourceFactory() {}

  public static DataSource get() {
    if (dataSource != null) {
      return dataSource;
    } else {
      try {
        final Map<String, String> conf = TestUtils.getConnectorConfigurationForStreaming(false);
        final SnowflakeURL url =
            new SnowflakeURL(conf.get(KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME));

        // Extract properties from conf Map
        final String user = conf.get(KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME);
        final String role = conf.get(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME);
        final String privateKeyStr = conf.get(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY);
        final String privateKeyPassphrase =
            conf.get(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE);
        final String database = conf.get(KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME);
        final String schema = conf.get(KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME);
        final String warehouse = conf.get(SF_WAREHOUSE);

        // Assert all required properties are present
        assert user != null : "User must not be null";
        assert privateKeyStr != null : "Private key must not be null";
        assert database != null : "Database must not be null";
        assert schema != null : "Schema must not be null";
        assert warehouse != null : "Warehouse must not be null";

        // Build connection properties
        final Properties connectionProperties = new Properties();
        connectionProperties.setProperty("authenticator", "snowflake_jwt");
        connectionProperties.setProperty("user", user);
        connectionProperties.setProperty("db", database);
        connectionProperties.setProperty("schema", schema);
        connectionProperties.setProperty("role", role);
        connectionProperties.setProperty("warehouse", warehouse);

        // JWT key pair auth - set private key
        final PrivateKey privateKey =
            PrivateKeyTool.parsePrivateKey(privateKeyStr, privateKeyPassphrase);
        connectionProperties.put("privateKey", privateKey);

        // Create connection factory with Snowflake driver
        final SnowflakeDriver driver = new SnowflakeDriver();
        final ConnectionFactory connectionFactory =
            new DriverConnectionFactory(driver, url.getJdbcUrl(), connectionProperties);

        // Create poolable connection factory
        final PoolableConnectionFactory poolableConnectionFactory =
            new PoolableConnectionFactory(connectionFactory, null);

        // Create the pool with 1 initial connection
        final GenericObjectPool<PoolableConnection> connectionPool =
            new GenericObjectPool<>(poolableConnectionFactory);
        connectionPool.setMaxTotal(10);
        connectionPool.setMaxIdle(1);
        connectionPool.setMinIdle(1);

        poolableConnectionFactory.setPool(connectionPool);
        dataSource = new PoolingDataSource<>(connectionPool);
        return dataSource;

      } catch (final Exception e) {
        throw new RuntimeException("Failed to create DataSource", e);
      }
    }
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/SnowflakeURLTest.java
================================================
/*
 * Copyright (c) 2019 Snowflake Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.snowflake.kafka.connector.internal;

import org.junit.Rule;
import org.junit.Test;
import org.junit.contrib.java.lang.system.EnvironmentVariables;

public class SnowflakeURLTest {
  @Rule public final EnvironmentVariables environmentVariables = new EnvironmentVariables();

  @Test
  public void createFromValidURL() {
    String url = "http://account.snowflake.com:80";

    SnowflakeURL sfurl = new SnowflakeURL(url);

    assert !sfurl.sslEnabled();

    assert sfurl.getAccount().equals("account");

    assert sfurl.getFullUrl().equals("account.snowflake.com:80");

    assert sfurl.getPort() == 80;

    assert sfurl.getScheme().equals("http");

    assert sfurl.getJdbcUrl().equals("jdbc:snowflake://" + sfurl.getFullUrl());

    url = "https://account.snowflake.com:443";

    sfurl = new SnowflakeURL(url);

    assert sfurl.sslEnabled();

    assert sfurl.getScheme().equals("https");

    assert sfurl.getAccount().equals("account");

    url = " account.snowflake.com:80";

    sfurl = new SnowflakeURL(url);

    assert sfurl.sslEnabled();

    assert sfurl.getAccount().equals("account");

    assert sfurl.getFullUrl().equals("account.snowflake.com:80");

    assert sfurl.getPort() == 80;

    assert sfurl.getScheme().equals("https");

    assert sfurl.getJdbcUrl().equals("jdbc:snowflake://" + sfurl.getFullUrl());

    url = "account.snowflake.com";

    new SnowflakeURL(url);

    url = "http://account.snowflake.com ";

    sfurl = new SnowflakeURL(url);

    assert !sfurl.sslEnabled();

    assert sfurl.getAccount().equals("account");

    assert sfurl.getFullUrl().equals("account.snowflake.com:80");

    assert sfurl.getPort() == 80;

    assert sfurl.getScheme().equals("http");

    assert sfurl.getJdbcUrl().equals("jdbc:snowflake://" + sfurl.getFullUrl());

    url = "https://account.snowflake.com";

    new SnowflakeURL(url);

    url = "https://account.region.aws.privatelink.snowflake.com:443";

    sfurl = new SnowflakeURL(url);

    assert sfurl.getUrlWithoutPort().equals("account.region.aws.privatelink.snowflake.com");
  }

  @Test(expected = SnowflakeKafkaConnectorException.class)
  public void createFromInvalidURL() {
    String url = "htt://account.snowflake.com:80";

    new SnowflakeURL(url);
  }

  @Test
  public void testRegionlessURLString() {
    String url = "http://org-account.snowflake.com:80";

    SnowflakeURL sfurl = new SnowflakeURL(url);

    assert !sfurl.sslEnabled();

    assert sfurl.getAccount().equals("org-account");

    assert sfurl.getFullUrl().equals("org-account.snowflake.com:80");

    assert sfurl.getPort() == 80;

    assert sfurl.getScheme().equals("http");

    assert sfurl.getJdbcUrl().equals("jdbc:snowflake://" + sfurl.getFullUrl());
  }

  @Test
  public void testRegionlessWithPrivateLinkURL() {
    // test with privatelink too
    String url = "https://org-account.privatelink.snowflake.com:80";

    SnowflakeURL sfurl = new SnowflakeURL(url);

    assert sfurl.sslEnabled();

    assert sfurl.getAccount().equals("org-account");

    assert sfurl.getFullUrl().equals("org-account.privatelink.snowflake.com:80");

    assert sfurl.getPort() == 80;

    assert sfurl.getScheme().equals("https");

    assert sfurl.getJdbcUrl().equals("jdbc:snowflake://" + sfurl.getFullUrl());
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/StandardSnowflakeConnectionServiceDdlTest.java
================================================
package com.snowflake.kafka.connector.internal;

import static org.junit.jupiter.api.Assertions.*;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.ArgumentMatchers.argThat;
import static org.mockito.Mockito.*;

import com.snowflake.kafka.connector.internal.schemaevolution.ColumnInfos;
import java.lang.reflect.Field;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.*;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.ArgumentCaptor;

/**
 * Tests for DDL methods in StandardSnowflakeConnectionService: appendColumnsToTable and
 * alterNonNullableColumns.
 */
public class StandardSnowflakeConnectionServiceDdlTest {

  private Connection mockJdbcConn;
  // Separate stubs for the isIcebergTable SHOW query vs the ALTER DDL query.
  private PreparedStatement mockShowStmt;
  private PreparedStatement mockAlterStmt;
  private ResultSet mockEmptyRs;
  private StandardSnowflakeConnectionService service;

  @BeforeEach
  public void setUp() throws Exception {
    mockJdbcConn = mock(Connection.class);
    when(mockJdbcConn.isClosed()).thenReturn(false);

    // isIcebergTable uses SHOW ICEBERG TABLES LIKE → returns empty ResultSet (non-iceberg)
    mockShowStmt = mock(PreparedStatement.class);
    mockEmptyRs = mock(ResultSet.class);
    when(mockEmptyRs.next()).thenReturn(false);
    when(mockShowStmt.executeQuery()).thenReturn(mockEmptyRs);

    // ALTER DDL statement
    mockAlterStmt = mock(PreparedStatement.class);

    when(mockJdbcConn.prepareStatement(argThat(s -> s != null && s.startsWith("show"))))
        .thenReturn(mockShowStmt);
    when(mockJdbcConn.prepareStatement(argThat(s -> s != null && !s.startsWith("show"))))
        .thenReturn(mockAlterStmt);

    service = createServiceWithMockConnection(mockJdbcConn);
  }

  private static StandardSnowflakeConnectionService createServiceWithMockConnection(
      Connection mockConn) throws Exception {
    org.objenesis.Objenesis objenesis = new org.objenesis.ObjenesisStd();
    StandardSnowflakeConnectionService svc =
        objenesis.newInstance(StandardSnowflakeConnectionService.class);

    Field connField = StandardSnowflakeConnectionService.class.getDeclaredField("conn");
    connField.setAccessible(true);
    connField.set(svc, mockConn);

    Field loggerField = StandardSnowflakeConnectionService.class.getDeclaredField("LOGGER");
    loggerField.setAccessible(true);
    loggerField.set(svc, new KCLogger(StandardSnowflakeConnectionService.class.getName()));

    return svc;
  }

  /** Captures the ALTER SQL (second prepareStatement call; first is the SHOW ICEBERG check). */
  private String captureAlterSql() throws SQLException {
    ArgumentCaptor<String> sqlCaptor = ArgumentCaptor.forClass(String.class);
    verify(mockJdbcConn, times(2)).prepareStatement(sqlCaptor.capture());
    return sqlCaptor.getAllValues().get(1);
  }

  @Test
  public void testAppendColumnsToTable_singleColumn_generatesCorrectSql() throws SQLException {
    Map<String, ColumnInfos> columns = new LinkedHashMap<>();
    columns.put("new_col", new ColumnInfos("VARCHAR", null));

    service.appendColumnsToTable("test_table", columns);

    String sql = captureAlterSql();

    // Table name uses identifier(?), column name is quoted inline
    assertTrue(sql.startsWith("alter table identifier(?) add column if not exists "));
    assertTrue(sql.contains("\"new_col\" VARCHAR"));
    assertTrue(sql.contains("comment 'column created by schema evolution"));

    // Only the table name is a binding
    verify(mockAlterStmt).setString(1, "\"test_table\"");
    verify(mockAlterStmt).execute();
  }

  @Test
  public void testAppendColumnsToTable_multipleColumns_repeatsIfNotExists() throws SQLException {
    Map<String, ColumnInfos> columns = new LinkedHashMap<>();
    columns.put("col_a", new ColumnInfos("VARCHAR", null));
    columns.put("col_b", new ColumnInfos("NUMBER", null));

    service.appendColumnsToTable("test_table", columns);

    String sql = captureAlterSql();

    assertTrue(sql.contains("\"col_a\" VARCHAR"));
    assertTrue(sql.contains(", if not exists \"col_b\" NUMBER"));

    verify(mockAlterStmt).setString(1, "\"test_table\"");
    verify(mockAlterStmt).execute();
  }

  @Test
  public void testAppendColumnsToTable_withComment_includesDdlComment() throws SQLException {
    Map<String, ColumnInfos> columns = new LinkedHashMap<>();
    columns.put("col1", new ColumnInfos("INT", "source field doc"));

    service.appendColumnsToTable("test_table", columns);

    String sql = captureAlterSql();

    assertTrue(sql.contains("INT comment 'source field doc'"));
  }

  @Test
  public void testAppendColumnsToTable_nullMap_doesNothing() throws SQLException {
    service.appendColumnsToTable("test_table", null);
    // No SQL calls at all — not even the isIcebergTable check
    verify(mockJdbcConn, never()).prepareStatement(anyString());
  }

  @Test
  public void testAppendColumnsToTable_emptyMap_doesNothing() throws SQLException {
    service.appendColumnsToTable("test_table", Collections.emptyMap());
    verify(mockJdbcConn, never()).prepareStatement(anyString());
  }

  @Test
  public void testAppendColumnsToTable_sqlException_throwsError2015() throws SQLException {
    // isIcebergTable SHOW succeeds (returns empty); only the ALTER fails
    when(mockJdbcConn.prepareStatement(argThat(s -> s != null && !s.startsWith("show"))))
        .thenThrow(new SQLException("test error"));

    Map<String, ColumnInfos> columns = new LinkedHashMap<>();
    columns.put("col1", new ColumnInfos("VARCHAR", null));

    SnowflakeKafkaConnectorException ex =
        assertThrows(
            SnowflakeKafkaConnectorException.class,
            () -> service.appendColumnsToTable("test_table", columns));
    assertTrue(ex.getMessage().contains("2015"));
  }

  @Test
  public void testAlterNonNullableColumns_singleColumn_generatesCorrectSql() throws SQLException {
    service.alterNonNullableColumns("test_table", Arrays.asList("COL1"));

    String sql = captureAlterSql();

    // Table name uses identifier(?), column names are quoted inline
    assertTrue(sql.startsWith("alter table identifier(?) alter "));
    assertTrue(sql.contains("\"COL1\" drop not null"));
    assertTrue(
        sql.contains(
            "\"COL1\" comment 'column altered to be nullable by schema evolution"
                + " from Snowflake Kafka Connector'"));

    verify(mockAlterStmt).setString(1, "\"test_table\"");
    verify(mockAlterStmt).execute();
  }

  @Test
  public void testAlterNonNullableColumns_multipleColumns_generatesCorrectSql()
      throws SQLException {
    service.alterNonNullableColumns("test_table", Arrays.asList("COL_A", "COL_B"));

    String sql = captureAlterSql();

    assertTrue(sql.contains("\"COL_A\" drop not null"));
    assertTrue(sql.contains("\"COL_B\" drop not null"));

    verify(mockAlterStmt).setString(1, "\"test_table\"");
    verify(mockAlterStmt).execute();
  }

  @Test
  public void testAppendColumnsToTable_caseSensitiveColumnsQuotedInline() throws SQLException {
    Map<String, ColumnInfos> columns = new LinkedHashMap<>();
    columns.put("city", new ColumnInfos("VARCHAR", null));

    service.appendColumnsToTable("test_table", columns);

    String sql = captureAlterSql();

    // Lowercase "city" is quoted inline to preserve case
    assertTrue(sql.contains("\"city\" VARCHAR"));
  }

  @Test
  public void testAlterNonNullableColumns_caseSensitiveColumnsQuotedInline() throws SQLException {
    service.alterNonNullableColumns("test_table", Arrays.asList("city"));

    String sql = captureAlterSql();

    assertTrue(sql.contains("\"city\" drop not null"));
    assertTrue(sql.contains("\"city\" comment"));
  }

  @Test
  public void testAppendColumnsToTable_embeddedQuotesEscaped() throws SQLException {
    Map<String, ColumnInfos> columns = new LinkedHashMap<>();
    columns.put("col\"name", new ColumnInfos("VARCHAR", null));

    service.appendColumnsToTable("test_table", columns);

    String sql = captureAlterSql();

    // Embedded double quotes are escaped per SQL standard
    assertTrue(sql.contains("\"col\"\"name\" VARCHAR"));
  }

  @Test
  public void testAlterNonNullableColumns_nullList_doesNothing() throws SQLException {
    service.alterNonNullableColumns("test_table", null);
    verify(mockJdbcConn, never()).prepareStatement(anyString());
  }

  @Test
  public void testAlterNonNullableColumns_emptyList_doesNothing() throws SQLException {
    service.alterNonNullableColumns("test_table", Collections.emptyList());
    verify(mockJdbcConn, never()).prepareStatement(anyString());
  }

  @Test
  public void testAlterNonNullableColumns_sqlException_throwsError2016() throws SQLException {
    // isIcebergTable SHOW succeeds (returns empty); only the ALTER fails
    when(mockJdbcConn.prepareStatement(argThat(s -> s != null && !s.startsWith("show"))))
        .thenThrow(new SQLException("test error"));

    SnowflakeKafkaConnectorException ex =
        assertThrows(
            SnowflakeKafkaConnectorException.class,
            () -> service.alterNonNullableColumns("test_table", Arrays.asList("COL1")));
    assertTrue(ex.getMessage().contains("2016"));
  }

  @Test
  public void testAppendColumnsToTable_icebergTable_usesAlterIcebergTable() throws SQLException {
    // Simulate isIcebergTable returning true
    when(mockEmptyRs.next()).thenReturn(true);

    Map<String, ColumnInfos> columns = new LinkedHashMap<>();
    columns.put("new_col", new ColumnInfos("VARCHAR", null));

    service.appendColumnsToTable("iceberg_table", columns);

    String sql = captureAlterSql();
    assertTrue(sql.startsWith("alter iceberg table identifier(?) add column if not exists "));
  }

  // ---------------------------------------------------------------------------
  // shouldEvolveSchema tests
  // ---------------------------------------------------------------------------

  @Test
  public void testShouldEvolveSchema_icebergTable_seEnabled_returnsTrue() throws Exception {
    // Grant row: grantee_name = role, privilege = OWNERSHIP
    ResultSet grantRs = mock(ResultSet.class);
    when(grantRs.next()).thenReturn(true, false);
    when(grantRs.getString("grantee_name")).thenReturn("TEST_ROLE");
    when(grantRs.getString("privilege")).thenReturn("OWNERSHIP");

    // SHOW TABLES returns nothing (iceberg table)
    ResultSet emptyRs = mock(ResultSet.class);
    when(emptyRs.next()).thenReturn(false);

    // SHOW ICEBERG TABLES returns a row with enable_schema_evolution = Y
    ResultSet icebergRs = mock(ResultSet.class);
    when(icebergRs.next()).thenReturn(true, false);
    when(icebergRs.getString("enable_schema_evolution")).thenReturn("Y");

    Connection conn = mock(Connection.class);
    when(conn.isClosed()).thenReturn(false);

    PreparedStatement grantStmt = mock(PreparedStatement.class);
    when(grantStmt.executeQuery()).thenReturn(grantRs);
    PreparedStatement showTablesStmt = mock(PreparedStatement.class);
    when(showTablesStmt.executeQuery()).thenReturn(emptyRs);
    PreparedStatement showIcebergStmt = mock(PreparedStatement.class);
    when(showIcebergStmt.executeQuery()).thenReturn(icebergRs);

    when(conn.prepareStatement(argThat(s -> s != null && s.startsWith("show grants"))))
        .thenReturn(grantStmt);
    when(conn.prepareStatement(argThat(s -> s != null && s.equals("show tables like ? limit 1"))))
        .thenReturn(showTablesStmt);
    when(conn.prepareStatement(
            argThat(s -> s != null && s.equals("show iceberg tables like ? limit 1"))))
        .thenReturn(showIcebergStmt);

    StandardSnowflakeConnectionService svc = createServiceWithMockConnection(conn);
    assertTrue(svc.shouldEvolveSchema("iceberg_table", "TEST_ROLE"));
  }

  @Test
  public void testShouldEvolveSchema_regularTable_seEnabled_returnsTrue() throws Exception {
    ResultSet grantRs = mock(ResultSet.class);
    when(grantRs.next()).thenReturn(true, false);
    when(grantRs.getString("grantee_name")).thenReturn("TEST_ROLE");
    when(grantRs.getString("privilege")).thenReturn("OWNERSHIP");

    ResultSet tableRs = mock(ResultSet.class);
    when(tableRs.next()).thenReturn(true, false);
    when(tableRs.getString("enable_schema_evolution")).thenReturn("Y");

    Connection conn = mock(Connection.class);
    when(conn.isClosed()).thenReturn(false);

    PreparedStatement grantStmt = mock(PreparedStatement.class);
    when(grantStmt.executeQuery()).thenReturn(grantRs);
    PreparedStatement showTablesStmt = mock(PreparedStatement.class);
    when(showTablesStmt.executeQuery()).thenReturn(tableRs);

    when(conn.prepareStatement(argThat(s -> s != null && s.startsWith("show grants"))))
        .thenReturn(grantStmt);
    when(conn.prepareStatement(argThat(s -> s != null && s.equals("show tables like ? limit 1"))))
        .thenReturn(showTablesStmt);

    StandardSnowflakeConnectionService svc = createServiceWithMockConnection(conn);
    assertTrue(svc.shouldEvolveSchema("regular_table", "TEST_ROLE"));
  }

  @Test
  public void testShouldEvolveSchema_tableNotFound_returnsFalse() throws Exception {
    ResultSet grantRs = mock(ResultSet.class);
    when(grantRs.next()).thenReturn(true, false);
    when(grantRs.getString("grantee_name")).thenReturn("TEST_ROLE");
    when(grantRs.getString("privilege")).thenReturn("OWNERSHIP");

    ResultSet emptyRs = mock(ResultSet.class);
    when(emptyRs.next()).thenReturn(false);

    Connection conn = mock(Connection.class);
    when(conn.isClosed()).thenReturn(false);

    PreparedStatement grantStmt = mock(PreparedStatement.class);
    when(grantStmt.executeQuery()).thenReturn(grantRs);
    PreparedStatement showTablesStmt = mock(PreparedStatement.class);
    when(showTablesStmt.executeQuery()).thenReturn(emptyRs);
    PreparedStatement showIcebergStmt = mock(PreparedStatement.class);
    when(showIcebergStmt.executeQuery()).thenReturn(emptyRs);

    when(conn.prepareStatement(argThat(s -> s != null && s.startsWith("show grants"))))
        .thenReturn(grantStmt);
    when(conn.prepareStatement(argThat(s -> s != null && s.equals("show tables like ? limit 1"))))
        .thenReturn(showTablesStmt);
    when(conn.prepareStatement(
            argThat(s -> s != null && s.equals("show iceberg tables like ? limit 1"))))
        .thenReturn(showIcebergStmt);

    StandardSnowflakeConnectionService svc = createServiceWithMockConnection(conn);
    assertFalse(svc.shouldEvolveSchema("missing_table", "TEST_ROLE"));
  }

  @Test
  public void testAlterNonNullableColumns_icebergTable_usesAlterIcebergTable() throws SQLException {
    // Simulate isIcebergTable returning true
    when(mockEmptyRs.next()).thenReturn(true);

    service.alterNonNullableColumns("iceberg_table", Arrays.asList("COL1"));

    String sql = captureAlterSql();
    assertTrue(sql.startsWith("alter iceberg table identifier(?) alter "));
  }

  @Test
  public void testCreateTableWithOnlyMetadataColumn_icebergTableAlreadyExists_doesNotThrow()
      throws SQLException {
    // Snowflake rejects CREATE TABLE IF NOT EXISTS when the name belongs to an ICEBERG TABLE.
    // The method should swallow the error and return normally rather than propagating it.
    SQLException icebergConflict =
        new SQLException(
            "SQL compilation error:\nObject 'MY_TABLE' already exists as ICEBERG_TABLE");
    when(mockAlterStmt.execute()).thenThrow(icebergConflict);

    assertDoesNotThrow(() -> service.createTableWithOnlyMetadataColumn("MY_TABLE"));
  }

  @Test
  public void testCreateTableWithOnlyMetadataColumn_otherSqlError_throws() throws SQLException {
    SQLException otherError = new SQLException("Some other SQL error");
    when(mockAlterStmt.execute()).thenThrow(otherError);

    assertThrows(
        com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException.class,
        () -> service.createTableWithOnlyMetadataColumn("MY_TABLE"));
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/TestUtils.java
================================================
/*
 * Copyright (c) 2019 Snowflake Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.snowflake.kafka.connector.internal;

import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTPS_PROXY_HOST;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTPS_PROXY_PASSWORD;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTPS_PROXY_PORT;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTPS_PROXY_USER;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTP_PROXY_HOST;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTP_PROXY_PASSWORD;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTP_PROXY_PORT;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTP_PROXY_USER;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.HTTP_USE_PROXY;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE;
import static com.snowflake.kafka.connector.Utils.JDK_HTTP_AUTH_TUNNELING;
import static org.assertj.core.api.Assertions.assertThat;

import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.PropertyNamingStrategies;
import com.fasterxml.jackson.databind.annotation.JsonNaming;
import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.Utils;
import com.snowflake.kafka.connector.config.SnowflakeSinkConnectorConfigBuilder;
import io.confluent.connect.avro.AvroConverter;
import io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient;
import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient;
import java.io.File;
import java.io.IOException;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.security.KeyPair;
import java.security.KeyPairGenerator;
import java.security.NoSuchAlgorithmException;
import java.security.PrivateKey;
import java.security.Security;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.function.Function;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaAndValue;
import org.apache.kafka.connect.data.SchemaBuilder;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.json.JsonConverter;
import org.apache.kafka.connect.sink.SinkRecord;
import org.bouncycastle.asn1.nist.NISTObjectIdentifiers;
import org.bouncycastle.jcajce.provider.BouncyCastleFipsProvider;
import org.bouncycastle.openssl.jcajce.JcaPEMWriter;
import org.bouncycastle.operator.OperatorCreationException;
import org.bouncycastle.pkcs.PKCS8EncryptedPrivateKeyInfoBuilder;
import org.bouncycastle.pkcs.jcajce.JcaPKCS8EncryptedPrivateKeyInfoBuilder;
import org.bouncycastle.pkcs.jcajce.JcePKCSPBEOutputEncryptorBuilder;

public class TestUtils {
  private static final KCLogger log = new KCLogger(TestUtils.class.getName());

  private static final Random random = new Random();
  public static final String TEST_CONNECTOR_NAME = "TEST_CONNECTOR";

  private static final String SNOWFLAKE_CREDENTIAL_FILE_ENV = "SNOWFLAKE_CREDENTIAL_FILE";

  private static final ObjectMapper mapper = new ObjectMapper();

  private static SnowflakeURL url = null;

  private static volatile Profile profile = null;

  // Ephemeral schema: each test run creates its own schema to avoid collisions.
  private static volatile String ephemeralSchema = null;
  private static volatile boolean creatingEphemeralSchema = false;

  @JsonNaming(PropertyNamingStrategies.SnakeCaseStrategy.class)
  @JsonIgnoreProperties(ignoreUnknown = true)
  public static class Profile {
    public String user;
    public String role;
    public String host;
    public String database;
    public String schema;
    public String warehouse;
    public String privateKey;
    public String encryptedPrivateKey;
    public String privateKeyPassphrase;
    public String password;
    public String oauthClientId;
    public String oauthClientSecret;
    public String oauthRefreshToken;
    public String oauthTokenEndpoint;
    public String desRsaKey;
  }

  public static final String JSON_WITH_SCHEMA =
      "{\n"
          + "  \"schema\": {\n"
          + "    \"type\": \"struct\",\n"
          + "    \"fields\": [\n"
          + "      {\n"
          + "        \"type\": \"string\",\n"
          + "        \"doc\": \"doc\", \n"
          + "        \"optional\": false,\n"
          + "        \"field\": \"regionid\"\n"
          + "      },\n"
          + "      {\n"
          + "        \"type\": \"string\",\n"
          + "        \"optional\": false,\n"
          + "        \"field\": \"gender\"\n"
          + "      }\n"
          + "    ],\n"
          + "    \"optional\": false,\n"
          + "    \"name\": \"sf.kc.test\"\n"
          + "  },\n"
          + "  \"payload\": {\n"
          + "    \"regionid\": \"Region_5\",\n"
          + "    \"gender\": \"FEMALE\"\n"
          + "  }\n"
          + "}";
  public static final String JSON_WITHOUT_SCHEMA = "{\"userid\": \"User_1\"}";

  private static Profile getProfile() {
    if (profile == null) {
      String path = System.getenv(SNOWFLAKE_CREDENTIAL_FILE_ENV);
      if (path == null || path.isEmpty()) {
        throw new IllegalStateException(
            SNOWFLAKE_CREDENTIAL_FILE_ENV + " environment variable is not set");
      }
      try {
        profile = mapper.readValue(new File(path), Profile.class);
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }
    return profile;
  }

  public static PrivateKey generatePrivateKey() {
    KeyPairGenerator keyPairGenerator = null;
    try {
      keyPairGenerator = KeyPairGenerator.getInstance("RSA");
    } catch (final NoSuchAlgorithmException e) {
      throw new RuntimeException(e);
    }
    keyPairGenerator.initialize(2048);
    KeyPair keyPair = keyPairGenerator.generateKeyPair();
    return keyPair.getPrivate();
  }

  /**
   * Returns the ephemeral schema name for this test run, creating it on first access.
   *
   * <p>The name is {@code <original_schema>_<7-char random salt>}. A JVM shutdown hook drops the
   * schema with CASCADE so all tables/pipes/channels are cleaned up automatically.
   *
   * <p>A re-entrancy guard ({@code creatingEphemeralSchema}) handles the circular call path: {@code
   * getOrCreateEphemeralSchema → getConnection → transformProfileFileToConnectorConfiguration →
   * getOrCreateEphemeralSchema}. During bootstrap the original schema is returned so the JDBC
   * connection can be established.
   */
  private static String getOrCreateEphemeralSchema() {
    if (ephemeralSchema != null) {
      return ephemeralSchema;
    }
    synchronized (TestUtils.class) {
      if (ephemeralSchema != null) {
        return ephemeralSchema;
      }
      // Re-entrancy guard: while we are creating the schema, the JDBC connection we open will
      // call back into transformProfileFileToConnectorConfiguration → here. Return the original
      // schema so that bootstrap connection can be established.
      if (creatingEphemeralSchema) {
        return getProfile().schema;
      }
      creatingEphemeralSchema = true;
      try {
        String originalSchema = getProfile().schema;
        String database = getProfile().database;

        String salt = randomAlphanumeric(7);
        String salted = originalSchema + "_" + salt;
        String fqn = database + "." + salted;

        log.info("Creating ephemeral test schema: {}", fqn);
        try (Connection conn = NonEncryptedKeyTestSnowflakeConnection.getConnection();
            Statement stmt = conn.createStatement()) {
          stmt.execute("CREATE SCHEMA IF NOT EXISTS " + fqn);
        }

        Runtime.getRuntime()
            .addShutdownHook(
                new Thread(
                    () -> {
                      try (Connection c = NonEncryptedKeyTestSnowflakeConnection.getConnection();
                          Statement s = c.createStatement()) {
                        log.info("Dropping ephemeral test schema: {}", fqn);
                        s.execute("DROP SCHEMA IF EXISTS " + fqn + " CASCADE");
                      } catch (Exception e) {
                        log.error(
                            "Failed to drop ephemeral test schema {}: {}", fqn, e.getMessage());
                      }
                    }));

        ephemeralSchema = salted;
        return salted;
      } catch (Exception e) {
        // Snowflake is unreachable (e.g. unit tests without a live connection).
        // Fall back to the original schema so unit tests behave exactly as before.
        log.warn("Could not create ephemeral test schema, using original: {}", e.getMessage());
        ephemeralSchema = getProfile().schema;
        return ephemeralSchema;
      } finally {
        creatingEphemeralSchema = false;
      }
    }
  }

  private static String randomAlphanumeric(int length) {
    String chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
    StringBuilder sb = new StringBuilder(length);
    for (int i = 0; i < length; i++) {
      sb.append(chars.charAt(random.nextInt(chars.length())));
    }
    return sb.toString();
  }

  public static Map<String, String> transformProfileFileToConnectorConfiguration(
      boolean takeEncryptedKeyAndPassword) {
    Map<String, String> configuration = new HashMap<>();

    Profile p = getProfile();
    configuration.put(KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME, p.user);
    configuration.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, p.role);
    configuration.put(KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME, p.database);
    configuration.put(
        KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME, getOrCreateEphemeralSchema());
    configuration.put(KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME, p.host);
    configuration.put(SnowflakeDataSourceFactory.SF_WAREHOUSE, p.warehouse);

    if (takeEncryptedKeyAndPassword) {
      configuration.put(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY, p.encryptedPrivateKey);
      configuration.put(SNOWFLAKE_PRIVATE_KEY_PASSPHRASE, p.privateKeyPassphrase);
    } else {
      configuration.put(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY, p.privateKey);
    }

    // password only appears in test profile
    if (p.password != null) {
      configuration.put("password", p.password);
    }

    configuration.put(KafkaConnectorConfigParams.NAME, TEST_CONNECTOR_NAME);

    // enable test query mark
    configuration.put(Utils.TASK_ID, "");

    // ITs test features other than compatibility validation; opt out by default
    configuration.put(
        KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_VALIDATE_COMPATIBILITY_WITH_CLASSIC,
        "false");

    return configuration;
  }

  public static Map<String, String> getConnectorConfigurationForStreaming(
      boolean takeEncryptedKey) {
    Map<String, String> configuration =
        transformProfileFileToConnectorConfiguration(takeEncryptedKey);
    // On top of existing properties, add
    configuration.put(Utils.TASK_ID, "0");
    // Existing tests assume column identifier normalization is enabled (uppercasing JSON keys
    // to match Snowflake's default uppercase column names).
    configuration.put(
        com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams
            .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION,
        "true");

    return configuration;
  }

  /**
   * @return JDBC config with encrypted private key
   */
  public static String generateAESKey(PrivateKey key, char[] passwd)
      throws IOException, OperatorCreationException {
    Security.addProvider(new BouncyCastleFipsProvider());
    StringWriter writer = new StringWriter();
    JcaPEMWriter pemWriter = new JcaPEMWriter(writer);
    PKCS8EncryptedPrivateKeyInfoBuilder pkcs8EncryptedPrivateKeyInfoBuilder =
        new JcaPKCS8EncryptedPrivateKeyInfoBuilder(key);
    pemWriter.writeObject(
        pkcs8EncryptedPrivateKeyInfoBuilder.build(
            new JcePKCSPBEOutputEncryptorBuilder(NISTObjectIdentifiers.id_aes256_CBC)
                .setProvider("BCFIPS")
                .build(passwd)));
    pemWriter.close();
    return writer.toString();
  }

  /**
   * execute sql query
   *
   * @param query sql query string
   * @return result set
   */
  static ResultSet executeQuery(String query) {
    try {
      Statement statement =
          NonEncryptedKeyTestSnowflakeConnection.getConnection().createStatement();
      log.debug("Executing query: {}", query);
      return statement.executeQuery(query);
    }
    // if ANY exceptions occur, an illegal state has been reached
    catch (Exception e) {
      throw new IllegalStateException(e);
    }
  }

  /**
   * execute sql query
   *
   * @param query sql query string
   * @param parameter parameter to be inserted at index 1
   */
  public static void executeQueryWithParameter(String query, String parameter) {
    try {
      executeQueryWithParameter(
          NonEncryptedKeyTestSnowflakeConnection.getConnection(), query, parameter);
    } catch (Exception e) {
      throw new RuntimeException("Error executing query: " + query, e);
    }
  }

  /**
   * execute sql query
   *
   * @param conn jdbc connection
   * @param query sql query string
   * @param parameter parameter to be inserted at index 1
   */
  public static void executeQueryWithParameter(Connection conn, String query, String parameter) {
    log.debug("Executing query: {}", query);
    try {
      PreparedStatement stmt = conn.prepareStatement(query);
      stmt.setString(1, parameter);
      stmt.execute();
      stmt.close();
    } catch (Exception e) {
      throw new RuntimeException("Error executing query: " + query, e);
    }
  }

  /**
   * execute sql query and collect result
   *
   * @param query sql query string
   * @param parameter parameter to be inserted at index 1
   * @param resultCollector function to collect result
   * @return result
   * @param <T> result type
   */
  public static <T> T executeQueryAndCollectResult(
      String query, String parameter, Function<ResultSet, T> resultCollector) {
    try {
      return executeQueryAndCollectResult(
          NonEncryptedKeyTestSnowflakeConnection.getConnection(),
          query,
          parameter,
          resultCollector);
    } catch (Exception e) {
      throw new RuntimeException("Error executing query: " + query, e);
    }
  }

  /**
   * execute sql query and collect result
   *
   * @param conn jdbc connection
   * @param query sql query string
   * @param parameter parameter to be inserted at index 1
   * @param resultCollector function to collect result
   * @return result
   * @param <T> result type
   */
  public static <T> T executeQueryAndCollectResult(
      Connection conn, String query, String parameter, Function<ResultSet, T> resultCollector) {
    try {
      PreparedStatement stmt = conn.prepareStatement(query);
      stmt.setString(1, parameter);
      stmt.execute();
      ResultSet resultSet = stmt.getResultSet();
      T result = resultCollector.apply(resultSet);
      resultSet.close();
      stmt.close();
      return result;
    } catch (Exception e) {
      throw new RuntimeException("Error executing query: " + query, e);
    }
  }

  /**
   * Create a table with a single variant column: record_metadata.
   *
   * @param tableName table name
   * @param overwrite if true, execute "create or replace table"; otherwise "create table if not
   *     exists"
   */
  public static void createTableWithMetadataColumn(String tableName, boolean overwrite) {
    String ddl =
        overwrite
            ? "create or replace table \"" + tableName + "\" (record_metadata variant)"
            : "create table if not exists \"" + tableName + "\" (record_metadata variant)";
    executeQuery(ddl);
  }

  /** Shorthand for {@link #createTableWithMetadataColumn(String, boolean)} with overwrite=false. */
  public static void createTableWithMetadataColumn(String tableName) {
    createTableWithMetadataColumn(tableName, false);
  }

  /**
   * drop a table
   *
   * @param tableName table name
   */
  public static void dropTable(String tableName) {
    String query = "drop table if exists \"" + tableName + "\"";

    executeQuery(query);
  }

  public static void dropPipe(String pipeName) {
    // Quote pipe name if it contains special characters like dashes
    String quotedPipeName =
        pipeName.contains("-") || pipeName.contains(" ") ? "\"" + pipeName + "\"" : pipeName;
    executeQuery("drop pipe if exists " + quotedPipeName);
  }

  /** Select * from table */
  public static ResultSet showTable(String tableName) {
    String query = "select * from \"" + tableName + "\"";

    return executeQuery(query);
  }

  /**
   * create a random name for test
   *
   * @param objectName e.g. table, stage, pipe
   * @return kafka_connector_test_objectName_randomNum
   */
  private static String randomName(String objectName) {
    long num = random.nextLong();
    num = num < 0 ? (num + 1) * (-1) : num;
    return "kafka_connector_test_" + objectName + "_" + num;
  }

  /**
   * @return a random table name
   */
  public static String randomTableName() {
    return randomName("table").toUpperCase(java.util.Locale.ROOT);
  }

  public static String randomTopicName() {
    return randomName("topic");
  }

  static SnowflakeURL getUrl() {
    if (url == null) {
      url = new SnowflakeURL(getProfile().host);
    }
    return url;
  }

  /**
   * Check Snowflake Error Code in test
   *
   * @param error Snowflake error
   * @param func function throwing exception
   * @return true is error code is correct, otherwise, false
   */
  public static boolean assertError(SnowflakeErrors error, Runnable func) {
    try {
      func.run();
    } catch (SnowflakeKafkaConnectorException e) {
      return e.checkErrorCode(error);
    }
    return false;
  }

  /**
   * @return snowflake connection for test
   */
  public static SnowflakeConnectionService getConnectionService() {
    return SnowflakeConnectionServiceFactory.builder()
        .setProperties(transformProfileFileToConnectorConfiguration(false))
        .build();
  }

  public static SnowflakeConnectionService getConnectionServiceWithEncryptedKey() {
    return SnowflakeConnectionServiceFactory.builder()
        .setProperties(getConnectorConfigurationForStreaming(true))
        .build();
  }

  /**
   * Reset proxy parameters in JVM which is enabled during starting a sink Task. Call this if your
   * test/code executes the Utils.enableJVMProxy function
   */
  public static void resetProxyParametersInJVM() {
    System.setProperty(HTTP_USE_PROXY, "");
    System.setProperty(HTTP_PROXY_HOST, "");
    System.setProperty(HTTP_PROXY_PORT, "");
    System.setProperty(HTTPS_PROXY_HOST, "");
    System.setProperty(HTTPS_PROXY_PORT, "");

    // No harm in unsetting user password as well
    System.setProperty(JDK_HTTP_AUTH_TUNNELING, "");
    System.setProperty(HTTP_PROXY_USER, "");
    System.setProperty(HTTP_PROXY_PASSWORD, "");
    System.setProperty(HTTPS_PROXY_USER, "");
    System.setProperty(HTTPS_PROXY_PASSWORD, "");
  }

  /**
   * retrieve table size from snowflake
   *
   * @param tableName table name
   * @return size of table
   * @throws SQLException if meet connection issue
   */
  public static int tableSize(String tableName) throws SQLException {
    String query = "show tables like '" + tableName + "'";
    ResultSet result = executeQuery(query);

    if (result.next()) {
      final int rows = result.getInt("rows");
      log.debug("{} table size is: {}", tableName, rows);
      return rows;
    }

    return 0;
  }

  /** Interface to define the lambda function to be used by assertWithRetry */
  public interface AssertFunction {
    boolean operate() throws Exception;
  }

  /**
   * Assert with sleep and retry logic
   *
   * @param func the lambda function to be asserted defined by interface AssertFunction
   * @param intervalSec retry time interval in seconds
   * @param maxRetry max retry times
   */
  public static void assertWithRetry(AssertFunction func, int intervalSec, int maxRetry)
      throws Exception {
    int iteration = 1;
    while (!func.operate()) {
      if (iteration > maxRetry) {
        throw new InterruptedException("Max retry exceeded");
      }
      Thread.sleep(intervalSec * 1000L);
      iteration += 1;
    }
  }

  public static void assertWithRetry(AssertFunction func) throws Exception {
    assertWithRetry(func, 5, 20);
  }

  /* Generate (noOfRecords - startOffset) for a given topic and partition. */
  public static List<SinkRecord> createJsonStringSinkRecords(
      final long startOffset, final long noOfRecords, final String topicName, final int partitionNo)
      throws Exception {
    return createJsonRecords(
        startOffset,
        noOfRecords,
        topicName,
        partitionNo,
        null,
        Collections.singletonMap("schemas.enable", Boolean.toString(false)));
  }

  /* Generate (noOfRecords - startOffset) blank records for a given topic and partition. */
  public static List<SinkRecord> createBlankJsonSinkRecords(
      final long startOffset,
      final long noOfRecords,
      final String topicName,
      final int partitionNo) {
    return createJsonRecords(
        startOffset,
        noOfRecords,
        topicName,
        partitionNo,
        null,
        Collections.singletonMap("schemas.enable", Boolean.toString(false)));
  }

  /* Generate (noOfRecords - startOffset) for a given topic and partition. */
  public static List<SinkRecord> createNativeJsonSinkRecords(
      final long startOffset,
      final long noOfRecords,
      final String topicName,
      final int partitionNo) {
    return createJsonRecords(
        startOffset,
        noOfRecords,
        topicName,
        partitionNo,
        TestUtils.JSON_WITH_SCHEMA.getBytes(StandardCharsets.UTF_8),
        Collections.singletonMap("schemas.enable", Boolean.toString(true)));
  }

  private static List<SinkRecord> createJsonRecords(
      final long startOffset,
      final long noOfRecords,
      final String topicName,
      final int partitionNo,
      byte[] value,
      Map<String, String> converterConfig) {
    JsonConverter converter = new JsonConverter();
    converter.configure(converterConfig, false);
    SchemaAndValue schemaInputValue = converter.toConnectData("test", value);

    ArrayList<SinkRecord> records = new ArrayList<>();
    for (long i = startOffset; i < startOffset + noOfRecords; ++i) {
      records.add(
          new SinkRecord(
              topicName,
              partitionNo,
              Schema.STRING_SCHEMA,
              "test",
              schemaInputValue.schema(),
              schemaInputValue.value(),
              i));
    }
    return records;
  }

  /* Generate (noOfRecords - startOffset) for a given topic and partition which were essentially avro records */
  public static List<SinkRecord> createBigAvroRecords(
      final long startOffset,
      final long noOfRecords,
      final String topicName,
      final int partitionNo) {
    ArrayList<SinkRecord> records = new ArrayList<>();

    final int outerSegmentLength = 10;
    final int innerSegmentLength = 10;
    List<Schema> outerSchemas = new ArrayList<>(outerSegmentLength);
    for (int outerSegment = 0; outerSegment < outerSegmentLength; outerSegment++) {
      SchemaBuilder outerSegmentSchema = SchemaBuilder.struct().name("segment" + outerSegment);
      for (int innerSegment = 0; innerSegment < innerSegmentLength; innerSegment++) {
        outerSegmentSchema.field(
            "segment_" + outerSegment + "_" + innerSegment, Schema.STRING_SCHEMA);
      }
      outerSchemas.add(outerSegmentSchema.build());
    }

    List<Struct> items = new ArrayList<>(outerSegmentLength);
    for (int outerSegment = 0; outerSegment < outerSegmentLength; outerSegment++) {
      Struct outerItem = new Struct(outerSchemas.get(outerSegment));
      for (int innerSegment = 0; innerSegment < innerSegmentLength; innerSegment++) {
        outerItem.put(
            "segment_" + outerSegment + "_" + innerSegment,
            "segment_" + outerSegment + "_" + innerSegment);
      }
      items.add(outerItem);
    }

    SchemaBuilder schemaBuilderBigAvroSegment = SchemaBuilder.struct().name("biggestAvro");
    outerSchemas.forEach(schema -> schemaBuilderBigAvroSegment.field(schema.name(), schema));

    Struct originalBASegment = new Struct(schemaBuilderBigAvroSegment.build());

    for (int i = 0; i < outerSchemas.size(); i++) {
      originalBASegment.put(outerSchemas.get(i).name(), items.get(i));
    }

    SchemaRegistryClient schemaRegistry = new MockSchemaRegistryClient();
    AvroConverter avroConverter = new AvroConverter(schemaRegistry);
    avroConverter.configure(
        Collections.singletonMap("schema.registry.url", "http://fake-url"), false);
    byte[] converted =
        avroConverter.fromConnectData(
            topicName, schemaBuilderBigAvroSegment.schema(), originalBASegment);
    SchemaAndValue avroInputValue = avroConverter.toConnectData(topicName, converted);

    for (long i = startOffset; i < startOffset + noOfRecords; ++i) {
      records.add(
          new SinkRecord(
              topicName,
              partitionNo,
              Schema.STRING_SCHEMA,
              "key" + i,
              avroInputValue.schema(),
              avroInputValue.value(),
              i));
    }
    return records;
  }

  /**
   * @deprecated use SnowflakeSinkConnectorConfigBuilder instead
   */
  @Deprecated
  public static Map<String, String> getConfig() {
    return SnowflakeSinkConnectorConfigBuilder.streamingConfig().build();
  }

  /**
   * Check if the schema of the table matches the provided schema.
   *
   * @param tableName the name of the table
   * @param schemaMap the provided schema
   */
  public static void checkTableSchema(String tableName, Map<String, String> schemaMap)
      throws SQLException {
    // the table should be checked to exist beforehand
    InternalUtils.assertNotEmpty("tableName", tableName);
    String describeTableQuery = "desc table " + tableName;
    ResultSet result = executeQuery(describeTableQuery);
    int numberOfColumnExpected = schemaMap.size();
    int numberOfColumnInTable = 0;
    while (result.next()) {
      String colName = result.getString("name");
      if (!colName.equals(colName.toUpperCase())) {
        colName = "\"" + colName + "\"";
      }
      final String type = result.getString("type");
      log.info("Checking column name: [{}] should have type: [{}]", colName, type);
      assertThat(type).startsWith(schemaMap.get(colName));
      // see if the type of the column in sf is the same as expected (ignoring scale)
      numberOfColumnInTable++;
    }
    assert numberOfColumnExpected == numberOfColumnInTable;
  }

  /**
   * Check if one row retrieved from the table matches the provided content
   *
   * <p>The assumption is that the rows in the table are the same.
   *
   * @param tableName the name of the table
   * @param contentMap the provided content map from columnName to their value
   */
  public static void checkTableContentOneRow(String tableName, Map<String, Object> contentMap)
      throws SQLException {
    InternalUtils.assertNotEmpty("tableName", tableName);
    String getRowQuery = "select * from " + tableName + " limit 1";
    ResultSet result = executeQuery(getRowQuery);
    result.next();
    assert result.getMetaData().getColumnCount() == contentMap.size();
    for (int i = 0; i < contentMap.size(); ++i) {
      String columnName = result.getMetaData().getColumnName(i + 1);
      Object value = result.getObject(i + 1);
      if (value != null) {
        // For map or array
        if (value instanceof String
            && (((String) value).startsWith("{") || ((String) value).startsWith("["))) {
          // Get rid of the formatting added by snowflake
          value = ((String) value).replace(" ", "").replace("\n", "");
        }
        if ("RECORD_METADATA_PLACE_HOLDER".equals(contentMap.get(columnName))) {
          continue;
        }
        assert value.equals(contentMap.get(columnName))
            : "expected: " + contentMap.get(columnName) + " actual: " + value;
      } else {
        assert contentMap.get(columnName) == null : "value should be null";
      }
    }
  }

  public static Map<String, Object> getTableContentOneRow(String tableName) throws SQLException {
    String getRowQuery = "select * from " + tableName + " limit 1";
    ResultSet result = executeQuery(getRowQuery);
    result.next();

    Map<String, Object> contentMap = new HashMap<>();
    for (int i = 0; i < result.getMetaData().getColumnCount(); i++) {
      contentMap.put(result.getMetaData().getColumnName(i + 1), result.getObject(i + 1));
    }
    return contentMap;
  }

  public static int getNumberOfRows(String tableName) throws SQLException {
    String getRowQuery = "select count(*) from " + tableName;
    ResultSet result = executeQuery(getRowQuery);
    result.next();
    final int rowsNo = result.getInt(1);
    log.info("Number or rows: [{}]", rowsNo);
    return rowsNo;
  }

  public static int getNumberOfColumns(String tableName) throws SQLException {
    String getRowQuery = "select * from " + tableName + " limit 1";
    ResultSet result = executeQuery(getRowQuery);
    return result.getMetaData().getColumnCount();
  }

  public static void assertTableRowCount(String tableName, int expectedRowCount)
      throws SQLException {
    int actualRowCount = getNumberOfRows(tableName);
    if (actualRowCount != expectedRowCount) {
      throw new AssertionError(
          String.format(
              "Expected table %s to have %d rows, but it has %d rows",
              tableName, expectedRowCount, actualRowCount));
    }
  }

  public static void assertTableColumnCount(String tableName, int expectedColumnCount)
      throws SQLException {
    int actualColumnCount = getNumberOfColumns(tableName);
    if (actualColumnCount != expectedColumnCount) {
      throw new AssertionError(
          String.format(
              "Expected table %s to have %d columns, but it has %d columns",
              tableName, expectedColumnCount, actualColumnCount));
    }
  }

  public static void assertTableHasColumn(String tableName, String columnName) throws SQLException {
    String getRowQuery = "select * from " + tableName + " limit 1";
    ResultSet result = executeQuery(getRowQuery);
    ResultSetMetaData metaData = result.getMetaData();
    boolean found = false;
    for (int i = 1; i <= metaData.getColumnCount(); i++) {
      if (metaData.getColumnName(i).equalsIgnoreCase(columnName)) {
        found = true;
        break;
      }
    }
    if (!found) {
      throw new AssertionError(
          String.format(
              "Expected table %s to have column %s, but it was not found", tableName, columnName));
    }
  }

  public static List<Map<String, Object>> getTableRows(String tableName) throws SQLException {
    InternalUtils.assertNotEmpty("tableName", tableName);
    String getRowQuery = "select * from " + tableName;
    ResultSet result = executeQuery(getRowQuery);
    ResultSetMetaData metaData = result.getMetaData();
    int columnCount = metaData.getColumnCount();

    List<Map<String, Object>> rows = new ArrayList<>();
    while (result.next()) {
      Map<String, Object> row = new HashMap<>();
      for (int i = 1; i <= columnCount; i++) {
        String columnName = metaData.getColumnName(i);
        Object value = result.getObject(i);
        row.put(columnName, value);
      }
      rows.add(row);
    }
    return rows;
  }

  public static void assertColumnNullable(String tableName, String columnName, boolean isNullable)
      throws SQLException {
    InternalUtils.assertNotEmpty("tableName", tableName);
    InternalUtils.assertNotEmpty("columnName", columnName);
    String describeTableQuery = "desc table " + tableName;
    final String isNullableVal = isNullable ? "Y" : "N";
    ResultSet result = executeQuery(describeTableQuery);
    while (result.next()) {
      String colName = result.getString("name");
      String nullable = result.getString("null?");
      if (columnName.equals(colName)) {
        assertThat(nullable).as("Column %s should be nullable", colName).isEqualTo(isNullableVal);
      }
    }
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/TombstoneRecordIngestionIT.java
================================================
package com.snowflake.kafka.connector.internal;

import static com.snowflake.kafka.connector.ConnectorConfigValidatorTest.COMMUNITY_CONVERTER_SUBSET;
import static com.snowflake.kafka.connector.internal.TestUtils.getConnectionService;
import static java.lang.String.format;
import static org.assertj.core.api.Assertions.assertThat;

import com.snowflake.kafka.connector.ConnectorConfigTools;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.internal.streaming.InMemorySinkTaskContext;
import com.snowflake.kafka.connector.internal.streaming.SnowflakeSinkServiceV2;
import com.snowflake.kafka.connector.internal.streaming.StreamingSinkServiceBuilder;
import io.confluent.connect.avro.AvroConverter;
import io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient;
import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaAndValue;
import org.apache.kafka.connect.json.JsonConverter;
import org.apache.kafka.connect.sink.SinkRecord;
import org.apache.kafka.connect.storage.Converter;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;

class TombstoneRecordIngestionIT {
  private final int partition = 0;
  private final String topic = "test";
  private String table;
  private Converter jsonConverter;
  private Map<String, String> converterConfig;

  @BeforeEach
  void beforeEach() {
    this.table = TestUtils.randomTableName();
    getConnectionService()
        .executeQueryWithParameters(
            format(
                "create or replace table %s (record_metadata variant, gender varchar, regionid"
                    + " varchar)",
                table));

    this.jsonConverter = new JsonConverter();
    this.converterConfig = new HashMap<>();
    this.converterConfig.put("schemas.enable", "false");
    this.jsonConverter.configure(this.converterConfig, false);
  }

  @AfterEach
  void afterEach() {
    TestUtils.dropTable(table);
  }

  @ParameterizedTest(name = "behavior: {0}")
  @EnumSource(ConnectorConfigTools.BehaviorOnNullValues.class)
  void testStreamingTombstoneBehavior(ConnectorConfigTools.BehaviorOnNullValues behavior)
      throws Exception {
    // setup
    TopicPartition topicPartition = new TopicPartition(topic, partition);
    SinkTaskConfig taskConfig =
        SinkTaskConfig.builderFrom(TestUtils.getConnectorConfigurationForStreaming(false))
            .topicToTableMap(Collections.singletonMap(topic, table))
            .behaviorOnNullValues(behavior)
            .build();
    SnowflakeSinkServiceV2 service =
        StreamingSinkServiceBuilder.builder(getConnectionService(), taskConfig)
            .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition)))
            .build();
    service.startPartitions(Collections.singleton(topicPartition));
    service.awaitInitialization();

    // create one normal record
    SinkRecord normalRecord = TestUtils.createNativeJsonSinkRecords(0, 1, topic, partition).get(0);

    // test
    this.testIngestTombstoneRunner(normalRecord, COMMUNITY_CONVERTER_SUBSET, service, behavior);

    // cleanup
    service.closeAll();
  }

  @ParameterizedTest(name = "behavior: {0}")
  @EnumSource(ConnectorConfigTools.BehaviorOnNullValues.class)
  void testStreamingTombstoneBehaviorWithSchematization(
      ConnectorConfigTools.BehaviorOnNullValues behavior) throws Exception {
    // setup
    TopicPartition topicPartition = new TopicPartition(topic, partition);
    SinkTaskConfig taskConfig =
        SinkTaskConfig.builderFrom(TestUtils.getConnectorConfigurationForStreaming(false))
            .topicToTableMap(Collections.singletonMap(topic, table))
            .behaviorOnNullValues(behavior)
            .build();
    SnowflakeSinkServiceV2 service =
        StreamingSinkServiceBuilder.builder(getConnectionService(), taskConfig)
            .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition)))
            .build();
    service.startPartitions(Collections.singleton(topicPartition));
    service.awaitInitialization();

    // create one normal record
    SinkRecord normalRecord = TestUtils.createNativeJsonSinkRecords(0, 1, topic, partition).get(0);
    service.insert(normalRecord); // schematization needs first insert for evolution

    // test
    this.testIngestTombstoneRunner(normalRecord, COMMUNITY_CONVERTER_SUBSET, service, behavior);

    // cleanup
    service.closeAll();
  }

  // all ingestion methods should have the same behavior for tombstone records
  private void testIngestTombstoneRunner(
      SinkRecord normalRecord,
      List<Converter> converters,
      SnowflakeSinkService service,
      ConnectorConfigTools.BehaviorOnNullValues behavior)
      throws Exception {
    int offset = 1; // normalRecord should be offset 0
    List<SinkRecord> sinkRecords = new ArrayList<>();
    sinkRecords.add(normalRecord);

    // create tombstone records
    SchemaAndValue nullRecordInput = this.jsonConverter.toConnectData(topic, null);
    SinkRecord allNullRecord1 = new SinkRecord(topic, partition, null, null, null, null, offset++);
    SinkRecord allNullRecord2 =
        new SinkRecord(
            topic,
            partition,
            null,
            null,
            nullRecordInput.schema(),
            nullRecordInput.value(),
            offset++);
    SinkRecord allNullRecord3 =
        new SinkRecord(
            topic,
            partition,
            nullRecordInput.schema(),
            nullRecordInput.value(),
            nullRecordInput.schema(),
            nullRecordInput.value(),
            offset++);

    // add tombstone records
    sinkRecords.addAll(Arrays.asList(allNullRecord1, allNullRecord2, allNullRecord3));

    // create and add tombstone records from each converter
    Map<String, String> converterConfig = new HashMap<>();
    converterConfig.put("schemas.enable", "false");
    for (Converter converter : converters) {
      // handle avro converter
      if (converter.toString().contains("io.confluent.connect.avro.AvroConverter")) {
        SchemaRegistryClient schemaRegistry = new MockSchemaRegistryClient();
        converter = new AvroConverter(schemaRegistry);
        converterConfig.put("schema.registry.url", "http://fake-url");
      }

      converter.configure(converterConfig, false);
      SchemaAndValue input = converter.toConnectData(topic, null);
      sinkRecords.add(
          new SinkRecord(
              topic,
              partition,
              Schema.STRING_SCHEMA,
              converter.toString(),
              input.schema(),
              input.value(),
              offset));

      offset++;
    }

    // insert all records
    service.insert(sinkRecords);

    // verify inserted (offset updates happen automatically in streaming)
    int expectedOffset =
        behavior == ConnectorConfigTools.BehaviorOnNullValues.DEFAULT ? sinkRecords.size() : 1;
    TestUtils.assertWithRetry(() -> TestUtils.tableSize(table) == expectedOffset, 10, 20);
    TestUtils.assertWithRetry(
        () -> service.getOffset(new TopicPartition(topic, partition)) == expectedOffset, 10, 20);

    // assert that one row have values in those columns
    assertThat(
            TestUtils.getTableRows(table).stream()
                .filter(
                    row ->
                        "FEMALE".equals(row.get("GENDER"))
                            && "Region_5".equals(row.get("REGIONID")))
                .count())
        .isEqualTo(1);
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/metrics/MetricsJmxReporterTest.java
================================================
package com.snowflake.kafka.connector.internal.metrics;

import static org.junit.Assert.*;

import com.codahale.metrics.Gauge;
import com.codahale.metrics.MetricRegistry;
import org.junit.Before;
import org.junit.Test;

public class MetricsJmxReporterTest {
  private MetricRegistry metricRegistry;
  private MetricsJmxReporter reporter;

  @Before
  public void setUp() {
    metricRegistry = new MetricRegistry();
    reporter = new MetricsJmxReporter(metricRegistry, "testConnector");
  }

  @Test
  public void testRemoveMetricByExactName() {
    metricRegistry.register("channel:ch1/offsets/processed-offset", (Gauge<Long>) () -> 42L);
    metricRegistry.register("channel:ch1/offsets/persisted-offset", (Gauge<Long>) () -> 10L);
    metricRegistry.register("channel:ch2/offsets/processed-offset", (Gauge<Long>) () -> 99L);

    assertEquals(3, metricRegistry.getMetrics().size());

    reporter.removeMetric("channel:ch1/offsets/processed-offset");

    assertEquals(2, metricRegistry.getMetrics().size());
    assertNull(metricRegistry.getGauges().get("channel:ch1/offsets/processed-offset"));
    assertNotNull(metricRegistry.getGauges().get("channel:ch1/offsets/persisted-offset"));
    assertNotNull(metricRegistry.getGauges().get("channel:ch2/offsets/processed-offset"));
  }

  @Test
  public void testRemoveMetricNonexistentIsNoOp() {
    metricRegistry.register("channel:ch1/offsets/processed-offset", (Gauge<Long>) () -> 42L);
    reporter.removeMetric("channel:nonexistent/offsets/foo");
    assertEquals(1, metricRegistry.getMetrics().size());
  }

  @Test
  public void testRemoveMetricsFromRegistryStillWorks() {
    metricRegistry.register("channel:ch1/offsets/a", (Gauge<Long>) () -> 1L);
    metricRegistry.register("channel:ch1/offsets/b", (Gauge<Long>) () -> 2L);
    metricRegistry.register("channel:ch2/offsets/a", (Gauge<Long>) () -> 3L);

    reporter.removeMetricsFromRegistry("channel:ch1");

    assertEquals(1, metricRegistry.getMetrics().size());
    assertNotNull(metricRegistry.getGauges().get("channel:ch2/offsets/a"));
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/metrics/SnowflakeSinkTaskMetricsTest.java
================================================
package com.snowflake.kafka.connector.internal.metrics;

import static com.snowflake.kafka.connector.internal.TestUtils.TEST_CONNECTOR_NAME;
import static com.snowflake.kafka.connector.internal.metrics.MetricsUtil.taskMetricName;
import static com.snowflake.kafka.connector.internal.metrics.SnowflakeSinkTaskMetrics.*;
import static org.junit.Assert.*;

import com.codahale.metrics.Gauge;
import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.Timer;
import java.util.concurrent.atomic.AtomicInteger;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

public class SnowflakeSinkTaskMetricsTest {

  private static final String TASK_ID = "3";
  private static final String PREFIX = "task-" + TASK_ID;

  private MetricRegistry metricRegistry;
  private MetricsJmxReporter metricsJmxReporter;
  private SnowflakeSinkTaskMetrics metrics;

  @Before
  public void setUp() {
    metricRegistry = new MetricRegistry();
    metricsJmxReporter = new MetricsJmxReporter(metricRegistry, TEST_CONNECTOR_NAME);
  }

  @After
  public void tearDown() {
    if (metrics != null) {
      metrics.unregister();
    }
  }

  private void createMetrics() {
    metrics = new SnowflakeSinkTaskMetrics(TEST_CONNECTOR_NAME, TASK_ID, metricsJmxReporter);
  }

  private void createMetricsWithSdkClientCount(int initialCount) {
    AtomicInteger sdkCount = new AtomicInteger(initialCount);
    metrics =
        new SnowflakeSinkTaskMetrics(
            TEST_CONNECTOR_NAME, TASK_ID, metricsJmxReporter, sdkCount::get);
  }

  @Test
  public void testAllMetricsRegistered() {
    createMetrics();

    // Method duration timers
    assertNotNull(
        metricRegistry.getTimers().get(taskMetricName(PREFIX, TASK_SUB_DOMAIN, PUT_DURATION)));
    assertNotNull(
        metricRegistry
            .getTimers()
            .get(taskMetricName(PREFIX, TASK_SUB_DOMAIN, PRECOMMIT_DURATION)));
    assertNotNull(
        metricRegistry
            .getTimers()
            .get(taskMetricName(PREFIX, TASK_SUB_DOMAIN, PRECOMMIT_OFFSET_FETCH_DURATION)));

    // Lifecycle duration timers
    assertNotNull(
        metricRegistry
            .getTimers()
            .get(taskMetricName(PREFIX, LIFECYCLE_SUB_DOMAIN, OPEN_DURATION)));
    assertNotNull(
        metricRegistry
            .getTimers()
            .get(taskMetricName(PREFIX, LIFECYCLE_SUB_DOMAIN, CLOSE_DURATION)));
    assertNotNull(
        metricRegistry
            .getTimers()
            .get(taskMetricName(PREFIX, LIFECYCLE_SUB_DOMAIN, START_DURATION)));

    // Channel/SDK timers
    assertNotNull(
        metricRegistry
            .getTimers()
            .get(taskMetricName(PREFIX, LIFECYCLE_SUB_DOMAIN, CHANNEL_OPEN_DURATION)));
    assertNotNull(
        metricRegistry
            .getTimers()
            .get(taskMetricName(PREFIX, LIFECYCLE_SUB_DOMAIN, SDK_CLIENT_CREATE_DURATION)));

    // Meter
    assertNotNull(
        metricRegistry.getMeters().get(taskMetricName(PREFIX, TASK_SUB_DOMAIN, PUT_RECORDS)));

    // Counters
    assertNotNull(
        metricRegistry
            .getCounters()
            .get(taskMetricName(PREFIX, TASK_SUB_DOMAIN, PRECOMMIT_PARTITIONS_SKIPPED)));
    assertNotNull(
        metricRegistry.getCounters().get(taskMetricName(PREFIX, LIFECYCLE_SUB_DOMAIN, OPEN_COUNT)));
    assertNotNull(
        metricRegistry
            .getCounters()
            .get(taskMetricName(PREFIX, LIFECYCLE_SUB_DOMAIN, CLOSE_COUNT)));
    assertNotNull(
        metricRegistry
            .getCounters()
            .get(taskMetricName(PREFIX, LIFECYCLE_SUB_DOMAIN, CHANNEL_OPEN_COUNT)));

    // Gauges
    assertNotNull(
        metricRegistry
            .getGauges()
            .get(taskMetricName(PREFIX, TASK_SUB_DOMAIN, ASSIGNED_PARTITIONS)));
  }

  @Test
  public void testPutDurationTimer() {
    createMetrics();
    Timer.Context ctx = metrics.putDuration().time();
    ctx.stop();
    assertEquals(1, metrics.putDuration().getCount());
  }

  @Test
  public void testPreCommitDurationTimer() {
    createMetrics();
    Timer.Context ctx = metrics.preCommitDuration().time();
    ctx.stop();
    assertEquals(1, metrics.preCommitDuration().getCount());
  }

  @Test
  public void testLifecycleTimers() {
    createMetrics();

    Timer.Context openCtx = metrics.openDuration().time();
    openCtx.stop();
    assertEquals(1, metrics.openDuration().getCount());

    Timer.Context closeCtx = metrics.closeDuration().time();
    closeCtx.stop();
    assertEquals(1, metrics.closeDuration().getCount());

    Timer.Context startCtx = metrics.startDuration().time();
    startCtx.stop();
    assertEquals(1, metrics.startDuration().getCount());
  }

  @Test
  public void testChannelAndSdkTimers() {
    createMetrics();

    Timer.Context channelCtx = metrics.channelOpenDuration().time();
    channelCtx.stop();
    Timer.Context channelCtx2 = metrics.channelOpenDuration().time();
    channelCtx2.stop();
    assertEquals(2, metrics.channelOpenDuration().getCount());

    Timer.Context sdkCtx = metrics.sdkClientCreateDuration().time();
    sdkCtx.stop();
    assertEquals(1, metrics.sdkClientCreateDuration().getCount());

    Timer.Context fetchCtx = metrics.preCommitOffsetFetchDuration().time();
    fetchCtx.stop();
    assertEquals(1, metrics.preCommitOffsetFetchDuration().getCount());
  }

  @Test
  public void testPutRecordsMeter() {
    createMetrics();
    metrics.putRecords().mark(100);
    metrics.putRecords().mark(50);
    assertEquals(150, metrics.putRecords().getCount());
  }

  @Test
  public void testPreCommitPartitionsSkipped() {
    createMetrics();
    metrics.preCommitPartitionsSkipped().inc(3);
    assertEquals(3, metrics.preCommitPartitionsSkipped().getCount());
  }

  @Test
  public void testAssignedPartitionsGauge() {
    createMetrics();
    assertEquals(0, metrics.getAssignedPartitions());
    metrics.setAssignedPartitions(12);
    assertEquals(12, metrics.getAssignedPartitions());

    @SuppressWarnings("unchecked")
    Gauge<Integer> gauge =
        metricRegistry
            .getGauges()
            .get(taskMetricName(PREFIX, TASK_SUB_DOMAIN, ASSIGNED_PARTITIONS));
    assertEquals(Integer.valueOf(12), gauge.getValue());
  }

  @Test
  public void testLifecycleCounters() {
    createMetrics();
    metrics.openCount().inc();
    metrics.openCount().inc();
    metrics.closeCount().inc();
    assertEquals(2, metrics.openCount().getCount());
    assertEquals(1, metrics.closeCount().getCount());
  }

  @Test
  public void testChannelOpenCount() {
    createMetrics();
    metrics.channelOpenCount().inc();
    metrics.channelOpenCount().inc();
    metrics.channelOpenCount().inc();
    assertEquals(3, metrics.channelOpenCount().getCount());
  }

  @Test
  public void testSdkClientCountGauge() {
    createMetricsWithSdkClientCount(5);

    @SuppressWarnings("unchecked")
    Gauge<Integer> gauge =
        metricRegistry
            .getGauges()
            .get(taskMetricName(PREFIX, LIFECYCLE_SUB_DOMAIN, SDK_CLIENT_COUNT));
    assertNotNull(gauge);
    assertEquals(Integer.valueOf(5), gauge.getValue());
  }

  @Test
  public void testSdkClientCountGaugeNotRegisteredWithoutSupplier() {
    createMetrics();
    assertNull(
        metricRegistry
            .getGauges()
            .get(taskMetricName(PREFIX, LIFECYCLE_SUB_DOMAIN, SDK_CLIENT_COUNT)));
  }

  @Test
  public void testUnregisterRemovesAllMetrics() {
    createMetrics();
    assertFalse(metricRegistry.getMetrics().isEmpty());
    metrics.unregister();
    assertTrue(metricRegistry.getMetrics().isEmpty());
    metrics = null;
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/schemaevolution/ColumnInfosTest.java
================================================
package com.snowflake.kafka.connector.internal.schemaevolution;

import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.*;

import org.junit.jupiter.api.Test;

class ColumnInfosTest {

  @Test
  void getDdlComments_withComment() {
    ColumnInfos infos = new ColumnInfos("VARCHAR", "user name field");
    assertThat(infos.getDdlComments()).isEqualTo(" comment 'user name field' ");
  }

  @Test
  void getDdlComments_withoutComment() {
    ColumnInfos infos = new ColumnInfos("INT");
    assertThat(infos.getDdlComments())
        .isEqualTo(" comment 'column created by schema evolution from Snowflake Kafka Connector' ");
  }

  @Test
  void getDdlComments_withNullComment() {
    ColumnInfos infos = new ColumnInfos("INT", null);
    assertThat(infos.getDdlComments())
        .isEqualTo(" comment 'column created by schema evolution from Snowflake Kafka Connector' ");
  }

  @Test
  void getDdlComments_escapeSingleQuotes() {
    ColumnInfos infos = new ColumnInfos("VARCHAR", "it's a test");
    assertThat(infos.getDdlComments()).isEqualTo(" comment 'it''s a test' ");
  }

  @Test
  void constructorRejectsNullColumnType() {
    assertThrows(NullPointerException.class, () -> new ColumnInfos(null));
    assertThrows(NullPointerException.class, () -> new ColumnInfos(null, "comment"));
  }

  @Test
  void equalityAndHashCode() {
    ColumnInfos a = new ColumnInfos("VARCHAR", "comment");
    ColumnInfos b = new ColumnInfos("VARCHAR", "comment");
    ColumnInfos c = new ColumnInfos("INT", "comment");
    ColumnInfos d = new ColumnInfos("VARCHAR", null);

    assertEquals(a, b);
    assertEquals(a.hashCode(), b.hashCode());
    assertNotEquals(a, c);
    assertNotEquals(a, d);
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/schemaevolution/SchemaEvolutionTargetItemsTest.java
================================================
package com.snowflake.kafka.connector.internal.schemaevolution;

import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.*;

import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.junit.jupiter.api.Test;

class SchemaEvolutionTargetItemsTest {

  @Test
  void hasDataForSchemaEvolution_withColumnsToAdd() {
    SchemaEvolutionTargetItems items =
        new SchemaEvolutionTargetItems(
            "table", Collections.emptySet(), new HashSet<>(Arrays.asList("COL1")));
    assertTrue(items.hasDataForSchemaEvolution());
  }

  @Test
  void hasDataForSchemaEvolution_withColumnsToDropNonNull() {
    SchemaEvolutionTargetItems items =
        new SchemaEvolutionTargetItems(
            "table", new HashSet<>(Arrays.asList("COL1")), Collections.emptySet());
    assertTrue(items.hasDataForSchemaEvolution());
  }

  @Test
  void hasDataForSchemaEvolution_empty() {
    SchemaEvolutionTargetItems items =
        new SchemaEvolutionTargetItems("table", Collections.emptySet(), Collections.emptySet());
    assertFalse(items.hasDataForSchemaEvolution());
  }

  @Test
  void constructorHandlesNullSets() {
    SchemaEvolutionTargetItems items = new SchemaEvolutionTargetItems("table", null, null);
    assertThat(items.getColumnsToAdd()).isEmpty();
    assertThat(items.getColumnsToDropNonNullability()).isEmpty();
    assertFalse(items.hasDataForSchemaEvolution());
  }

  @Test
  void twoArgConstructorSetsColumnsToAdd() {
    Set<String> cols = new HashSet<>(Arrays.asList("A", "B"));
    SchemaEvolutionTargetItems items = new SchemaEvolutionTargetItems("table", cols);
    assertThat(items.getColumnsToAdd()).containsExactlyInAnyOrder("A", "B");
    assertThat(items.getColumnsToDropNonNullability()).isEmpty();
  }

  @Test
  void gettersReturnUnmodifiableSets() {
    SchemaEvolutionTargetItems items =
        new SchemaEvolutionTargetItems(
            "table", new HashSet<>(Arrays.asList("DROP1")), new HashSet<>(Arrays.asList("ADD1")));
    assertThrows(UnsupportedOperationException.class, () -> items.getColumnsToAdd().add("X"));
    assertThrows(
        UnsupportedOperationException.class, () -> items.getColumnsToDropNonNullability().add("X"));
  }

  @Test
  void defensiveCopyPreventsExternalMutation() {
    Set<String> original = new HashSet<>(Arrays.asList("COL1"));
    SchemaEvolutionTargetItems items =
        new SchemaEvolutionTargetItems("table", original, Collections.emptySet());
    original.add("COL2");
    assertThat(items.getColumnsToDropNonNullability()).containsExactly("COL1");
  }

  @Test
  void equalityAndHashCode() {
    SchemaEvolutionTargetItems a =
        new SchemaEvolutionTargetItems(
            "t", new HashSet<>(Arrays.asList("C1")), new HashSet<>(Arrays.asList("C2")));
    SchemaEvolutionTargetItems b =
        new SchemaEvolutionTargetItems(
            "t", new HashSet<>(Arrays.asList("C1")), new HashSet<>(Arrays.asList("C2")));
    SchemaEvolutionTargetItems c =
        new SchemaEvolutionTargetItems("t", Collections.emptySet(), Collections.emptySet());
    assertEquals(a, b);
    assertEquals(a.hashCode(), b.hashCode());
    assertNotEquals(a, c);
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/schemaevolution/SnowflakeColumnTypeMapperTest.java
================================================
package com.snowflake.kafka.connector.internal.schemaevolution;

import static org.assertj.core.api.Assertions.assertThat;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
import java.util.stream.Stream;
import org.apache.kafka.connect.data.Date;
import org.apache.kafka.connect.data.Decimal;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.Time;
import org.apache.kafka.connect.data.Timestamp;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

class SnowflakeColumnTypeMapperTest {

  private final SnowflakeColumnTypeMapper mapper = new SnowflakeColumnTypeMapper();

  @ParameterizedTest(name = "should map Kafka type {0} to Snowflake column type {2}")
  @MethodSource("kafkaTypesToMap")
  void shouldMapKafkaTypeToSnowflakeColumnType(
      Schema.Type kafkaType, String schemaName, String expectedSnowflakeType) {
    assertThat(mapper.mapToColumnType(kafkaType, schemaName)).isEqualTo(expectedSnowflakeType);
  }

  @ParameterizedTest()
  @MethodSource("jsonNodeTypesToMap")
  void shouldMapJsonNodeTypeToKafkaType(JsonNode value, Schema.Type expectedKafkaType) {
    assertThat(mapper.mapJsonNodeTypeToKafkaType(value)).isEqualTo(expectedKafkaType);
  }

  private static Stream<Arguments> kafkaTypesToMap() {
    return Stream.of(
        Arguments.of(Schema.Type.INT8, null, "BYTEINT"),
        Arguments.of(Schema.Type.INT16, null, "SMALLINT"),
        Arguments.of(Schema.Type.INT32, Date.LOGICAL_NAME, "DATE"),
        Arguments.of(Schema.Type.INT32, Time.LOGICAL_NAME, "TIME(6)"),
        Arguments.of(Schema.Type.INT32, null, "INT"),
        Arguments.of(Schema.Type.INT64, Timestamp.LOGICAL_NAME, "TIMESTAMP(6)"),
        Arguments.of(Schema.Type.INT64, null, "BIGINT"),
        Arguments.of(Schema.Type.FLOAT32, null, "FLOAT"),
        Arguments.of(Schema.Type.FLOAT64, null, "DOUBLE"),
        Arguments.of(Schema.Type.BOOLEAN, null, "BOOLEAN"),
        Arguments.of(Schema.Type.STRING, null, "VARCHAR"),
        Arguments.of(Schema.Type.BYTES, Decimal.LOGICAL_NAME, "VARCHAR"),
        Arguments.of(Schema.Type.BYTES, null, "BINARY"),
        Arguments.of(Schema.Type.ARRAY, null, "ARRAY"),
        Arguments.of(Schema.Type.STRUCT, null, "VARIANT"),
        Arguments.of(Schema.Type.MAP, null, "VARIANT"));
  }

  private static Stream<Arguments> jsonNodeTypesToMap() {
    return Stream.of(
        Arguments.of(JsonNodeFactory.instance.nullNode(), Schema.Type.STRING),
        Arguments.of(JsonNodeFactory.instance.numberNode((short) 1), Schema.Type.INT16),
        Arguments.of(JsonNodeFactory.instance.numberNode(1), Schema.Type.INT32),
        Arguments.of(JsonNodeFactory.instance.numberNode(1L), Schema.Type.INT64),
        Arguments.of(JsonNodeFactory.instance.numberNode(1.0), Schema.Type.FLOAT64),
        Arguments.of(JsonNodeFactory.instance.numberNode(1.0f), Schema.Type.FLOAT32),
        Arguments.of(JsonNodeFactory.instance.textNode("text"), Schema.Type.STRING),
        Arguments.of(JsonNodeFactory.instance.booleanNode(true), Schema.Type.BOOLEAN),
        Arguments.of(JsonNodeFactory.instance.binaryNode(new byte[] {1, 2, 3}), Schema.Type.BYTES),
        Arguments.of(JsonNodeFactory.instance.arrayNode().add(1).add(2).add(3), Schema.Type.ARRAY),
        Arguments.of(JsonNodeFactory.instance.objectNode(), Schema.Type.STRUCT),
        Arguments.of(JsonNodeFactory.instance.pojoNode(new Object()), null));
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/schemaevolution/SnowflakeSchemaEvolutionServiceTest.java
================================================
/*
 * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved.
 *
 * Tests for schema evolution service and DDL execution (Commit 6).
 */

package com.snowflake.kafka.connector.internal.schemaevolution;

import static org.junit.jupiter.api.Assertions.*;
import static org.mockito.ArgumentMatchers.*;
import static org.mockito.Mockito.*;

import com.snowflake.kafka.connector.internal.SnowflakeConnectionService;
import com.snowflake.kafka.connector.records.SnowflakeMetadataConfig;
import com.snowflake.kafka.connector.records.SnowflakeSinkRecord;
import java.util.*;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaBuilder;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.sink.SinkRecord;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.ArgumentCaptor;
import org.mockito.InOrder;

/** Tests for SnowflakeSchemaEvolutionService */
public class SnowflakeSchemaEvolutionServiceTest {

  private static final SnowflakeMetadataConfig METADATA_CONFIG = new SnowflakeMetadataConfig();

  private SnowflakeConnectionService mockConn;
  private SnowflakeSchemaEvolutionService service;

  @BeforeEach
  public void setUp() {
    mockConn = mock(SnowflakeConnectionService.class);
    service = new SnowflakeSchemaEvolutionService(mockConn);
  }

  private static SnowflakeSinkRecord toSinkRecord(SinkRecord kafkaRecord) {
    return SnowflakeSinkRecord.from(kafkaRecord, METADATA_CONFIG, true, false);
  }

  @Test
  public void testEvolveSchemaAddColumns() {
    Schema valueSchema =
        SchemaBuilder.struct()
            .field("name", Schema.STRING_SCHEMA)
            .field("new_col", Schema.INT32_SCHEMA)
            .build();

    Struct value = new Struct(valueSchema);
    value.put("name", "Alice");
    value.put("new_col", 42);

    SinkRecord kafkaRecord = new SinkRecord("topic", 0, null, null, valueSchema, value, 0);
    SnowflakeSinkRecord record = toSinkRecord(kafkaRecord);

    SchemaEvolutionTargetItems items =
        new SchemaEvolutionTargetItems(
            "test_table", Collections.emptySet(), new HashSet<>(Arrays.asList("NEW_COL")));

    service.evolveSchemaIfNeeded(items, record);

    verify(mockConn).appendColumnsToTable(eq("test_table"), anyMap());
    verify(mockConn, never()).alterNonNullableColumns(anyString(), anyList());
  }

  @Test
  public void testEvolveSchemaDropNotNull() {
    SinkRecord kafkaRecord = new SinkRecord("topic", 0, null, null, null, new HashMap<>(), 0);
    SnowflakeSinkRecord record = toSinkRecord(kafkaRecord);

    SchemaEvolutionTargetItems items =
        new SchemaEvolutionTargetItems(
            "test_table", new HashSet<>(Arrays.asList("COL1", "COL2")), Collections.emptySet());

    service.evolveSchemaIfNeeded(items, record);

    ArgumentCaptor<List<String>> colsCaptor = ArgumentCaptor.forClass(List.class);
    verify(mockConn).alterNonNullableColumns(eq("test_table"), colsCaptor.capture());
    List<String> droppedCols = colsCaptor.getValue();
    assertEquals(2, droppedCols.size());
    assertTrue(droppedCols.contains("COL1"));
    assertTrue(droppedCols.contains("COL2"));
    verify(mockConn, never()).appendColumnsToTable(anyString(), anyMap());
  }

  @Test
  public void testEvolveSchemaNoDataSkipsExecution() {
    SinkRecord kafkaRecord = new SinkRecord("topic", 0, null, null, null, null, 0);
    SnowflakeSinkRecord record = toSinkRecord(kafkaRecord);

    SchemaEvolutionTargetItems items =
        new SchemaEvolutionTargetItems(
            "test_table", Collections.emptySet(), Collections.emptySet());

    service.evolveSchemaIfNeeded(items, record);

    verify(mockConn, never()).appendColumnsToTable(anyString(), anyMap());
    verify(mockConn, never()).alterNonNullableColumns(anyString(), anyList());
  }

  @Test
  public void testEvolveSchemaHandlesAddColumnFailure() {
    Schema valueSchema = SchemaBuilder.struct().field("col1", Schema.STRING_SCHEMA).build();

    Struct value = new Struct(valueSchema);
    value.put("col1", "test");

    SinkRecord kafkaRecord = new SinkRecord("topic", 0, null, null, valueSchema, value, 0);
    SnowflakeSinkRecord record = toSinkRecord(kafkaRecord);

    doThrow(
            new com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException(
                "race", "2001"))
        .when(mockConn)
        .appendColumnsToTable(anyString(), anyMap());

    SchemaEvolutionTargetItems items =
        new SchemaEvolutionTargetItems(
            "test_table", Collections.emptySet(), new HashSet<>(Arrays.asList("COL1")));

    assertDoesNotThrow(() -> service.evolveSchemaIfNeeded(items, record));
  }

  @Test
  public void testEvolveSchemaHandlesDropNotNullFailure() {
    SinkRecord kafkaRecord = new SinkRecord("topic", 0, null, null, null, new HashMap<>(), 0);
    SnowflakeSinkRecord record = toSinkRecord(kafkaRecord);

    doThrow(
            new com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException(
                "race", "2001"))
        .when(mockConn)
        .alterNonNullableColumns(anyString(), anyList());

    SchemaEvolutionTargetItems items =
        new SchemaEvolutionTargetItems(
            "test_table", new HashSet<>(Arrays.asList("COL1")), Collections.emptySet());

    assertDoesNotThrow(() -> service.evolveSchemaIfNeeded(items, record));
  }

  @Test
  public void testEvolveSchemaAddColumnsBeforeDropNotNull() {
    Schema valueSchema =
        SchemaBuilder.struct()
            .field("existing_col", Schema.STRING_SCHEMA)
            .field("new_col", Schema.INT32_SCHEMA)
            .build();

    Struct value = new Struct(valueSchema);
    value.put("existing_col", "hello");
    value.put("new_col", 99);

    SinkRecord kafkaRecord = new SinkRecord("topic", 0, null, null, valueSchema, value, 0);
    SnowflakeSinkRecord record = toSinkRecord(kafkaRecord);

    SchemaEvolutionTargetItems items =
        new SchemaEvolutionTargetItems(
            "test_table",
            new HashSet<>(Arrays.asList("EXISTING_COL")),
            new HashSet<>(Arrays.asList("NEW_COL")));

    service.evolveSchemaIfNeeded(items, record);

    InOrder inOrder = inOrder(mockConn);
    inOrder.verify(mockConn).appendColumnsToTable(eq("test_table"), anyMap());
    inOrder.verify(mockConn).alterNonNullableColumns(eq("test_table"), anyList());
  }

  @Test
  public void testEvolveSchemaWithTransformedRecordContent() {
    // Simulates the schematization=off path where the transformed record
    // contains RECORD_CONTENT (a Map) instead of the original flat fields.
    Map<String, Object> transformedRecord = new HashMap<>();
    transformedRecord.put("RECORD_CONTENT", new HashMap<>(Map.of("city", "Hsinchu", "age", 42)));
    transformedRecord.put("RECORD_METADATA", new HashMap<>(Map.of("offset", 0)));

    SinkRecord syntheticKafkaRecord =
        new SinkRecord("topic", 0, null, null, null, transformedRecord, 0);
    SnowflakeSinkRecord syntheticRecord = toSinkRecord(syntheticKafkaRecord);

    SchemaEvolutionTargetItems items =
        new SchemaEvolutionTargetItems(
            "test_table", Collections.emptySet(), new HashSet<>(Arrays.asList("RECORD_CONTENT")));

    service.evolveSchemaIfNeeded(items, syntheticRecord);

    @SuppressWarnings("unchecked")
    ArgumentCaptor<Map<String, ColumnInfos>> schemaCaptor = ArgumentCaptor.forClass(Map.class);
    verify(mockConn).appendColumnsToTable(eq("test_table"), schemaCaptor.capture());

    Map<String, ColumnInfos> addedColumns = schemaCaptor.getValue();
    assertTrue(addedColumns.containsKey("RECORD_CONTENT"));
    assertEquals("VARIANT", addedColumns.get("RECORD_CONTENT").getColumnType());
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/schemaevolution/TableSchemaResolverTest.java
================================================
package com.snowflake.kafka.connector.internal.schemaevolution;

import static org.assertj.core.api.Assertions.*;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.snowflake.kafka.connector.internal.TestUtils;
import com.snowflake.kafka.connector.records.SnowflakeMetadataConfig;
import com.snowflake.kafka.connector.records.SnowflakeSinkRecord;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import org.apache.kafka.common.record.TimestampType;
import org.apache.kafka.connect.data.SchemaAndValue;
import org.apache.kafka.connect.json.JsonConverter;
import org.apache.kafka.connect.sink.SinkRecord;
import org.junit.jupiter.api.Test;

public class TableSchemaResolverTest {

  private static final SnowflakeMetadataConfig METADATA_CONFIG = new SnowflakeMetadataConfig();

  private final TableSchemaResolver schemaResolver = new TableSchemaResolver();

  private static SnowflakeSinkRecord toSinkRecord(
      SinkRecord kafkaRecord, boolean enableColumnIdentifierNormalization) {
    return SnowflakeSinkRecord.from(
        kafkaRecord, METADATA_CONFIG, true, enableColumnIdentifierNormalization);
  }

  @Test
  public void testGetColumnTypesWithSchema_TimestampField_JacksonCanSerialize() {
    // Reproducer for PR review comment: when schematization IS enabled,
    // content map holds the raw output of convertToMap(). During schema evolution,
    // OBJECT_MAPPER.valueToTree(record.getContent()) runs on this map.
    // If the map contains a raw Instant, plain ObjectMapper (no JavaTimeModule) throws
    // InvalidDefinitionException.
    java.util.Date nearEpochDate =
        new java.util.Date(java.time.Instant.parse("1969-04-08T00:00:00Z").toEpochMilli());

    org.apache.kafka.connect.data.Schema schema =
        org.apache.kafka.connect.data.SchemaBuilder.struct()
            .field("ts", org.apache.kafka.connect.data.Timestamp.SCHEMA)
            .build();
    org.apache.kafka.connect.data.Struct struct =
        new org.apache.kafka.connect.data.Struct(schema).put("ts", nearEpochDate);

    SinkRecord kafkaRecord =
        new SinkRecord(
            "topic",
            0,
            null,
            null,
            schema,
            struct,
            0,
            System.currentTimeMillis(),
            TimestampType.CREATE_TIME);

    // enableSchematization=true so content = convertToMap() result (may contain Instant)
    SnowflakeSinkRecord record = toSinkRecord(kafkaRecord, false);

    // This is the call that fails: OBJECT_MAPPER.valueToTree(record.getContent())
    // triggers Jackson serialization of the Instant without JavaTimeModule
    assertThatCode(
            () -> schemaResolver.resolveTableSchemaFromSnowflakeRecord(record, Arrays.asList("ts")))
        .doesNotThrowAnyException();
  }

  @Test
  public void testGetColumnTypesWithoutSchema_NormalizationEnabled()
      throws JsonProcessingException {
    String columnName = "test";
    ObjectMapper mapper = new ObjectMapper();
    JsonConverter jsonConverter = new JsonConverter();
    Map<String, ?> config = Collections.singletonMap("schemas.enable", false);
    jsonConverter.configure(config, false);
    Map<String, String> jsonMap = new HashMap<>();
    jsonMap.put(columnName, "value");
    SchemaAndValue schemaAndValue =
        jsonConverter.toConnectData("topic", mapper.writeValueAsBytes(jsonMap));
    SinkRecord kafkaRecord =
        new SinkRecord(
            "topic",
            0,
            null,
            null,
            schemaAndValue.schema(),
            schemaAndValue.value(),
            0,
            System.currentTimeMillis(),
            TimestampType.CREATE_TIME);
    SnowflakeSinkRecord record = toSinkRecord(kafkaRecord, true);

    // With normalization=true, "test" normalizes to "TEST" in ColumnValuePair
    // So columnsToInclude should use raw normalized name "TEST"
    TableSchema tableSchema =
        schemaResolver.resolveTableSchemaFromSnowflakeRecord(
            record, Collections.singletonList("TEST"));

    assertThat(tableSchema.getColumnInfos())
        .containsExactlyInAnyOrderEntriesOf(
            Collections.singletonMap("TEST", new ColumnInfos("VARCHAR", null)));

    // Get non-existing column name should return nothing
    tableSchema =
        schemaResolver.resolveTableSchemaFromSnowflakeRecord(
            record, Collections.singletonList("NONEXISTENT"));
    assertThat(tableSchema.getColumnInfos()).isEmpty();
  }

  @Test
  public void testGetColumnTypesWithoutSchema_NormalizationDisabled()
      throws JsonProcessingException {
    String columnName = "test";
    ObjectMapper mapper = new ObjectMapper();
    JsonConverter jsonConverter = new JsonConverter();
    Map<String, ?> config = Collections.singletonMap("schemas.enable", false);
    jsonConverter.configure(config, false);
    Map<String, String> jsonMap = new HashMap<>();
    jsonMap.put(columnName, "value");
    SchemaAndValue schemaAndValue =
        jsonConverter.toConnectData("topic", mapper.writeValueAsBytes(jsonMap));
    SinkRecord kafkaRecord =
        new SinkRecord(
            "topic",
            0,
            null,
            null,
            schemaAndValue.schema(),
            schemaAndValue.value(),
            0,
            System.currentTimeMillis(),
            TimestampType.CREATE_TIME);
    SnowflakeSinkRecord record = toSinkRecord(kafkaRecord, false);

    // With normalization=false, column name stays as-is: "test"
    TableSchema tableSchema =
        schemaResolver.resolveTableSchemaFromSnowflakeRecord(
            record, Collections.singletonList("test"));

    assertThat(tableSchema.getColumnInfos())
        .containsExactlyInAnyOrderEntriesOf(
            Collections.singletonMap("test", new ColumnInfos("VARCHAR", null)));
  }

  @Test
  public void testGetColumnTypesWithSchema_NormalizationEnabled() {
    JsonConverter converter = new JsonConverter();
    Map<String, String> converterConfig = new HashMap<>();
    converterConfig.put("schemas.enable", "true");
    converter.configure(converterConfig, false);
    SchemaAndValue schemaAndValue =
        converter.toConnectData(
            "topic", TestUtils.JSON_WITH_SCHEMA.getBytes(StandardCharsets.UTF_8));

    // With normalization=true: "regionid" → "REGIONID", "gender" → "GENDER"
    String columnName1 = "REGIONID";
    String columnName2 = "GENDER";
    SinkRecord kafkaRecord =
        new SinkRecord(
            "topic",
            0,
            null,
            null,
            schemaAndValue.schema(),
            schemaAndValue.value(),
            0,
            System.currentTimeMillis(),
            TimestampType.CREATE_TIME);
    SnowflakeSinkRecord record = toSinkRecord(kafkaRecord, true);

    TableSchema tableSchema =
        schemaResolver.resolveTableSchemaFromSnowflakeRecord(
            record, Arrays.asList(columnName1, columnName2));

    assertThat(tableSchema.getColumnInfos().get(columnName1).getColumnType()).isEqualTo("VARCHAR");
    assertThat(tableSchema.getColumnInfos().get(columnName1).getComments()).isEqualTo("doc");
    assertThat(tableSchema.getColumnInfos().get(columnName2).getColumnType()).isEqualTo("VARCHAR");
    assertThat(tableSchema.getColumnInfos().get(columnName2).getComments()).isNull();
  }

  @Test
  public void testGetColumnTypesWithSchema_NormalizationDisabled() {
    JsonConverter converter = new JsonConverter();
    Map<String, String> converterConfig = new HashMap<>();
    converterConfig.put("schemas.enable", "true");
    converter.configure(converterConfig, false);
    SchemaAndValue schemaAndValue =
        converter.toConnectData(
            "topic", TestUtils.JSON_WITH_SCHEMA.getBytes(StandardCharsets.UTF_8));

    // With normalization=false: column names stay as-is
    String columnName1 = "regionid";
    String columnName2 = "gender";
    SinkRecord kafkaRecord =
        new SinkRecord(
            "topic",
            0,
            null,
            null,
            schemaAndValue.schema(),
            schemaAndValue.value(),
            0,
            System.currentTimeMillis(),
            TimestampType.CREATE_TIME);
    SnowflakeSinkRecord record = toSinkRecord(kafkaRecord, false);

    TableSchema tableSchema =
        schemaResolver.resolveTableSchemaFromSnowflakeRecord(
            record, Arrays.asList(columnName1, columnName2));

    assertThat(tableSchema.getColumnInfos().get(columnName1).getColumnType()).isEqualTo("VARCHAR");
    assertThat(tableSchema.getColumnInfos().get(columnName1).getComments()).isEqualTo("doc");
    assertThat(tableSchema.getColumnInfos().get(columnName2).getColumnType()).isEqualTo("VARCHAR");
    assertThat(tableSchema.getColumnInfos().get(columnName2).getComments()).isNull();
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/schemaevolution/ValidationResultMapperTest.java
================================================
package com.snowflake.kafka.connector.internal.schemaevolution;

import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.*;

import com.snowflake.kafka.connector.internal.validation.ValidationResult;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.junit.jupiter.api.Test;

class ValidationResultMapperTest {

  @Test
  void mapWithExtraColumnsAndBothNotNullViolations() {
    Set<String> extraCols = new HashSet<>(Arrays.asList("NEW_COL1", "NEW_COL2"));
    Set<String> missingNotNull = new HashSet<>(Arrays.asList("REQUIRED_COL"));
    Set<String> nullNotNull = new HashSet<>(Arrays.asList("NULLABLE_COL"));

    ValidationResult result =
        ValidationResult.structuralError(extraCols, missingNotNull, nullNotNull);

    SchemaEvolutionTargetItems items =
        ValidationResultMapper.mapToSchemaEvolutionItems(result, "MY_TABLE");

    assertEquals("MY_TABLE", items.getTableName());
    assertThat(items.getColumnsToAdd()).containsExactlyInAnyOrder("NEW_COL1", "NEW_COL2");
    assertThat(items.getColumnsToDropNonNullability())
        .containsExactlyInAnyOrder("REQUIRED_COL", "NULLABLE_COL");
    assertTrue(items.hasDataForSchemaEvolution());
  }

  @Test
  void mapWithEmptyResult() {
    ValidationResult result =
        ValidationResult.structuralError(
            Collections.emptySet(), Collections.emptySet(), Collections.emptySet());

    SchemaEvolutionTargetItems items =
        ValidationResultMapper.mapToSchemaEvolutionItems(result, "MY_TABLE");

    assertFalse(items.hasDataForSchemaEvolution());
    assertThat(items.getColumnsToAdd()).isEmpty();
    assertThat(items.getColumnsToDropNonNullability()).isEmpty();
  }

  @Test
  void mapWithOnlyExtraColumns() {
    ValidationResult result =
        ValidationResult.structuralError(
            new HashSet<>(Arrays.asList("COL1")), Collections.emptySet(), Collections.emptySet());

    SchemaEvolutionTargetItems items =
        ValidationResultMapper.mapToSchemaEvolutionItems(result, "T");

    assertThat(items.getColumnsToAdd()).containsExactly("COL1");
    assertThat(items.getColumnsToDropNonNullability()).isEmpty();
  }

  @Test
  void mapWithOnlyMissingNotNull() {
    ValidationResult result =
        ValidationResult.structuralError(
            Collections.emptySet(), new HashSet<>(Arrays.asList("COL1")), Collections.emptySet());

    SchemaEvolutionTargetItems items =
        ValidationResultMapper.mapToSchemaEvolutionItems(result, "T");

    assertThat(items.getColumnsToAdd()).isEmpty();
    assertThat(items.getColumnsToDropNonNullability()).containsExactly("COL1");
  }

  @Test
  void mapWithOnlyNullValueForNotNull() {
    ValidationResult result =
        ValidationResult.structuralError(
            Collections.emptySet(), Collections.emptySet(), new HashSet<>(Arrays.asList("COL1")));

    SchemaEvolutionTargetItems items =
        ValidationResultMapper.mapToSchemaEvolutionItems(result, "T");

    assertThat(items.getColumnsToAdd()).isEmpty();
    assertThat(items.getColumnsToDropNonNullability()).containsExactly("COL1");
  }

  @Test
  void mapCombinesBothNotNullViolationTypes() {
    ValidationResult result =
        ValidationResult.structuralError(
            Collections.emptySet(),
            new HashSet<>(Arrays.asList("MISSING1")),
            new HashSet<>(Arrays.asList("NULL1")));

    SchemaEvolutionTargetItems items =
        ValidationResultMapper.mapToSchemaEvolutionItems(result, "T");

    assertThat(items.getColumnsToDropNonNullability())
        .containsExactlyInAnyOrder("MISSING1", "NULL1");
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/BatchOffsetFetcherTest.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

import static com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel.NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyBoolean;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

import com.snowflake.ingest.streaming.ChannelStatus;
import com.snowflake.ingest.streaming.ChannelStatusBatch;
import com.snowflake.ingest.streaming.SFException;
import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.config.SinkTaskConfigTestBuilder;
import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException;
import com.snowflake.kafka.connector.internal.metrics.TaskMetrics;
import com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel;
import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientFactory;
import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientSupplier;
import com.snowflake.kafka.connector.internal.streaming.v2.service.BatchOffsetFetcher;
import com.snowflake.kafka.connector.internal.streaming.v2.service.ThreadPools;
import java.time.Instant;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import org.apache.kafka.common.TopicPartition;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

/** Unit tests for {@link BatchOffsetFetcher}. */
class BatchOffsetFetcherTest {

  private static final String TASK_ID = "0";

  private String connectorName;
  private ExecutorService ioExecutor;
  private BatchOffsetFetcher fetcher;
  private Map<TopicPartition, TopicPartitionChannel> channels;
  private CountingClientSupplier clientSupplier;

  @BeforeEach
  void setUp() {
    // Unique name per test to avoid StreamingClientPools caching across tests
    connectorName = "test_connector_" + UUID.randomUUID().toString().substring(0, 8);

    SinkTaskConfig taskConfig =
        SinkTaskConfigTestBuilder.builder().connectorName(connectorName).taskId(TASK_ID).build();

    ThreadPools.registerTask(connectorName, taskConfig);
    ioExecutor = ThreadPools.getIoExecutor(connectorName);
    fetcher =
        new BatchOffsetFetcher(connectorName, TASK_ID, taskConfig, ioExecutor, TaskMetrics.noop());
    channels = new HashMap<>();

    clientSupplier = new CountingClientSupplier();
    StreamingClientFactory.setStreamingClientSupplier(clientSupplier);
  }

  @AfterEach
  void tearDown() {
    ThreadPools.closeForTask(connectorName);
    StreamingClientFactory.resetStreamingClientSupplier();
  }

  @Test
  void emptyPartitionsReturnsEmptyMap() {
    assertTrue(fetcher.getCommittedOffsets(Collections.emptySet(), channelLookup()).isEmpty());
  }

  @Test
  void groupsByPipeAndBatchesCalls() {
    TopicPartition tp0 = new TopicPartition("topicA", 0);
    TopicPartition tp1 = new TopicPartition("topicA", 1);
    TopicPartition tp2 = new TopicPartition("topicB", 0);
    TopicPartition tp3 = new TopicPartition("topicB", 1);

    registerChannel(tp0, "pipeA", "chA0", 10L);
    registerChannel(tp1, "pipeA", "chA1", 20L);
    registerChannel(tp2, "pipeB", "chB0", 30L);
    registerChannelWithNoOffset(tp3, "pipeB", "chB1");

    Map<TopicPartition, Long> result =
        fetcher.getCommittedOffsets(Set.of(tp0, tp1, tp2, tp3), channelLookup());

    assertEquals(3, result.size());
    assertEquals(11L, result.get(tp0));
    assertEquals(21L, result.get(tp1));
    assertEquals(31L, result.get(tp2));
    assertTrue(!result.containsKey(tp3), "Channel with no offset should be excluded");

    // Two pipes, so exactly 2 batch calls
    assertEquals(2, clientSupplier.getBatchCallCount());
  }

  @Test
  void uninitializedPartitionsAreSkipped() {
    TopicPartition initialized = new TopicPartition("topicA", 0);
    TopicPartition uninitialized = new TopicPartition("topicA", 1);

    registerChannel(initialized, "pipeA", "ch0", 5L);

    Map<TopicPartition, Long> result =
        fetcher.getCommittedOffsets(Set.of(initialized, uninitialized), channelLookup());

    assertEquals(1, result.size());
    assertEquals(6L, result.get(initialized));
  }

  @Test
  void sfExceptionForOnePipeDoesNotAffectOthers() {
    TopicPartition tp0 = new TopicPartition("topicA", 0);
    TopicPartition tp1 = new TopicPartition("topicB", 0);

    registerChannel(tp0, "pipeA", "ch0", 10L);
    registerChannel(tp1, "pipeB", "ch1", 20L);

    clientSupplier.setFailingPipe("pipeA");

    Map<TopicPartition, Long> result =
        fetcher.getCommittedOffsets(Set.of(tp0, tp1), channelLookup());

    assertEquals(1, result.size());
    assertEquals(21L, result.get(tp1));
  }

  @Test
  void connectorExceptionPropagates() {
    TopicPartition tp0 = new TopicPartition("topicA", 0);

    TopicPartitionChannel mockChannel = mock(TopicPartitionChannel.class);
    when(mockChannel.getChannelName()).thenReturn("ch0");
    when(mockChannel.getPipeName()).thenReturn("pipeA");
    when(mockChannel.processChannelStatus(any(ChannelStatus.class), anyBoolean()))
        .thenThrow(new SnowflakeKafkaConnectorException("ingestion error", "5030"));

    channels.put(tp0, mockChannel);
    clientSupplier.setChannelOffset("ch0", "pipeA", "10");

    assertThrows(
        SnowflakeKafkaConnectorException.class,
        () -> fetcher.getCommittedOffsets(Set.of(tp0), channelLookup()));
  }

  @Test
  void partitionsByTopicGroupsCorrectly() {
    TopicPartition tpA0 = new TopicPartition("topicA", 0);
    TopicPartition tpA1 = new TopicPartition("topicA", 1);
    TopicPartition tpB0 = new TopicPartition("topicB", 0);
    TopicPartition tpB1 = new TopicPartition("topicB", 1);

    TopicPartitionChannel chA0 = mockChannel("pipeA");
    TopicPartitionChannel chB0 = mockChannel("pipeB");

    // tpA0 and tpB0 have channels; tpA1 and tpB1 do not
    Map<TopicPartition, TopicPartitionChannel> lookup = Map.of(tpA0, chA0, tpB0, chB0);

    BatchOffsetFetcher.PartitionsByTopic result =
        BatchOffsetFetcher.PartitionsByTopic.groupByTopic(
            Set.of(tpA0, tpA1, tpB0, tpB1), tp -> Optional.ofNullable(lookup.get(tp)));

    // Initialized channels grouped by pipe
    assertEquals(2, result.pipeNameToChannels.size());
    assertEquals(Map.of(tpA0, chA0), result.pipeNameToChannels.get("pipeA"));
    assertEquals(Map.of(tpB0, chB0), result.pipeNameToChannels.get("pipeB"));

    // Uninitialized partitions grouped by topic
    assertEquals(2, result.topicToPartitionsWithoutChannels.size());
    assertEquals(Set.of(tpA1), result.topicToPartitionsWithoutChannels.get("topicA"));
    assertEquals(Set.of(tpB1), result.topicToPartitionsWithoutChannels.get("topicB"));
  }

  private static TopicPartitionChannel mockChannel(String pipeName) {
    TopicPartitionChannel ch = mock(TopicPartitionChannel.class);
    when(ch.getPipeName()).thenReturn(pipeName);
    return ch;
  }

  // -- helpers --

  private Function<TopicPartition, Optional<TopicPartitionChannel>> channelLookup() {
    return tp -> Optional.ofNullable(channels.get(tp));
  }

  private void registerChannel(
      TopicPartition topicPartition, String pipeName, String channelName, long committedOffset) {
    TopicPartitionChannel mockChannel = mock(TopicPartitionChannel.class);
    when(mockChannel.getChannelName()).thenReturn(channelName);
    when(mockChannel.getPipeName()).thenReturn(pipeName);
    when(mockChannel.processChannelStatus(any(ChannelStatus.class), anyBoolean()))
        .thenReturn(committedOffset + 1);

    channels.put(topicPartition, mockChannel);
    clientSupplier.setChannelOffset(channelName, pipeName, String.valueOf(committedOffset));
  }

  private void registerChannelWithNoOffset(
      TopicPartition topicPartition, String pipeName, String channelName) {
    TopicPartitionChannel mockChannel = mock(TopicPartitionChannel.class);
    when(mockChannel.getChannelName()).thenReturn(channelName);
    when(mockChannel.getPipeName()).thenReturn(pipeName);
    when(mockChannel.processChannelStatus(any(ChannelStatus.class), anyBoolean()))
        .thenReturn(NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE);

    channels.put(topicPartition, mockChannel);
  }

  /**
   * A StreamingClientSupplier that creates mock clients with configurable channel statuses and
   * tracks batch call counts.
   */
  static class CountingClientSupplier implements StreamingClientSupplier {
    private final AtomicInteger batchCallCount = new AtomicInteger(0);
    // pipeName -> (channelName -> offsetToken)
    private final Map<String, Map<String, String>> pipeChannelOffsets = new ConcurrentHashMap<>();
    private volatile String failingPipe = null;

    void setChannelOffset(String channelName, String pipeName, String offsetToken) {
      pipeChannelOffsets
          .computeIfAbsent(pipeName, k -> new ConcurrentHashMap<>())
          .put(channelName, offsetToken);
    }

    void setFailingPipe(String pipeName) {
      this.failingPipe = pipeName;
    }

    int getBatchCallCount() {
      return batchCallCount.get();
    }

    @Override
    public SnowflakeStreamingIngestClient get(
        String clientName,
        String dbName,
        String schemaName,
        String pipeName,
        StreamingClientProperties streamingClientProperties) {
      SnowflakeStreamingIngestClient client = mock(SnowflakeStreamingIngestClient.class);
      when(client.getChannelStatus(any()))
          .thenAnswer(
              invocation -> {
                batchCallCount.incrementAndGet();
                if (pipeName.equals(failingPipe)) {
                  throw new SFException(
                      "TestError", "Simulated batch failure", 500, "Internal Server Error");
                }
                List<String> names = invocation.getArgument(0);
                Map<String, ChannelStatus> statusMap = new HashMap<>();
                Map<String, String> offsets =
                    pipeChannelOffsets.getOrDefault(pipeName, Collections.emptyMap());
                for (String name : names) {
                  statusMap.put(
                      name,
                      new ChannelStatus(
                          "db",
                          "schema",
                          pipeName,
                          name,
                          "SUCCESS",
                          offsets.get(name),
                          Instant.now(),
                          0,
                          0,
                          0,
                          null,
                          null,
                          null,
                          null,
                          Instant.now()));
                }
                return new ChannelStatusBatch(statusMap);
              });
      return client;
    }
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/ChannelStatusCheckIT.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

import static org.awaitility.Awaitility.await;
import static org.junit.jupiter.api.Assertions.assertTrue;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector;
import com.snowflake.kafka.connector.Utils;
import com.snowflake.kafka.connector.internal.TestUtils;
import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientFactory;
import java.time.Duration;
import java.util.HashMap;
import java.util.Map;
import org.apache.kafka.connect.json.JsonConverter;
import org.apache.kafka.connect.runtime.ConnectorConfig;
import org.apache.kafka.connect.runtime.rest.entities.ConnectorStateInfo;
import org.apache.kafka.connect.sink.SinkConnector;
import org.apache.kafka.connect.storage.StringConverter;
import org.apache.kafka.connect.util.clusters.EmbeddedConnectCluster;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestInstance;

/**
 * Integration tests for channel status error handling using an embedded Kafka Connect cluster with
 * fake streaming ingest clients.
 */
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
class ChannelStatusCheckIT {

  private EmbeddedConnectCluster connectCluster;
  private final FakeIngestClientSupplier fakeClientSupplier = new FakeIngestClientSupplier();

  @BeforeAll
  void beforeAll() {
    Map<String, String> workerConfig = new HashMap<>();
    workerConfig.put("plugin.discovery", "hybrid_warn");
    // Set a short offset flush interval for faster preCommit calls
    workerConfig.put("offset.flush.interval.ms", "1000");
    connectCluster =
        new EmbeddedConnectCluster.Builder()
            .name("channel-status-check-cluster")
            .numWorkers(5)
            .workerProps(workerConfig)
            .build();
    connectCluster.start();
  }

  @AfterAll
  void afterAll() {
    if (connectCluster != null) {
      connectCluster.stop();
      connectCluster = null;
    }
  }

  private static final int PARTITIONS_NUMBER = 10;

  private String topicName;
  private String connectorName;
  private final ObjectMapper mapper = new ObjectMapper();

  @BeforeEach
  void setUp() {
    topicName = TestUtils.randomTableName();
    connectorName = topicName + "_connector";
    connectCluster.kafka().createTopic(topicName, PARTITIONS_NUMBER);
    TestUtils.createTableWithMetadataColumn(topicName);
    StreamingClientFactory.setStreamingClientSupplier(fakeClientSupplier);
  }

  @AfterEach
  void tearDown() {
    connectCluster.deleteConnector(connectorName);
    waitForConnectorStopped(connectorName);
    connectCluster.kafka().deleteTopic(topicName);
    StreamingClientFactory.resetStreamingClientSupplier();
    TestUtils.dropTable(topicName);
  }

  @Test
  void shouldContinueWorkingWhenNoChannelErrors() throws JsonProcessingException {
    // Given: connector with default config (errors.tolerance=none)
    Map<String, String> config = defaultProperties(topicName, connectorName);
    connectCluster.configureConnector(connectorName, config);
    waitForConnectorRunning(connectorName);
    waitForOpenedFakeIngestClient(connectorName);

    // When: produce messages
    produceMessages(3000);

    // Then: connector should remain running (no errors to cause failure)
    await("Messages processed")
        .atMost(Duration.ofSeconds(30))
        .until(() -> waitForConnectorToOpenChannels(connectorName).getAppendedRowCount() >= 3);

    ConnectorStateInfo connectorState = connectCluster.connectorStatus(connectorName);
    assertTrue(
        connectorState.tasks().stream().allMatch(task -> "RUNNING".equals(task.state())),
        "All tasks should be running when there are no channel errors");
  }

  @Test
  void shouldFailConnectorWhenChannelHasErrorsAndToleranceIsNone() throws JsonProcessingException {
    // Given: connector with errors.tolerance=none (default)
    Map<String, String> config = defaultProperties(topicName, connectorName);
    connectCluster.configureConnector(connectorName, config);
    waitForConnectorRunning(connectorName);

    FakeSnowflakeStreamingIngestClient fakeClient = waitForConnectorToOpenChannels(connectorName);

    // Produce initial message to ensure channel is set up
    produceMessages(3000);
    await("Initial message processed")
        .atMost(Duration.ofSeconds(30))
        .until(() -> fakeClient.getAppendedRowCount() >= 1);

    // When: inject errors on all channels
    for (FakeSnowflakeStreamingIngestChannel channel : fakeClient.getOpenedChannels()) {
      channel.updateErrors(5, "Test error message", "95");
    }

    // Then: connector task should fail due to channel errors
    await("Connector task failed")
        .atMost(Duration.ofMinutes(2))
        .pollInterval(Duration.ofSeconds(4))
        .until(
            () -> {
              ConnectorStateInfo state = connectCluster.connectorStatus(connectorName);
              return state.tasks().stream().anyMatch(task -> "FAILED".equals(task.state()));
            });
  }

  @Test
  void shouldContinueWorkingWhenChannelHasErrorsAndToleranceIsAll() throws JsonProcessingException {
    // Given: connector with errors.tolerance=all
    Map<String, String> config = defaultProperties(topicName, connectorName);
    config.put(KafkaConnectorConfigParams.ERRORS_TOLERANCE_CONFIG, "all");
    connectCluster.configureConnector(connectorName, config);
    waitForConnectorRunning(connectorName);

    FakeSnowflakeStreamingIngestClient fakeClient = waitForConnectorToOpenChannels(connectorName);

    // Produce initial message
    produceMessages(1);
    await("Initial message processed")
        .atMost(Duration.ofSeconds(30))
        .until(() -> fakeClient.getAppendedRowCount() >= 1);

    // When: inject errors on all channels
    for (FakeSnowflakeStreamingIngestChannel channel : fakeClient.getOpenedChannels()) {
      channel.updateErrors(5, "Test error message", "95");
    }

    // Produce more messages
    produceMessages(2);

    // Then: connector should continue running (errors are tolerated)
    await("Messages processed despite errors")
        .atMost(Duration.ofSeconds(30))
        .until(() -> fakeClient.getAppendedRowCount() >= 3);

    ConnectorStateInfo connectorState = connectCluster.connectorStatus(connectorName);
    assertTrue(
        connectorState.tasks().stream().allMatch(task -> "RUNNING".equals(task.state())),
        "All tasks should remain running when errors.tolerance=all");
  }

  @Test
  void shouldContinueWorkingWithPreExistingErrorsAndToleranceIsNone()
      throws JsonProcessingException {
    // Given: Pre-existing errors are set BEFORE the connector starts (simulating channel reopen
    // scenario)
    // This simulates the case where a channel has cumulative errors from a previous connector run
    fakeClientSupplier.setPreExistingErrorCount(5);

    Map<String, String> config = defaultProperties(topicName, connectorName);
    config.put(KafkaConnectorConfigParams.ERRORS_TOLERANCE_CONFIG, "none");
    connectCluster.configureConnector(connectorName, config);
    waitForConnectorRunning(connectorName);

    FakeSnowflakeStreamingIngestClient fakeClient = waitForConnectorToOpenChannels(connectorName);

    // Produce messages
    produceMessages(5);

    // Then: connector should remain running because pre-existing errors don't count as new errors
    await("Messages processed despite pre-existing errors")
        .atMost(Duration.ofSeconds(30))
        .until(() -> fakeClient.getAppendedRowCount() >= 5);

    ConnectorStateInfo connectorState = connectCluster.connectorStatus(connectorName);
    assertTrue(
        connectorState.tasks().stream().allMatch(task -> "RUNNING".equals(task.state())),
        "All tasks should be running when there are only pre-existing errors");
  }

  @Test
  void shouldFailWhenNewErrorsOccurAfterStartupWithPreExistingErrors()
      throws JsonProcessingException {
    // Given: Pre-existing errors are set BEFORE the connector starts
    fakeClientSupplier.setPreExistingErrorCount(5);

    Map<String, String> config = defaultProperties(topicName, connectorName);
    connectCluster.configureConnector(connectorName, config);
    waitForConnectorRunning(connectorName);

    FakeSnowflakeStreamingIngestClient fakeClient = waitForConnectorToOpenChannels(connectorName);

    // Produce initial message
    produceMessages(1);
    await("Initial message processed")
        .atMost(Duration.ofSeconds(30))
        .until(() -> fakeClient.getAppendedRowCount() >= 1);

    // When: NEW errors occur (error count increases from 5 to 10)
    for (FakeSnowflakeStreamingIngestChannel channel : fakeClient.getOpenedChannels()) {
      channel.updateErrors(10, "Test error message", "95");
    }

    // Then: connector task should fail due to NEW channel errors
    await("Connector task failed due to new errors")
        .atMost(Duration.ofMinutes(2))
        .pollInterval(Duration.ofSeconds(4))
        .until(
            () -> {
              ConnectorStateInfo state = connectCluster.connectorStatus(connectorName);
              return state.tasks().stream().anyMatch(task -> "FAILED".equals(task.state()));
            });
  }

  private void produceMessages(int count) throws JsonProcessingException {
    Map<String, String> payload = Map.of("key1", "value1", "key2", "value2");
    for (int i = 0; i < count; i++) {
      connectCluster
          .kafka()
          .produce(
              topicName, i % PARTITIONS_NUMBER, "key-" + i, mapper.writeValueAsString(payload));
    }
  }

  // Helper methods

  private FakeSnowflakeStreamingIngestClient waitForConnectorToOpenChannels(String connectorName) {
    await("channelsCreated")
        .atMost(Duration.ofSeconds(30))
        .ignoreExceptions()
        .until(
            () ->
                !getFakeSnowflakeStreamingIngestClient(connectorName)
                    .getOpenedChannels()
                    .isEmpty());
    return getFakeSnowflakeStreamingIngestClient(connectorName);
  }

  private void waitForOpenedFakeIngestClient(String connectorName) {
    waitForConnectorToOpenChannels(connectorName);
  }

  private FakeSnowflakeStreamingIngestClient getFakeSnowflakeStreamingIngestClient(
      String connectorName) {
    // Connector names are sanitized/uppercased by Utils.convertAppName() in the connector
    Map<String, String> config = new HashMap<>();
    config.put(KafkaConnectorConfigParams.NAME, connectorName);
    Utils.convertAppName(config);
    String sanitizedConnectorName = config.get(KafkaConnectorConfigParams.NAME);
    return fakeClientSupplier.getFakeIngestClients().stream()
        .filter((client) -> client.getClientName().contains(sanitizedConnectorName))
        .findFirst()
        .orElseThrow();
  }

  private Map<String, String> defaultProperties(String topicName, String connectorName) {
    Map<String, String> config = TestUtils.transformProfileFileToConnectorConfiguration(false);
    config.put(SinkConnector.TOPICS_CONFIG, topicName);
    config.put(
        ConnectorConfig.CONNECTOR_CLASS_CONFIG, SnowflakeStreamingSinkConnector.class.getName());
    config.put(ConnectorConfig.TASKS_MAX_CONFIG, "1");
    config.put(ConnectorConfig.KEY_CONVERTER_CLASS_CONFIG, StringConverter.class.getName());
    config.put(ConnectorConfig.VALUE_CONVERTER_CLASS_CONFIG, JsonConverter.class.getName());
    config.put(KafkaConnectorConfigParams.NAME, connectorName);
    config.put(KafkaConnectorConfigParams.VALUE_CONVERTER_SCHEMAS_ENABLE, "false");
    config.put(KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, "server_side");
    return config;
  }

  private void waitForConnectorRunning(String connectorName) {
    try {
      connectCluster
          .assertions()
          .assertConnectorAndAtLeastNumTasksAreRunning(
              connectorName, 1, "The connector did not start.");
    } catch (InterruptedException e) {
      throw new IllegalStateException("The connector is not running");
    }
  }

  private void waitForConnectorStopped(String connectorName) {
    try {
      connectCluster
          .assertions()
          .assertConnectorDoesNotExist(connectorName, "Failed to stop the connector");
    } catch (InterruptedException e) {
      throw new IllegalStateException("Interrupted while waiting for connector to stop");
    }
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/CloseTopicPartitionChannelIT.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

import static org.awaitility.Awaitility.await;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.snowflake.kafka.connector.ConnectClusterBaseIT;
import com.snowflake.kafka.connector.internal.TestUtils;
import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientFactory;
import java.time.Duration;
import java.util.Map;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

class CloseTopicPartitionChannelIT extends ConnectClusterBaseIT {

  private static final int PARTITIONS_NUMBER = 16;

  private String topicName;
  private String connectorName;
  private ObjectMapper mapper = new ObjectMapper();

  @BeforeEach
  void setUp() throws JsonProcessingException {
    topicName = TestUtils.randomTableName();
    connectorName = topicName + "_connector";
    connectCluster.kafka().createTopic(topicName, PARTITIONS_NUMBER);
    // JVM scoped Ingest client mock
    StreamingClientFactory.setStreamingClientSupplier(fakeClientSupplier);
    generateKafkaMessages();
  }

  @AfterEach
  void tearDown() {
    connectCluster.kafka().deleteTopic(topicName);
    StreamingClientFactory.resetStreamingClientSupplier();
    TestUtils.dropTable(topicName);
    TestUtils.dropPipe(topicName + "-STREAMING");
  }

  private void generateKafkaMessages() throws JsonProcessingException {
    final Map<String, String> payload = Map.of("key1", "value1", "key2", "value2");

    int bound = PARTITIONS_NUMBER;
    for (int partition = 0; partition < bound; partition++) {
      connectCluster
          .kafka()
          .produce(topicName, partition, "key-" + partition, mapper.writeValueAsString(payload));
    }
  }

  @Test
  void closeChannels() {
    // given
    connectCluster.configureConnector(connectorName, defaultProperties(topicName, connectorName));
    waitForConnectorRunning(connectorName);
    waitForOpenedFakeIngestClient(connectorName);
    await("Channels created")
        .atMost(Duration.ofSeconds(30))
        .ignoreExceptions()
        .until(
            () ->
                getOpenedFakeIngestClient(connectorName).getOpenedChannels().size()
                    == PARTITIONS_NUMBER);

    // when
    connectCluster.deleteConnector(connectorName);
    waitForConnectorDoesNotExist(connectorName);

    // then
    await("Channels closed")
        .atMost(Duration.ofSeconds(30))
        .until(
            () ->
                getOpenedFakeIngestClient(connectorName).countClosedChannels()
                    == PARTITIONS_NUMBER);
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/DefaultStreamingConfigValidatorTest.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;

import com.google.common.collect.ImmutableMap;
import java.util.HashMap;
import java.util.Map;
import org.junit.jupiter.api.Test;

class DefaultStreamingConfigValidatorTest {

  private final DefaultStreamingConfigValidator validator = new DefaultStreamingConfigValidator();

  private Map<String, String> validConfig() {
    Map<String, String> config = new HashMap<>();
    config.put("snowflake.role.name", "testrole");
    return config;
  }

  @Test
  void testStringConverterAllowed_WhenSchematizationDisabled() {
    Map<String, String> config = validConfig();
    config.put("value.converter", "org.apache.kafka.connect.storage.StringConverter");
    config.put("snowflake.enable.schematization", "false");

    ImmutableMap<String, String> result = validator.validate(config);

    assertTrue(
        result.isEmpty(), "StringConverter should be allowed when schematization is disabled");
  }

  @Test
  void testByteArrayConverterAllowed_WhenSchematizationDisabled() {
    Map<String, String> config = validConfig();
    config.put("value.converter", "org.apache.kafka.connect.converters.ByteArrayConverter");
    config.put("snowflake.enable.schematization", "false");

    ImmutableMap<String, String> result = validator.validate(config);

    assertTrue(
        result.isEmpty(), "ByteArrayConverter should be allowed when schematization is disabled");
  }

  @Test
  void testStringConverterBlocked_WhenSchematizationEnabled() {
    Map<String, String> config = validConfig();
    config.put("value.converter", "org.apache.kafka.connect.storage.StringConverter");
    config.put("snowflake.enable.schematization", "true");

    ImmutableMap<String, String> result = validator.validate(config);

    assertFalse(
        result.isEmpty(), "StringConverter should be blocked when schematization is enabled");
  }

  @Test
  void testByteArrayConverterBlocked_WhenSchematizationEnabled() {
    Map<String, String> config = validConfig();
    config.put("value.converter", "org.apache.kafka.connect.converters.ByteArrayConverter");
    config.put("snowflake.enable.schematization", "true");

    ImmutableMap<String, String> result = validator.validate(config);

    assertFalse(
        result.isEmpty(), "ByteArrayConverter should be blocked when schematization is enabled");
  }

  @Test
  void testStringConverterBlocked_WhenSchematizationDefault() {
    Map<String, String> config = validConfig();
    config.put("value.converter", "org.apache.kafka.connect.storage.StringConverter");

    ImmutableMap<String, String> result = validator.validate(config);

    assertFalse(
        result.isEmpty(), "StringConverter should be blocked when schematization defaults to true");
  }

  @Test
  void testJsonConverterAllowed_WhenSchematizationEnabled() {
    Map<String, String> config = validConfig();
    config.put("value.converter", "org.apache.kafka.connect.json.JsonConverter");
    config.put("snowflake.enable.schematization", "true");

    ImmutableMap<String, String> result = validator.validate(config);

    assertTrue(result.isEmpty(), "JsonConverter should be allowed regardless of schematization");
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/FakeIngestClientSupplier.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient;
import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientSupplier;
import java.util.Collection;
import java.util.concurrent.ConcurrentHashMap;

public class FakeIngestClientSupplier implements StreamingClientSupplier {

  private final ConcurrentHashMap<String, FakeSnowflakeStreamingIngestClient>
      pipeToIngestClientMap = new ConcurrentHashMap<>();

  private long preExistingErrorCount = 0;

  @Override
  public SnowflakeStreamingIngestClient get(
      final String clientName,
      final String dbName,
      final String schemaName,
      final String pipeName,
      final StreamingClientProperties streamingClientProperties) {
    return pipeToIngestClientMap.computeIfAbsent(
        pipeName,
        (key) -> {
          final FakeSnowflakeStreamingIngestClient client =
              new FakeSnowflakeStreamingIngestClient(pipeName, clientName);
          client.setDefaultErrorCount(preExistingErrorCount);
          return client;
        });
  }

  public Collection<FakeSnowflakeStreamingIngestClient> getFakeIngestClients() {
    return pipeToIngestClientMap.values();
  }

  /**
   * Sets the pre-existing error count that will be applied to all channels when they are opened.
   * This simulates the cumulative error count that persists in Snowflake across connector restarts.
   */
  public void setPreExistingErrorCount(final long errorCount) {
    this.preExistingErrorCount = errorCount;
    // Also update existing clients
    for (final FakeSnowflakeStreamingIngestClient client : pipeToIngestClientMap.values()) {
      client.setDefaultErrorCount(errorCount);
    }
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/FakeSnowflakeStreamingIngestChannel.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

import static java.util.List.copyOf;

import com.snowflake.ingest.streaming.ChannelStatus;
import com.snowflake.ingest.streaming.SnowflakeStreamingIngestChannel;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.TimeoutException;
import java.util.function.Predicate;

public class FakeSnowflakeStreamingIngestChannel
    implements SnowflakeStreamingIngestChannel, Comparable<FakeSnowflakeStreamingIngestChannel> {

  private final String databaseName;
  private final String schemaName;
  private final String pipeName;
  private final String channelName;
  private final List<Map<String, Object>> appendedRows;

  private volatile boolean closed;
  private String offsetToken;
  private String statusCode = "SUCCESS";
  private long rowsInsertedCount;
  private long rowsParsedCount;
  private long rowsErrorCount;
  private String lastErrorOffsetTokenUpperBound;
  private String lastErrorMessage;
  private Instant lastErrorTimestamp;
  private Duration serverAvgProcessingLatency;

  public FakeSnowflakeStreamingIngestChannel(
      final String pipeName,
      final String channelName,
      final FakeSnowflakeStreamingIngestClient parentClient) {
    this("db", "schema", pipeName, channelName);
  }

  public FakeSnowflakeStreamingIngestChannel(
      final String databaseName,
      final String schemaName,
      final String pipeName,
      final String channelName) {
    this.databaseName = databaseName;
    this.schemaName = schemaName;
    this.pipeName = pipeName;
    this.channelName = channelName;
    this.appendedRows = new ArrayList<>();
  }

  @Override
  public String getDBName() {
    return databaseName;
  }

  @Override
  public String getSchemaName() {
    return schemaName;
  }

  @Override
  public String getPipeName() {
    return pipeName;
  }

  @Override
  public String getFullyQualifiedPipeName() {
    throw new UnsupportedOperationException();
  }

  @Override
  public String getFullyQualifiedChannelName() {
    return channelName;
  }

  @Override
  public boolean isClosed() {
    return closed;
  }

  @Override
  public String getChannelName() {
    return channelName;
  }

  @Override
  public void close() {
    this.closed = true;
  }

  @Override
  public void close(final boolean waitForFlush, final Duration timeoutDuration)
      throws TimeoutException {
    this.close();
  }

  @Override
  public synchronized void appendRow(final Map<String, Object> row, final String offsetToken) {
    this.appendedRows.add(row);
    this.offsetToken = offsetToken;
  }

  @Override
  public synchronized void appendRows(
      final Iterable<Map<String, Object>> rows,
      final String startOffsetToken,
      final String endOffsetToken) {

    for (Map<String, Object> row : rows) {
      this.appendedRows.add(row);
    }
    this.offsetToken = endOffsetToken;
  }

  @Override
  public synchronized String getLatestCommittedOffsetToken() {
    return offsetToken;
  }

  @Override
  public ChannelStatus getChannelStatus() {
    return new ChannelStatus(
        databaseName,
        schemaName,
        pipeName,
        channelName,
        statusCode,
        offsetToken,
        Instant.now(),
        rowsInsertedCount,
        rowsParsedCount,
        rowsErrorCount,
        lastErrorOffsetTokenUpperBound,
        lastErrorMessage,
        lastErrorTimestamp,
        serverAvgProcessingLatency,
        Instant.now());
  }

  public void updateErrors(
      long errorCount, String lastErrorMessage, String lastErrorOffsetTokenUpperBound) {
    this.rowsErrorCount = errorCount;
    this.lastErrorMessage = lastErrorMessage;
    this.lastErrorOffsetTokenUpperBound = lastErrorOffsetTokenUpperBound;
    this.lastErrorTimestamp = Instant.now();
  }

  public void setErrorCount(final long errorCount) {
    this.rowsErrorCount = errorCount;
  }

  public void setOffsetToken(final String offsetToken) {
    this.offsetToken = offsetToken;
  }

  @Override
  public CompletableFuture<Void> waitForCommit(
      final Predicate<String> tokenChecker, final Duration timeoutDuration) {
    throw new UnsupportedOperationException();
  }

  @Override
  public CompletableFuture<Void> waitForFlush(final Duration timeoutDuration) {
    throw new UnsupportedOperationException();
  }

  @Override
  public void initiateFlush() {
    throw new UnsupportedOperationException();
  }

  public synchronized int getAppendedRowsCount() {
    return this.appendedRows.size();
  }

  public synchronized List<Map<String, Object>> getAppendedRows() {
    return copyOf(appendedRows);
  }

  @Override
  public int compareTo(final FakeSnowflakeStreamingIngestChannel o) {
    return this.channelName.compareTo(o.getChannelName());
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/FakeSnowflakeStreamingIngestClient.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

import com.snowflake.ingest.streaming.ChannelStatus;
import com.snowflake.ingest.streaming.ChannelStatusBatch;
import com.snowflake.ingest.streaming.OpenChannelResult;
import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient;
import java.time.Duration;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;

public class FakeSnowflakeStreamingIngestClient implements SnowflakeStreamingIngestClient {

  private final String pipeName;
  private final String clientName;
  private final Map<String, FakeSnowflakeStreamingIngestChannel> openedChannels =
      new ConcurrentHashMap<>();
  private final Map<String, String> channelNameToOffsetTokens = new ConcurrentHashMap<>();
  // Shared error counts per channel name - persists across channel reopens like real Snowflake
  private final Map<String, Long> channelNameToErrorCount = new ConcurrentHashMap<>();
  // Default error count to use when no channel-specific count is set
  private long defaultErrorCount = 0;
  private boolean closed = false;

  public FakeSnowflakeStreamingIngestClient(final String pipeName, final String clientName) {
    this.pipeName = pipeName;
    this.clientName = clientName;
  }

  public void setDefaultErrorCount(final long errorCount) {
    this.defaultErrorCount = errorCount;
  }

  public void setInitialErrorCountForChannel(final String channelName, final long errorCount) {
    channelNameToErrorCount.put(channelName, errorCount);
  }

  public long getErrorCountForChannel(final String channelName) {
    return channelNameToErrorCount.getOrDefault(channelName, defaultErrorCount);
  }

  @Override
  public void close() {
    this.closed = true;
  }

  @Override
  public CompletableFuture<Void> close(final boolean waitForFlush, final Duration timeoutDuration) {
    throw new UnsupportedOperationException();
  }

  @Override
  public void initiateFlush() {
    throw new UnsupportedOperationException();
  }

  @Override
  public OpenChannelResult openChannel(final String channelName) {
    throw new UnsupportedOperationException();
  }

  @Override
  public OpenChannelResult openChannel(final String channelName, final String offsetToken) {
    if (offsetToken != null) {
      channelNameToOffsetTokens.put(channelName, offsetToken);
    }
    // Error counts persist across channel reopens, like real Snowflake.
    // Use the existing channel's count if present, otherwise fall back to pre-seeded or default.
    FakeSnowflakeStreamingIngestChannel previous = openedChannels.get(channelName);
    final long errorCount =
        previous != null
            ? previous.getChannelStatus().getRowsErrorCount()
            : channelNameToErrorCount.getOrDefault(channelName, defaultErrorCount);
    final FakeSnowflakeStreamingIngestChannel channel =
        new FakeSnowflakeStreamingIngestChannel("db", "schema", pipeName, channelName);
    channel.setOffsetToken(offsetToken);
    channel.setErrorCount(errorCount);
    openedChannels.put(channel.getFullyQualifiedChannelName(), channel);
    return new OpenChannelResult(channel, channel.getChannelStatus());
  }

  @Override
  public void dropChannel(final String channelName) {
    throw new UnsupportedOperationException();
  }

  @Override
  public Map<String, String> getLatestCommittedOffsetTokens(final List<String> channelNames) {
    throw new UnsupportedOperationException();
  }

  @Override
  public ChannelStatusBatch getChannelStatus(final List<String> channelNames) {
    Map<String, ChannelStatus> statusMap = new HashMap<>();
    for (String name : channelNames) {
      FakeSnowflakeStreamingIngestChannel channel = openedChannels.get(name);
      if (channel != null) {
        statusMap.put(name, channel.getChannelStatus());
      }
    }
    return new ChannelStatusBatch(statusMap);
  }

  @Override
  public boolean isClosed() {
    throw new UnsupportedOperationException();
  }

  @Override
  public CompletableFuture<Void> waitForFlush(final Duration timeoutDuration) {
    throw new UnsupportedOperationException();
  }

  @Override
  public String getDBName() {
    throw new UnsupportedOperationException();
  }

  @Override
  public String getSchemaName() {
    throw new UnsupportedOperationException();
  }

  @Override
  public String getPipeName() {
    throw new UnsupportedOperationException();
  }

  @Override
  public String getClientName() {
    return clientName;
  }

  public List<FakeSnowflakeStreamingIngestChannel> getOpenedChannels() {
    return new ArrayList<>(openedChannels.values());
  }

  public long countClosedChannels() {
    return openedChannels.values().stream()
        .filter(FakeSnowflakeStreamingIngestChannel::isClosed)
        .count();
  }

  public int getAppendedRowCount() {
    return openedChannels.values().stream()
        .mapToInt(FakeSnowflakeStreamingIngestChannel::getAppendedRowsCount)
        .sum();
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/InMemorySinkTaskContext.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.connect.sink.ErrantRecordReporter;
import org.apache.kafka.connect.sink.SinkRecord;
import org.apache.kafka.connect.sink.SinkTaskContext;

/* In memory implementation of SinkTaskContext used for testing */
public class InMemorySinkTaskContext implements SinkTaskContext {
  private final Map<TopicPartition, Long> offsets = new HashMap();
  private long timeoutMs = -1L;
  private Set<TopicPartition> assignment;

  public InMemorySinkTaskContext(Set<TopicPartition> assignment) {
    this.assignment = assignment;
  }

  public Map<String, String> configs() {
    throw new UnsupportedOperationException();
  }

  public void offset(Map<TopicPartition, Long> offsets) {
    this.offsets.putAll(offsets);
  }

  public void offset(TopicPartition tp, long offset) {
    this.offsets.put(tp, offset);
  }

  /** Returns the last offset set for the given partition, or -1 if not set. */
  public long offset(TopicPartition tp) {
    return this.offsets.getOrDefault(tp, -1L);
  }

  public void timeout(long timeoutMs) {
    this.timeoutMs = timeoutMs;
  }

  public Set<TopicPartition> assignment() {
    return this.assignment;
  }

  public void pause(TopicPartition... partitions) {}

  public void resume(TopicPartition... partitions) {}

  public void requestCommit() {}

  public ErrantRecordReporter errantRecordReporter() {
    return new ErrantRecordReporter() {
      @Override
      public Future<Void> report(SinkRecord record, Throwable error) {
        return Executors.newCachedThreadPool().submit(() -> null);
      }
    };
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/OpenChannelRetryPolicyTest.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertSame;
import static org.junit.jupiter.api.Assertions.assertThrows;

import com.snowflake.ingest.streaming.SFException;
import com.snowflake.ingest.streaming.SnowflakeStreamingIngestChannel;
import dev.failsafe.function.CheckedSupplier;
import java.util.concurrent.atomic.AtomicInteger;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.Mock;
import org.mockito.MockitoAnnotations;

public class OpenChannelRetryPolicyTest {

  private static final String EXCEPTION_429_MSG =
      "Open channel request failed: HTTP Status: 429 ErrorBody: {\n"
          + "\"status_code\" : 87,\n"
          + "\"message\" : \"Cannot open channel at this time due to a high number of pending"
          + " open channel requests on the table.\"\n"
          + "}.";

  @Mock private SnowflakeStreamingIngestChannel mockChannel;

  private final String channelName = "test_channel";

  @BeforeEach
  void setUp() {
    MockitoAnnotations.initMocks(this);
  }

  @Test
  void shouldReturnChannelOnFirstAttemptSuccess() {
    // Given
    CheckedSupplier<SnowflakeStreamingIngestChannel> supplier = () -> mockChannel;

    // When
    SnowflakeStreamingIngestChannel result =
        OpenChannelRetryPolicy.executeWithRetry(supplier, channelName);

    // Then
    assertSame(mockChannel, result);
  }

  @Test
  void shouldNotRetryOnNonSFException() {
    // Given
    IllegalArgumentException nonRetryableException = new IllegalArgumentException("Non-retryable");
    CheckedSupplier<SnowflakeStreamingIngestChannel> supplier =
        () -> {
          throw nonRetryableException;
        };

    // When/Then
    IllegalArgumentException thrownException =
        assertThrows(
            IllegalArgumentException.class,
            () -> OpenChannelRetryPolicy.executeWithRetry(supplier, channelName));

    assertSame(nonRetryableException, thrownException);
  }

  @Test
  void shouldNotRetryOnSFExceptionWithout429() {
    // Given
    SFException nonRetryableException =
        new SFException("OPEN_CHANNEL_FAILURE", "Some other error", 400, "BAD_REQUEST");
    AtomicInteger attemptCount = new AtomicInteger(0);
    CheckedSupplier<SnowflakeStreamingIngestChannel> supplier =
        () -> {
          attemptCount.incrementAndGet();
          throw nonRetryableException;
        };

    // When/Then
    SFException thrownException =
        assertThrows(
            SFException.class,
            () -> OpenChannelRetryPolicy.executeWithRetry(supplier, channelName));

    assertSame(nonRetryableException, thrownException);
    assertEquals(1, attemptCount.get()); // Should only attempt once
  }

  @Test
  void shouldRetryMultipleTimesOn429Exception() {
    // Given
    SFException exception429 =
        new SFException("INTERNAL_ERROR", EXCEPTION_429_MSG, 429, "TOO_MANY_REQUESTS");
    AtomicInteger attemptCount = new AtomicInteger(0);

    CheckedSupplier<SnowflakeStreamingIngestChannel> supplier =
        () -> {
          int attempt = attemptCount.incrementAndGet();
          if (attempt <= 2) {
            throw exception429; // Fail first 2 attempts with 429
          }
          return mockChannel; // Succeed on 3rd attempt
        };

    // When
    SnowflakeStreamingIngestChannel result =
        OpenChannelRetryPolicy.executeWithRetry(supplier, channelName);

    // Then
    assertSame(mockChannel, result);
    assertEquals(3, attemptCount.get()); // Verify it retried 2 times before succeeding
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/SnowflakeSinkServiceV2AvroSchematizationIT.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

import static com.snowflake.kafka.connector.internal.TestUtils.assertWithRetry;
import static com.snowflake.kafka.connector.internal.TestUtils.getTableContentOneRow;

import com.snowflake.kafka.connector.ConnectorConfigTools;
import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.internal.SnowflakeConnectionService;
import com.snowflake.kafka.connector.internal.SnowflakeSinkService;
import com.snowflake.kafka.connector.internal.TestUtils;
import io.confluent.connect.avro.AvroConverter;
import io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient;
import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaAndValue;
import org.apache.kafka.connect.data.SchemaBuilder;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.sink.SinkRecord;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

public class SnowflakeSinkServiceV2AvroSchematizationIT {

  private static final int PARTITION = 0;
  private static final int START_OFFSET = 0;

  private static final String ID_INT8 = "ID_INT8";
  private static final String ID_INT8_OPTIONAL = "ID_INT8_OPTIONAL";
  private static final String ID_INT16 = "ID_INT16";
  private static final String ID_INT32 = "ID_INT32";
  private static final String ID_INT64 = "ID_INT64";
  private static final String FIRST_NAME = "FIRST_NAME";
  private static final String RATING_FLOAT32 = "RATING_FLOAT32";
  private static final String FLOAT_NAN = "FLOAT_NAN";
  private static final String FLOAT_POSITIVE_INFINITY = "FLOAT_POSITIVE_INFINITY";
  private static final String FLOAT_NEGATIVE_INFINITY = "FLOAT_NEGATIVE_INFINITY";
  private static final String RATING_FLOAT64 = "RATING_FLOAT64";
  private static final String APPROVAL = "APPROVAL";
  private static final String INFO_ARRAY_STRING = "INFO_ARRAY_STRING";
  private static final String INFO_ARRAY_INT = "INFO_ARRAY_INT";
  private static final String INFO_ARRAY_JSON = "INFO_ARRAY_JSON";
  private static final String INFO_MAP = "INFO_MAP";
  private static final String RECORD_METADATA = "RECORD_METADATA";

  private static final Map<String, String> EXPECTED_AVRO_SCHEMA =
      new HashMap<String, String>() {
        {
          put(ID_INT8, "NUMBER");
          put(ID_INT8_OPTIONAL, "NUMBER");
          put(ID_INT16, "NUMBER");
          put(ID_INT32, "NUMBER");
          put(ID_INT64, "NUMBER");
          put(FIRST_NAME, "VARCHAR");
          put(RATING_FLOAT32, "FLOAT");
          put(FLOAT_NAN, "FLOAT");
          put(FLOAT_POSITIVE_INFINITY, "FLOAT");
          put(FLOAT_NEGATIVE_INFINITY, "FLOAT");
          put(RATING_FLOAT64, "FLOAT");
          put(APPROVAL, "BOOLEAN");
          put(INFO_ARRAY_STRING, "ARRAY");
          put(INFO_ARRAY_INT, "ARRAY");
          put(INFO_ARRAY_JSON, "ARRAY");
          put(INFO_MAP, "VARIANT");
          put(RECORD_METADATA, "VARIANT");
        }
      };

  private String table;
  private SnowflakeConnectionService conn;
  private String topic;
  private TopicPartition topicPartition;

  private SnowflakeSinkService service;

  @BeforeEach
  void before() {
    table = TestUtils.randomTableName();
    topic = table;
    conn = TestUtils.getConnectionServiceWithEncryptedKey();
    topicPartition = new TopicPartition(topic, PARTITION);
  }

  @AfterEach
  void after() {
    service.closeAll();
  }

  @Test
  public void testSchematizationWithTableCreationAndAvroInput() throws Exception {
    // given
    conn.createTableWithOnlyMetadataColumn(table);
    SinkRecord avroRecordValue = createSinkRecord();
    service = createService();

    // when
    service.insert(Collections.singletonList(avroRecordValue));
    assertWithRetry(() -> TestUtils.getNumberOfRows(table) == 1);

    // then
    TestUtils.checkTableSchema(table, EXPECTED_AVRO_SCHEMA);

    Map<String, Object> actual = getTableContentOneRow(topic);
    Assertions.assertEquals(0L, actual.get(ID_INT8));
    Assertions.assertNull(actual.get(ID_INT8_OPTIONAL));
    Assertions.assertEquals(42L, actual.get(ID_INT16));
    Assertions.assertEquals(42L, actual.get(ID_INT32));
    Assertions.assertEquals(42L, actual.get(ID_INT64));
    Assertions.assertEquals("zekai", actual.get(FIRST_NAME));
    Assertions.assertEquals(0.99, ((Number) actual.get(RATING_FLOAT32)).doubleValue(), 0.001);
    Assertions.assertTrue(
        Double.isNaN(((Number) actual.get(FLOAT_NAN)).doubleValue()),
        "Expected NaN for " + FLOAT_NAN);
    Assertions.assertTrue(
        Double.isInfinite(((Number) actual.get(FLOAT_POSITIVE_INFINITY)).doubleValue())
            && ((Number) actual.get(FLOAT_POSITIVE_INFINITY)).doubleValue() > 0,
        "Expected +Infinity for " + FLOAT_POSITIVE_INFINITY);
    Assertions.assertTrue(
        Double.isInfinite(((Number) actual.get(FLOAT_NEGATIVE_INFINITY)).doubleValue())
            && ((Number) actual.get(FLOAT_NEGATIVE_INFINITY)).doubleValue() < 0,
        "Expected -Infinity for " + FLOAT_NEGATIVE_INFINITY);
    Assertions.assertEquals(0.99, ((Number) actual.get(RATING_FLOAT64)).doubleValue(), 0.001);
    Assertions.assertEquals(true, actual.get(APPROVAL));
    Assertions.assertEquals(
        "[\"a\",\"b\"]", StringUtils.deleteWhitespace(actual.get(INFO_ARRAY_STRING).toString()));
    Assertions.assertEquals(
        "[1,2]", StringUtils.deleteWhitespace(actual.get(INFO_ARRAY_INT).toString()));
    Assertions.assertEquals(
        "[null,\"{\\\"a\\\":1,\\\"b\\\":null,\\\"c\\\":null,\\\"d\\\":\\\"89asda9s0a\\\"}\"]",
        StringUtils.deleteWhitespace(actual.get(INFO_ARRAY_JSON).toString()));
    Assertions.assertEquals(
        "{\"field\":3}", StringUtils.deleteWhitespace(actual.get(INFO_MAP).toString()));
  }

  private SnowflakeSinkService createService() {
    Map<String, String> config = prepareConfig();
    SinkTaskConfig sinkTaskConfig = SinkTaskConfig.from(config);
    SnowflakeSinkService service =
        StreamingSinkServiceBuilder.builder(conn, sinkTaskConfig)
            .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition)))
            .build();
    service.startPartition(new TopicPartition(topic, PARTITION));
    service.awaitInitialization();
    return service;
  }

  private SinkRecord createSinkRecord() {
    Schema schema = prepareSchema();
    Struct data = prepareData(schema);
    AvroConverter avroConverter = prepareAvroConverter();

    byte[] converted = avroConverter.fromConnectData(topic, data.schema(), data);

    SchemaAndValue avroInputValue = avroConverter.toConnectData(topic, converted);

    return new SinkRecord(
        topic,
        PARTITION,
        Schema.STRING_SCHEMA,
        "test",
        avroInputValue.schema(),
        avroInputValue.value(),
        START_OFFSET);
  }

  private AvroConverter prepareAvroConverter() {
    SchemaRegistryClient schemaRegistry = new MockSchemaRegistryClient();
    AvroConverter avroConverter = new AvroConverter(schemaRegistry);
    avroConverter.configure(
        Collections.singletonMap("schema.registry.url", "http://fake-url"), false);
    return avroConverter;
  }

  private Map<String, String> prepareConfig() {
    Map<String, String> config = TestUtils.getConnectorConfigurationForStreaming(false);
    config.put(
        KafkaConnectorConfigParams.VALUE_CONVERTER, "io.confluent.connect.avro.AvroConverter");
    config.put(KafkaConnectorConfigParams.VALUE_CONVERTER_SCHEMA_REGISTRY_URL, "http://fake-url");
    // Schema type inference assertions depend on client-side validation behavior
    config.put(KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, "client_side");
    ConnectorConfigTools.setDefaultValues(config);
    return config;
  }

  private Schema prepareSchema() {
    SchemaBuilder schemaBuilder =
        SchemaBuilder.struct()
            .field(ID_INT8, Schema.INT8_SCHEMA)
            .field(ID_INT8_OPTIONAL, Schema.OPTIONAL_INT8_SCHEMA)
            .field(ID_INT16, Schema.INT16_SCHEMA)
            .field(ID_INT32, Schema.INT32_SCHEMA)
            .field(ID_INT64, Schema.INT64_SCHEMA)
            .field(FIRST_NAME, Schema.STRING_SCHEMA)
            .field(RATING_FLOAT32, Schema.FLOAT32_SCHEMA)
            .field(FLOAT_NAN, Schema.FLOAT32_SCHEMA)
            .field(FLOAT_POSITIVE_INFINITY, Schema.FLOAT32_SCHEMA)
            .field(FLOAT_NEGATIVE_INFINITY, Schema.FLOAT32_SCHEMA)
            .field(RATING_FLOAT64, Schema.FLOAT64_SCHEMA)
            .field(APPROVAL, Schema.BOOLEAN_SCHEMA)
            .field(INFO_ARRAY_STRING, SchemaBuilder.array(Schema.STRING_SCHEMA).build())
            .field(INFO_ARRAY_INT, SchemaBuilder.array(Schema.INT32_SCHEMA).build())
            .field(INFO_ARRAY_JSON, SchemaBuilder.array(Schema.OPTIONAL_STRING_SCHEMA).build())
            .field(INFO_MAP, SchemaBuilder.map(Schema.STRING_SCHEMA, Schema.INT32_SCHEMA).build());
    return schemaBuilder.build();
  }

  private Struct prepareData(Schema schema) {
    return new Struct(schema)
        .put(ID_INT8, (byte) 0)
        .put(ID_INT16, (short) 42)
        .put(ID_INT32, 42)
        .put(ID_INT64, 42L)
        .put(FIRST_NAME, "zekai")
        .put(RATING_FLOAT32, 0.99f)
        .put(FLOAT_NAN, Float.NaN)
        .put(FLOAT_POSITIVE_INFINITY, Float.POSITIVE_INFINITY)
        .put(FLOAT_NEGATIVE_INFINITY, Float.NEGATIVE_INFINITY)
        .put(RATING_FLOAT64, 0.99d)
        .put(APPROVAL, true)
        .put(INFO_ARRAY_STRING, Arrays.asList("a", "b"))
        .put(INFO_ARRAY_INT, Arrays.asList(1, 2))
        .put(
            INFO_ARRAY_JSON,
            Arrays.asList(null, "{\"a\": 1, \"b\": null, \"c\": null, \"d\": \"89asda9s0a\"}"))
        .put(INFO_MAP, Collections.singletonMap("field", 3));
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/SnowflakeSinkServiceV2BaseIT.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

import com.snowflake.kafka.connector.internal.TestUtils;
import org.apache.kafka.common.TopicPartition;

public abstract class SnowflakeSinkServiceV2BaseIT {

  protected final String table = TestUtils.randomTableName();

  protected final int partition = 0;
  protected final int partition2 = 1;

  // Topic name should be same as table name. (Only for testing, not necessarily in real deployment)
  protected String topic = table;
  protected TopicPartition topicPartition = new TopicPartition(topic, partition);
  protected TopicPartition topicPartition2 = new TopicPartition(topic, partition2);
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/SnowflakeSinkServiceV2IT.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

import static com.snowflake.kafka.connector.internal.TestUtils.TEST_CONNECTOR_NAME;
import static com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel.NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE;
import static com.snowflake.kafka.connector.internal.streaming.v2.service.PartitionChannelManager.makeChannelName;

import com.codahale.metrics.Gauge;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.config.SnowflakeValidation;
import com.snowflake.kafka.connector.dlq.InMemoryKafkaRecordErrorReporter;
import com.snowflake.kafka.connector.internal.SnowflakeConnectionService;
import com.snowflake.kafka.connector.internal.SnowflakeSinkService;
import com.snowflake.kafka.connector.internal.TestUtils;
import com.snowflake.kafka.connector.internal.metrics.MetricsUtil;
import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetryChannelCreation;
import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetryChannelStatus;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService;
import io.confluent.connect.avro.AvroConverter;
import io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient;
import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaAndValue;
import org.apache.kafka.connect.data.SchemaBuilder;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.json.JsonConverter;
import org.apache.kafka.connect.sink.SinkRecord;
import org.apache.kafka.connect.storage.Converter;
import org.jetbrains.annotations.NotNull;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.Mockito;

public class SnowflakeSinkServiceV2IT extends SnowflakeSinkServiceV2BaseIT {

  private final SnowflakeConnectionService conn = TestUtils.getConnectionServiceWithEncryptedKey();
  private SinkTaskConfig.Builder configBuilder;
  private String pipe;

  @BeforeEach
  public void setup() {
    Map<String, String> config = TestUtils.getConnectorConfigurationForStreaming(true);
    configBuilder = SinkTaskConfig.builderFrom(config).validation(SnowflakeValidation.SERVER_SIDE);
    pipe = table;
  }

  @AfterEach
  public void afterEach() {
    TestUtils.dropTable(table);
    TestUtils.dropPipe(pipe);
  }

  @Test
  public void testChannelCloseIngestion()
      throws Exception { // opens a channel for partition 0, table and topic
    SnowflakeSinkService service =
        StreamingSinkServiceBuilder.builder(conn, configBuilder.build())
            .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition)))
            .build();
    service.startPartition(topicPartition);
    service.awaitInitialization();

    Converter converter = buildJsonConverter();
    SchemaAndValue input =
        converter.toConnectData(topic, "{\"name\":\"test\"}".getBytes(StandardCharsets.UTF_8));
    long offset = 0;

    SinkRecord record1 =
        new SinkRecord(
            topic,
            partition,
            Schema.STRING_SCHEMA,
            "test_key" + offset,
            input.schema(),
            input.value(),
            offset);

    // Lets close the service
    // Closing a partition == closing a channel
    service.close(Collections.singletonList(topicPartition));

    // Lets insert a record when partition was closed.
    // It should auto create the channel
    service.insert(record1);

    TestUtils.assertWithRetry(() -> service.getOffset(topicPartition) == 1, 5, 20);

    service.closeAll();
  }

  private static @NotNull Converter buildJsonConverter() {
    Converter converter = new JsonConverter();
    HashMap<String, String> converterConfig = new HashMap<>();
    converterConfig.put("schemas.enable", "false");
    converter.configure(converterConfig, true);
    return converter;
  }

  @Test
  public void testRebalanceOpenCloseIngestion()
      throws Exception { // opens a channel for partition 0, table and topic
    SnowflakeSinkService service =
        StreamingSinkServiceBuilder.builder(conn, configBuilder.build())
            .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition)))
            .build();
    service.startPartition(topicPartition);
    service.awaitInitialization();

    Converter converter = buildJsonConverter();
    SchemaAndValue input =
        converter.toConnectData(topic, "{\"name\":\"test\"}".getBytes(StandardCharsets.UTF_8));
    long offset = 0;

    SinkRecord record1 =
        new SinkRecord(
            topic,
            partition,
            Schema.STRING_SCHEMA,
            "test_key" + offset,
            input.schema(),
            input.value(),
            offset);

    service.insert(record1);

    // Lets close the service
    // Closing a partition == closing a channel
    service.close(Collections.singletonList(topicPartition));

    // it should skip this record1 since it will fetch offset token 0 from Snowflake
    service.insert(record1);

    TestUtils.assertWithRetry(() -> service.getOffset(topicPartition) == 1, 5, 20);

    service.closeAll();
  }

  @Test
  public void testStreamingIngestion()
      throws Exception { // opens a channel for partition 0, table and topic
    SnowflakeSinkService service =
        StreamingSinkServiceBuilder.builder(conn, configBuilder.build())
            .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition)))
            .build();
    service.startPartition(topicPartition);
    service.awaitInitialization();

    Converter converter = buildJsonConverter();

    SchemaAndValue input =
        converter.toConnectData(topic, "{\"name\":\"test\"}".getBytes(StandardCharsets.UTF_8));
    long offset = 0;

    SinkRecord record1 =
        new SinkRecord(
            topic,
            partition,
            Schema.STRING_SCHEMA,
            "test_key" + offset,
            input.schema(),
            input.value(),
            offset);

    service.insert(record1);

    TestUtils.assertWithRetry(() -> service.getOffset(topicPartition) == 1, 5, 20);

    // insert another offset and check what we committed
    offset += 1;
    SinkRecord record2 =
        new SinkRecord(
            topic,
            partition,
            Schema.STRING_SCHEMA,
            "test_key" + offset,
            input.schema(),
            input.value(),
            offset);
    offset += 1;
    SinkRecord record3 =
        new SinkRecord(
            topic,
            partition,
            Schema.STRING_SCHEMA,
            "test_key" + offset,
            input.schema(),
            input.value(),
            offset);

    service.insert(Arrays.asList(record2, record3));
    TestUtils.assertWithRetry(() -> service.getOffset(topicPartition) == 3, 5, 20);

    service.closeAll();
  }

  @Test
  public void testStreamingIngest_multipleChannelPartitions_withMetrics()
      throws Exception { // set up telemetry service spy
    SnowflakeConnectionService connectionService = Mockito.spy(this.conn);
    SnowflakeTelemetryService telemetryService = Mockito.spy(this.conn.getTelemetryClient());
    Mockito.when(connectionService.getTelemetryClient()).thenReturn(telemetryService);

    // opens a channel for partition 0, table and topic
    SnowflakeSinkService service =
        StreamingSinkServiceBuilder.builder(connectionService, configBuilder.build())
            .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition)))
            .withMetricsJmxReporter(
                new com.snowflake.kafka.connector.internal.metrics.MetricsJmxReporter(
                    new com.codahale.metrics.MetricRegistry(), TEST_CONNECTOR_NAME))
            .build();

    service.startPartition(topicPartition);
    service.startPartition(new TopicPartition(topic, partition2));
    service.awaitInitialization();

    final int recordsInPartition1 = 2;
    final int recordsInPartition2 = 5;
    List<SinkRecord> recordsPartition1 =
        TestUtils.createJsonStringSinkRecords(0, recordsInPartition1, topic, partition);

    List<SinkRecord> recordsPartition2 =
        TestUtils.createJsonStringSinkRecords(0, recordsInPartition2, topic, partition2);

    List<SinkRecord> records = new ArrayList<>(recordsPartition1);
    records.addAll(recordsPartition2);

    service.insert(records);

    TestUtils.assertWithRetry(
        () -> {
          // This is how we will trigger flush. (Mimicking poll API)
          service.insert(new ArrayList<>()); // trigger time based flush
          return TestUtils.tableSize(table) == recordsInPartition1 + recordsInPartition2;
        },
        10,
        20);

    TestUtils.assertWithRetry(
        () -> service.getOffset(topicPartition) == recordsInPartition1, 5, 20);
    TestUtils.assertWithRetry(
        () -> service.getOffset(new TopicPartition(topic, partition2)) == recordsInPartition2,
        20,
        5);

    // verify all metrics (gauges + counters)
    Map<String, Gauge> gaugeMetrics =
        service
            .getMetricRegistry(makeChannelName(TEST_CONNECTOR_NAME, topic, partition))
            .get()
            .getGauges();
    long totalMetrics =
        gaugeMetrics.size()
            + service
                .getMetricRegistry(makeChannelName(TEST_CONNECTOR_NAME, topic, partition))
                .get()
                .getCounters()
                .size();
    assert totalMetrics == SnowflakeTelemetryChannelStatus.NUM_METRICS * 2; // two partitions

    // partition 1
    verifyPartitionMetrics(
        gaugeMetrics,
        makeChannelName(TEST_CONNECTOR_NAME, topic, partition),
        recordsInPartition1 - 1,
        recordsInPartition1 - 1);
    verifyPartitionMetrics(
        gaugeMetrics,
        makeChannelName(TEST_CONNECTOR_NAME, topic, partition2),
        recordsInPartition2 - 1,
        recordsInPartition2 - 1);

    // verify telemetry
    Mockito.verify(telemetryService, Mockito.times(2))
        .reportKafkaPartitionStart(Mockito.any(SnowflakeTelemetryChannelCreation.class));

    service.closeAll();

    // verify metrics closed
    assert !service
        .getMetricRegistry(makeChannelName(TEST_CONNECTOR_NAME, topic, partition))
        .isPresent();

    Mockito.verify(telemetryService, Mockito.times(2))
        .reportKafkaPartitionUsage(
            Mockito.any(SnowflakeTelemetryChannelStatus.class), Mockito.eq(true));
  }

  private void verifyPartitionMetrics(
      Map<String, Gauge> metricRegistry,
      String partitionChannelKey,
      long offsetPersistedInSnowflake,
      long processedOffset) {
    // offsets
    assert (long)
            metricRegistry
                .get(
                    MetricsUtil.channelMetricName(
                        partitionChannelKey,
                        MetricsUtil.OFFSET_SUB_DOMAIN,
                        MetricsUtil.OFFSET_PERSISTED_IN_SNOWFLAKE))
                .getValue()
        == offsetPersistedInSnowflake;
    assert (long)
            metricRegistry
                .get(
                    MetricsUtil.channelMetricName(
                        partitionChannelKey,
                        MetricsUtil.OFFSET_SUB_DOMAIN,
                        MetricsUtil.PROCESSED_OFFSET))
                .getValue()
        == processedOffset;
  }

  @Test
  public void testStreamingIngest_multipleChannelPartitionsWithTopic2Table() throws Exception {
    final int partitionCount = 3;
    final int recordsInEachPartition = 2;
    final int topicCount = 3;

    Map<String, String> topic2Table = new HashMap<>();
    ArrayList<String> topics = new ArrayList<>();
    for (int topic = 0; topic < topicCount; topic++) {
      final String topicName = TestUtils.randomTableName();
      topics.add(topicName);
      topic2Table.put(topicName, table);
    }
    configBuilder.topicToTableMap(topic2Table);

    SnowflakeSinkService service =
        StreamingSinkServiceBuilder.builder(conn, configBuilder.build())
            .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition)))
            .build();

    for (int topic = 0; topic < topicCount; topic++) {
      for (int partition = 0; partition < partitionCount; partition++) {
        service.startPartition(new TopicPartition(topics.get(topic), partition));
      }
      service.awaitInitialization();

      List<SinkRecord> records = new ArrayList<>();
      for (int partition = 0; partition < partitionCount; partition++) {
        records.addAll(
            TestUtils.createJsonStringSinkRecords(
                0, recordsInEachPartition, topics.get(topic), partition));
      }

      service.insert(records);
    }

    TestUtils.assertWithRetry(
        () -> TestUtils.tableSize(table) == recordsInEachPartition * partitionCount * topicCount,
        10,
        20);

    for (int topic = 0; topic < topicCount; topic++) {
      int finalTopic = topic;

      for (int partition = 0; partition < partitionCount; partition++) {
        int finalPartition = partition;
        TestUtils.assertWithRetry(
            () ->
                service.getOffset(new TopicPartition(topics.get(finalTopic), finalPartition))
                    == recordsInEachPartition,
            20,
            5);
      }
    }

    service.closeAll();
  }

  @Test
  public void testStreamingIngest_startPartitionsWithMultipleChannelPartitions() throws Exception {
    final int partitionCount = 5;
    final int recordsInEachPartition = 2;

    ArrayList<TopicPartition> topicPartitions = new ArrayList<>();
    for (int partition = 0; partition < partitionCount; partition++) {
      topicPartitions.add(new TopicPartition(topic, partition));
    }
    configBuilder.topicToTableMap(Collections.singletonMap(topic, table));

    SnowflakeSinkService service =
        StreamingSinkServiceBuilder.builder(conn, configBuilder.build())
            .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition)))
            .build();

    service.startPartitions(topicPartitions);
    service.awaitInitialization();

    List<SinkRecord> records = new ArrayList<>();
    for (int partition = 0; partition < partitionCount; partition++) {
      records.addAll(
          TestUtils.createJsonStringSinkRecords(0, recordsInEachPartition, topic, partition));
    }

    service.insert(records);

    TestUtils.assertWithRetry(
        () -> {
          service.insert(new ArrayList<>()); // trigger time based flush
          return TestUtils.tableSize(table) == recordsInEachPartition * partitionCount;
        },
        10,
        20);

    for (int partition = 0; partition < partitionCount; partition++) {
      int finalPartition = partition;
      TestUtils.assertWithRetry(
          () ->
              service.getOffset(new TopicPartition(topic, finalPartition))
                  == recordsInEachPartition,
          20,
          5);
    }

    service.closeAll();
  }

  @Test
  public void testNativeJsonInputIngestion() throws Exception { // json without schema
    JsonConverter converter = new JsonConverter();
    HashMap<String, String> converterConfig = new HashMap<>();
    converterConfig.put("schemas.enable", "false");
    converter.configure(converterConfig, false);
    SchemaAndValue noSchemaInputValue =
        converter.toConnectData(
            topic, TestUtils.JSON_WITHOUT_SCHEMA.getBytes(StandardCharsets.UTF_8));

    converter = new JsonConverter();
    converterConfig = new HashMap<>();
    converterConfig.put("schemas.enable", "false");
    converter.configure(converterConfig, true);
    SchemaAndValue noSchemaInputKey =
        converter.toConnectData(
            topic, TestUtils.JSON_WITHOUT_SCHEMA.getBytes(StandardCharsets.UTF_8));

    // json with schema
    converter = new JsonConverter();
    converterConfig = new HashMap<>();
    converterConfig.put("schemas.enable", "true");
    converter.configure(converterConfig, false);
    SchemaAndValue schemaInputValue =
        converter.toConnectData(topic, TestUtils.JSON_WITH_SCHEMA.getBytes(StandardCharsets.UTF_8));

    converter = new JsonConverter();
    converterConfig = new HashMap<>();
    converterConfig.put("schemas.enable", "true");
    converter.configure(converterConfig, true);
    SchemaAndValue schemaInputKey =
        converter.toConnectData(topic, TestUtils.JSON_WITH_SCHEMA.getBytes(StandardCharsets.UTF_8));

    long startOffset = 0;
    long endOffset = 3;

    SinkRecord noSchemaRecordValue =
        new SinkRecord(
            topic,
            partition,
            Schema.STRING_SCHEMA,
            "test",
            noSchemaInputValue.schema(),
            noSchemaInputValue.value(),
            startOffset);
    SinkRecord schemaRecordValue =
        new SinkRecord(
            topic,
            partition,
            Schema.STRING_SCHEMA,
            "test",
            schemaInputValue.schema(),
            schemaInputValue.value(),
            startOffset + 1);

    SinkRecord noSchemaRecordKey =
        new SinkRecord(
            topic,
            partition,
            noSchemaInputKey.schema(),
            noSchemaInputKey.value(),
            schemaInputValue.schema(),
            schemaInputValue.value(),
            startOffset + 2);
    SinkRecord schemaRecordKey =
        new SinkRecord(
            topic,
            partition,
            schemaInputKey.schema(),
            schemaInputKey.value(),
            schemaInputValue.schema(),
            schemaInputValue.value(),
            startOffset + 3);

    SnowflakeSinkService service =
        StreamingSinkServiceBuilder.builder(conn, configBuilder.build())
            .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition)))
            .build();
    service.startPartition(topicPartition);
    service.awaitInitialization();

    service.insert(noSchemaRecordValue);
    service.insert(schemaRecordValue);

    service.insert(noSchemaRecordKey);
    service.insert(schemaRecordKey);

    TestUtils.assertWithRetry(() -> service.getOffset(topicPartition) == endOffset + 1, 5, 20);

    service.closeAll();
  }

  @Test
  public void testNativeAvroInputIngestion() throws Exception { // avro
    SchemaBuilder schemaBuilder =
        SchemaBuilder.struct()
            .field("int8", SchemaBuilder.int8().defaultValue((byte) 2).doc("int8 field").build())
            .field("int16", Schema.INT16_SCHEMA)
            .field("int32", Schema.INT32_SCHEMA)
            .field("int64", Schema.INT64_SCHEMA)
            .field("float32", Schema.FLOAT32_SCHEMA)
            .field("float64", Schema.FLOAT64_SCHEMA)
            .field("int8Min", SchemaBuilder.int8().defaultValue((byte) 2).doc("int8 field").build())
            .field("int16Min", Schema.INT16_SCHEMA)
            .field("int32Min", Schema.INT32_SCHEMA)
            .field("int64Min", Schema.INT64_SCHEMA)
            .field("float32Min", Schema.FLOAT32_SCHEMA)
            .field("float64Min", Schema.FLOAT64_SCHEMA)
            .field("int8Max", SchemaBuilder.int8().defaultValue((byte) 2).doc("int8 field").build())
            .field("int16Max", Schema.INT16_SCHEMA)
            .field("int32Max", Schema.INT32_SCHEMA)
            .field("int64Max", Schema.INT64_SCHEMA)
            .field("float32Max", Schema.FLOAT32_SCHEMA)
            .field("float64Max", Schema.FLOAT64_SCHEMA)
            .field("float64HighPrecision", Schema.FLOAT64_SCHEMA)
            .field("float64TenDigits", Schema.FLOAT64_SCHEMA)
            .field("float64BigDigits", Schema.FLOAT64_SCHEMA)
            .field("boolean", Schema.BOOLEAN_SCHEMA)
            .field("string", Schema.STRING_SCHEMA)
            .field("bytes", Schema.BYTES_SCHEMA)
            .field("bytesReadOnly", Schema.BYTES_SCHEMA)
            .field("int16Optional", Schema.OPTIONAL_INT16_SCHEMA)
            .field("int32Optional", Schema.OPTIONAL_INT32_SCHEMA)
            .field("int64Optional", Schema.OPTIONAL_INT64_SCHEMA)
            .field("float32Optional", Schema.OPTIONAL_FLOAT32_SCHEMA)
            .field("float64Optional", Schema.OPTIONAL_FLOAT64_SCHEMA)
            .field("booleanOptional", Schema.OPTIONAL_BOOLEAN_SCHEMA)
            .field("stringOptional", Schema.OPTIONAL_STRING_SCHEMA)
            .field("bytesOptional", Schema.OPTIONAL_BYTES_SCHEMA)
            .field("array", SchemaBuilder.array(Schema.STRING_SCHEMA).build())
            .field("map", SchemaBuilder.map(Schema.STRING_SCHEMA, Schema.INT32_SCHEMA).build())
            .field(
                "int8Optional",
                SchemaBuilder.int8().defaultValue((byte) 2).doc("int8 field").build())
            .field(
                "mapNonStringKeys",
                SchemaBuilder.map(Schema.INT32_SCHEMA, Schema.INT32_SCHEMA).build())
            .field(
                "mapArrayMapInt",
                SchemaBuilder.map(
                        Schema.STRING_SCHEMA,
                        SchemaBuilder.array(
                                SchemaBuilder.map(Schema.STRING_SCHEMA, Schema.INT32_SCHEMA)
                                    .build())
                            .build())
                    .build());
    Struct original =
        new Struct(schemaBuilder.build())
            .put("int8", (byte) 12)
            .put("int16", (short) 12)
            .put("int32", 12)
            .put("int64", 12L)
            .put("float32", 12.2f)
            .put("float64", 12.2)
            .put("int8Min", Byte.MIN_VALUE)
            .put("int16Min", Short.MIN_VALUE)
            .put("int32Min", Integer.MIN_VALUE)
            .put("int64Min", Long.MIN_VALUE)
            .put("float32Min", Float.MIN_VALUE)
            .put("float64Min", Double.MIN_VALUE)
            .put("int8Max", Byte.MAX_VALUE)
            .put("int16Max", Short.MAX_VALUE)
            .put("int32Max", Integer.MAX_VALUE)
            .put("int64Max", Long.MAX_VALUE)
            .put("float32Max", Float.MAX_VALUE)
            .put("float64Max", Double.MAX_VALUE)
            .put("float64HighPrecision", 2312.4200000000001d)
            .put("float64TenDigits", 1.0d / 3.0d)
            .put("float64BigDigits", 2312.42321432655123456d)
            .put("boolean", true)
            .put("string", "foo")
            .put("bytes", ByteBuffer.wrap("foo".getBytes()))
            .put("bytesReadOnly", ByteBuffer.wrap("foo".getBytes()).asReadOnlyBuffer())
            .put("array", Arrays.asList("a", "b", "c"))
            .put("map", Collections.singletonMap("field", 1))
            .put("mapNonStringKeys", Collections.singletonMap(1, 1))
            .put(
                "mapArrayMapInt",
                Collections.singletonMap(
                    "field",
                    Arrays.asList(
                        Collections.singletonMap("field", 1),
                        Collections.singletonMap("field", 1))));

    SchemaRegistryClient schemaRegistry = new MockSchemaRegistryClient();
    AvroConverter avroConverter = new AvroConverter(schemaRegistry);
    avroConverter.configure(
        Collections.singletonMap("schema.registry.url", "http://fake-url"), false);
    byte[] converted = avroConverter.fromConnectData(topic, original.schema(), original);
    SchemaAndValue avroInputValue = avroConverter.toConnectData(topic, converted);

    avroConverter = new AvroConverter(schemaRegistry);
    avroConverter.configure(
        Collections.singletonMap("schema.registry.url", "http://fake-url"), true);
    converted = avroConverter.fromConnectData(topic, original.schema(), original);
    SchemaAndValue avroInputKey = avroConverter.toConnectData(topic, converted);

    long startOffset = 0;
    long endOffset = 2;

    SinkRecord avroRecordValue =
        new SinkRecord(
            topic,
            partition,
            Schema.STRING_SCHEMA,
            "test",
            avroInputValue.schema(),
            avroInputValue.value(),
            startOffset);

    SinkRecord avroRecordKey =
        new SinkRecord(
            topic,
            partition,
            avroInputKey.schema(),
            avroInputKey.value(),
            Schema.STRING_SCHEMA,
            "test",
            startOffset + 1);

    SinkRecord avroRecordKeyValue =
        new SinkRecord(
            topic,
            partition,
            avroInputKey.schema(),
            avroInputKey.value(),
            avroInputKey.schema(),
            avroInputKey.value(),
            startOffset + 2);

    configBuilder.tolerateErrors(true);
    configBuilder.dlqTopicName("DLQ_TOPIC");
    configBuilder.errorsLogEnable(true);

    SnowflakeSinkService service =
        StreamingSinkServiceBuilder.builder(conn, configBuilder.build())
            .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition)))
            .build();
    service.startPartition(topicPartition);
    service.awaitInitialization();

    service.insert(avroRecordValue);
    service.insert(avroRecordKey);
    service.insert(avroRecordKeyValue);

    TestUtils.assertWithRetry(() -> service.getOffset(topicPartition) == endOffset + 1, 5, 20);

    service.closeAll();
  }

  @Test
  public void testBrokenIngestion() throws Exception { // Mismatched schema and value
    SchemaAndValue brokenInputValue = new SchemaAndValue(Schema.INT32_SCHEMA, "error");

    long startOffset = 0;

    SinkRecord brokenValue =
        new SinkRecord(
            topic,
            partition,
            Schema.STRING_SCHEMA,
            "test",
            brokenInputValue.schema(),
            brokenInputValue.value(),
            startOffset);

    SinkRecord brokenKey =
        new SinkRecord(
            topic,
            partition,
            brokenInputValue.schema(),
            brokenInputValue.value(),
            Schema.STRING_SCHEMA,
            "test",
            startOffset + 1);

    SinkRecord brokenKeyValue =
        new SinkRecord(
            topic,
            partition,
            brokenInputValue.schema(),
            brokenInputValue.value(),
            brokenInputValue.schema(),
            brokenInputValue.value(),
            startOffset + 2);

    configBuilder.tolerateErrors(true);
    configBuilder.dlqTopicName("DLQ_TOPIC");
    configBuilder.errorsLogEnable(true);

    InMemoryKafkaRecordErrorReporter errorReporter = new InMemoryKafkaRecordErrorReporter();

    SnowflakeSinkService service =
        StreamingSinkServiceBuilder.builder(conn, configBuilder.build())
            .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition)))
            .withErrorReporter(errorReporter)
            .build();
    service.startPartition(topicPartition);
    service.awaitInitialization();

    service.insert(brokenValue);
    service.insert(brokenKey);
    service.insert(brokenKeyValue);

    TestUtils.assertWithRetry(
        () -> service.getOffset(topicPartition) == NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE, 5, 20);

    List<InMemoryKafkaRecordErrorReporter.ReportedRecord> reportedData =
        errorReporter.getReportedRecords();

    assert reportedData.size() == 3;
    assert TestUtils.tableSize(table) == 0
        : "expected: " + 0 + " actual: " + TestUtils.tableSize(table);
  }

  @Test
  public void testBrokenRecordIngestionFollowedUpByValidRecord()
      throws Exception { // Mismatched schema and value
    SchemaAndValue brokenInputValue = new SchemaAndValue(Schema.INT32_SCHEMA, "error");

    SinkRecord brokenValue =
        new SinkRecord(
            topic, partition, null, null, brokenInputValue.schema(), brokenInputValue.value(), 0);

    SinkRecord brokenKey =
        new SinkRecord(
            topic, partition, brokenInputValue.schema(), brokenInputValue.value(), null, null, 1);

    SinkRecord correctValue =
        new SinkRecord(topic, partition, null, "key1", null, Map.of("name", "john"), 2);

    configBuilder.tolerateErrors(true);
    configBuilder.dlqTopicName("DLQ_TOPIC");
    configBuilder.errorsLogEnable(true);

    InMemoryKafkaRecordErrorReporter errorReporter = new InMemoryKafkaRecordErrorReporter();

    SnowflakeSinkService service =
        StreamingSinkServiceBuilder.builder(conn, configBuilder.build())
            .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition)))
            .withErrorReporter(errorReporter)
            .build();
    service.startPartition(topicPartition);
    service.awaitInitialization();

    service.insert(brokenValue);
    service.insert(brokenKey);
    service.insert(correctValue);

    TestUtils.assertWithRetry(() -> service.getOffset(topicPartition) == 3, 5, 20);

    List<InMemoryKafkaRecordErrorReporter.ReportedRecord> reportedData =
        errorReporter.getReportedRecords();

    assert reportedData.size() == 2;
    assert TestUtils.tableSize(table) == 1
        : "expected: " + 1 + " actual: " + TestUtils.tableSize(table);

    service.closeAll();
  }

  /* Service start -> Insert -> Close. service start -> fetch the offsetToken, compare and ingest check data */

  @Test
  public void testStreamingIngestionWithExactlyOnceSemanticsNoOverlappingOffsets()
      throws Exception {
    SnowflakeSinkService service =
        StreamingSinkServiceBuilder.builder(conn, configBuilder.build())
            .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition)))
            .build();
    service.startPartition(topicPartition);
    service.awaitInitialization();

    Converter converter = buildJsonConverter();

    SchemaAndValue input =
        converter.toConnectData(topic, "{\"name\":\"test\"}".getBytes(StandardCharsets.UTF_8));

    long offset = 0;
    // Create sink record
    SinkRecord record1 =
        new SinkRecord(
            topic, partition, Schema.STRING_SCHEMA, "test", input.schema(), input.value(), offset);

    service.insert(record1);

    TestUtils.assertWithRetry(() -> service.getOffset(topicPartition) == 1, 5, 20);
    // wait for ingest
    TestUtils.assertWithRetry(() -> TestUtils.tableSize(table) == 1, 30, 20);

    service.closeAll();

    // initialize a new sink service
    SnowflakeSinkService service2 =
        StreamingSinkServiceBuilder.builder(conn, configBuilder.build())
            .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition)))
            .build();
    service2.startPartition(topicPartition);
    service2.awaitInitialization();
    offset = 1;
    // Create sink record
    SinkRecord record2 =
        new SinkRecord(
            topic, partition, Schema.STRING_SCHEMA, "test", input.schema(), input.value(), offset);

    service2.insert(record2);

    // wait for ingest
    TestUtils.assertWithRetry(() -> TestUtils.tableSize(table) == 2, 30, 20);

    assert service2.getOffset(topicPartition) == offset + 1;

    service2.closeAll();
  }

  /* Service start -> Insert -> Close. service start -> fetch the offsetToken, compare and ingest check data */
  @Test
  public void testStreamingIngestionWithExactlyOnceSemanticsOverlappingOffsets() throws Exception {
    SnowflakeSinkService service =
        StreamingSinkServiceBuilder.builder(conn, configBuilder.build())
            .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition)))
            .build();
    service.startPartition(topicPartition);
    service.awaitInitialization();

    final long noOfRecords = 10;
    // send regular data
    List<SinkRecord> records =
        TestUtils.createJsonStringSinkRecords(0, noOfRecords, topic, partition);

    service.insert(records);

    TestUtils.assertWithRetry(() -> service.getOffset(topicPartition) == noOfRecords, 5, 20);

    // wait for ingest
    TestUtils.assertWithRetry(() -> TestUtils.tableSize(table) == 10, 30, 20);

    service.closeAll();

    // initialize a new sink service
    SnowflakeSinkService service2 =
        StreamingSinkServiceBuilder.builder(conn, configBuilder.build())
            .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition)))
            .build();
    service2.startPartition(topicPartition);
    service2.awaitInitialization();

    final long startOffsetAlreadyInserted = 5;
    records =
        TestUtils.createJsonStringSinkRecords(
            startOffsetAlreadyInserted, noOfRecords, topic, partition);

    service2.insert(records);

    final long totalRecordsExpected = noOfRecords + (noOfRecords - startOffsetAlreadyInserted);

    // wait for ingest
    TestUtils.assertWithRetry(() -> TestUtils.tableSize(table) == totalRecordsExpected, 30, 20);

    assert service2.getOffset(topicPartition) == totalRecordsExpected;

    service2.closeAll();
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/SnowflakeSinkServiceV2SchematizationIT.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

import com.snowflake.kafka.connector.builder.SinkRecordBuilder;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.dlq.InMemoryKafkaRecordErrorReporter;
import com.snowflake.kafka.connector.internal.SnowflakeConnectionService;
import com.snowflake.kafka.connector.internal.SnowflakeSinkService;
import com.snowflake.kafka.connector.internal.TestUtils;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import org.apache.kafka.connect.data.SchemaAndValue;
import org.apache.kafka.connect.json.JsonConverter;
import org.apache.kafka.connect.sink.SinkRecord;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

public class SnowflakeSinkServiceV2SchematizationIT extends SnowflakeSinkServiceV2BaseIT {

  private final SnowflakeConnectionService conn = TestUtils.getConnectionService();
  private SinkTaskConfig sinkTaskConfig;
  private SnowflakeSinkService service;
  private String pipe;

  @BeforeEach
  public void setup() {
    Map<String, String> config = TestUtils.getConnectorConfigurationForStreaming(false);
    sinkTaskConfig =
        SinkTaskConfig.builderFrom(config).tolerateErrors(true).dlqTopicName("dlq_topic").build();
    pipe = table;
  }

  @AfterEach
  public void teardown() {
    service.closeAll();
    TestUtils.dropTable(table);
    TestUtils.dropPipe(pipe);
  }

  @Test
  public void snowflakeSinkTask_put_whenJsonRecordCannotBeSchematized_sendRecordToDLQ() {
    // given
    conn.createTableWithOnlyMetadataColumn(table);

    InMemoryKafkaRecordErrorReporter errorReporter = new InMemoryKafkaRecordErrorReporter();

    service =
        StreamingSinkServiceBuilder.builder(conn, sinkTaskConfig)
            .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition)))
            .withErrorReporter(errorReporter)
            .build();
    service.startPartition(topicPartition);
    service.awaitInitialization();

    // Create a record that cannot be schematized (array at root level)
    String notSchematizeableJsonRecord = "[{\"name\":\"sf\",\"answer\":42}]";
    SinkRecord record = createKafkaRecordWithoutSchema(notSchematizeableJsonRecord, 0);

    // when
    service.insert(record);

    // then
    Assertions.assertEquals(1, errorReporter.getReportedRecords().size());
  }

  /** Helper method to create a Kafka record from JSON string */
  private SinkRecord createKafkaRecord(String jsonWithSchema, long offset, boolean withSchema) {
    JsonConverter jsonConverter = new JsonConverter();
    Map<String, String> converterConfig = new HashMap<>();
    converterConfig.put("schemas.enable", String.valueOf(withSchema));
    jsonConverter.configure(converterConfig, false);

    byte[] valueBytes = jsonWithSchema.getBytes(StandardCharsets.UTF_8);
    SchemaAndValue schemaAndValue = jsonConverter.toConnectData(topic, valueBytes);

    return SinkRecordBuilder.forTopicPartition(topic, partition)
        .withSchemaAndValue(schemaAndValue)
        .withOffset(offset)
        .withKey("test")
        .build();
  }

  /**
   * Convenience method to create a Kafka record from JSON without schema (schemas.enable = false)
   */
  private SinkRecord createKafkaRecordWithoutSchema(String jsonPayload, long offset) {
    return createKafkaRecord(jsonPayload, offset, false);
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/SnowflakeSinkServiceV2Test.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyBoolean;
import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;

import com.snowflake.ingest.streaming.SFException;
import com.snowflake.kafka.connector.builder.SinkRecordBuilder;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.config.SinkTaskConfigTestBuilder;
import com.snowflake.kafka.connector.config.SnowflakeValidation;
import com.snowflake.kafka.connector.internal.SnowflakeConnectionService;
import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException;
import com.snowflake.kafka.connector.internal.metrics.TaskMetrics;
import com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel;
import com.snowflake.kafka.connector.internal.streaming.v2.BackpressureException;
import com.snowflake.kafka.connector.internal.streaming.v2.service.BatchOffsetFetcher;
import com.snowflake.kafka.connector.internal.streaming.v2.service.PartitionChannelManager;
import com.snowflake.kafka.connector.internal.streaming.v2.service.ThreadPools;
import java.time.Instant;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.connect.sink.SinkRecord;
import org.apache.kafka.connect.sink.SinkTaskContext;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.ArgumentCaptor;

class SnowflakeSinkServiceV2Test {

  private static final String TOPIC = "test_topic";
  private static final String CONNECTOR_NAME = "test_connector";

  private PartitionChannelManager mockChannelManager;
  private BatchOffsetFetcher mockBatchOffsetFetcher;
  private SinkTaskContext mockSinkTaskContext;
  private SnowflakeSinkServiceV2 service;

  @BeforeEach
  void setUp() {
    mockChannelManager = mock(PartitionChannelManager.class);
    mockBatchOffsetFetcher = mock(BatchOffsetFetcher.class);
    mockSinkTaskContext = mock(SinkTaskContext.class);

    SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class);
    when(mockConn.isClosed()).thenReturn(false);

    service =
        new SnowflakeSinkServiceV2(
            mockConn,
            SinkTaskConfigTestBuilder.builder().connectorName(CONNECTOR_NAME).taskId("0").build(),
            mockSinkTaskContext,
            Optional.empty(),
            () -> mockBatchOffsetFetcher,
            () -> mockChannelManager,
            TaskMetrics.noop());
  }

  @AfterEach
  void tearDown() {
    ThreadPools.closeForTask(CONNECTOR_NAME);
  }

  // --- insert() skip logic ---

  @Test
  void insertSkipsRecordsForInitializingPartitions() {
    TopicPartition tp = new TopicPartition(TOPIC, 0);
    TopicPartitionChannel channel = mockChannel("ch_0", true);
    when(mockChannelManager.getChannel(tp)).thenReturn(Optional.of(channel));

    SinkRecord record = recordFor(TOPIC, 0, 10);
    service.insert(Collections.singletonList(record));

    verify(channel, never()).insertRecord(any(), anyBoolean());
    verify(mockSinkTaskContext).offset(tp, 10);
  }

  @Test
  void insertProcessesRecordsForReadyPartitions() {
    TopicPartition tp = new TopicPartition(TOPIC, 0);
    TopicPartitionChannel channel = mockChannel("ch_0", false);
    when(mockChannelManager.getChannel(tp)).thenReturn(Optional.of(channel));

    SinkRecord record = recordFor(TOPIC, 0, 5);
    service.insert(Collections.singletonList(record));

    verify(channel).insertRecord(record, true);
    verify(mockSinkTaskContext, never()).offset(any(TopicPartition.class), any(Long.class));
  }

  @Test
  void insertHandlesMixOfInitializingAndReadyPartitions() {
    TopicPartition tpInit = new TopicPartition(TOPIC, 0);
    TopicPartition tpReady = new TopicPartition(TOPIC, 1);

    TopicPartitionChannel initChannel = mockChannel("ch_0", true);
    TopicPartitionChannel readyChannel = mockChannel("ch_1", false);

    when(mockChannelManager.getChannel(tpInit)).thenReturn(Optional.of(initChannel));
    when(mockChannelManager.getChannel(tpReady)).thenReturn(Optional.of(readyChannel));

    List<SinkRecord> records = Arrays.asList(recordFor(TOPIC, 0, 100), recordFor(TOPIC, 1, 200));

    service.insert(records);

    verify(initChannel, never()).insertRecord(any(), anyBoolean());
    verify(readyChannel).insertRecord(records.get(1), true);
    verify(mockSinkTaskContext).offset(tpInit, 100);
    verify(mockSinkTaskContext, never()).offset(tpReady, 200);
  }

  @Test
  void insertResetsToFirstSkippedOffset() {
    TopicPartition tp = new TopicPartition(TOPIC, 0);
    TopicPartitionChannel channel = mockChannel("ch_0", true);
    when(mockChannelManager.getChannel(tp)).thenReturn(Optional.of(channel));

    List<SinkRecord> records =
        Arrays.asList(recordFor(TOPIC, 0, 5), recordFor(TOPIC, 0, 6), recordFor(TOPIC, 0, 7));

    service.insert(records);

    verify(mockSinkTaskContext).offset(tp, 5);
    verify(mockSinkTaskContext, never()).offset(tp, 6);
    verify(mockSinkTaskContext, never()).offset(tp, 7);
  }

  // --- getCommittedOffsets() skip logic ---

  @Test
  @SuppressWarnings("unchecked")
  void getCommittedOffsetsExcludesInitializingPartitions() {
    TopicPartition tpInit = new TopicPartition(TOPIC, 0);
    TopicPartition tpReady = new TopicPartition(TOPIC, 1);

    TopicPartitionChannel initChannel = mockChannel("ch_0", true);
    TopicPartitionChannel readyChannel = mockChannel("ch_1", false);

    when(mockChannelManager.getChannel(tpInit)).thenReturn(Optional.of(initChannel));
    when(mockChannelManager.getChannel(tpReady)).thenReturn(Optional.of(readyChannel));

    Map<TopicPartition, Long> expectedOffsets = new HashMap<>();
    expectedOffsets.put(tpReady, 42L);
    when(mockBatchOffsetFetcher.getCommittedOffsets(any(), any(Function.class)))
        .thenReturn(expectedOffsets);

    Set<TopicPartition> allPartitions = new HashSet<>(Arrays.asList(tpInit, tpReady));
    Map<TopicPartition, Long> result = service.getCommittedOffsets(allPartitions);

    assertEquals(expectedOffsets, result);

    ArgumentCaptor<Set<TopicPartition>> captor = ArgumentCaptor.forClass(Set.class);
    verify(mockBatchOffsetFetcher).getCommittedOffsets(captor.capture(), any(Function.class));
    Set<TopicPartition> passedPartitions = captor.getValue();
    assertEquals(1, passedPartitions.size());
    assertTrue(passedPartitions.contains(tpReady));
  }

  @Test
  @SuppressWarnings("unchecked")
  void getCommittedOffsetsReturnsEmptyWhenAllInitializing() {
    TopicPartition tp0 = new TopicPartition(TOPIC, 0);
    TopicPartition tp1 = new TopicPartition(TOPIC, 1);

    TopicPartitionChannel ch0 = mockChannel("ch_0", true);
    TopicPartitionChannel ch1 = mockChannel("ch_1", true);
    when(mockChannelManager.getChannel(tp0)).thenReturn(Optional.of(ch0));
    when(mockChannelManager.getChannel(tp1)).thenReturn(Optional.of(ch1));

    when(mockBatchOffsetFetcher.getCommittedOffsets(any(), any(Function.class)))
        .thenReturn(Collections.emptyMap());

    Set<TopicPartition> allPartitions = new HashSet<>(Arrays.asList(tp0, tp1));
    Map<TopicPartition, Long> result = service.getCommittedOffsets(allPartitions);

    assertTrue(result.isEmpty());

    ArgumentCaptor<Set<TopicPartition>> captor = ArgumentCaptor.forClass(Set.class);
    verify(mockBatchOffsetFetcher).getCommittedOffsets(captor.capture(), any(Function.class));
    assertTrue(captor.getValue().isEmpty());
  }

  // --- transition from initializing to ready ---

  @Test
  void insertProcessesRecordsAfterChannelTransitionsFromInitializingToReady() {
    TopicPartition tp = new TopicPartition(TOPIC, 0);
    TopicPartitionChannel channel = mockChannel("ch_0", true);
    when(mockChannelManager.getChannel(tp)).thenReturn(Optional.of(channel));

    SinkRecord record1 = recordFor(TOPIC, 0, 10);
    service.insert(Collections.singletonList(record1));

    verify(channel, never()).insertRecord(any(), anyBoolean());
    verify(mockSinkTaskContext).offset(tp, 10);

    // Channel finishes initializing — Kafka re-delivers from the rewound offset
    when(channel.isInitializing()).thenReturn(false);

    SinkRecord record2 = recordFor(TOPIC, 0, 11);
    service.insert(List.of(record1, record2));

    verify(channel).insertRecord(record1, true);
    verify(channel).insertRecord(record2, false);
  }

  // --- startPartitions() pipe resolution (FR5) ---

  @Test
  void startPartitionsThrowsWhenValidationEnabledAndNonDefaultPipeExists() {
    SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class);
    when(mockConn.isClosed()).thenReturn(false);
    when(mockConn.tableExist(TOPIC)).thenReturn(true);
    when(mockConn.pipeExist(TOPIC)).thenReturn(true);

    SnowflakeSinkServiceV2 svc = buildService(mockConn, /* clientValidationEnabled= */ true);

    TopicPartition tp = new TopicPartition(TOPIC, 0);
    SnowflakeKafkaConnectorException exception =
        assertThrows(SnowflakeKafkaConnectorException.class, () -> svc.startPartitions(Set.of(tp)));

    assertTrue(exception.getMessage().contains("0032"));
  }

  @Test
  @SuppressWarnings("unchecked")
  void startPartitionsUsesDefaultPipeWhenValidationEnabledAndNoNonDefaultPipe() {
    SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class);
    when(mockConn.isClosed()).thenReturn(false);
    when(mockConn.tableExist(TOPIC)).thenReturn(true);
    when(mockConn.pipeExist(TOPIC)).thenReturn(false);

    PartitionChannelManager channelMgr = mock(PartitionChannelManager.class);
    SnowflakeSinkServiceV2 svc =
        buildService(mockConn, /* clientValidationEnabled= */ true, channelMgr);

    TopicPartition tp = new TopicPartition(TOPIC, 0);
    svc.startPartitions(Set.of(tp));

    ArgumentCaptor<Map<String, String>> captor = ArgumentCaptor.forClass(Map.class);
    verify(channelMgr).startPartitions(any(), captor.capture());
    assertEquals(TOPIC + "-STREAMING", captor.getValue().get(TOPIC));
  }

  @Test
  @SuppressWarnings("unchecked")
  void startPartitionsUsesNonDefaultPipeWhenValidationDisabled() {
    SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class);
    when(mockConn.isClosed()).thenReturn(false);
    when(mockConn.tableExist(TOPIC)).thenReturn(true);
    when(mockConn.pipeExist(TOPIC)).thenReturn(true);

    PartitionChannelManager channelMgr = mock(PartitionChannelManager.class);
    SnowflakeSinkServiceV2 svc =
        buildService(mockConn, /* clientValidationEnabled= */ false, channelMgr);

    TopicPartition tp = new TopicPartition(TOPIC, 0);
    svc.startPartitions(Set.of(tp));

    ArgumentCaptor<Map<String, String>> captor = ArgumentCaptor.forClass(Map.class);
    verify(channelMgr).startPartitions(any(), captor.capture());
    assertEquals(TOPIC, captor.getValue().get(TOPIC));
  }

  // --- backpressure handling ---

  @Test
  void insertSkipsAllPartitionsAfterBackpressure() {
    TopicPartition tp0 = new TopicPartition(TOPIC, 0);
    TopicPartition tp1 = new TopicPartition(TOPIC, 1);

    TopicPartitionChannel channel0 = mockChannel("ch_0", false);
    TopicPartitionChannel channel1 = mockChannel("ch_1", false);

    when(mockChannelManager.getChannel(tp0)).thenReturn(Optional.of(channel0));
    when(mockChannelManager.getChannel(tp1)).thenReturn(Optional.of(channel1));

    // channel0 throws BackpressureException
    doThrow(
            new BackpressureException(
                new SFException("MemoryThresholdExceeded", "backpressure", 0, "")))
        .when(channel0)
        .insertRecord(any(), anyBoolean());

    List<SinkRecord> records = Arrays.asList(recordFor(TOPIC, 0, 100), recordFor(TOPIC, 1, 200));
    service.insert(records);

    // channel0 threw; channel1 is skipped because backpressure stops all partitions
    verify(channel0).insertRecord(records.get(0), true);
    verify(channel1, never()).insertRecord(any(), anyBoolean());

    // Both partitions rewound
    verify(mockSinkTaskContext).offset(tp0, 100L);
    verify(mockSinkTaskContext).offset(tp1, 200L);
  }

  @Test
  void insertSkipsRemainingRecordsForAllPartitionsAfterBackpressure() {
    TopicPartition tp0 = new TopicPartition(TOPIC, 0);
    TopicPartition tp1 = new TopicPartition(TOPIC, 1);

    TopicPartitionChannel channel0 = mockChannel("ch_0", false);
    TopicPartitionChannel channel1 = mockChannel("ch_1", false);

    when(mockChannelManager.getChannel(tp0)).thenReturn(Optional.of(channel0));
    when(mockChannelManager.getChannel(tp1)).thenReturn(Optional.of(channel1));

    // channel1 throws BackpressureException
    doThrow(new BackpressureException(new SFException("ReceiverSaturated", "backpressure", 0, "")))
        .when(channel1)
        .insertRecord(any(), anyBoolean());

    // p0's first record succeeds, p1 throws, p0's second record is skipped
    List<SinkRecord> records =
        Arrays.asList(recordFor(TOPIC, 0, 100), recordFor(TOPIC, 1, 200), recordFor(TOPIC, 0, 101));
    service.insert(records);

    // channel0 first record processed, channel1 threw, channel0 second record skipped
    verify(channel0).insertRecord(records.get(0), true);
    verify(channel1).insertRecord(records.get(1), true);
    verify(channel0, never()).insertRecord(records.get(2), false);

    // p1 rewound to the backpressured record; p0 rewound to the first skipped record
    verify(mockSinkTaskContext).offset(tp1, 200L);
    verify(mockSinkTaskContext).offset(tp0, 101L);
  }

  @Test
  void insertRewindsOnBackpressureWithInitializingPartitions() {
    TopicPartition tpInit = new TopicPartition(TOPIC, 0);
    TopicPartition tpReady = new TopicPartition(TOPIC, 1);

    TopicPartitionChannel initChannel = mockChannel("ch_0", true);
    TopicPartitionChannel readyChannel = mockChannel("ch_1", false);

    when(mockChannelManager.getChannel(tpInit)).thenReturn(Optional.of(initChannel));
    when(mockChannelManager.getChannel(tpReady)).thenReturn(Optional.of(readyChannel));

    // Ready channel hits backpressure
    doThrow(
            new BackpressureException(
                new SFException("MemoryThresholdExceeded", "backpressure", 0, "")))
        .when(readyChannel)
        .insertRecord(any(), anyBoolean());

    List<SinkRecord> records = Arrays.asList(recordFor(TOPIC, 0, 100), recordFor(TOPIC, 1, 200));
    service.insert(records);

    // initChannel skipped (initializing), readyChannel attempted and threw
    verify(initChannel, never()).insertRecord(any(), anyBoolean());
    verify(readyChannel).insertRecord(records.get(1), true);

    // Both partitions rewound via offsetsOfFirstSkippedRecord
    verify(mockSinkTaskContext).offset(tpInit, 100L);
    verify(mockSinkTaskContext).offset(tpReady, 200L);
  }

  @Test
  void insertSetsCooldownAfterBackpressure() {
    TopicPartition tp0 = new TopicPartition(TOPIC, 0);
    TopicPartitionChannel channel0 = mockChannel("ch_0", false);
    when(mockChannelManager.getChannel(tp0)).thenReturn(Optional.of(channel0));

    doThrow(
            new BackpressureException(
                new SFException("MemoryThresholdExceeded", "backpressure", 0, "")))
        .when(channel0)
        .insertRecord(any(), anyBoolean());

    service.insert(Collections.singletonList(recordFor(TOPIC, 0, 100)));

    // Cooldown should be set to a future time
    assertTrue(
        service.backpressureUntil.isAfter(
            Instant.now().minus(SnowflakeSinkServiceV2.BACKPRESSURE_COOLDOWN)));
  }

  @Test
  void insertSkipsEntireBatchDuringCooldown() {
    TopicPartition tp0 = new TopicPartition(TOPIC, 0);
    TopicPartition tp1 = new TopicPartition(TOPIC, 1);

    TopicPartitionChannel channel0 = mockChannel("ch_0", false);
    TopicPartitionChannel channel1 = mockChannel("ch_1", false);

    when(mockChannelManager.getChannel(tp0)).thenReturn(Optional.of(channel0));
    when(mockChannelManager.getChannel(tp1)).thenReturn(Optional.of(channel1));

    // Set cooldown to a future time
    service.backpressureUntil = Instant.now().plusSeconds(30);

    List<SinkRecord> records = Arrays.asList(recordFor(TOPIC, 0, 100), recordFor(TOPIC, 1, 200));
    service.insert(records);

    // No inserts attempted during cooldown
    verify(channel0, never()).insertRecord(any(), anyBoolean());
    verify(channel1, never()).insertRecord(any(), anyBoolean());

    // All partitions rewound
    verify(mockSinkTaskContext).offset(tp0, 100L);
    verify(mockSinkTaskContext).offset(tp1, 200L);
  }

  @Test
  void insertResumesNormallyAfterCooldownExpires() {
    TopicPartition tp0 = new TopicPartition(TOPIC, 0);
    TopicPartitionChannel channel0 = mockChannel("ch_0", false);
    when(mockChannelManager.getChannel(tp0)).thenReturn(Optional.of(channel0));

    // Set cooldown to the past (expired)
    service.backpressureUntil = Instant.now().minusSeconds(1);

    service.insert(Collections.singletonList(recordFor(TOPIC, 0, 100)));

    // Normal processing resumes
    verify(channel0).insertRecord(any(), anyBoolean());
    verify(mockSinkTaskContext, never()).offset(any(TopicPartition.class), any(Long.class));
  }

  // --- recovery skip logic ---

  @Test
  void insertSkipsRemainingRecordsForPartitionAfterRecovery() {
    TopicPartition tp0 = new TopicPartition(TOPIC, 0);
    TopicPartition tp1 = new TopicPartition(TOPIC, 1);

    TopicPartitionChannel channel0 = mockChannel("ch_0", false);
    TopicPartitionChannel channel1 = mockChannel("ch_1", false);

    when(mockChannelManager.getChannel(tp0)).thenReturn(Optional.of(channel0));
    when(mockChannelManager.getChannel(tp1)).thenReturn(Optional.of(channel1));

    // channel0 signals recovery on its first record
    when(channel0.insertRecord(any(), anyBoolean())).thenReturn(false);

    List<SinkRecord> records =
        Arrays.asList(
            recordFor(TOPIC, 0, 100),
            recordFor(TOPIC, 1, 200),
            recordFor(TOPIC, 0, 101),
            recordFor(TOPIC, 0, 102));
    service.insert(records);

    // channel0: only the first record was attempted; 101 and 102 were skipped
    verify(channel0).insertRecord(records.get(0), true);
    verify(channel0, never()).insertRecord(records.get(2), false);
    verify(channel0, never()).insertRecord(records.get(3), false);

    // channel1: processed normally
    verify(channel1).insertRecord(records.get(1), true);

    // Only the recovering partition is rewound, to the triggering record's offset
    verify(mockSinkTaskContext).offset(tp0, 100L);
    verify(mockSinkTaskContext, never()).offset(tp1, 200L);
  }

  @Test
  void insertRewindsToFirstSkippedOffsetAfterRecoveryMidPartition() {
    TopicPartition tp = new TopicPartition(TOPIC, 0);
    TopicPartitionChannel channel = mockChannel("ch_0", false);
    when(mockChannelManager.getChannel(tp)).thenReturn(Optional.of(channel));

    // First record succeeds, second triggers recovery
    when(channel.insertRecord(any(), anyBoolean())).thenReturn(true).thenReturn(false);

    List<SinkRecord> records =
        Arrays.asList(recordFor(TOPIC, 0, 100), recordFor(TOPIC, 0, 101), recordFor(TOPIC, 0, 102));
    service.insert(records);

    // First two records attempted, third skipped
    verify(channel).insertRecord(records.get(0), true);
    verify(channel).insertRecord(records.get(1), false);
    verify(channel, never()).insertRecord(records.get(2), false);

    // Rewind to the record that triggered recovery
    verify(mockSinkTaskContext).offset(tp, 101L);
  }

  // --- helpers ---

  private SnowflakeSinkServiceV2 buildService(
      SnowflakeConnectionService conn, boolean clientValidationEnabled) {
    return buildService(conn, clientValidationEnabled, mock(PartitionChannelManager.class));
  }

  private SnowflakeSinkServiceV2 buildService(
      SnowflakeConnectionService conn,
      boolean clientValidationEnabled,
      PartitionChannelManager channelManager) {
    SinkTaskConfig config =
        SinkTaskConfigTestBuilder.builder()
            .connectorName(CONNECTOR_NAME)
            .taskId("0")
            .validation(
                clientValidationEnabled
                    ? SnowflakeValidation.CLIENT_SIDE
                    : SnowflakeValidation.SERVER_SIDE)
            .enableSanitization(false)
            .build();
    return new SnowflakeSinkServiceV2(
        conn,
        config,
        mockSinkTaskContext,
        Optional.empty(),
        () -> mock(BatchOffsetFetcher.class),
        () -> channelManager,
        TaskMetrics.noop());
  }

  private static TopicPartitionChannel mockChannel(String channelName, boolean initializing) {
    TopicPartitionChannel channel = mock(TopicPartitionChannel.class);
    when(channel.getChannelName()).thenReturn(channelName);
    when(channel.isInitializing()).thenReturn(initializing);
    when(channel.isChannelClosed()).thenReturn(false);
    when(channel.insertRecord(any(), anyBoolean())).thenReturn(true);
    return channel;
  }

  private static SinkRecord recordFor(String topic, int partition, long offset) {
    return SinkRecordBuilder.forTopicPartition(topic, partition).withOffset(offset).build();
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/SnowflakeSinkServiceV2ValidationLoggingTest.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

import static org.junit.jupiter.api.Assertions.*;
import static org.mockito.Mockito.*;

import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.config.SinkTaskConfigTestBuilder;
import com.snowflake.kafka.connector.config.SnowflakeValidation;
import com.snowflake.kafka.connector.internal.SnowflakeConnectionService;
import com.snowflake.kafka.connector.internal.metrics.TaskMetrics;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.function.Consumer;
import org.apache.log4j.AppenderSkeleton;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.log4j.spi.LoggingEvent;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

/**
 * Tests for SnowflakeSinkServiceV2 pre-flight safety checks. Verifies validation configuration
 * logging for preventing data loss and task crashes.
 */
public class SnowflakeSinkServiceV2ValidationLoggingTest {

  private TestAppender testAppender;
  private Logger logger;

  @BeforeEach
  public void setUp() {
    // Capture logs from SnowflakeSinkServiceV2
    logger = Logger.getLogger(SnowflakeSinkServiceV2.class);
    testAppender = new TestAppender();
    logger.addAppender(testAppender);
    logger.setLevel(Level.INFO);
  }

  @AfterEach
  public void tearDown() {
    logger.removeAppender(testAppender);
  }

  /**
   * Test SAFE config: Validation enabled + errors.tolerance=none
   *
   * <p>Task aborts on validation failure - no data loss
   */
  @Test
  public void testSafeConfigValidationEnabledWithToleranceNone() {
    SinkTaskConfig config =
        SinkTaskConfigTestBuilder.builder()
            .connectorName("test-connector")
            .taskId("0")
            .validation(SnowflakeValidation.CLIENT_SIDE)
            .tolerateErrors(false)
            .build();

    SnowflakeSinkServiceV2 service = createServiceWithConfig(config);
    assertNotNull(service);

    // Verify INFO log contains expected message
    assertTrue(
        testAppender.containsMessage(Level.INFO, "Client-side validation enabled"),
        "Should log INFO about validation enabled");
    assertTrue(
        testAppender.containsMessage(Level.INFO, "Validation failures will abort the task (safe"),
        "Should log that task will abort on validation failure");
  }

  /**
   * Test SAFE config: Validation enabled + errors.tolerance=all + DLQ configured
   *
   * <p>Validation errors route to DLQ - no data loss
   */
  @Test
  public void testSafeConfigValidationEnabledWithToleranceAllAndDlq() {
    SinkTaskConfig config =
        SinkTaskConfigTestBuilder.builder()
            .connectorName("test-connector")
            .taskId("0")
            .validation(SnowflakeValidation.CLIENT_SIDE)
            .tolerateErrors(true)
            .dlqTopicName("my-dlq-topic")
            .build();

    SnowflakeSinkServiceV2 service = createServiceWithConfig(config);
    assertNotNull(service);

    // Verify INFO log contains expected message with DLQ topic name
    assertTrue(
        testAppender.containsMessage(Level.INFO, "Client-side validation enabled"),
        "Should log INFO about validation enabled");
    assertTrue(
        testAppender.containsMessage(Level.INFO, "Validation failures will route to DLQ topic"),
        "Should log that failures route to DLQ");
    assertTrue(
        testAppender.containsMessage(Level.INFO, "my-dlq-topic"), "Should log the DLQ topic name");
  }

  /**
   * Test UNSAFE config: Validation enabled + errors.tolerance=all + NO DLQ
   *
   * <p>Invalid records silently dropped - DATA LOSS
   */
  @Test
  public void testUnsafeConfigValidationEnabledWithToleranceAllNoDlq() {
    SinkTaskConfig config =
        SinkTaskConfigTestBuilder.builder()
            .connectorName("test-connector")
            .taskId("0")
            .validation(SnowflakeValidation.CLIENT_SIDE)
            .tolerateErrors(true)
            .dlqTopicName("")
            .build();

    SnowflakeSinkServiceV2 service = createServiceWithConfig(config);
    assertNotNull(service);

    // Verify ERROR log about unsafe configuration
    assertTrue(
        testAppender.containsMessage(Level.ERROR, "UNSAFE CONFIGURATION"),
        "Should log ERROR about unsafe configuration");
    assertTrue(
        testAppender.containsMessage(Level.ERROR, "SILENTLY DROPPED"),
        "Should warn about silent data loss");
    assertTrue(
        testAppender.containsMessage(Level.ERROR, "causing data loss"),
        "Should explicitly mention data loss");
  }

  /**
   * Test: Validation disabled with ERROR_LOGGING enabled on existing table.
   *
   * <p>Should NOT warn about missing error logging when ERROR_LOGGING is present.
   */
  @Test
  public void testValidationDisabledWithErrorLoggingEnabled() {
    SinkTaskConfig config =
        SinkTaskConfigTestBuilder.builder()
            .connectorName("test-connector")
            .taskId("0")
            .validation(SnowflakeValidation.SERVER_SIDE)
            .topicToTableMap(Map.of("topic1", "table1"))
            .build();

    SnowflakeSinkServiceV2 service =
        createServiceWithConfig(
            config,
            mockConn -> {
              when(mockConn.tableExist("table1")).thenReturn(true);
              when(mockConn.hasErrorLoggingEnabled("table1")).thenReturn(true);
            });
    assertNotNull(service);

    assertFalse(
        testAppender.containsMessage(Level.WARN, "does not have ERROR_LOGGING"),
        "Should NOT warn about missing error logging when it is enabled");
    assertTrue(
        testAppender.containsMessage(Level.INFO, "error table is active"),
        "Should log INFO confirming error table is active");
  }

  /**
   * Test: Validation disabled, multiple tables — one enabled, one disabled.
   *
   * <p>Verifies per-table iteration: only the disabled table gets a warning; the enabled table gets
   * an INFO confirmation.
   */
  @Test
  public void testValidationDisabledMultipleTablesPartialErrorLogging() {
    SinkTaskConfig config =
        SinkTaskConfigTestBuilder.builder()
            .connectorName("test-connector")
            .taskId("0")
            .validation(SnowflakeValidation.SERVER_SIDE)
            .topicToTableMap(Map.of("topic_ok", "table_ok", "topic_bad", "table_bad"))
            .build();

    SnowflakeSinkServiceV2 service =
        createServiceWithConfig(
            config,
            mockConn -> {
              when(mockConn.tableExist("table_ok")).thenReturn(true);
              when(mockConn.hasErrorLoggingEnabled("table_ok")).thenReturn(true);
              when(mockConn.tableExist("table_bad")).thenReturn(true);
              when(mockConn.hasErrorLoggingEnabled("table_bad")).thenReturn(false);
            });
    assertNotNull(service);

    assertTrue(
        testAppender.containsMessage(Level.WARN, "table_bad"),
        "Should warn about the table missing ERROR_LOGGING");
    assertFalse(
        testAppender.containsMessage(Level.WARN, "table_ok"),
        "Should NOT warn about the table that has ERROR_LOGGING enabled");
    assertTrue(
        testAppender.containsMessage(Level.INFO, "table_ok"),
        "Should log INFO confirmation for the table with ERROR_LOGGING enabled");
  }

  /**
   * Test: Validation disabled WITHOUT ERROR_LOGGING on existing table.
   *
   * <p>Should warn about the specific table and suggest ALTER TABLE.
   */
  @Test
  public void testValidationDisabledWithoutErrorLogging() {
    SinkTaskConfig config =
        SinkTaskConfigTestBuilder.builder()
            .connectorName("test-connector")
            .taskId("0")
            .validation(SnowflakeValidation.SERVER_SIDE)
            .topicToTableMap(Map.of("topic1", "table1"))
            .build();

    SnowflakeSinkServiceV2 service =
        createServiceWithConfig(
            config,
            mockConn -> {
              when(mockConn.tableExist("table1")).thenReturn(true);
              when(mockConn.hasErrorLoggingEnabled("table1")).thenReturn(false);
            });
    assertNotNull(service);

    assertTrue(testAppender.containsMessage(Level.WARN, "table1"), "Should mention the table name");
    assertTrue(
        testAppender.containsMessage(Level.WARN, "does not have ERROR_LOGGING"),
        "Should warn about missing error logging");
    assertTrue(
        testAppender.containsMessage(Level.WARN, "ALTER TABLE"),
        "Should suggest ALTER TABLE command");
  }

  /**
   * Test: Validation disabled, table does not exist yet.
   *
   * <p>Should NOT warn about error logging — table will be auto-created with ERROR_LOGGING = TRUE.
   */
  @Test
  public void testValidationDisabledTableNotExists() {
    SinkTaskConfig config =
        SinkTaskConfigTestBuilder.builder()
            .connectorName("test-connector")
            .taskId("0")
            .validation(SnowflakeValidation.SERVER_SIDE)
            .topicToTableMap(Map.of("topic1", "table1"))
            .build();

    SnowflakeSinkServiceV2 service =
        createServiceWithConfig(
            config,
            mockConn -> {
              when(mockConn.tableExist("table1")).thenReturn(false);
            });
    assertNotNull(service);

    assertFalse(
        testAppender.containsMessage(Level.WARN, "does not have ERROR_LOGGING"),
        "Should NOT warn about error logging for non-existent table");
  }

  /**
   * Test: Validation disabled, table is Iceberg.
   *
   * <p>Should warn that Iceberg tables do not support ERROR_LOGGING and not check
   * hasErrorLoggingEnabled.
   */
  @Test
  public void testValidationDisabledIcebergTableWarning() {
    SinkTaskConfig config =
        SinkTaskConfigTestBuilder.builder()
            .connectorName("test-connector")
            .taskId("0")
            .validation(SnowflakeValidation.SERVER_SIDE)
            .topicToTableMap(Map.of("topic1", "iceberg_table"))
            .build();

    SnowflakeSinkServiceV2 service =
        createServiceWithConfig(
            config,
            mockConn -> {
              when(mockConn.tableExist("iceberg_table")).thenReturn(true);
              when(mockConn.isIcebergTable("iceberg_table")).thenReturn(true);
            });
    assertNotNull(service);

    assertTrue(
        testAppender.containsMessage(Level.WARN, "Iceberg table"),
        "Should warn that the table is Iceberg");
    assertTrue(
        testAppender.containsMessage(Level.WARN, "do not support ERROR_LOGGING"),
        "Should warn that Iceberg does not support ERROR_LOGGING");
    assertFalse(
        testAppender.containsMessage(Level.WARN, "does not have ERROR_LOGGING"),
        "Should NOT emit the generic missing-ERROR_LOGGING warning for Iceberg tables");
  }

  /**
   * Test: Legacy KC v3 config warning
   *
   * <p>Warns if snowflake.enable.schematization is present (not supported in KC v4)
   */
  @Test
  public void testLegacySchematizationConfigWarning() {
    SinkTaskConfig config =
        SinkTaskConfigTestBuilder.builder()
            .connectorName("test-connector")
            .taskId("0")
            .enableSchematization(true)
            .build();

    SnowflakeSinkServiceV2 service = createServiceWithConfig(config);
    assertNotNull(service);

    // Verify WARN log about legacy config
    assertTrue(
        testAppender.containsMessage(Level.WARN, "snowflake.enable.schematization"),
        "Should mention legacy config name");
    assertTrue(
        testAppender.containsMessage(Level.WARN, "not supported in KC v4"),
        "Should explain config is not supported");
    assertTrue(
        testAppender.containsMessage(Level.WARN, "ENABLE_SCHEMA_EVOLUTION"),
        "Should mention server-side schema evolution");
  }

  /** Helper to create SnowflakeSinkServiceV2 with minimal mocked dependencies. */
  private SnowflakeSinkServiceV2 createServiceWithConfig(SinkTaskConfig config) {
    return createServiceWithConfig(config, mockConn -> {});
  }

  /** Helper with optional mock setup for connection service. */
  private SnowflakeSinkServiceV2 createServiceWithConfig(
      SinkTaskConfig config, Consumer<SnowflakeConnectionService> mockSetup) {
    SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class);
    when(mockConn.isClosed()).thenReturn(false);
    when(mockConn.getTelemetryClient()).thenReturn(null);
    mockSetup.accept(mockConn);

    TaskMetrics mockMetrics = mock(TaskMetrics.class);

    try {
      return new SnowflakeSinkServiceV2(
          mockConn,
          config,
          null, // recordErrorReporter
          null, // sinkTaskContext
          java.util.Optional.empty(), // metricsJmxReporter
          mockMetrics);
    } catch (Exception e) {
      System.err.println("Failed to create service: " + e.getMessage());
      e.printStackTrace();
      return null;
    }
  }

  /** Test appender that captures log events for verification. */
  private static class TestAppender extends AppenderSkeleton {
    private final List<LoggingEvent> events = new ArrayList<>();

    @Override
    protected void append(LoggingEvent event) {
      events.add(event);
    }

    @Override
    public void close() {
      // No-op
    }

    @Override
    public boolean requiresLayout() {
      return false;
    }

    public boolean containsMessage(Level level, String messageFragment) {
      return events.stream()
          .anyMatch(
              event ->
                  event.getLevel().equals(level)
                      && event.getRenderedMessage().contains(messageFragment));
    }

    public List<LoggingEvent> getEvents() {
      return new ArrayList<>(events);
    }
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/StreamingClientPropertiesTest.java
================================================
/*
 * Copyright (c) 2023 Snowflake Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.snowflake.kafka.connector.internal.streaming;

import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP;
import static com.snowflake.kafka.connector.internal.TestUtils.generatePrivateKey;
import static com.snowflake.kafka.connector.internal.TestUtils.getConnectorConfigurationForStreaming;
import static com.snowflake.kafka.connector.internal.streaming.StreamingClientProperties.STREAMING_CLIENT_V2_PREFIX_NAME;
import static org.assertj.core.api.Assertions.assertThat;

import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.Utils;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.config.SnowflakeSinkConnectorConfigBuilder;
import com.snowflake.kafka.connector.internal.PrivateKeyTool;
import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException;
import com.snowflake.kafka.connector.internal.SnowflakeURL;
import java.security.PrivateKey;
import java.util.Base64;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import org.junit.Assert;
import org.junit.jupiter.api.Test;

public class StreamingClientPropertiesTest {

  private static final String EXAMPLE_PARAM1 = "EXAMPLE_PARAM1".toLowerCase();
  private static final String EXAMPLE_PARAM2 = "EXAMPLE_PARAM2".toLowerCase();

  @Test
  public void testGetValidProperties() {
    String privateKeyPem = Base64.getEncoder().encodeToString(generatePrivateKey().getEncoded());
    String testUrl = "https://testaccount.us-east-1.snowflakecomputing.com";

    Map<String, String> connectorConfig = new HashMap<>();
    connectorConfig.put(KafkaConnectorConfigParams.NAME, "testName");
    connectorConfig.put(Utils.TASK_ID, "0");
    connectorConfig.put(KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME, testUrl);
    connectorConfig.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, "testRole");
    connectorConfig.put(KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME, "testUser");
    connectorConfig.put(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY, privateKeyPem);

    SinkTaskConfig config = SinkTaskConfig.from(connectorConfig);
    StreamingClientProperties result = StreamingClientProperties.from(config);

    // verify client properties
    Properties clientProps = result.clientProperties;
    assertThat(clientProps.getProperty("user")).isEqualTo("testUser");
    assertThat(clientProps.getProperty("role")).isEqualTo("testRole");
    assertThat(clientProps.getProperty("account")).isEqualTo("testaccount");
    assertThat(clientProps.getProperty("host"))
        .isEqualTo("testaccount.us-east-1.snowflakecomputing.com");
    assertThat(clientProps.getProperty("private_key")).isEqualTo(privateKeyPem);
    assertThat(clientProps).hasSize(5);

    // verify client name prefix and empty parameter overrides
    assertThat(result.clientNamePrefix).isEqualTo(STREAMING_CLIENT_V2_PREFIX_NAME + "testName");
    assertThat(result.parameterOverrides).isEmpty();
  }

  @Test
  void shouldPropagateStreamingClientPropertiesFromOverrideMap() {
    // GIVEN
    Map<String, String> connectorConfig =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig().build();

    connectorConfig.put(Utils.TASK_ID, "0");
    connectorConfig.put(
        KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY,
        Base64.getEncoder().encodeToString(generatePrivateKey().getEncoded()));
    connectorConfig.put(
        SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP, "EXAMPLE_PARAM1:1,EXAMPLE_PARAM2:2");

    Map<String, Object> expectedParameterOverrides = new HashMap<>();
    expectedParameterOverrides.put(EXAMPLE_PARAM1, "1");
    expectedParameterOverrides.put(EXAMPLE_PARAM2, "2");

    // WHEN
    SinkTaskConfig config = SinkTaskConfig.from(connectorConfig);
    StreamingClientProperties resultProperties = StreamingClientProperties.from(config);

    // THEN
    assertThat(resultProperties.parameterOverrides).isEqualTo(expectedParameterOverrides);
  }

  @Test
  void explicitStreamingClientPropertiesTakePrecedenceOverOverrideMap_SingleBufferEnabled() {
    // GIVEN
    Map<String, String> connectorConfig =
        SnowflakeSinkConnectorConfigBuilder.streamingConfig().build();

    connectorConfig.put(Utils.TASK_ID, "0");
    connectorConfig.put(
        KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY,
        Base64.getEncoder().encodeToString(generatePrivateKey().getEncoded()));
    connectorConfig.put(
        SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP, "EXAMPLE_PARAM1:1,EXAMPLE_PARAM2:2");

    Map<String, Object> expectedParameterOverrides = new HashMap<>();
    expectedParameterOverrides.put(EXAMPLE_PARAM1, "1");
    expectedParameterOverrides.put(EXAMPLE_PARAM2, "2");

    // WHEN
    SinkTaskConfig config = SinkTaskConfig.from(connectorConfig);
    StreamingClientProperties resultProperties = StreamingClientProperties.from(config);

    // THEN
    assertThat(resultProperties.parameterOverrides).isEqualTo(expectedParameterOverrides);
  }

  @Test
  public void testValidPropertiesWithOverriddenStreamingPropertiesMap() {
    Map<String, String> connectorConfig = getConnectorConfigurationForStreaming(true);
    connectorConfig.put(KafkaConnectorConfigParams.NAME, "testName");
    String testUrl = "https://testaccount.us-east-1.snowflakecomputing.com";
    connectorConfig.put(KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME, testUrl);
    connectorConfig.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, "testRole");
    connectorConfig.put(KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME, "testUser");
    connectorConfig.put(
        SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP, "EXAMPLE_PARAM2:10000000");

    SnowflakeURL parsedUrl = new SnowflakeURL(testUrl);
    Properties expectedProps = new Properties();
    expectedProps.put("user", "testUser");
    expectedProps.put("role", "testRole");
    expectedProps.put("account", parsedUrl.getAccount());
    expectedProps.put("host", parsedUrl.getUrlWithoutPort());
    String privateKeyStr = connectorConfig.get(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY);
    if (privateKeyStr != null) {
      String passphrase =
          connectorConfig.get(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE);
      PrivateKey privateKey = PrivateKeyTool.parsePrivateKey(privateKeyStr, passphrase);
      expectedProps.put("private_key", Base64.getEncoder().encodeToString(privateKey.getEncoded()));
    }
    String expectedClientName = STREAMING_CLIENT_V2_PREFIX_NAME + "testName";
    Map<String, Object> expectedParameterOverrides = new HashMap<>();
    expectedParameterOverrides.put(EXAMPLE_PARAM2, "10000000");

    // test get properties
    SinkTaskConfig config = SinkTaskConfig.from(connectorConfig);
    StreamingClientProperties resultProperties = StreamingClientProperties.from(config);

    // verify
    assert resultProperties.clientProperties.equals(expectedProps);
    assert resultProperties.clientNamePrefix.equals(expectedClientName);
    assert resultProperties.parameterOverrides.equals(expectedParameterOverrides);
  }

  @Test
  public void testInvalidStreamingClientPropertiesMap() {
    Map<String, String> connectorConfig = getConnectorConfigurationForStreaming(true);
    connectorConfig.put(KafkaConnectorConfigParams.NAME, "testName");
    connectorConfig.put(
        KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME,
        "https://testaccount.us-east-1.snowflakecomputing.com");
    connectorConfig.put(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME, "testRole");
    connectorConfig.put(KafkaConnectorConfigParams.SNOWFLAKE_USER_NAME, "testUser");
    connectorConfig.put(
        SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP,
        "MAX_CHANNEL_SIZE_IN_BYTES->10000000,MAX_CLIENT_LAG100");

    // test get properties
    try {
      SinkTaskConfig config = SinkTaskConfig.from(connectorConfig);
      StreamingClientProperties.from(config);
      Assert.fail("Should throw an exception");
    } catch (SnowflakeKafkaConnectorException exception) {
      assert exception
          .getMessage()
          .contains(KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP);
    }

    connectorConfig.put(
        SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP, "MAX_CHANNEL_SIZE_IN_BYTES->10000000");

    // test get properties
    try {
      SinkTaskConfig config = SinkTaskConfig.from(connectorConfig);
      StreamingClientProperties.from(config);
      Assert.fail("Should throw an exception");
    } catch (SnowflakeKafkaConnectorException exception) {
      assert exception
          .getMessage()
          .contains(KafkaConnectorConfigParams.SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP);
    }
  }

  @Test
  public void testStreamingClientPropertiesEquality() {
    Map<String, String> config1 = getConnectorConfigurationForStreaming(true);
    config1.put(KafkaConnectorConfigParams.NAME, "catConnector");

    Map<String, String> config2 = getConnectorConfigurationForStreaming(true);
    config2.put(KafkaConnectorConfigParams.NAME, "dogConnector");

    // get properties
    StreamingClientProperties prop1 = StreamingClientProperties.from(SinkTaskConfig.from(config1));
    StreamingClientProperties prop2 = StreamingClientProperties.from(SinkTaskConfig.from(config2));

    assert prop1.equals(prop2);
    assert prop1.hashCode() == prop2.hashCode();

    config1.put(
        SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP,
        "max_append_request_buffer_duration_ms:1000");
    config2.put(
        SNOWFLAKE_STREAMING_CLIENT_PROVIDER_OVERRIDE_MAP,
        "max_append_request_buffer_duration_ms:10000");

    prop1 = StreamingClientProperties.from(SinkTaskConfig.from(config1));
    prop2 = StreamingClientProperties.from(SinkTaskConfig.from(config2));

    assert !prop1.equals(prop2);
    assert prop1.hashCode() != prop2.hashCode();
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/StreamingErrorHandlerIT.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;

import com.snowflake.kafka.connector.builder.SinkRecordBuilder;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.dlq.InMemoryKafkaRecordErrorReporter;
import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException;
import com.snowflake.kafka.connector.internal.TestUtils;
import com.snowflake.kafka.connector.internal.metrics.TaskMetrics;
import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetryChannelStatus;
import com.snowflake.kafka.connector.internal.streaming.v2.SnowpipeStreamingPartitionChannel;
import com.snowflake.kafka.connector.internal.streaming.v2.channel.PartitionOffsetTracker;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.UUID;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaAndValue;
import org.apache.kafka.connect.errors.DataException;
import org.apache.kafka.connect.json.JsonConverter;
import org.apache.kafka.connect.sink.SinkRecord;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

/**
 * Integration tests verifying client-side broken record errors are routed through {@link
 * StreamingErrorHandler} with proper {@code errors.tolerance} semantics.
 *
 * <p>These tests exercise the full path: {@link SnowpipeStreamingPartitionChannel#insertRecord} →
 * broken record detection → {@link StreamingErrorHandler#handleError} → DLQ / throw.
 */
class StreamingErrorHandlerIT {

  private static final String TOPIC = "test_topic";
  private static final int PARTITION = 0;

  private String channelName;
  private String pipeName;

  private SnowflakeTelemetryService mockTelemetryService;
  private InMemorySinkTaskContext sinkTaskContext;
  private ExecutorService openChannelIoExecutor;

  @BeforeEach
  void setUp() {
    final String uniqueId = UUID.randomUUID().toString().substring(0, 8);
    channelName = "test_channel_" + uniqueId;
    pipeName = "test_pipe_" + uniqueId;

    mockTelemetryService = mock(SnowflakeTelemetryService.class);

    sinkTaskContext =
        new InMemorySinkTaskContext(Collections.singleton(new TopicPartition(TOPIC, PARTITION)));
    openChannelIoExecutor = Executors.newSingleThreadExecutor();
  }

  @AfterEach
  void tearDown() {
    openChannelIoExecutor.shutdownNow();
  }

  // ── errors.tolerance = NONE (default) ──────────────────────────────────────

  @Test
  void brokenRecord_toleranceNone_shouldThrowDataException() {
    InMemoryKafkaRecordErrorReporter errorReporter = new InMemoryKafkaRecordErrorReporter();
    Map<String, String> config = baseConfig();

    SnowpipeStreamingPartitionChannel channel = createChannel(config, errorReporter);
    SinkRecord brokenSinkRecord = buildBrokenValueRecord(0);

    DataException thrown =
        assertThrows(DataException.class, () -> channel.insertRecord(brokenSinkRecord, true));

    // The cause should be the original SnowflakeKafkaConnectorException from convertToMap
    assertNotNull(thrown.getCause(), "DataException should wrap the original conversion exception");
    assertTrue(
        thrown.getCause() instanceof SnowflakeKafkaConnectorException,
        "Cause should be the original SnowflakeKafkaConnectorException, got: "
            + thrown.getCause().getClass().getName());
    assertEquals(0, errorReporter.getReportedRecords().size());
  }

  @Test
  void brokenKeyRecord_toleranceNone_shouldThrowDataException() {
    InMemoryKafkaRecordErrorReporter errorReporter = new InMemoryKafkaRecordErrorReporter();
    Map<String, String> config = baseConfig();

    SnowpipeStreamingPartitionChannel channel = createChannel(config, errorReporter);
    SinkRecord brokenSinkRecord = buildBrokenKeyRecord(0);

    DataException thrown =
        assertThrows(DataException.class, () -> channel.insertRecord(brokenSinkRecord, true));

    assertNotNull(thrown.getCause(), "DataException should wrap the original conversion exception");
    assertEquals(0, errorReporter.getReportedRecords().size());
  }

  // ── errors.tolerance = NONE + DLQ configured ─────────────────────

  @Test
  void brokenRecord_toleranceNone_withDLQ_shouldRouteToDlqThenThrow() {
    InMemoryKafkaRecordErrorReporter errorReporter = new InMemoryKafkaRecordErrorReporter();
    Map<String, String> config = baseConfig();
    config.put("errors.deadletterqueue.topic.name", "my-dlq-topic");
    // errors.tolerance defaults to "none"

    SnowpipeStreamingPartitionChannel channel = createChannel(config, errorReporter);
    SinkRecord brokenSinkRecord = buildBrokenValueRecord(0);

    DataException thrown =
        assertThrows(DataException.class, () -> channel.insertRecord(brokenSinkRecord, true));

    assertNotNull(thrown.getCause(), "DataException should wrap the original conversion exception");

    // Record should be preserved in DLQ before task failure
    assertEquals(
        1,
        errorReporter.getReportedRecords().size(),
        "Record should be routed to DLQ even when tolerance=none");

    InMemoryKafkaRecordErrorReporter.ReportedRecord reported =
        errorReporter.getReportedRecords().get(0);
    assertEquals(brokenSinkRecord, reported.getRecord());
    assertTrue(
        reported.getException() instanceof DataException,
        "DLQ should receive DataException wrapper");
  }

  // ── errors.tolerance = ALL + DLQ configured ────────────────────────────────

  @Test
  void brokenRecord_toleranceAll_withDLQ_shouldSendOriginalExceptionToDLQ() {
    InMemoryKafkaRecordErrorReporter errorReporter = new InMemoryKafkaRecordErrorReporter();
    Map<String, String> config = baseConfig();
    config.put("errors.tolerance", "all");
    config.put("errors.deadletterqueue.topic.name", "my-dlq-topic");

    SnowpipeStreamingPartitionChannel channel = createChannel(config, errorReporter);
    SinkRecord brokenSinkRecord = buildBrokenValueRecord(0);

    // Should NOT throw
    channel.insertRecord(brokenSinkRecord, true);

    assertEquals(1, errorReporter.getReportedRecords().size());

    InMemoryKafkaRecordErrorReporter.ReportedRecord reported =
        errorReporter.getReportedRecords().get(0);
    assertEquals(brokenSinkRecord, reported.getRecord());

    // DLQ should receive DataException (KCv3-compatible) with original exception as cause
    assertTrue(
        reported.getException() instanceof DataException,
        "DLQ should receive DataException wrapper, got: "
            + reported.getException().getClass().getName());
    assertNotNull(
        reported.getException().getCause(),
        "DataException should have the original exception as cause");
    assertTrue(
        reported.getException().getCause() instanceof SnowflakeKafkaConnectorException,
        "DataException cause should be SnowflakeKafkaConnectorException, got: "
            + reported.getException().getCause().getClass().getName());
  }

  @Test
  void brokenKeyRecord_toleranceAll_withDLQ_shouldSendOriginalExceptionToDLQ() {
    InMemoryKafkaRecordErrorReporter errorReporter = new InMemoryKafkaRecordErrorReporter();
    Map<String, String> config = baseConfig();
    config.put("errors.tolerance", "all");
    config.put("errors.deadletterqueue.topic.name", "my-dlq-topic");

    SnowpipeStreamingPartitionChannel channel = createChannel(config, errorReporter);
    SinkRecord brokenSinkRecord = buildBrokenKeyRecord(0);

    channel.insertRecord(brokenSinkRecord, true);

    assertEquals(1, errorReporter.getReportedRecords().size());
    InMemoryKafkaRecordErrorReporter.ReportedRecord reported =
        errorReporter.getReportedRecords().get(0);
    assertEquals(brokenSinkRecord, reported.getRecord());

    // DLQ should receive DataException wrapper with original exception as cause
    assertTrue(
        reported.getException() instanceof DataException,
        "DLQ should receive DataException wrapper, got: "
            + reported.getException().getClass().getName());
    assertNotNull(reported.getException().getCause(), "DataException should have cause");
  }

  @Test
  void multipleBrokenRecords_toleranceAll_withDLQ_shouldSendOnlyBrokenToDLQ() {
    InMemoryKafkaRecordErrorReporter errorReporter = new InMemoryKafkaRecordErrorReporter();
    Map<String, String> config = baseConfig();
    config.put("errors.tolerance", "all");
    config.put("errors.deadletterqueue.topic.name", "my-dlq-topic");

    SnowpipeStreamingPartitionChannel channel = createChannel(config, errorReporter);

    channel.insertRecord(buildBrokenValueRecord(0), true);
    channel.insertRecord(buildValidRecord(1), false);
    channel.insertRecord(buildBrokenValueRecord(2), false);
    channel.insertRecord(buildBrokenValueRecord(3), false);

    assertEquals(3, errorReporter.getReportedRecords().size());
  }

  // ── errors.tolerance = ALL + no DLQ → should silently drop ─────────────────

  @Test
  void brokenRecord_toleranceAll_noDLQ_shouldSilentlyDrop() {
    InMemoryKafkaRecordErrorReporter errorReporter = new InMemoryKafkaRecordErrorReporter();
    Map<String, String> config = baseConfig();
    config.put("errors.tolerance", "all");
    // No DLQ topic configured

    SnowpipeStreamingPartitionChannel channel = createChannel(config, errorReporter);
    SinkRecord brokenSinkRecord = buildBrokenValueRecord(0);

    // Should NOT throw - record is silently dropped with a warning log
    channel.insertRecord(brokenSinkRecord, true);

    assertEquals(0, errorReporter.getReportedRecords().size());
  }

  // ── Helpers ────────────────────────────────────────────────────────────────

  private Map<String, String> baseConfig() {
    return new HashMap<>(TestUtils.getConnectorConfigurationForStreaming(false));
  }

  /**
   * Creates a SinkRecord whose value triggers a broken record (plain String with STRING_SCHEMA).
   */
  private SinkRecord buildBrokenValueRecord(long offset) {
    return SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION)
        .withValueSchema(Schema.STRING_SCHEMA)
        .withValue("plain string - not a map")
        .withOffset(offset)
        .build();
  }

  /** Creates a SinkRecord whose key triggers a broken record (String with INT32 key schema). */
  private SinkRecord buildBrokenKeyRecord(long offset) {
    return SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION)
        .withKeySchema(Schema.INT32_SCHEMA)
        .withKey("not an int")
        .withValueSchema(Schema.STRING_SCHEMA)
        .withValue("{}")
        .withOffset(offset)
        .build();
  }

  /** Creates a valid SinkRecord with a schemaless JSON map value. */
  private SinkRecord buildValidRecord(long offset) {
    JsonConverter jsonConverter = new JsonConverter();
    jsonConverter.configure(Collections.singletonMap("schemas.enable", "false"), false);
    SchemaAndValue schemaAndValue =
        jsonConverter.toConnectData(TOPIC, "{\"name\": \"test\"}".getBytes(StandardCharsets.UTF_8));
    return SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION)
        .withSchemaAndValue(schemaAndValue)
        .withOffset(offset)
        .build();
  }

  private SnowpipeStreamingPartitionChannel createChannel(
      Map<String, String> config, InMemoryKafkaRecordErrorReporter errorReporter) {
    SinkTaskConfig taskConfig = SinkTaskConfig.from(config);
    StreamingErrorHandler errorHandler =
        new StreamingErrorHandler(taskConfig, errorReporter, mockTelemetryService);

    final TopicPartition topicPartition = new TopicPartition(TOPIC, PARTITION);
    final PartitionOffsetTracker offsetTracker =
        new PartitionOffsetTracker(topicPartition, sinkTaskContext, channelName);
    final SnowflakeTelemetryChannelStatus telemetryChannelStatus =
        new SnowflakeTelemetryChannelStatus(
            "test_table",
            "test_connector",
            channelName,
            System.currentTimeMillis(),
            Optional.empty(),
            offsetTracker.persistedOffsetRef(),
            offsetTracker.processedOffsetRef(),
            offsetTracker.consumerGroupOffsetRef());

    return new SnowpipeStreamingPartitionChannel(
        "test_table",
        channelName,
        pipeName,
        new FakeSnowflakeStreamingIngestClient(pipeName, "test_connector"),
        openChannelIoExecutor,
        mockTelemetryService,
        telemetryChannelStatus,
        offsetTracker,
        taskConfig,
        errorHandler,
        TaskMetrics.noop(),
        false,
        null,
        Optional.empty());
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/StreamingManualModeIT.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

import static com.snowflake.kafka.connector.internal.TestUtils.assertTableColumnCount;
import static com.snowflake.kafka.connector.internal.TestUtils.assertTableHasColumn;
import static com.snowflake.kafka.connector.internal.TestUtils.assertTableRowCount;
import static com.snowflake.kafka.connector.internal.TestUtils.assertWithRetry;
import static com.snowflake.kafka.connector.internal.TestUtils.getTableRows;
import static com.snowflake.kafka.connector.internal.TestUtils.tableSize;
import static java.lang.String.format;
import static org.apache.kafka.connect.data.Schema.STRING_SCHEMA;
import static org.junit.jupiter.api.Assertions.assertEquals;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.snowflake.kafka.connector.Constants;
import com.snowflake.kafka.connector.InjectQueryRunner;
import com.snowflake.kafka.connector.InjectQueryRunnerExtension;
import com.snowflake.kafka.connector.InjectSnowflakeDataSourceExtension;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.internal.SnowflakeConnectionService;
import com.snowflake.kafka.connector.internal.SnowflakeSinkService;
import com.snowflake.kafka.connector.internal.TestUtils;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.sql.SQLException;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.dbutils.QueryRunner;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.connect.data.SchemaAndValue;
import org.apache.kafka.connect.json.JsonConverter;
import org.apache.kafka.connect.sink.SinkRecord;
import org.apache.kafka.connect.storage.Converter;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;

@ExtendWith({InjectSnowflakeDataSourceExtension.class, InjectQueryRunnerExtension.class})
// Manual mode meaning user creates his own pipe and table objects
class StreamingManualModeIT {

  private final ObjectMapper objectMapper = new ObjectMapper();
  private final SnowflakeConnectionService conn = TestUtils.getConnectionServiceWithEncryptedKey();

  private String tableName;
  private String topicName;
  private TopicPartition topicPartition;
  private SnowflakeSinkService snowflakeSinkService;

  @InjectQueryRunner private QueryRunner queryRunner;

  @BeforeEach
  void beforeEach() throws SQLException {
    final Map<String, String> config = TestUtils.getConnectorConfigurationForStreaming(true);
    config.put(Constants.KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, "server_side");
    config.put(
        Constants.KafkaConnectorConfigParams
            .SNOWFLAKE_COMPATIBILITY_ENABLE_COLUMN_IDENTIFIER_NORMALIZATION,
        "false");
    SinkTaskConfig sinkTaskConfig = SinkTaskConfig.from(config);
    tableName = TestUtils.randomTableName();
    topicName = tableName;
    topicPartition = new TopicPartition(topicName, 0);
    snowflakeSinkService =
        StreamingSinkServiceBuilder.builder(conn, sinkTaskConfig)
            .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition)))
            .build();
    queryRunner.execute(
        format(
            "create table %s (city varchar, age number, married boolean, has_cat boolean,"
                + " crazy_field_name boolean, skills variant, family variant)",
            tableName));
  }

  @AfterEach
  void afterEach() {
    TestUtils.dropTable(tableName);
    TestUtils.dropPipe(tableName + "-STREAMING");
  }

  @Nested
  class TableAndPipeDefinedByUser {

    private String pipeName;

    @BeforeEach
    void beforeEach() throws SQLException {
      pipeName = tableName;
      queryRunner.execute(
          format(
              "CREATE OR REPLACE PIPE %s AS COPY INTO %s FROM (SELECT $1:city, $1:age,  $1:married,"
                  + " $1['has cat'] has_cat, $1['! @&$#* has Łułósżź'] crazy_field_name, $1:skills,"
                  + " $1:family FROM TABLE(DATA_SOURCE(TYPE => 'STREAMING')))",
              pipeName, tableName));
    }

    @AfterEach
    void afterEach() throws SQLException {
      TestUtils.dropPipe(pipeName);
    }

    @Test
    void test_streaming_ingestion_with_user_defined_table_and_pipe() throws Exception {

      List<SinkRecord> records = buildContentSinkRecords();
      snowflakeSinkService.startPartition(topicPartition);
      snowflakeSinkService.awaitInitialization();
      snowflakeSinkService.insert(records);

      // Wait for data to be ingested into the table
      assertWithRetry(() -> tableSize(tableName) == 2);
      snowflakeSinkService.closeAll();

      // Assert that the table has exactly 2 rows with the given values
      assertTableRowCount(tableName, 2);

      List<Map<String, Object>> dbRows = getTableRows(tableName);

      final Map<String, Object> firstRow = dbRows.get(0);
      makeCommonAssertions(firstRow);
      assertEquals(true, firstRow.get("HAS_CAT"));
      assertEquals(true, firstRow.get("CRAZY_FIELD_NAME"));
    }
  }

  @Nested
  class DefaultPipe {

    @BeforeEach
    void beforeEach() throws SQLException {
      queryRunner.execute(
          format(
              "create or replace table %s (record_metadata variant, city varchar, age number,"
                  + " married boolean, \"has cat\" boolean , \"! @&$#* has Łułósżź\" boolean,"
                  + " skills variant, family variant)",
              tableName));
    }

    @Test
    void test_streaming_ingestion_with_user_defined_table_and_default_pipe() throws Exception {
      List<SinkRecord> records = buildContentSinkRecords();
      snowflakeSinkService.startPartition(topicPartition);
      snowflakeSinkService.awaitInitialization();
      snowflakeSinkService.insert(records);

      // Wait for data to be ingested into the table
      assertWithRetry(() -> tableSize(tableName) == 2);
      snowflakeSinkService.closeAll();

      // Assert that the table has exactly 2 rows and 2 columns
      assertTableRowCount(tableName, 2);
      assertTableColumnCount(tableName, 8);
      Map<String, Object> firstRow = getTableRows(tableName).get(0);
      assertTableHasColumn(tableName, "record_metadata");
      makeCommonAssertions(firstRow);
      assertEquals(true, firstRow.get("! @&$#* has Łułósżź"));
      assertEquals(true, firstRow.get("has cat"));
    }
  }

  private List<SinkRecord> buildContentSinkRecords() throws JsonProcessingException {
    // this json row is sent twice to Kafka
    final byte[] jsonPayload =
        objectMapper
            .writeValueAsString(
                Map.of(
                    "city",
                    "Pcim Górny",
                    "age",
                    30,
                    "married",
                    true,
                    "has cat",
                    true,
                    "! @&$#* has Łułósżź",
                    true,
                    "skills",
                    List.of("sitting", "standing", "eating"),
                    "family",
                    Map.of("son", "Jack", "daughter", "Anna")))
            .getBytes(StandardCharsets.UTF_8);
    Converter converter = new JsonConverter();
    final Map<String, String> converterConfig = new HashMap<>();
    converterConfig.put("schemas.enable", "false");
    converter.configure(converterConfig, false);
    SchemaAndValue input = converter.toConnectData(topicName, jsonPayload);
    return List.of(
        new SinkRecord(topicName, 0, STRING_SCHEMA, "test_key1", input.schema(), input.value(), 1),
        new SinkRecord(topicName, 0, STRING_SCHEMA, "test_key2", input.schema(), input.value(), 2));
  }

  private JsonNode toJson(Object value) throws IOException {
    if (value instanceof String) {
      return objectMapper.readTree((String) value);
    }
    if (value instanceof byte[]) {
      return objectMapper.readTree((byte[]) value);
    }

    return objectMapper.valueToTree(value);
  }

  private void makeCommonAssertions(final Map<String, Object> firstRow) throws IOException {
    assertEquals("Pcim Górny", firstRow.get("CITY"));
    assertEquals(30L, firstRow.get("AGE"));
    assertEquals(true, firstRow.get("MARRIED"));
    assertEquals(toJson(List.of("sitting", "standing", "eating")), toJson(firstRow.get("SKILLS")));
    assertEquals(toJson(Map.of("son", "Jack", "daughter", "Anna")), toJson(firstRow.get("FAMILY")));
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/StreamingSinkServiceBuilder.java
================================================
package com.snowflake.kafka.connector.internal.streaming;

import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.dlq.InMemoryKafkaRecordErrorReporter;
import com.snowflake.kafka.connector.dlq.KafkaRecordErrorReporter;
import com.snowflake.kafka.connector.internal.SnowflakeConnectionService;
import com.snowflake.kafka.connector.internal.metrics.MetricsJmxReporter;
import com.snowflake.kafka.connector.internal.metrics.TaskMetrics;
import java.util.Collections;
import java.util.Optional;
import org.apache.kafka.connect.sink.SinkTaskContext;

public class StreamingSinkServiceBuilder {

  private final SnowflakeConnectionService conn;
  private SinkTaskConfig config;

  private KafkaRecordErrorReporter errorReporter = new InMemoryKafkaRecordErrorReporter();
  private SinkTaskContext sinkTaskContext = new InMemorySinkTaskContext(Collections.emptySet());
  private Optional<MetricsJmxReporter> metricsJmxReporter = Optional.empty();
  private TaskMetrics taskMetrics = TaskMetrics.noop();

  public static StreamingSinkServiceBuilder builder(
      SnowflakeConnectionService conn, SinkTaskConfig config) {
    return new StreamingSinkServiceBuilder(conn, config);
  }

  public SnowflakeSinkServiceV2 build() {
    return new SnowflakeSinkServiceV2(
        conn, config, errorReporter, sinkTaskContext, metricsJmxReporter, taskMetrics);
  }

  private StreamingSinkServiceBuilder(SnowflakeConnectionService conn, SinkTaskConfig config) {
    this.conn = conn;
    this.config = config;
  }

  public StreamingSinkServiceBuilder withErrorReporter(KafkaRecordErrorReporter errorReporter) {
    this.errorReporter = errorReporter;
    return this;
  }

  public StreamingSinkServiceBuilder withSinkTaskContext(SinkTaskContext sinkTaskContext) {
    this.sinkTaskContext = sinkTaskContext;
    return this;
  }

  public StreamingSinkServiceBuilder withMetricsJmxReporter(MetricsJmxReporter reporter) {
    this.metricsJmxReporter = Optional.of(reporter);
    return this;
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/telemetry/PeriodicTelemetryReporterTest.java
================================================
package com.snowflake.kafka.connector.internal.streaming.telemetry;

import static com.snowflake.kafka.connector.internal.streaming.telemetry.PeriodicTelemetryReporter.MAX_INITIAL_JITTER_MS;
import static com.snowflake.kafka.connector.internal.telemetry.TelemetryConstants.TOPIC_PARTITION_CHANNEL_NAME;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

import com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import net.snowflake.client.internal.jdbc.telemetry.Telemetry;
import net.snowflake.client.internal.jdbc.telemetry.TelemetryData;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

class PeriodicTelemetryReporterTest {

  private static final String CONNECTOR_NAME = "test-connector";
  private static final String TASK_ID = "0";
  private static final long SHORT_REPORT_INTERVAL_MS = 100L;
  private static final long MAX_WAIT_FOR_FIRST_REPORT_MS = MAX_INITIAL_JITTER_MS + 2000;

  private MockTelemetryClient mockTelemetryClient;
  private SnowflakeTelemetryService telemetryService;
  private PeriodicTelemetryReporter reporter;

  @BeforeEach
  void setUp() {
    mockTelemetryClient = new MockTelemetryClient();
    telemetryService = new SnowflakeTelemetryService(mockTelemetryClient);
    telemetryService.setAppName(CONNECTOR_NAME);
    telemetryService.setTaskID(TASK_ID);
  }

  @AfterEach
  void tearDown() {
    if (reporter != null) {
      reporter.stop();
    }
  }

  @Test
  void shouldStartAndStopWithoutErrors() {
    // Given
    Supplier<Map<String, TopicPartitionChannel>> emptySupplier = Collections::emptyMap;
    reporter =
        new PeriodicTelemetryReporter(
            telemetryService, emptySupplier, CONNECTOR_NAME, TASK_ID, SHORT_REPORT_INTERVAL_MS);

    // When/Then
    assertDoesNotThrow(() -> reporter.start());
    assertDoesNotThrow(() -> reporter.stop());
  }

  @Test
  void shouldNotReportTelemetryWhenNoChannelsExist() throws InterruptedException {
    // Given
    Supplier<Map<String, TopicPartitionChannel>> emptySupplier = Collections::emptyMap;
    reporter =
        new PeriodicTelemetryReporter(
            telemetryService, emptySupplier, CONNECTOR_NAME, TASK_ID, SHORT_REPORT_INTERVAL_MS);

    // When
    reporter.start();

    // Wait for at least one report cycle
    Thread.sleep(SHORT_REPORT_INTERVAL_MS * 3);

    // Then
    assertTrue(
        mockTelemetryClient.getSentTelemetryData().isEmpty(),
        "No telemetry should be sent when there are no channels");
  }

  @Test
  void shouldNotReportTelemetryWhenChannelsSupplierReturnsNull() throws InterruptedException {
    // Given
    Supplier<Map<String, TopicPartitionChannel>> nullSupplier = () -> null;
    reporter =
        new PeriodicTelemetryReporter(
            telemetryService, nullSupplier, CONNECTOR_NAME, TASK_ID, SHORT_REPORT_INTERVAL_MS);

    // When
    reporter.start();

    // Wait for at least one report cycle
    Thread.sleep(SHORT_REPORT_INTERVAL_MS * 3);

    // Then
    assertTrue(
        mockTelemetryClient.getSentTelemetryData().isEmpty(),
        "No telemetry should be sent when supplier returns null");
  }

  @Test
  void shouldReportTelemetryForActiveChannels() throws InterruptedException {
    // Given
    TopicPartitionChannel mockChannel = createMockChannelWithNonEmptyStatus();
    Map<String, TopicPartitionChannel> channels = new HashMap<>();
    channels.put("channel1", mockChannel);

    Supplier<Map<String, TopicPartitionChannel>> channelSupplier = () -> channels;
    reporter =
        new PeriodicTelemetryReporter(
            telemetryService, channelSupplier, CONNECTOR_NAME, TASK_ID, SHORT_REPORT_INTERVAL_MS);

    // When
    reporter.start();

    // Wait for telemetry to be reported (accounting for jitter)
    waitForTelemetryCount(1, MAX_WAIT_FOR_FIRST_REPORT_MS);

    // Then
    assertTrue(
        mockTelemetryClient.getSentTelemetryData().size() >= 1,
        "At least one telemetry report should be sent");
  }

  @Test
  void shouldReportTelemetryForMultipleChannels() throws InterruptedException {
    // Given
    final String channelName1 = "testChannel_topic1_partition0";
    final String channelName2 = "testChannel_topic2_partition1";
    TopicPartitionChannel mockChannel1 = createMockChannelWithNonEmptyStatus(channelName1);
    TopicPartitionChannel mockChannel2 = createMockChannelWithNonEmptyStatus(channelName2);
    Map<String, TopicPartitionChannel> channels = new HashMap<>();
    channels.put("channel1", mockChannel1);
    channels.put("channel2", mockChannel2);

    Supplier<Map<String, TopicPartitionChannel>> channelSupplier = () -> channels;
    reporter =
        new PeriodicTelemetryReporter(
            telemetryService, channelSupplier, CONNECTOR_NAME, TASK_ID, SHORT_REPORT_INTERVAL_MS);

    // When
    reporter.start();

    // Wait for telemetry to be reported (at least 2 reports for 2 channels, accounting for jitter)
    waitForTelemetryCount(2, MAX_WAIT_FOR_FIRST_REPORT_MS);

    // Then
    LinkedList<TelemetryData> sentTelemetry = mockTelemetryClient.getSentTelemetryData();
    assertTrue(sentTelemetry.size() >= 2, "Telemetry should be sent for all channels");

    Set<String> reportedChannelNames =
        sentTelemetry.stream()
            .map(
                telemetryData ->
                    telemetryData
                        .getMessage()
                        .get("data")
                        .get(TOPIC_PARTITION_CHANNEL_NAME)
                        .asText())
            .collect(Collectors.toSet());

    assertEquals(2, reportedChannelNames.size(), "Telemetry should be sent for both channels");
    assertThat(reportedChannelNames).containsExactlyInAnyOrder(channelName1, channelName2);
  }

  @Test
  void shouldNotReportTelemetryForEmptyChannelStatus() throws InterruptedException {
    // Given
    TopicPartitionChannel mockChannel = createMockChannelWithEmptyStatus();
    Map<String, TopicPartitionChannel> channels = new HashMap<>();
    channels.put("channel1", mockChannel);

    Supplier<Map<String, TopicPartitionChannel>> channelSupplier = () -> channels;
    reporter =
        new PeriodicTelemetryReporter(
            telemetryService, channelSupplier, CONNECTOR_NAME, TASK_ID, SHORT_REPORT_INTERVAL_MS);

    // When
    reporter.start();

    // Wait for at least one report cycle
    Thread.sleep(SHORT_REPORT_INTERVAL_MS * 3);

    // Then - empty status should not be reported
    assertTrue(
        mockTelemetryClient.getSentTelemetryData().isEmpty(),
        "Empty channel status should not trigger telemetry");
  }

  @Test
  void shouldNotReportTelemetryWhenChannelStatusIsNull() throws InterruptedException {
    // Given
    TopicPartitionChannel mockChannel = mock(TopicPartitionChannel.class);
    when(mockChannel.getSnowflakeTelemetryChannelStatus()).thenReturn(null);
    Map<String, TopicPartitionChannel> channels = new HashMap<>();
    channels.put("channel1", mockChannel);

    Supplier<Map<String, TopicPartitionChannel>> channelSupplier = () -> channels;
    reporter =
        new PeriodicTelemetryReporter(
            telemetryService, channelSupplier, CONNECTOR_NAME, TASK_ID, SHORT_REPORT_INTERVAL_MS);

    // When
    reporter.start();

    // Wait for at least one report cycle
    Thread.sleep(SHORT_REPORT_INTERVAL_MS * 3);

    // Then - null status should not be reported
    assertTrue(
        mockTelemetryClient.getSentTelemetryData().isEmpty(),
        "Null channel status should not trigger telemetry");
  }

  @Test
  void shouldContinueReportingAfterExceptionInChannelStatusRetrieval() throws InterruptedException {
    // Given
    TopicPartitionChannel failingChannel = mock(TopicPartitionChannel.class);
    when(failingChannel.getSnowflakeTelemetryChannelStatus())
        .thenThrow(new RuntimeException("Test exception"));

    TopicPartitionChannel workingChannel = createMockChannelWithNonEmptyStatus();

    Map<String, TopicPartitionChannel> channels = new HashMap<>();
    channels.put("failingChannel", failingChannel);
    channels.put("workingChannel", workingChannel);

    Supplier<Map<String, TopicPartitionChannel>> channelSupplier = () -> channels;
    reporter =
        new PeriodicTelemetryReporter(
            telemetryService, channelSupplier, CONNECTOR_NAME, TASK_ID, SHORT_REPORT_INTERVAL_MS);

    // When
    reporter.start();

    // Wait for telemetry to be reported (accounting for jitter)
    waitForTelemetryCount(1, MAX_WAIT_FOR_FIRST_REPORT_MS);

    // Then - should still report for the working channel
    assertTrue(
        mockTelemetryClient.getSentTelemetryData().size() >= 1,
        "Telemetry should be reported despite exception in one channel");
  }

  @Test
  void shouldContinueReportingAfterExceptionInSupplier() throws InterruptedException {
    // Given
    final AtomicLong callCount = new AtomicLong(0);
    TopicPartitionChannel mockChannel = createMockChannelWithNonEmptyStatus();
    Map<String, TopicPartitionChannel> channels = new HashMap<>();
    channels.put("channel1", mockChannel);

    Supplier<Map<String, TopicPartitionChannel>> flakySupplier =
        () -> {
          if (callCount.incrementAndGet() == 1) {
            throw new RuntimeException("First call fails");
          }
          return channels;
        };

    reporter =
        new PeriodicTelemetryReporter(
            telemetryService, flakySupplier, CONNECTOR_NAME, TASK_ID, SHORT_REPORT_INTERVAL_MS);

    // When
    reporter.start();

    // Wait for telemetry to be reported (accounting for jitter + one extra interval after failure)
    waitForTelemetryCount(1, MAX_WAIT_FOR_FIRST_REPORT_MS + SHORT_REPORT_INTERVAL_MS * 2);

    // Then - should eventually report after first failure
    assertTrue(
        mockTelemetryClient.getSentTelemetryData().size() >= 1,
        "Telemetry should be reported after supplier recovers from exception");
  }

  @Test
  void shouldReportPeriodically() throws InterruptedException {
    // Given
    TopicPartitionChannel mockChannel = createMockChannelWithNonEmptyStatus();
    Map<String, TopicPartitionChannel> channels = new HashMap<>();
    channels.put("channel1", mockChannel);

    Supplier<Map<String, TopicPartitionChannel>> channelSupplier = () -> channels;
    reporter =
        new PeriodicTelemetryReporter(
            telemetryService, channelSupplier, CONNECTOR_NAME, TASK_ID, SHORT_REPORT_INTERVAL_MS);

    // When
    reporter.start();

    // Wait for multiple report cycles (jitter + at least one more interval)
    waitForTelemetryCount(2, MAX_WAIT_FOR_FIRST_REPORT_MS + SHORT_REPORT_INTERVAL_MS * 2);

    // Then - should report multiple times
    assertTrue(
        mockTelemetryClient.getSentTelemetryData().size() >= 2,
        "Telemetry should be reported periodically");
  }

  private void waitForTelemetryCount(int minCount, long maxWaitMs) throws InterruptedException {
    long startTime = System.currentTimeMillis();
    while (mockTelemetryClient.getSentTelemetryData().size() < minCount) {
      if (System.currentTimeMillis() - startTime > maxWaitMs) {
        break;
      }
      Thread.sleep(10);
    }
  }

  private TopicPartitionChannel createMockChannelWithNonEmptyStatus() {
    return createMockChannelWithNonEmptyStatus("testChannel");
  }

  private TopicPartitionChannel createMockChannelWithNonEmptyStatus(final String channelName) {
    TopicPartitionChannel mockChannel = mock(TopicPartitionChannel.class);
    SnowflakeTelemetryChannelStatus mockStatus =
        new SnowflakeTelemetryChannelStatus(
            "testTable",
            CONNECTOR_NAME,
            channelName,
            System.currentTimeMillis(),
            Optional.empty(),
            new AtomicLong(10L),
            new AtomicLong(5L),
            new AtomicLong(15L));
    when(mockChannel.getSnowflakeTelemetryChannelStatus()).thenReturn(mockStatus);
    return mockChannel;
  }

  private TopicPartitionChannel createMockChannelWithEmptyStatus() {
    TopicPartitionChannel mockChannel = mock(TopicPartitionChannel.class);
    SnowflakeTelemetryChannelStatus emptyStatus =
        new SnowflakeTelemetryChannelStatus(
            "testTable",
            CONNECTOR_NAME,
            "testChannel",
            System.currentTimeMillis(),
            Optional.empty(),
            new AtomicLong(-1L),
            new AtomicLong(-1L),
            new AtomicLong(-1L));
    when(mockChannel.getSnowflakeTelemetryChannelStatus()).thenReturn(emptyStatus);
    return mockChannel;
  }

  /** Mock implementation of Telemetry for testing. */
  static class MockTelemetryClient implements Telemetry {

    private final LinkedList<TelemetryData> telemetryDataList = new LinkedList<>();
    private final LinkedList<TelemetryData> sentTelemetryData = new LinkedList<>();
    private final ExecutorService executor = Executors.newSingleThreadExecutor();

    @Override
    public void addLogToBatch(TelemetryData telemetryData) {
      synchronized (this) {
        telemetryDataList.add(telemetryData);
      }
    }

    @Override
    public void close() {
      synchronized (this) {
        telemetryDataList.clear();
        sentTelemetryData.clear();
      }
    }

    @Override
    public Future<Boolean> sendBatchAsync() {
      return executor.submit(
          () -> {
            synchronized (MockTelemetryClient.this) {
              sentTelemetryData.addAll(telemetryDataList);
              telemetryDataList.clear();
            }
            return true;
          });
    }

    @Override
    public void postProcess(String s, String s1, int i, Throwable throwable) {}

    LinkedList<TelemetryData> getSentTelemetryData() {
      synchronized (this) {
        return new LinkedList<>(sentTelemetryData);
      }
    }
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/v2/AppendRowWithFallbackPolicyTest.java
================================================
package com.snowflake.kafka.connector.internal.streaming.v2;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertSame;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

import com.snowflake.ingest.streaming.SFException;
import dev.failsafe.function.CheckedRunnable;
import java.util.concurrent.atomic.AtomicInteger;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.MockitoAnnotations;

public class AppendRowWithFallbackPolicyTest {

  private final String channelName = "test_channel";

  @BeforeEach
  void setUp() {
    MockitoAnnotations.initMocks(this);
  }

  @Test
  void shouldReturnChannelOnFirstAttemptSuccess() {
    // Given
    CheckedRunnable supplier = () -> {};

    // When
    boolean succeeded =
        AppendRowWithFallbackPolicy.executeWithFallback(supplier, failingFallback(), channelName);

    // Then
    assertTrue(succeeded, "Should return true on successful append");
  }

  @Test
  void shouldThrowBackpressureExceptionOnRetryableException() {
    // Given
    AtomicInteger attemptCounter = new AtomicInteger(0);
    SFException retryableException =
        new SFException("MemoryThresholdExceeded", "Some Message", 429, "Some Stacktrace");
    CheckedRunnable supplier =
        () -> {
          attemptCounter.getAndIncrement();
          throw retryableException;
        };

    // When/Then
    BackpressureException thrownException =
        assertThrows(
            BackpressureException.class,
            () ->
                AppendRowWithFallbackPolicy.executeWithFallback(
                    supplier, failingFallback(), channelName));

    // Then
    assertEquals(1, attemptCounter.get()); // Should only attempt once (no retry)
    assertSame(retryableException, thrownException.getCause());
    assertEquals("SDK backpressure: MemoryThresholdExceeded", thrownException.getMessage());
  }

  @Test
  void shouldThrowBackpressureExceptionForAllRetryableErrorCodes() {
    // Test ReceiverSaturated
    assertThrowsBackpressureException("ReceiverSaturated");

    // Test MemoryThresholdExceeded
    assertThrowsBackpressureException("MemoryThresholdExceeded");

    // Test MemoryThresholdExceededInContainer
    assertThrowsBackpressureException("MemoryThresholdExceededInContainer");

    // Test HttpRetryableClientError
    assertThrowsBackpressureException("HttpRetryableClientError");
  }

  private void assertThrowsBackpressureException(String errorCode) {
    // Given
    SFException sfException = new SFException(errorCode, "message", 429, "stack");
    CheckedRunnable supplier =
        () -> {
          throw sfException;
        };

    // When/Then
    BackpressureException exception =
        assertThrows(
            BackpressureException.class,
            () ->
                AppendRowWithFallbackPolicy.executeWithFallback(
                    supplier, failingFallback(), channelName));

    assertSame(sfException, exception.getCause());
    assertEquals("SDK backpressure: " + errorCode, exception.getMessage());
  }

  @Test
  void shouldFallbackOnNonRetryableSFException() {
    // Given
    AtomicInteger attemptCounter = new AtomicInteger(0);
    SFException nonRetryableException =
        new SFException("NonRetryableError", "Some Message", 420, "Some Stacktrace");
    CheckedRunnable supplier =
        () -> {
          if (attemptCounter.getAndIncrement() == 0) {
            throw nonRetryableException;
          }
        };
    AtomicInteger fallbackCallCounter = new AtomicInteger(0);

    // When
    boolean succeeded =
        AppendRowWithFallbackPolicy.executeWithFallback(
            supplier, countingFallbackSupplier(fallbackCallCounter), channelName);

    // Then
    assertEquals(1, attemptCounter.get()); // Should not retry
    assertEquals(1, fallbackCallCounter.get()); // Fallback should be called once
    assertFalse(succeeded, "Should return false when fallback fired");
  }

  @Test
  void shouldNotRetryNorFallbackOnNonSFException() {
    // Given
    AtomicInteger attemptCounter = new AtomicInteger(0);
    IllegalArgumentException nonRetryableException = new IllegalArgumentException("Non-retryable");
    CheckedRunnable supplier =
        () -> {
          attemptCounter.getAndIncrement();
          throw nonRetryableException;
        };

    // When/Then
    IllegalArgumentException thrownException =
        assertThrows(
            IllegalArgumentException.class,
            () ->
                AppendRowWithFallbackPolicy.executeWithFallback(
                    supplier, failingFallback(), channelName));

    assertSame(nonRetryableException, thrownException);
    assertEquals(1, attemptCounter.get()); // Should only attempt once
  }

  private AppendRowWithFallbackPolicy.FallbackSupplierWithException failingFallback() {
    return exception -> {
      throw new RuntimeException("Test Scenario Failure");
    };
  }

  private AppendRowWithFallbackPolicy.FallbackSupplierWithException countingFallbackSupplier(
      AtomicInteger callCounter) {
    return exception -> callCounter.getAndIncrement();
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/v2/BackpressureExceptionTest.java
================================================
package com.snowflake.kafka.connector.internal.streaming.v2;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertSame;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

import com.snowflake.ingest.streaming.SFException;
import org.junit.jupiter.api.Test;

public class BackpressureExceptionTest {

  @Test
  void shouldWrapSFExceptionWithCorrectMessage() {
    // Given
    SFException cause = new SFException("ReceiverSaturated", "Server overloaded", 429, "stack");

    // When
    BackpressureException exception = new BackpressureException(cause);

    // Then
    assertEquals("SDK backpressure: ReceiverSaturated", exception.getMessage());
    assertSame(cause, exception.getCause());
  }

  @Test
  void shouldRecognizeReceiverSaturatedAsRetryable() {
    // Given
    SFException sfException = new SFException("ReceiverSaturated", "message", 429, "stack");

    // When/Then
    assertTrue(BackpressureException.isRetryableError(sfException));
  }

  @Test
  void shouldRecognizeMemoryThresholdExceededAsRetryable() {
    // Given
    SFException sfException = new SFException("MemoryThresholdExceeded", "message", 429, "stack");

    // When/Then
    assertTrue(BackpressureException.isRetryableError(sfException));
  }

  @Test
  void shouldRecognizeMemoryThresholdExceededInContainerAsRetryable() {
    // Given
    SFException sfException =
        new SFException("MemoryThresholdExceededInContainer", "message", 429, "stack");

    // When/Then
    assertTrue(BackpressureException.isRetryableError(sfException));
  }

  @Test
  void shouldRecognizeHttpRetryableClientErrorAsRetryable() {
    // Given
    SFException sfException = new SFException("HttpRetryableClientError", "message", 503, "stack");

    // When/Then
    assertTrue(BackpressureException.isRetryableError(sfException));
  }

  @Test
  void shouldRejectNonRetryableSFException() {
    // Given
    SFException sfException = new SFException("SomeOtherError", "message", 500, "stack");

    // When/Then
    assertFalse(BackpressureException.isRetryableError(sfException));
  }

  @Test
  void shouldRejectNonSFException() {
    // Given
    IllegalArgumentException nonSFException = new IllegalArgumentException("not an SFException");

    // When/Then
    assertFalse(BackpressureException.isRetryableError(nonSFException));
  }

  @Test
  void shouldRejectNullException() {
    // When/Then
    assertFalse(BackpressureException.isRetryableError(null));
  }

  @Test
  void shouldRejectConstructionWithNonRetryableSFException() {
    // Given
    SFException nonRetryable = new SFException("SomeOtherError", "message", 500, "stack");

    // When/Then
    assertThrows(IllegalArgumentException.class, () -> new BackpressureException(nonRetryable));
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/v2/ClientRecreationExceptionTest.java
================================================
package com.snowflake.kafka.connector.internal.streaming.v2;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertSame;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

import com.snowflake.ingest.streaming.SFException;
import org.junit.jupiter.api.Test;

public class ClientRecreationExceptionTest {

  @Test
  void shouldWrapSFExceptionWithCorrectMessage() {
    SFException cause = new SFException("InvalidClientError", "Client is invalid", 409, "Conflict");

    ClientRecreationException exception = new ClientRecreationException(cause);

    assertEquals("SDK client invalid: InvalidClientError", exception.getMessage());
    assertSame(cause, exception.getCause());
  }

  @Test
  void shouldRecognizeInvalidClientError() {
    SFException sfException =
        new SFException("InvalidClientError", "Client is invalid", 409, "Conflict");

    assertTrue(ClientRecreationException.isClientInvalidError(sfException));
  }

  @Test
  void shouldRecognizeSfApiPipeFailedOverError() {
    SFException sfException =
        new SFException("SfApiPipeFailedOverError", "HTTP 410 pipe failover", 400, "Bad Request");

    assertTrue(ClientRecreationException.isClientInvalidError(sfException));
  }

  @Test
  void shouldRecognizeClosedClientError() {
    SFException sfException =
        new SFException("ClosedClientError", "Client is closed", 409, "Conflict");

    assertTrue(ClientRecreationException.isClientInvalidError(sfException));
  }

  @Test
  void shouldNotRecognizeBackpressureErrors() {
    assertFalse(
        ClientRecreationException.isClientInvalidError(
            new SFException("ReceiverSaturated", "message", 429, "stack")));
    assertFalse(
        ClientRecreationException.isClientInvalidError(
            new SFException("MemoryThresholdExceeded", "message", 429, "stack")));
  }

  @Test
  void shouldNotRecognizeChannelLevelErrors() {
    assertFalse(
        ClientRecreationException.isClientInvalidError(
            new SFException("InvalidChannelError", "Channel invalid", 409, "Conflict")));
    assertFalse(
        ClientRecreationException.isClientInvalidError(
            new SFException("ClosedChannelError", "Channel closed", 409, "Conflict")));
  }

  @Test
  void shouldNotRecognizeOtherSFException() {
    SFException sfException = new SFException("SomeOtherError", "message", 500, "stack");

    assertFalse(ClientRecreationException.isClientInvalidError(sfException));
  }

  @Test
  void shouldNotRecognizeNonSFException() {
    IllegalArgumentException nonSFException = new IllegalArgumentException("not an SFException");

    assertFalse(ClientRecreationException.isClientInvalidError(nonSFException));
  }

  @Test
  void shouldNotRecognizeNull() {
    assertFalse(ClientRecreationException.isClientInvalidError(null));
  }

  @Test
  void shouldRejectConstructionWithNonClientInvalidSFException() {
    SFException nonClientInvalid = new SFException("SomeOtherError", "message", 500, "stack");

    assertThrows(
        IllegalArgumentException.class, () -> new ClientRecreationException(nonClientInvalid));
  }

  @Test
  void shouldRejectConstructionWithBackpressureSFException() {
    SFException backpressure = new SFException("ReceiverSaturated", "message", 429, "stack");

    assertThrows(IllegalArgumentException.class, () -> new ClientRecreationException(backpressure));
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/v2/SnowpipeStreamingPartitionChannelTest.java
================================================
package com.snowflake.kafka.connector.internal.streaming.v2;

import static com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel.NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.argThat;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;

import com.snowflake.ingest.streaming.ChannelStatus;
import com.snowflake.ingest.streaming.ChannelStatusBatch;
import com.snowflake.ingest.streaming.OpenChannelResult;
import com.snowflake.ingest.streaming.SFException;
import com.snowflake.ingest.streaming.SnowflakeStreamingIngestChannel;
import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient;
import com.snowflake.kafka.connector.builder.SinkRecordBuilder;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.config.SinkTaskConfigTestBuilder;
import com.snowflake.kafka.connector.config.SnowflakeValidation;
import com.snowflake.kafka.connector.internal.DescribeTableRow;
import com.snowflake.kafka.connector.internal.SnowflakeConnectionService;
import com.snowflake.kafka.connector.internal.metrics.TaskMetrics;
import com.snowflake.kafka.connector.internal.streaming.InMemorySinkTaskContext;
import com.snowflake.kafka.connector.internal.streaming.StreamingErrorHandler;
import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetryChannelStatus;
import com.snowflake.kafka.connector.internal.streaming.v2.channel.PartitionOffsetTracker;
import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationMode;
import com.snowflake.kafka.connector.internal.streaming.v2.migration.Ssv1MigrationResponse;
import com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.time.Instant;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Predicate;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.connect.data.SchemaAndValue;
import org.apache.kafka.connect.errors.ConnectException;
import org.apache.kafka.connect.json.JsonConverter;
import org.apache.kafka.connect.sink.SinkRecord;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

class SnowpipeStreamingPartitionChannelTest {

  private static final String CONNECTOR_NAME = "test_connector";
  private static final String TABLE_NAME = "test_table";
  private static final String TOPIC_NAME = "test_topic";
  private static final int PARTITION = 0;
  private static final String SSV1_CHANNEL_NAME = TOPIC_NAME + "_" + PARTITION;

  private String channelName;
  private String pipeName;

  private SnowflakeTelemetryService mockTelemetryService;
  private StreamingErrorHandler mockErrorHandler;
  private ExecutorService openChannelIoExecutor;
  private TrackingIngestClientSupplier trackingClientSupplier;
  private TrackingStreamingIngestClient trackingClient;
  private InMemorySinkTaskContext sinkTaskContext;

  @BeforeEach
  void setUp() {
    // Generate unique names to avoid StreamingClientPools caching issues between tests
    final String uniqueId = UUID.randomUUID().toString().substring(0, 8);
    channelName = "test_channel_" + uniqueId;
    pipeName = "test_pipe_" + uniqueId;

    mockTelemetryService = mock(SnowflakeTelemetryService.class);
    mockErrorHandler = mock(StreamingErrorHandler.class);

    sinkTaskContext =
        new InMemorySinkTaskContext(
            Collections.singleton(new TopicPartition(TOPIC_NAME, PARTITION)));

    trackingClientSupplier = new TrackingIngestClientSupplier();
    trackingClient = new TrackingStreamingIngestClient(pipeName, trackingClientSupplier);
    openChannelIoExecutor = Executors.newSingleThreadExecutor();
  }

  @AfterEach
  void tearDown() {
    openChannelIoExecutor.shutdownNow();
  }

  @Test
  void shouldNotCloseChannelOnFirstOpen() {
    // When: Creating a new channel (first open)
    final SnowpipeStreamingPartitionChannel channel = createPartitionChannel();
    // Wait for async init to complete
    channel.getChannel();

    // Then: close() should not have been called because channel was null initially
    assertEquals(0, trackingClientSupplier.getCloseCallCount());
  }

  @Test
  void shouldCloseOpenChannelBeforeReopening() {
    // Given: A partition channel is created and its underlying channel is open
    final SnowpipeStreamingPartitionChannel partitionChannel = createPartitionChannel();
    // Wait for async init to complete
    partitionChannel.getChannel();
    assertEquals(1, trackingClientSupplier.getTotalChannelsCreated());
    assertTrue(!partitionChannel.isChannelClosed(), "Channel should be open before recovery");

    // Record close count before recovery
    final int closeCountBeforeRecovery = trackingClientSupplier.getCloseCallCount();

    // When: appendRow throws SFException once, triggering the fallback that reopens the channel.
    // After recovery the fallback completes normally — no exception propagates.
    trackingClientSupplier.setNonRetryableAppendRowFailures(1);
    partitionChannel.insertRecord(buildValidRecord(0), true);

    // reopenChannel closes the old channel before opening a new one
    assertEquals(closeCountBeforeRecovery + 1, trackingClientSupplier.getCloseCallCount());
    assertEquals(2, trackingClientSupplier.getTotalChannelsCreated());
  }

  @Test
  void closeChannelAsyncCancelsInitializationBeforeChannelOpens() throws Exception {
    // Block the single-threaded executor so the channel init task is queued but not started
    CountDownLatch blockExecutor = new CountDownLatch(1);
    openChannelIoExecutor.submit(
        () -> {
          blockExecutor.await();
          return null;
        });

    SnowpipeStreamingPartitionChannel partitionChannel = createPartitionChannel();

    // closeChannelAsync sets cancelled=true while the init task is still queued
    CompletableFuture<Void> closeFuture = partitionChannel.closeChannelAsync();

    // Unblock the executor — init task starts, sees cancelled=true, throws CancellationException
    blockExecutor.countDown();

    // The close future should complete via the exceptionally branch
    closeFuture.get(5, TimeUnit.SECONDS);

    // No SDK channel was ever opened or closed
    assertEquals(0, trackingClientSupplier.getTotalChannelsCreated());
    assertEquals(0, trackingClientSupplier.getCloseCallCount());
  }

  @Test
  void reopenChannelRecoversAfterFailedAsyncInitialization() {
    // Make the first openChannel call (during async init) throw
    trackingClientSupplier.setThrowOnOpenChannel(true);
    SnowpipeStreamingPartitionChannel partitionChannel = createPartitionChannel();

    // Wait for the async init to complete exceptionally
    assertThrows(SFException.class, partitionChannel::getChannel);
    assertEquals(
        0,
        trackingClientSupplier.getTotalChannelsCreated(),
        "No channels should have been created since openChannel threw");

    // Allow subsequent openChannel calls to succeed (simulating a transient failure)
    trackingClientSupplier.setThrowOnOpenChannel(false);

    // First insertRecord triggers recovery via the Failsafe fallback. reopenChannel handles
    // the failed init future gracefully (skips close, opens a new channel). After successful
    // recovery the record is inserted on the new channel — no exception propagates.
    partitionChannel.insertRecord(buildValidRecord(0), true);

    assertEquals(
        1,
        trackingClientSupplier.getTotalChannelsCreated(),
        "reopenChannel should have opened a new channel after transient init failure");
  }

  @Test
  void reopenChannelClosesOldChannelWhenAsyncInitSucceeded() {
    SnowpipeStreamingPartitionChannel partitionChannel = createPartitionChannel();
    partitionChannel.getChannel();
    assertEquals(1, trackingClientSupplier.getTotalChannelsCreated());
    assertEquals(0, trackingClientSupplier.getCloseCallCount());

    // Trigger reopenChannel via appendRow SFException (throw once, then succeed on new channel)
    trackingClientSupplier.setNonRetryableAppendRowFailures(1);
    partitionChannel.insertRecord(buildValidRecord(0), true);

    // reopenChannel should have closed the old channel BEFORE opening the new one
    assertEquals(
        1,
        trackingClientSupplier.getCloseCallCount(),
        "Old channel should have been closed during reopenChannel");
    assertEquals(
        2,
        trackingClientSupplier.getTotalChannelsCreated(),
        "A new channel should have been opened during reopenChannel");
  }

  @Test
  void insertRecordThrowsBackpressureExceptionOnRetryableError() {
    SnowpipeStreamingPartitionChannel partitionChannel = createPartitionChannel();
    partitionChannel.getChannel();
    assertEquals(1, trackingClientSupplier.getTotalChannelsCreated());

    // appendRow will throw MemoryThresholdExceeded (retryable error)
    trackingClientSupplier.setRetryableAppendRowFailures(1);

    // BackpressureException should propagate up (not caught in this layer)
    // Task 4 will handle it at the batch-level insert() loop
    BackpressureException exception =
        assertThrows(
            BackpressureException.class,
            () -> partitionChannel.insertRecord(buildValidRecord(0), true));

    assertEquals("SDK backpressure: MemoryThresholdExceeded", exception.getMessage());

    // No channel reopening should have happened - the exception signals backpressure, not channel
    // invalidation
    assertEquals(0, trackingClientSupplier.getCloseCallCount());
    assertEquals(1, trackingClientSupplier.getTotalChannelsCreated());
  }

  @Test
  void isInitializingReturnsTrueWhileChannelFutureIsPending() throws Exception {
    // Block the executor so the channel init task is queued but not started
    CountDownLatch blockExecutor = new CountDownLatch(1);
    openChannelIoExecutor.submit(
        () -> {
          blockExecutor.await();
          return null;
        });

    SnowpipeStreamingPartitionChannel partitionChannel = createPartitionChannel();

    assertTrue(partitionChannel.isInitializing(), "Should be initializing while future is pending");

    // Unblock and wait for init to complete
    blockExecutor.countDown();
    partitionChannel.getChannel();

    assertFalse(
        partitionChannel.isInitializing(), "Should not be initializing after future completes");
  }

  @Test
  void channelInvalidationRecovery_taskSurvivesAndContinuesIngesting() {
    // This test validates the fix for the channel invalidation recovery bug:
    // Before the fix, a channel invalidation (SFException on appendRow) would trigger
    // the fallback to reopen the channel, but then unconditionally re-throw the exception,
    // causing the KC framework to kill the task as "unrecoverable".
    // After the fix, the fallback reopens the channel and completes normally, allowing
    // Failsafe to re-execute appendRow on the new channel.

    SnowpipeStreamingPartitionChannel partitionChannel = createPartitionChannel();
    partitionChannel.getChannel();
    assertEquals(1, trackingClientSupplier.getTotalChannelsCreated());

    // Insert first record successfully
    partitionChannel.insertRecord(buildValidRecord(0), true);

    // Simulate channel invalidation: appendRow throws once (non-retryable SFException),
    // then succeeds on the reopened channel.
    trackingClientSupplier.setNonRetryableAppendRowFailures(1);
    partitionChannel.insertRecord(buildValidRecord(1), false);

    // The channel should have been reopened (old closed, new opened)
    assertEquals(1, trackingClientSupplier.getCloseCallCount());
    assertEquals(2, trackingClientSupplier.getTotalChannelsCreated());

    // Subsequent records should continue to be ingested on the new channel
    partitionChannel.insertRecord(buildValidRecord(2), false);
    partitionChannel.insertRecord(buildValidRecord(3), false);

    // No additional channel reopenings
    assertEquals(1, trackingClientSupplier.getCloseCallCount());
    assertEquals(2, trackingClientSupplier.getTotalChannelsCreated());
  }

  @Test
  void channelInvalidation_stopsReopeningAfterMaxConsecutiveRecoveries() {
    // If the channel is permanently broken (every appendRow fails), we should not
    // loop forever reopening channels. After MAX_CONSECUTIVE_RECOVERIES (5) the
    // fallback stops reopening — no more channel churn.

    SnowpipeStreamingPartitionChannel partitionChannel = createPartitionChannel();
    partitionChannel.getChannel();
    assertEquals(1, trackingClientSupplier.getTotalChannelsCreated());

    // Every appendRow throws — channel is permanently invalid
    trackingClientSupplier.setThrowOnAppendRow(true);

    // Send many records. Each triggers the fallback, but only the first
    // MAX_CONSECUTIVE_RECOVERIES (5) actually reopen the channel. After that
    // the circuit breaker trips and no more channels are created.
    for (int i = 0; i < 20; i++) {
      partitionChannel.insertRecord(buildValidRecord(i), i == 0);
    }

    // Verify we didn't create an unbounded number of channels.
    // 1 initial + at most 5 recoveries = at most 6 channels.
    assertTrue(
        trackingClientSupplier.getTotalChannelsCreated() <= 6,
        "Expected at most 6 channels (1 initial + 5 recoveries), got: "
            + trackingClientSupplier.getTotalChannelsCreated());
  }

  private SinkRecord buildValidRecord(long offset) {
    JsonConverter jsonConverter = new JsonConverter();
    jsonConverter.configure(Collections.singletonMap("schemas.enable", "false"), false);
    SchemaAndValue schemaAndValue =
        jsonConverter.toConnectData(
            TOPIC_NAME, "{\"name\": \"test\"}".getBytes(StandardCharsets.UTF_8));
    return SinkRecordBuilder.forTopicPartition(TOPIC_NAME, PARTITION)
        .withSchemaAndValue(schemaAndValue)
        .withOffset(offset)
        .build();
  }

  private SnowpipeStreamingPartitionChannel createPartitionChannel() {
    final TopicPartition topicPartition = new TopicPartition(TOPIC_NAME, PARTITION);
    final PartitionOffsetTracker offsetTracker =
        new PartitionOffsetTracker(topicPartition, sinkTaskContext, channelName);
    final SnowflakeTelemetryChannelStatus telemetryChannelStatus =
        new SnowflakeTelemetryChannelStatus(
            TABLE_NAME,
            CONNECTOR_NAME,
            channelName,
            System.currentTimeMillis(),
            Optional.empty(),
            offsetTracker.persistedOffsetRef(),
            offsetTracker.processedOffsetRef(),
            offsetTracker.consumerGroupOffsetRef());

    SinkTaskConfig taskConfig =
        SinkTaskConfigTestBuilder.builder()
            .connectorName(CONNECTOR_NAME)
            .taskId("0")
            .enableSchematization(false)
            .enableColumnIdentifierNormalization(true)
            .validation(SnowflakeValidation.SERVER_SIDE)
            .build();

    return new SnowpipeStreamingPartitionChannel(
        TABLE_NAME,
        channelName,
        pipeName,
        trackingClient,
        openChannelIoExecutor,
        mockTelemetryService,
        telemetryChannelStatus,
        offsetTracker,
        taskConfig,
        mockErrorHandler,
        TaskMetrics.noop(),
        false,
        null,
        Optional.empty());
  }

  @Test
  void parseOffsetToken_nullReturnsNoOffset() {
    assertEquals(
        NO_OFFSET_TOKEN_REGISTERED_IN_SNOWFLAKE,
        SnowpipeStreamingPartitionChannel.parseOffsetToken(null, "test_channel"));
  }

  @Test
  void parseOffsetToken_validToken() {
    assertEquals(42L, SnowpipeStreamingPartitionChannel.parseOffsetToken("42", "test_channel"));
    assertEquals(0L, SnowpipeStreamingPartitionChannel.parseOffsetToken("0", "test_channel"));
    assertEquals(
        Long.MAX_VALUE,
        SnowpipeStreamingPartitionChannel.parseOffsetToken(
            String.valueOf(Long.MAX_VALUE), "test_channel"));
  }

  @Test
  void parseOffsetToken_invalidTokenThrows() {
    assertThrows(
        ConnectException.class,
        () -> SnowpipeStreamingPartitionChannel.parseOffsetToken("not_a_number", "test_channel"));
    assertThrows(
        ConnectException.class,
        () -> SnowpipeStreamingPartitionChannel.parseOffsetToken("", "test_channel"));
    assertThrows(
        ConnectException.class,
        () -> SnowpipeStreamingPartitionChannel.parseOffsetToken("12.5", "test_channel"));
  }

  // --- Validation integration tests ---

  private SnowflakeConnectionService mockConnService;

  private SnowpipeStreamingPartitionChannel createValidationEnabledChannel(
      List<DescribeTableRow> describeResult,
      boolean enableSchematization,
      boolean shouldEvolveSchema) {
    mockConnService = mock(SnowflakeConnectionService.class);
    when(mockConnService.describeTable(TABLE_NAME)).thenReturn(Optional.of(describeResult));

    final TopicPartition topicPartition = new TopicPartition(TOPIC_NAME, PARTITION);
    final PartitionOffsetTracker offsetTracker =
        new PartitionOffsetTracker(topicPartition, sinkTaskContext, channelName);
    final SnowflakeTelemetryChannelStatus telemetryChannelStatus =
        new SnowflakeTelemetryChannelStatus(
            TABLE_NAME,
            CONNECTOR_NAME,
            channelName,
            System.currentTimeMillis(),
            Optional.empty(),
            offsetTracker.persistedOffsetRef(),
            offsetTracker.processedOffsetRef(),
            offsetTracker.consumerGroupOffsetRef());

    SinkTaskConfig taskConfig =
        SinkTaskConfigTestBuilder.builder()
            .connectorName(CONNECTOR_NAME)
            .taskId("0")
            .enableSchematization(enableSchematization)
            .enableColumnIdentifierNormalization(true)
            .validation(SnowflakeValidation.CLIENT_SIDE)
            .build();

    return new SnowpipeStreamingPartitionChannel(
        TABLE_NAME,
        channelName,
        pipeName,
        trackingClient,
        openChannelIoExecutor,
        mockTelemetryService,
        telemetryChannelStatus,
        offsetTracker,
        taskConfig,
        mockErrorHandler,
        TaskMetrics.noop(),
        shouldEvolveSchema,
        mockConnService,
        Optional.empty());
  }

  private static final List<DescribeTableRow> STANDARD_TABLE_SCHEMA =
      Arrays.asList(
          new DescribeTableRow("RECORD_CONTENT", "VARIANT", null, "Y"),
          new DescribeTableRow("RECORD_METADATA", "VARIANT", null, "Y"));

  @Test
  void validationEnabled_validRecord_insertsSuccessfully() {
    // enableSchematization=false so the record is wrapped into RECORD_CONTENT/RECORD_METADATA
    SnowpipeStreamingPartitionChannel channel =
        createValidationEnabledChannel(STANDARD_TABLE_SCHEMA, false, true);
    SinkRecord record = buildValidRecord(0);

    channel.insertRecord(record, true);

    verify(mockErrorHandler, never()).handleError(any(Exception.class), any(SinkRecord.class));
    assertEquals(1, trackingClientSupplier.getTotalChannelsCreated());
  }

  @Test
  void validationEnabled_extraColumn_triggersSchemaEvolution() {
    // Table only has RECORD_METADATA — RECORD_CONTENT will be "extra"
    List<DescribeTableRow> schema =
        Arrays.asList(new DescribeTableRow("RECORD_METADATA", "VARIANT", null, "Y"));

    SnowpipeStreamingPartitionChannel channel = createValidationEnabledChannel(schema, true, true);

    SinkRecord record = buildValidRecord(0);
    channel.insertRecord(record, true);

    // Schema evolution attempted, but refreshed schema still missing RECORD_CONTENT -> error
    verify(mockErrorHandler).handleError(any(Exception.class), eq(record));
  }

  @Test
  void validationEnabled_schemaEvolutionDisabled_structuralErrorRoutesToDlq() {
    List<DescribeTableRow> schema =
        Arrays.asList(new DescribeTableRow("RECORD_METADATA", "VARIANT", null, "Y"));

    SnowpipeStreamingPartitionChannel channel = createValidationEnabledChannel(schema, true, false);

    SinkRecord record = buildValidRecord(0);
    channel.insertRecord(record, true);

    verify(mockErrorHandler).handleError(any(Exception.class), eq(record));
    verify(mockConnService, never()).appendColumnsToTable(any(), any());
    verify(mockConnService, never()).alterNonNullableColumns(any(), any());
  }

  @Test
  void validationEnabled_describeTableFails_disablesValidation() {
    mockConnService = mock(SnowflakeConnectionService.class);
    when(mockConnService.describeTable(TABLE_NAME)).thenReturn(Optional.empty());

    final TopicPartition topicPartition = new TopicPartition(TOPIC_NAME, PARTITION);
    final PartitionOffsetTracker offsetTracker =
        new PartitionOffsetTracker(topicPartition, sinkTaskContext, channelName);
    final SnowflakeTelemetryChannelStatus telemetryChannelStatus =
        new SnowflakeTelemetryChannelStatus(
            TABLE_NAME,
            CONNECTOR_NAME,
            channelName,
            System.currentTimeMillis(),
            Optional.empty(),
            offsetTracker.persistedOffsetRef(),
            offsetTracker.processedOffsetRef(),
            offsetTracker.consumerGroupOffsetRef());

    SinkTaskConfig taskConfig =
        SinkTaskConfigTestBuilder.builder()
            .connectorName(CONNECTOR_NAME)
            .taskId("0")
            .enableSchematization(true)
            .enableColumnIdentifierNormalization(true)
            .validation(SnowflakeValidation.CLIENT_SIDE)
            .build();

    SnowpipeStreamingPartitionChannel channel =
        new SnowpipeStreamingPartitionChannel(
            TABLE_NAME,
            channelName,
            pipeName,
            trackingClient,
            openChannelIoExecutor,
            mockTelemetryService,
            telemetryChannelStatus,
            offsetTracker,
            taskConfig,
            mockErrorHandler,
            TaskMetrics.noop(),
            true,
            mockConnService,
            Optional.empty());

    SinkRecord record = buildValidRecord(0);
    channel.insertRecord(record, true);

    verify(mockErrorHandler, never()).handleError(any(Exception.class), any(SinkRecord.class));
  }

  @Test
  void validationEnabled_notNullColumn_detectsMissingValue() {
    // RECORD_CONTENT and RECORD_METADATA are nullable, but REQUIRED_COL is NOT NULL
    List<DescribeTableRow> schema =
        Arrays.asList(
            new DescribeTableRow("RECORD_CONTENT", "VARIANT", null, "Y"),
            new DescribeTableRow("RECORD_METADATA", "VARIANT", null, "Y"),
            new DescribeTableRow("REQUIRED_COL", "VARCHAR(100)", null, "N"));

    // shouldEvolveSchema=true so schema evolution is attempted for the missing NOT NULL
    // col
    SnowpipeStreamingPartitionChannel channel = createValidationEnabledChannel(schema, true, true);

    // Record doesn't have REQUIRED_COL — should trigger structural error
    SinkRecord record = buildValidRecord(0);
    channel.insertRecord(record, true);

    verify(mockErrorHandler).handleError(any(Exception.class), eq(record));
  }

  @Test
  void validationEnabled_multipleExtraColumns_passesRawColumnNames() {
    List<DescribeTableRow> schema =
        Arrays.asList(new DescribeTableRow("RECORD_METADATA", "VARIANT", null, "Y"));

    SnowpipeStreamingPartitionChannel channel = createValidationEnabledChannel(schema, true, true);

    String json = "{\"city\": \"Hsinchu\", \"age\": 25, \"country\": \"TW\"}";
    JsonConverter jsonConverter = new JsonConverter();
    jsonConverter.configure(Collections.singletonMap("schemas.enable", "false"), false);
    SchemaAndValue schemaAndValue =
        jsonConverter.toConnectData(TOPIC_NAME, json.getBytes(StandardCharsets.UTF_8));
    SinkRecord record =
        SinkRecordBuilder.forTopicPartition(TOPIC_NAME, PARTITION)
            .withSchemaAndValue(schemaAndValue)
            .withOffset(0)
            .build();

    channel.insertRecord(record, true);

    verify(mockConnService)
        .appendColumnsToTable(
            eq(TABLE_NAME),
            argThat(
                columnInfos -> {
                  if (columnInfos == null) return false;
                  boolean hasCity = columnInfos.containsKey("CITY");
                  boolean hasAge = columnInfos.containsKey("AGE");
                  boolean hasCountry = columnInfos.containsKey("COUNTRY");
                  return hasCity && hasAge && hasCountry;
                }));
  }

  @Test
  void validationEnabled_identityColumnMissing_insertsSuccessfully() {
    List<DescribeTableRow> schema =
        Arrays.asList(
            new DescribeTableRow(
                "ID", "NUMBER(38,0)", null, "N", null, "IDENTITY START 1 INCREMENT 1"),
            new DescribeTableRow("RECORD_CONTENT", "VARIANT", null, "Y"),
            new DescribeTableRow("RECORD_METADATA", "VARIANT", null, "Y"));

    // enableSchematization=false so the record populates RECORD_CONTENT/RECORD_METADATA only
    SnowpipeStreamingPartitionChannel channel = createValidationEnabledChannel(schema, false, true);
    SinkRecord record = buildValidRecord(0);

    channel.insertRecord(record, true);

    // Identity column is missing from the row but should not trigger an error
    verify(mockErrorHandler, never()).handleError(any(Exception.class), any(SinkRecord.class));
  }

  @Test
  void validationEnabled_defaultNotNullColumnMissing_insertsSuccessfully() {
    List<DescribeTableRow> schema =
        Arrays.asList(
            new DescribeTableRow("RECORD_CONTENT", "VARIANT", null, "Y"),
            new DescribeTableRow("RECORD_METADATA", "VARIANT", null, "Y"),
            new DescribeTableRow(
                "CREATED_AT", "TIMESTAMP_NTZ(9)", null, "N", "CURRENT_TIMESTAMP()", null));

    SnowpipeStreamingPartitionChannel channel = createValidationEnabledChannel(schema, false, true);
    SinkRecord record = buildValidRecord(0);

    channel.insertRecord(record, true);

    verify(mockErrorHandler, never()).handleError(any(Exception.class), any(SinkRecord.class));
  }

  // --- SSv1 offset migration tests ---

  private SnowpipeStreamingPartitionChannel createPartitionChannelWithMigration(
      Ssv1MigrationMode migrationMode, SnowflakeConnectionService mockConn) {
    final TopicPartition topicPartition = new TopicPartition(TOPIC_NAME, PARTITION);
    final PartitionOffsetTracker offsetTracker =
        new PartitionOffsetTracker(topicPartition, sinkTaskContext, channelName);
    final SnowflakeTelemetryChannelStatus telemetryChannelStatus =
        new SnowflakeTelemetryChannelStatus(
            TABLE_NAME,
            CONNECTOR_NAME,
            channelName,
            System.currentTimeMillis(),
            Optional.empty(),
            offsetTracker.persistedOffsetRef(),
            offsetTracker.processedOffsetRef(),
            offsetTracker.consumerGroupOffsetRef());

    SinkTaskConfig migrationTaskConfig =
        SinkTaskConfigTestBuilder.builder()
            .connectorName(CONNECTOR_NAME)
            .taskId("0")
            .enableSchematization(false)
            .enableColumnIdentifierNormalization(true)
            .validation(SnowflakeValidation.SERVER_SIDE)
            .ssv1MigrationMode(migrationMode)
            .build();

    return new SnowpipeStreamingPartitionChannel(
        TABLE_NAME,
        channelName,
        pipeName,
        trackingClient,
        openChannelIoExecutor,
        mockTelemetryService,
        telemetryChannelStatus,
        offsetTracker,
        migrationTaskConfig,
        mockErrorHandler,
        TaskMetrics.noop(),
        false,
        mockConn,
        migrationMode == Ssv1MigrationMode.SKIP
            ? Optional.empty()
            : Optional.of(SSV1_CHANNEL_NAME));
  }

  @Test
  void migration_skip_doesNotConsultSsv1() {
    SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class);

    SnowpipeStreamingPartitionChannel channel =
        createPartitionChannelWithMigration(Ssv1MigrationMode.SKIP, mockConn);
    channel.getChannel();

    // System function should never be called when mode is SKIP
    verify(mockConn, never()).migrateSsv1ChannelOffset(any(), any(), any(), any());
  }

  @Test
  void migration_bestEffort_usesSsv1OffsetWhenSsv2HasNone() {
    SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class);
    when(mockConn.migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName))
        .thenReturn(Ssv1MigrationResponse.migrated(100L));

    SnowpipeStreamingPartitionChannel channel =
        createPartitionChannelWithMigration(Ssv1MigrationMode.BEST_EFFORT, mockConn);
    channel.getChannel();

    // SSv2 has no offset (null from FakeClient), so SSv1 should be consulted
    verify(mockConn).migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName);
    // Kafka offset should be set to ssv1Offset + 1 (101)
    assertEquals(101L, sinkTaskContext.offset(new TopicPartition(TOPIC_NAME, PARTITION)));
  }

  @Test
  void migration_bestEffort_proceedsWhenSsv1NotFound() {
    SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class);
    when(mockConn.migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName))
        .thenReturn(Ssv1MigrationResponse.channelNotFound());

    SnowpipeStreamingPartitionChannel channel =
        createPartitionChannelWithMigration(Ssv1MigrationMode.BEST_EFFORT, mockConn);
    channel.getChannel();

    // SSv1 not found — best_effort falls through to consumer group offset
    verify(mockConn).migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName);
  }

  @Test
  void migration_bestEffort_proceedsWhenSsv1HasNoOffset() {
    SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class);
    when(mockConn.migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName))
        .thenReturn(Ssv1MigrationResponse.channelFoundNoOffset());

    SnowpipeStreamingPartitionChannel channel =
        createPartitionChannelWithMigration(Ssv1MigrationMode.BEST_EFFORT, mockConn);
    channel.getChannel();

    // SSv1 channel exists but has no committed offset — best_effort falls through
    verify(mockConn).migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName);
  }

  @Test
  void migration_bestEffort_ignoresSsv1WhenSsv2HasOffset() {
    // Pre-seed an offset in the tracking client so SSv2 openChannel returns a non-null offset
    trackingClient =
        new TrackingStreamingIngestClient(pipeName, trackingClientSupplier) {
          @Override
          public OpenChannelResult openChannel(String channelNameArg, String offsetToken) {
            OpenChannelResult result = super.openChannel(channelNameArg, offsetToken);
            ChannelStatus status =
                new ChannelStatus(
                    "db",
                    "schema",
                    pipeName,
                    channelNameArg,
                    "SUCCESS",
                    "50",
                    Instant.now(),
                    0,
                    0,
                    0,
                    null,
                    null,
                    null,
                    null,
                    Instant.now());
            return new OpenChannelResult(result.getChannel(), status);
          }
        };

    SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class);

    SnowpipeStreamingPartitionChannel channel =
        createPartitionChannelWithMigration(Ssv1MigrationMode.BEST_EFFORT, mockConn);
    channel.getChannel();

    // SSv2 already has an offset, so system function should NOT be called
    verify(mockConn, never()).migrateSsv1ChannelOffset(any(), any(), any(), any());
    // Kafka offset should be set to ssv2Offset + 1 (51)
    assertEquals(51L, sinkTaskContext.offset(new TopicPartition(TOPIC_NAME, PARTITION)));
  }

  @Test
  void migration_strict_usesSsv1OffsetWhenFound() {
    SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class);
    when(mockConn.migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName))
        .thenReturn(Ssv1MigrationResponse.migrated(100L));

    SnowpipeStreamingPartitionChannel channel =
        createPartitionChannelWithMigration(Ssv1MigrationMode.STRICT, mockConn);
    channel.getChannel();

    // SSv1 found — strict mode migrates the offset just like best_effort
    verify(mockConn).migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName);
    assertEquals(101L, sinkTaskContext.offset(new TopicPartition(TOPIC_NAME, PARTITION)));
  }

  @Test
  void migration_strict_throwsWhenSsv1NotFound() {
    SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class);
    when(mockConn.migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName))
        .thenReturn(Ssv1MigrationResponse.channelNotFound());

    SnowpipeStreamingPartitionChannel channel =
        createPartitionChannelWithMigration(Ssv1MigrationMode.STRICT, mockConn);

    // SSv1 not found — strict mode fails rather than falling through
    assertThrows(ConnectException.class, () -> channel.getChannel());
  }

  @Test
  void migration_strict_proceedsWhenSsv1HasNoOffset() {
    SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class);
    when(mockConn.migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName))
        .thenReturn(Ssv1MigrationResponse.channelFoundNoOffset());

    SnowpipeStreamingPartitionChannel channel =
        createPartitionChannelWithMigration(Ssv1MigrationMode.STRICT, mockConn);
    channel.getChannel();

    // SSv1 channel exists but has no committed offset — strict does NOT throw because the
    // channel was found (nothing to migrate is different from channel not existing)
    verify(mockConn).migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName);
  }

  @Test
  void migration_ssv2OpenFails_doesNotConsultSsv1() {
    // Simulate SSv2 openChannel failure
    trackingClientSupplier.setThrowOnOpenChannel(true);

    SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class);

    SnowpipeStreamingPartitionChannel channel =
        createPartitionChannelWithMigration(Ssv1MigrationMode.BEST_EFFORT, mockConn);

    // SSv2 open failed, so the channel init future should fail
    assertThrows(RuntimeException.class, () -> channel.getChannel());

    // System function should NOT have been called — SSv2 must open successfully first
    verify(mockConn, never()).migrateSsv1ChannelOffset(any(), any(), any(), any());
  }

  @Test
  void migration_systemFunctionFails_propagatesException() {
    SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class);
    when(mockConn.migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName))
        .thenThrow(
            new RuntimeException(
                "SYSTEM$MIGRATE_SSV1_CHANNEL_OFFSET failed for ssv1Channel=" + SSV1_CHANNEL_NAME));

    SnowpipeStreamingPartitionChannel channel =
        createPartitionChannelWithMigration(Ssv1MigrationMode.BEST_EFFORT, mockConn);

    // The system function failure must propagate, not silently fall through to consumer group
    // offset. Falling through would risk duplicates if the consumer group offset is behind
    // the SSv1 offset.
    RuntimeException exception = assertThrows(RuntimeException.class, () -> channel.getChannel());
    assertTrue(exception.getMessage().contains("SYSTEM$MIGRATE_SSV1_CHANNEL_OFFSET"));
  }

  @Test
  void migration_bestEffort_consultsSsv1DuringReopenChannel() {
    SnowflakeConnectionService mockConn = mock(SnowflakeConnectionService.class);
    when(mockConn.migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName))
        .thenReturn(Ssv1MigrationResponse.migrated(100L));

    // Fail the initial channel open so no migration fires during construction
    trackingClientSupplier.setThrowOnOpenChannel(true);

    SnowpipeStreamingPartitionChannel channel =
        createPartitionChannelWithMigration(Ssv1MigrationMode.BEST_EFFORT, mockConn);
    assertThrows(RuntimeException.class, () -> channel.getChannel());

    // Initial open failed — system function should not have been called
    verify(mockConn, never()).migrateSsv1ChannelOffset(any(), any(), any(), any());

    // Allow the next openChannel to succeed
    trackingClientSupplier.setThrowOnOpenChannel(false);

    // Trigger reopenChannel via insertRecord: getChannel() re-throws the SFException from the
    // failed init future, which AppendRowWithFallbackPolicy catches and invokes reopenChannel.
    // reopenChannel's .exceptionally() handler handles the failed init, then opens a new channel.
    channel.insertRecord(buildValidRecord(0), true);

    // Wait for the async reopen to complete
    channel.getChannel();

    // reopenChannel should have consulted SSv1 exactly once (the initial open never reached it)
    verify(mockConn, times(1))
        .migrateSsv1ChannelOffset(TABLE_NAME, SSV1_CHANNEL_NAME, channelName, pipeName);
    // Kafka offset should be set to ssv1Offset + 1 (101)
    assertEquals(101L, sinkTaskContext.offset(new TopicPartition(TOPIC_NAME, PARTITION)));
  }

  /** Shared state holder that tracks channel operations for verification in tests. */
  static class TrackingIngestClientSupplier {

    private final AtomicInteger closeCallCount = new AtomicInteger(0);
    private final AtomicInteger totalChannelsCreated = new AtomicInteger(0);
    private volatile boolean throwOnOffsetToken;
    private volatile boolean throwOnAppendRow;
    private volatile boolean throwOnOpenChannel;
    private final AtomicInteger retryableAppendRowFailures = new AtomicInteger(0);
    private final AtomicInteger nonRetryableAppendRowFailures = new AtomicInteger(0);
    private volatile CountDownLatch blockOnOpenChannel;

    int getCloseCallCount() {
      return closeCallCount.get();
    }

    int getTotalChannelsCreated() {
      return totalChannelsCreated.get();
    }

    void setThrowOnOffsetToken(boolean throwOnOffsetToken) {
      this.throwOnOffsetToken = throwOnOffsetToken;
    }

    void setThrowOnAppendRow(boolean throwOnAppendRow) {
      this.throwOnAppendRow = throwOnAppendRow;
    }

    void setThrowOnOpenChannel(boolean throwOnOpenChannel) {
      this.throwOnOpenChannel = throwOnOpenChannel;
    }

    void setRetryableAppendRowFailures(int count) {
      this.retryableAppendRowFailures.set(count);
    }

    void setNonRetryableAppendRowFailures(int count) {
      this.nonRetryableAppendRowFailures.set(count);
    }

    void setBlockOnOpenChannel(CountDownLatch latch) {
      this.blockOnOpenChannel = latch;
    }

    void incrementCloseCallCount() {
      closeCallCount.incrementAndGet();
    }

    int incrementChannelsCreated() {
      return totalChannelsCreated.incrementAndGet();
    }
  }

  /** Streaming ingest client that creates tracking channels. */
  static class TrackingStreamingIngestClient implements SnowflakeStreamingIngestClient {

    private final String pipeName;
    private final TrackingIngestClientSupplier supplier;
    private final ConcurrentHashMap<String, TrackingStreamingIngestChannel> channels =
        new ConcurrentHashMap<>();

    TrackingStreamingIngestClient(
        final String pipeName, final TrackingIngestClientSupplier supplier) {
      this.pipeName = pipeName;
      this.supplier = supplier;
    }

    @Override
    public OpenChannelResult openChannel(final String channelName, final String offsetToken) {
      if (supplier.throwOnOpenChannel) {
        throw new SFException("OpenChannelFailed", "Test simulated openChannel failure", 0, "");
      }
      CountDownLatch latch = supplier.blockOnOpenChannel;
      if (latch != null) {
        try {
          latch.await();
        } catch (InterruptedException e) {
          Thread.currentThread().interrupt();
          throw new RuntimeException(e);
        }
      }
      supplier.incrementChannelsCreated();
      final ChannelStatus channelStatus =
          new ChannelStatus(
              "db",
              "schema",
              pipeName,
              channelName,
              "SUCCESS",
              offsetToken,
              Instant.now(),
              0,
              0,
              0,
              null,
              null,
              null,
              null,
              Instant.now());
      final TrackingStreamingIngestChannel channel =
          new TrackingStreamingIngestChannel(pipeName, channelName, supplier);
      channels.put(channelName, channel);
      return new OpenChannelResult(channel, channelStatus);
    }

    @Override
    public OpenChannelResult openChannel(final String channelName) {
      return openChannel(channelName, null);
    }

    @Override
    public void close() {}

    @Override
    public CompletableFuture<Void> close(
        final boolean waitForFlush, final Duration timeoutDuration) {
      throw new UnsupportedOperationException();
    }

    @Override
    public void initiateFlush() {}

    @Override
    public void dropChannel(final String channelName) {
      throw new UnsupportedOperationException();
    }

    @Override
    public Map<String, String> getLatestCommittedOffsetTokens(final List<String> channelNames) {
      throw new UnsupportedOperationException();
    }

    @Override
    public ChannelStatusBatch getChannelStatus(final List<String> channelNames) {
      Map<String, ChannelStatus> statusMap = new HashMap<>();
      for (String name : channelNames) {
        TrackingStreamingIngestChannel ch = channels.get(name);
        if (ch != null) {
          statusMap.put(name, ch.getChannelStatus());
        }
      }
      return new ChannelStatusBatch(statusMap);
    }

    @Override
    public boolean isClosed() {
      return false;
    }

    @Override
    public CompletableFuture<Void> waitForFlush(final Duration timeoutDuration) {
      throw new UnsupportedOperationException();
    }

    @Override
    public String getDBName() {
      throw new UnsupportedOperationException();
    }

    @Override
    public String getSchemaName() {
      throw new UnsupportedOperationException();
    }

    @Override
    public String getPipeName() {
      return pipeName;
    }

    @Override
    public String getClientName() {
      throw new UnsupportedOperationException();
    }
  }

  /** Streaming ingest channel that tracks close() calls. */
  static class TrackingStreamingIngestChannel implements SnowflakeStreamingIngestChannel {

    private final String pipeName;
    private final String channelName;
    private final TrackingIngestClientSupplier supplier;
    private volatile boolean closed = false;

    TrackingStreamingIngestChannel(
        final String pipeName,
        final String channelName,
        final TrackingIngestClientSupplier supplier) {
      this.pipeName = pipeName;
      this.channelName = channelName;
      this.supplier = supplier;
    }

    @Override
    public String getDBName() {
      throw new UnsupportedOperationException();
    }

    @Override
    public String getSchemaName() {
      throw new UnsupportedOperationException();
    }

    @Override
    public String getPipeName() {
      return pipeName;
    }

    @Override
    public String getFullyQualifiedPipeName() {
      return pipeName;
    }

    @Override
    public String getFullyQualifiedChannelName() {
      return channelName;
    }

    @Override
    public boolean isClosed() {
      return closed;
    }

    @Override
    public String getChannelName() {
      return channelName;
    }

    @Override
    public void close() {
      closed = true;
      supplier.incrementCloseCallCount();
    }

    @Override
    public void close(final boolean waitForFlush, final Duration timeoutDuration) {
      close();
    }

    @Override
    public void appendRow(final Map<String, Object> row, final String offsetToken) {
      if (supplier.retryableAppendRowFailures.getAndUpdate(n -> n > 0 ? n - 1 : 0) > 0) {
        throw new SFException("MemoryThresholdExceeded", "Test simulated backpressure", 0, "");
      }
      if (supplier.nonRetryableAppendRowFailures.getAndUpdate(n -> n > 0 ? n - 1 : 0) > 0) {
        throw new SFException("ChannelInvalidated", "Test simulated channel invalidation", 0, "");
      }
      if (supplier.throwOnAppendRow) {
        throw new SFException("ChannelInvalidated", "Test simulated channel invalidation", 0, "");
      }
    }

    @Override
    public void appendRows(
        final Iterable<Map<String, Object>> rows,
        final String startOffsetToken,
        final String endOffsetToken) {}

    @Override
    public String getLatestCommittedOffsetToken() {
      if (supplier.throwOnOffsetToken) {
        throw new SFException("ChannelInvalidated", "Test simulated channel invalidation", 0, "");
      }
      return null;
    }

    @Override
    public ChannelStatus getChannelStatus() {
      return new ChannelStatus(
          "db",
          "schema",
          pipeName,
          channelName,
          "SUCCESS",
          null,
          Instant.now(),
          0,
          0,
          0,
          null,
          null,
          null,
          null,
          Instant.now());
    }

    @Override
    public CompletableFuture<Void> waitForCommit(
        final Predicate<String> tokenChecker, final Duration timeoutDuration) {
      throw new UnsupportedOperationException();
    }

    @Override
    public CompletableFuture<Void> waitForFlush(final Duration timeoutDuration) {
      throw new UnsupportedOperationException();
    }

    @Override
    public void initiateFlush() {}
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/v2/StreamingClientManagerIT.java
================================================
package com.snowflake.kafka.connector.internal.streaming.v2;

import static com.snowflake.kafka.connector.Constants.DEFAULT_PIPE_NAME_SUFFIX;
import static org.assertj.core.api.Assertions.*;

import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.internal.SnowflakeConnectionService;
import com.snowflake.kafka.connector.internal.TestUtils;
import com.snowflake.kafka.connector.internal.metrics.TaskMetrics;
import com.snowflake.kafka.connector.internal.streaming.StreamingClientProperties;
import com.snowflake.kafka.connector.internal.streaming.v2.client.StreamingClientPools;
import com.snowflake.kafka.connector.internal.streaming.v2.service.ThreadPools;
import java.util.Map;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

public class StreamingClientManagerIT {

  private Map<String, String> connectorConfig;
  private SinkTaskConfig sinkTaskConfig;
  private StreamingClientProperties streamingClientProperties;
  private String testConnectorName;
  private String pipe1, pipe2;
  private String task1, task2;
  private String table1, table2;

  @BeforeEach
  public void setUp() {
    final long salt = System.currentTimeMillis();
    final SnowflakeConnectionService connectionService =
        TestUtils.getConnectionServiceWithEncryptedKey();
    connectorConfig = TestUtils.getConnectorConfigurationForStreaming(true);
    sinkTaskConfig = SinkTaskConfig.from(connectorConfig);
    streamingClientProperties = StreamingClientProperties.from(sinkTaskConfig);
    table1 = "TABLE1" + salt;
    table2 = "TABLE2" + salt;
    task1 = "task1" + salt;
    task2 = "task2" + salt;
    testConnectorName = "TEST_CONNECTOR_" + salt;
    pipe1 = table1 + DEFAULT_PIPE_NAME_SUFFIX;
    pipe2 = table2 + DEFAULT_PIPE_NAME_SUFFIX;
    TestUtils.createTableWithMetadataColumn(table1);
    TestUtils.createTableWithMetadataColumn(table2);
    ThreadPools.registerTask(testConnectorName, sinkTaskConfig);
    ThreadPools.registerTask(testConnectorName, sinkTaskConfig);
  }

  @AfterEach
  public void tearDown() {
    TestUtils.dropTable(table1);
    TestUtils.dropTable(table2);
    closeTaskClients(task1);
    closeTaskClients(task2);
    ThreadPools.closeForTask(testConnectorName);
    ThreadPools.closeForTask(testConnectorName);
  }

  @Test
  public void testGetClient_FirstTime_CreatesNewClient() {
    // When
    SnowflakeStreamingIngestClient client = getClient(task1, pipe1);
    // Then
    assertThat(client).as("Client should not be null").isNotNull();
  }

  @Test
  public void testGetClient_SamePipeName_ReturnsExistingClient() {
    // Given
    SnowflakeStreamingIngestClient client1 = getClient(task1, pipe1);

    // When
    SnowflakeStreamingIngestClient client2 = getClient(task1, pipe1);

    // Then
    assertThat(client1)
        .as("Should return the same client instance for same pipe name")
        .isEqualTo(client2);
  }

  @Test
  public void testGetClient_DifferentPipeNames_CreatesDistinctClients() {
    // When
    SnowflakeStreamingIngestClient client1 = getClient(task1, pipe1);
    SnowflakeStreamingIngestClient client2 = getClient(task2, pipe2);
    // Then
    assertThat(client1)
        .as("Different pipe names should create different clients")
        .isNotEqualTo(client2);
  }

  @Test
  public void testGetClient_AfterClientClosed_CreatesNewClient() {
    // Given
    SnowflakeStreamingIngestClient client1 = getClient(task1, pipe1);
    // Close the client for this task
    closeTaskClients(task1);

    // When
    SnowflakeStreamingIngestClient client2 = getClient(task1, pipe1);

    // Then
    assertThat(client1)
        .as("Should create a new client when previous task released it")
        .isNotEqualTo(client2);
  }

  @Test
  public void testClose_ExistingPipe_ClosesAndRemovesClient() {
    // Given
    SnowflakeStreamingIngestClient client = getClient(task1, pipe1);

    // When - Release the task
    closeTaskClients(task1);

    // Then - Verify new client is created for same pipe name with different task
    SnowflakeStreamingIngestClient newClient = getClient(task2, pipe1);
    assertThat(client).as("Should create new client after close").isNotEqualTo(newClient);
  }

  @Test
  public void testClose_NonExistentPipe_DoesNotThrow() {
    assertThatCode(() -> closeTaskClients("nonExistentTask")).doesNotThrowAnyException();
  }

  @Test
  public void testClose_MultipleClients_ClosesAllClients() {
    // Given
    getClient(task1, pipe1);
    getClient(task1, pipe2);
    assertThat(StreamingClientPools.getClientCountForTask(testConnectorName, task1)).isEqualTo(2);
    closeTaskClients(task1);
    assertThat(StreamingClientPools.getClientCountForTask(testConnectorName, task1)).isEqualTo(0);
  }

  @Test
  public void testProvider_ReuseAfterPartialClose_WorksCorrectly() {
    // task 1 uses 2 pipes, so it has 2 ingest clients
    SnowflakeStreamingIngestClient client1 = getClient(task1, pipe1);
    SnowflakeStreamingIngestClient client2 = getClient(task1, pipe2);

    // Task2 also uses pipe1 (shares one client with task1) in total there should only be 2 ingest
    // clients in the system
    SnowflakeStreamingIngestClient client3 = getClient(task2, pipe1);

    assertThat(client1).isEqualTo(client3);

    // When - task1 stops
    closeTaskClients(task1);

    // Then - Client1 should still be open (task2 still using it)
    SnowflakeStreamingIngestClient client1AfterRelease = getClient(task1, pipe1);

    //  should get the SAME client1 that task2 is still using
    assertThat(client1AfterRelease)
        .as("Should reuse client when another task is still using it")
        .isEqualTo(client1);
  }

  private SnowflakeStreamingIngestClient getClient(String task, String pipe) {
    return StreamingClientPools.getClient(
        testConnectorName,
        task,
        pipe,
        sinkTaskConfig,
        streamingClientProperties,
        TaskMetrics.noop());
  }

  private void closeTaskClients(String task) {
    StreamingClientPools.closeTaskClients(testConnectorName, task);
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/v2/client/StreamingClientPoolTest.java
================================================
package com.snowflake.kafka.connector.internal.streaming.v2.client;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.timeout;
import static org.mockito.Mockito.verify;

import com.snowflake.ingest.streaming.SFException;
import com.snowflake.ingest.streaming.SnowflakeStreamingIngestClient;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException;
import com.snowflake.kafka.connector.internal.TestUtils;
import com.snowflake.kafka.connector.internal.metrics.TaskMetrics;
import com.snowflake.kafka.connector.internal.streaming.StreamingClientProperties;
import com.snowflake.kafka.connector.internal.streaming.v2.service.ThreadPools;
import java.io.IOException;
import java.net.URLClassLoader;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;

class StreamingClientPoolTest {

  private SinkTaskConfig connectorConfig;
  private StreamingClientProperties streamingClientProperties;

  @BeforeEach
  void setUp() {
    Map<String, String> config = TestUtils.getConnectorConfigurationForStreaming(false);
    connectorConfig = SinkTaskConfig.from(config);
    streamingClientProperties = StreamingClientProperties.from(connectorConfig);
  }

  @AfterEach
  void tearDown() {
    StreamingClientFactory.resetStreamingClientSupplier();
  }

  @Nested
  class RefCountedClientTest {

    @Test
    void taskTracking() {
      RefCountedClientTestHarness harness = new RefCountedClientTestHarness();

      // empty initially
      assertThat(harness.refCountedClient.taskCount()).isEqualTo(0);
      assertThat(harness.refCountedClient.hasTask("task-0")).isFalse();

      // add two tasks (duplicate add is idempotent)
      harness.refCountedClient.addTask("task-0");
      harness.refCountedClient.addTask("task-1");
      harness.refCountedClient.addTask("task-0");
      assertThat(harness.refCountedClient.taskCount()).isEqualTo(2);

      // removing unknown task is a no-op
      assertThat(harness.refCountedClient.removeTask("task-unknown")).isFalse();

      // removing one of two is not the last
      assertThat(harness.refCountedClient.removeTask("task-0")).isFalse();
      assertThat(harness.refCountedClient.hasTask("task-0")).isFalse();
      assertThat(harness.refCountedClient.hasTask("task-1")).isTrue();

      // removing the final task signals empty
      assertThat(harness.refCountedClient.removeTask("task-1")).isTrue();
      assertThat(harness.refCountedClient.taskCount()).isEqualTo(0);
    }

    @Test
    void clientFuture_returns_client_on_success() {
      SnowflakeStreamingIngestClient mockClient = mock(SnowflakeStreamingIngestClient.class);
      setSupplierReturning(mockClient);

      RefCountedClientTestHarness harness = new RefCountedClientTestHarness();

      assertThat(harness.refCountedClient.clientFuture.join()).isSameAs(mockClient);
    }

    @Test
    void clientFuture_exposes_original_exception_on_failure() {
      SnowflakeKafkaConnectorException originalException =
          new SnowflakeKafkaConnectorException("boom", "TEST_ERROR");
      setSupplierThrowing(originalException);

      RefCountedClientTestHarness failedHarness = new RefCountedClientTestHarness();
      assertThatThrownBy(() -> failedHarness.refCountedClient.clientFuture.join())
          .isInstanceOf(CompletionException.class)
          .hasCause(originalException);

      // a new RefCountedClient with a working supplier succeeds
      SnowflakeStreamingIngestClient mockClient = mock(SnowflakeStreamingIngestClient.class);
      setSupplierReturning(mockClient);

      RefCountedClientTestHarness successHarness = new RefCountedClientTestHarness();
      assertThat(successHarness.refCountedClient.clientFuture.join()).isSameAs(mockClient);
    }

    @Test
    void clientFuture_wraps_checked_exception_in_CompletionException() {
      IOException checkedException = new IOException("disk error");
      setSupplierThrowingChecked(checkedException);

      RefCountedClientTestHarness harness = new RefCountedClientTestHarness();

      assertThatThrownBy(() -> harness.refCountedClient.clientFuture.join())
          .isInstanceOf(CompletionException.class)
          .hasCause(checkedException);
    }

    @Test
    void close_calls_close_on_client() {
      SnowflakeStreamingIngestClient mockClient = mock(SnowflakeStreamingIngestClient.class);
      setSupplierReturning(mockClient);

      RefCountedClientTestHarness harness = new RefCountedClientTestHarness();
      harness.refCountedClient.clientFuture.join();

      harness.refCountedClient.close("test-pipe", "test-connector");

      verify(mockClient).close();
    }

    /**
     * Helper that creates a RefCountedClient with the currently-installed supplier. Must be called
     * after configuring the supplier via {@code setSupplier*} methods.
     */
    class RefCountedClientTestHarness {
      final StreamingClientPool.RefCountedClient refCountedClient;

      RefCountedClientTestHarness() {
        this.refCountedClient =
            new StreamingClientPool.RefCountedClient(
                "test-pipe",
                "test-connector",
                connectorConfig,
                streamingClientProperties,
                TaskMetrics.noop(),
                Executors.newSingleThreadExecutor());
      }
    }
  }

  @Nested
  class PoolTest {

    private StreamingClientPool pool;
    private String connectorName;

    @BeforeEach
    void setUp() {
      connectorName = "test-connector-" + UUID.randomUUID().toString().substring(0, 8);
      ThreadPools.registerTask(connectorName, connectorConfig);
      pool = new StreamingClientPool(connectorName);
    }

    @AfterEach
    void tearDownPool() {
      ThreadPools.closeForTask(connectorName);
    }

    private SnowflakeStreamingIngestClient getClient(String taskId, String pipeName) {
      return pool.getClientAsync(
              taskId, pipeName, connectorConfig, streamingClientProperties, TaskMetrics.noop())
          .join();
    }

    @Test
    void getClient_creates_client_for_new_pipe() {
      SnowflakeStreamingIngestClient mockClient = mock(SnowflakeStreamingIngestClient.class);
      setSupplierReturning(mockClient);

      SnowflakeStreamingIngestClient result = getClient("task-0", "pipe-A");

      assertThat(result).isSameAs(mockClient);
    }

    @Test
    void getClient_reuses_client_for_same_pipe() {
      AtomicInteger callCount = new AtomicInteger();
      StreamingClientFactory.setStreamingClientSupplier(
          (clientName, dbName, schemaName, pipeName, props) -> {
            callCount.incrementAndGet();
            return mock(SnowflakeStreamingIngestClient.class);
          });

      getClient("task-0", "pipe-A");
      getClient("task-1", "pipe-A");

      assertThat(callCount.get())
          .as("supplier should only be called once for the same pipe")
          .isEqualTo(1);
    }

    @Test
    void getClient_returns_different_clients_for_different_pipes() {
      AtomicInteger callCount = new AtomicInteger();
      StreamingClientFactory.setStreamingClientSupplier(
          (clientName, dbName, schemaName, pipeName, props) -> {
            callCount.incrementAndGet();
            return mock(SnowflakeStreamingIngestClient.class);
          });

      SnowflakeStreamingIngestClient clientA = getClient("task-0", "pipe-A");
      SnowflakeStreamingIngestClient clientB = getClient("task-0", "pipe-B");

      assertThat(clientA).isNotSameAs(clientB);
      assertThat(callCount.get()).isEqualTo(2);
    }

    @Test
    void getClientCountForTask_counts_only_that_tasks_pipes() {
      setSupplierReturning(mock(SnowflakeStreamingIngestClient.class));

      // initially zero
      assertThat(pool.getClientCountForTask("task-0")).isEqualTo(0);

      // task-0 on two pipes, task-1 on one — counts are independent
      getClient("task-0", "pipe-A");
      getClient("task-0", "pipe-B");
      getClient("task-1", "pipe-B");
      assertThat(pool.getClientCountForTask("task-0")).isEqualTo(2);
      assertThat(pool.getClientCountForTask("task-1")).isEqualTo(1);
    }

    @Test
    void closeTaskClients_removes_entry_when_last_task_released() {
      SnowflakeStreamingIngestClient mockClient = mock(SnowflakeStreamingIngestClient.class);
      setSupplierReturning(mockClient);

      getClient("task-0", "pipe-A");
      pool.closeTaskClients("task-0");

      assertThat(pool.getClientCountForTask("task-0")).isEqualTo(0);
      verify(mockClient, timeout(5000)).close();
    }

    @Test
    void closeTaskClients_keeps_client_when_other_tasks_remain() {
      SnowflakeStreamingIngestClient mockClient = mock(SnowflakeStreamingIngestClient.class);
      setSupplierReturning(mockClient);

      getClient("task-0", "pipe-A");
      getClient("task-1", "pipe-A");

      pool.closeTaskClients("task-0");

      assertThat(pool.getClientCountForTask("task-1")).isEqualTo(1);
      verify(mockClient, never()).close();
    }

    @Test
    void closeTaskClients_then_getClient_creates_new_client() {
      AtomicInteger callCount = new AtomicInteger();
      StreamingClientFactory.setStreamingClientSupplier(
          (clientName, dbName, schemaName, pipeName, props) -> {
            callCount.incrementAndGet();
            return mock(SnowflakeStreamingIngestClient.class);
          });

      SnowflakeStreamingIngestClient first = getClient("task-0", "pipe-A");
      pool.closeTaskClients("task-0");

      SnowflakeStreamingIngestClient second = getClient("task-0", "pipe-A");

      assertThat(second).isNotSameAs(first);
      assertThat(callCount.get()).isEqualTo(2);
    }

    @Test
    void closeTaskClients_for_unknown_task_does_not_throw() {
      pool.closeTaskClients("nonexistent-task");
    }

    @Test
    void getClient_removes_entry_on_failure_and_rethrows() {
      SnowflakeKafkaConnectorException originalException =
          new SnowflakeKafkaConnectorException("creation failed", "TEST_ERROR");
      setSupplierThrowing(originalException);

      assertThatThrownBy(() -> getClient("task-0", "pipe-A"))
          .isInstanceOf(CompletionException.class)
          .hasCause(originalException);

      assertThat(pool.getClientCountForTask("task-0")).isEqualTo(0);
    }

    @Test
    void getClient_after_failure_retries_creation() {
      AtomicInteger callCount = new AtomicInteger();
      SnowflakeStreamingIngestClient mockClient = mock(SnowflakeStreamingIngestClient.class);

      StreamingClientFactory.setStreamingClientSupplier(
          (clientName, dbName, schemaName, pipeName, props) -> {
            if (callCount.incrementAndGet() == 1) {
              throw new SnowflakeKafkaConnectorException("transient", "TEST_ERROR");
            }
            return mockClient;
          });

      assertThatThrownBy(() -> getClient("task-0", "pipe-A"))
          .isInstanceOf(CompletionException.class)
          .hasCauseInstanceOf(SnowflakeKafkaConnectorException.class);

      SnowflakeStreamingIngestClient result = getClient("task-0", "pipe-A");

      assertThat(result).isSameAs(mockClient);
      assertThat(callCount.get()).isEqualTo(2);
    }

    @Test
    void recreateClient_retries_on_client_invalid_error() {
      SnowflakeStreamingIngestClient oldClient = mock(SnowflakeStreamingIngestClient.class);
      SnowflakeStreamingIngestClient newClient = mock(SnowflakeStreamingIngestClient.class);
      AtomicInteger callCount = new AtomicInteger();

      StreamingClientFactory.setStreamingClientSupplier(
          (clientName, dbName, schemaName, pipeName, props) -> {
            int count = callCount.incrementAndGet();
            if (count == 1) return oldClient;
            if (count == 2) {
              // First recreation attempt fails with pipe failover
              throw new SFException("SfApiPipeFailedOverError", "Pipe failed over", 410, "");
            }
            return newClient;
          });

      // Create initial client
      getClient("task-0", "pipe-A");

      // Recreate — first attempt fails with 410, should retry
      SnowflakeStreamingIngestClient result =
          StreamingClientPools.recreateClient(
              connectorName,
              "task-0",
              "pipe-A",
              oldClient,
              connectorConfig,
              streamingClientProperties,
              TaskMetrics.noop());

      assertThat(result).isSameAs(newClient);
      assertThat(callCount.get())
          .isEqualTo(3); // original + failed recreation + successful recreation
    }

    @Test
    void pool_threads_inherit_context_classloader_from_pool_creator() {
      AtomicReference<ClassLoader> capturedClassLoader = new AtomicReference<>();
      StreamingClientFactory.setStreamingClientSupplier(
          (clientName, dbName, schemaName, pipeName, props) -> {
            capturedClassLoader.set(Thread.currentThread().getContextClassLoader());
            return mock(SnowflakeStreamingIngestClient.class);
          });

      // Simulate Kafka Connect's PluginClassLoader by setting a custom context classloader
      // before creating the pool — the factory captures it at construction time.
      URLClassLoader fakePluginCL = new URLClassLoader(new java.net.URL[0], null);
      ClassLoader originalCL = Thread.currentThread().getContextClassLoader();
      String clConnectorName = "test-connector-cl-" + UUID.randomUUID().toString().substring(0, 8);
      Thread.currentThread().setContextClassLoader(fakePluginCL);
      StreamingClientPool poolWithCustomCL;
      try {
        ThreadPools.registerTask(clConnectorName, connectorConfig);
        poolWithCustomCL = new StreamingClientPool(clConnectorName);
      } finally {
        Thread.currentThread().setContextClassLoader(originalCL);
      }

      try {
        poolWithCustomCL
            .getClientAsync(
                "task-0", "pipe-A", connectorConfig, streamingClientProperties, TaskMetrics.noop())
            .join();

        assertThat(capturedClassLoader.get())
            .as("Pool thread should have the classloader from the pool creator")
            .isSameAs(fakePluginCL);
      } finally {
        ThreadPools.closeForTask(clConnectorName);
      }
    }

    @Test
    void recreateClient_replaces_entry_and_preserves_tasks() {
      SnowflakeStreamingIngestClient oldClient = mock(SnowflakeStreamingIngestClient.class);
      SnowflakeStreamingIngestClient newClient = mock(SnowflakeStreamingIngestClient.class);
      AtomicInteger callCount = new AtomicInteger();

      StreamingClientFactory.setStreamingClientSupplier(
          (clientName, dbName, schemaName, pipeName, props) -> {
            return callCount.incrementAndGet() == 1 ? oldClient : newClient;
          });

      // Two tasks share the same pipe
      getClient("task-0", "pipe-A");
      getClient("task-1", "pipe-A");
      assertThat(pool.getClientCountForTask("task-0")).isEqualTo(1);
      assertThat(pool.getClientCountForTask("task-1")).isEqualTo(1);

      // Recreate the client
      SnowflakeStreamingIngestClient result =
          pool.recreateClient(
              "task-0",
              "pipe-A",
              oldClient,
              connectorConfig,
              streamingClientProperties,
              TaskMetrics.noop());

      assertThat(result).isSameAs(newClient);
      // Both tasks should still be registered
      assertThat(pool.getClientCountForTask("task-0")).isEqualTo(1);
      assertThat(pool.getClientCountForTask("task-1")).isEqualTo(1);
      assertThat(callCount.get()).isEqualTo(2);
    }

    @Test
    void recreateClient_closes_old_client() {
      SnowflakeStreamingIngestClient oldClient = mock(SnowflakeStreamingIngestClient.class);
      SnowflakeStreamingIngestClient newClient = mock(SnowflakeStreamingIngestClient.class);
      AtomicInteger callCount = new AtomicInteger();

      StreamingClientFactory.setStreamingClientSupplier(
          (clientName, dbName, schemaName, pipeName, props) -> {
            return callCount.incrementAndGet() == 1 ? oldClient : newClient;
          });

      getClient("task-0", "pipe-A");

      pool.recreateClient(
          "task-0",
          "pipe-A",
          oldClient,
          connectorConfig,
          streamingClientProperties,
          TaskMetrics.noop());

      verify(oldClient).close();
    }

    @Test
    void recreateClient_creates_fresh_entry_and_registers_task_when_no_entry_exists() {
      // When recreateClient is called for a pipe with no existing entry (e.g. the entry was
      // already evicted by a failed creation), the fresh entry must have the caller's task
      // registered so closeTaskClients on a different task doesn't prematurely evict it.
      SnowflakeStreamingIngestClient freshClient = mock(SnowflakeStreamingIngestClient.class);
      setSupplierReturning(freshClient);

      // No prior call — pool is empty for this pipe.
      assertThat(pool.getClientCountForTask("task-0")).isEqualTo(0);

      SnowflakeStreamingIngestClient result =
          pool.recreateClient(
              "task-0",
              "pipe-A",
              mock(SnowflakeStreamingIngestClient.class),
              connectorConfig,
              streamingClientProperties,
              TaskMetrics.noop());

      assertThat(result).isSameAs(freshClient);
      // Task must be registered so closeTaskClients on a different task doesn't evict us.
      assertThat(pool.getClientCountForTask("task-0")).isEqualTo(1);

      // Simulate cleanup of a different task — the fresh entry must survive.
      pool.closeTaskClients("some-other-task");
      assertThat(pool.getClientCountForTask("task-0")).isEqualTo(1);
    }

    @Test
    void recreateClient_noop_if_client_already_replaced() {
      SnowflakeStreamingIngestClient oldClient = mock(SnowflakeStreamingIngestClient.class);
      SnowflakeStreamingIngestClient newClient = mock(SnowflakeStreamingIngestClient.class);
      AtomicInteger callCount = new AtomicInteger();

      StreamingClientFactory.setStreamingClientSupplier(
          (clientName, dbName, schemaName, pipeName, props) -> {
            return callCount.incrementAndGet() == 1 ? oldClient : newClient;
          });

      getClient("task-0", "pipe-A");

      // First recreation succeeds
      SnowflakeStreamingIngestClient firstResult =
          pool.recreateClient(
              "task-0",
              "pipe-A",
              oldClient,
              connectorConfig,
              streamingClientProperties,
              TaskMetrics.noop());
      assertThat(firstResult).isSameAs(newClient);

      // Second recreation with the OLD client reference should be a no-op
      SnowflakeStreamingIngestClient secondResult =
          pool.recreateClient(
              "task-0",
              "pipe-A",
              oldClient,
              connectorConfig,
              streamingClientProperties,
              TaskMetrics.noop());
      assertThat(secondResult).isSameAs(newClient);

      // Supplier should only have been called twice (original + one recreation)
      assertThat(callCount.get()).isEqualTo(2);
    }

    @Test
    void recreateClient_then_getClient_returns_new_client() {
      SnowflakeStreamingIngestClient oldClient = mock(SnowflakeStreamingIngestClient.class);
      SnowflakeStreamingIngestClient newClient = mock(SnowflakeStreamingIngestClient.class);
      AtomicInteger callCount = new AtomicInteger();

      StreamingClientFactory.setStreamingClientSupplier(
          (clientName, dbName, schemaName, pipeName, props) -> {
            return callCount.incrementAndGet() == 1 ? oldClient : newClient;
          });

      getClient("task-0", "pipe-A");

      pool.recreateClient(
          "task-0",
          "pipe-A",
          oldClient,
          connectorConfig,
          streamingClientProperties,
          TaskMetrics.noop());

      // A subsequent getClient should return the new client (not create a third one)
      SnowflakeStreamingIngestClient result = getClient("task-0", "pipe-A");
      assertThat(result).isSameAs(newClient);
      assertThat(callCount.get()).isEqualTo(2);
    }

    @Test
    void recreateClient_concurrent_callers_only_creates_once() throws Exception {
      SnowflakeStreamingIngestClient oldClient = mock(SnowflakeStreamingIngestClient.class);
      AtomicInteger supplierCallCount = new AtomicInteger();
      CountDownLatch supplierStarted = new CountDownLatch(1);
      CountDownLatch supplierProceed = new CountDownLatch(1);

      StreamingClientFactory.setStreamingClientSupplier(
          (clientName, dbName, schemaName, pipeName, props) -> {
            int count = supplierCallCount.incrementAndGet();
            if (count == 1) {
              // First call returns oldClient immediately
              return oldClient;
            }
            // Second call (recreation) blocks until signaled
            supplierStarted.countDown();
            try {
              supplierProceed.await();
            } catch (InterruptedException e) {
              Thread.currentThread().interrupt();
              throw new RuntimeException(e);
            }
            return mock(SnowflakeStreamingIngestClient.class);
          });

      getClient("task-0", "pipe-A");
      getClient("task-1", "pipe-A");

      // Launch two concurrent recreateClient calls
      CompletableFuture<SnowflakeStreamingIngestClient> future1 =
          CompletableFuture.supplyAsync(
              () ->
                  pool.recreateClient(
                      "task-0",
                      "pipe-A",
                      oldClient,
                      connectorConfig,
                      streamingClientProperties,
                      TaskMetrics.noop()));
      CompletableFuture<SnowflakeStreamingIngestClient> future2 =
          CompletableFuture.supplyAsync(
              () ->
                  pool.recreateClient(
                      "task-1",
                      "pipe-A",
                      oldClient,
                      connectorConfig,
                      streamingClientProperties,
                      TaskMetrics.noop()));

      // Wait for the supplier to start (only one should start)
      supplierStarted.await();
      supplierProceed.countDown();

      SnowflakeStreamingIngestClient result1 = future1.join();
      SnowflakeStreamingIngestClient result2 = future2.join();

      // Both callers should get the same new client
      assertThat(result1).isSameAs(result2);
      // Supplier should have been called exactly twice (original + one recreation)
      assertThat(supplierCallCount.get()).isEqualTo(2);
    }

    @Test
    void getClient_parallel_for_different_pipes_creates_concurrently() throws Exception {
      CountDownLatch bothStarted = new CountDownLatch(2);
      CountDownLatch proceed = new CountDownLatch(1);

      StreamingClientFactory.setStreamingClientSupplier(
          (clientName, dbName, schemaName, pipeName, props) -> {
            bothStarted.countDown();
            try {
              proceed.await();
            } catch (InterruptedException e) {
              Thread.currentThread().interrupt();
              throw new RuntimeException(e);
            }
            return mock(SnowflakeStreamingIngestClient.class);
          });

      CompletableFuture<SnowflakeStreamingIngestClient> futureA =
          pool.getClientAsync(
              "task-0", "pipe-A", connectorConfig, streamingClientProperties, TaskMetrics.noop());
      CompletableFuture<SnowflakeStreamingIngestClient> futureB =
          pool.getClientAsync(
              "task-1", "pipe-B", connectorConfig, streamingClientProperties, TaskMetrics.noop());

      // Both suppliers should have started before either completes
      bothStarted.await();
      proceed.countDown();

      SnowflakeStreamingIngestClient clientA = futureA.join();
      SnowflakeStreamingIngestClient clientB = futureB.join();

      assertThat(clientA).isNotSameAs(clientB);
    }
  }

  private void setSupplierReturning(SnowflakeStreamingIngestClient client) {
    StreamingClientFactory.setStreamingClientSupplier(
        (clientName, dbName, schemaName, pipeName, props) -> client);
  }

  private void setSupplierThrowing(RuntimeException exception) {
    StreamingClientFactory.setStreamingClientSupplier(
        (clientName, dbName, schemaName, pipeName, props) -> {
          throw exception;
        });
  }

  @SuppressWarnings("unchecked")
  private void setSupplierThrowingChecked(Exception checkedException) {
    StreamingClientFactory.setStreamingClientSupplier(
        (clientName, dbName, schemaName, pipeName, props) -> {
          sneakyThrow(checkedException);
          return null; // unreachable
        });
  }

  /**
   * Throws a checked exception without declaring it, for testing CompletionException unwrapping.
   */
  @SuppressWarnings("unchecked")
  private static <E extends Throwable> void sneakyThrow(Exception exception) throws E {
    throw (E) exception;
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/v2/client/StreamingClientPoolsTest.java
================================================
package com.snowflake.kafka.connector.internal.streaming.v2.client;

import static org.assertj.core.api.Assertions.assertThatThrownBy;

import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException;
import com.snowflake.kafka.connector.internal.TestUtils;
import com.snowflake.kafka.connector.internal.metrics.TaskMetrics;
import com.snowflake.kafka.connector.internal.streaming.StreamingClientProperties;
import com.snowflake.kafka.connector.internal.streaming.v2.service.ThreadPools;
import java.util.UUID;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

class StreamingClientPoolsTest {

  private SinkTaskConfig sinkTaskConfig;
  private StreamingClientProperties streamingClientProperties;
  private String connectorName;

  @BeforeEach
  void setUp() {
    sinkTaskConfig = SinkTaskConfig.from(TestUtils.getConnectorConfigurationForStreaming(false));
    streamingClientProperties = StreamingClientProperties.from(sinkTaskConfig);
    connectorName = "test-connector-pools-" + UUID.randomUUID().toString().substring(0, 8);
    ThreadPools.registerTask(connectorName, sinkTaskConfig);
  }

  @AfterEach
  void tearDown() {
    StreamingClientFactory.resetStreamingClientSupplier();
    StreamingClientPools.closeTaskClients(connectorName, "test-task");
    ThreadPools.closeForTask(connectorName);
  }

  @Test
  void getClient_unwraps_CompletionException_and_throws_original_RuntimeException() {
    SnowflakeKafkaConnectorException originalException =
        new SnowflakeKafkaConnectorException("creation failed", "TEST_ERROR");
    StreamingClientFactory.setStreamingClientSupplier(
        (clientName, dbName, schemaName, pipeName, props) -> {
          throw originalException;
        });

    assertThatThrownBy(
            () ->
                StreamingClientPools.getClient(
                    connectorName,
                    "test-task",
                    "pipe-A",
                    sinkTaskConfig,
                    streamingClientProperties,
                    TaskMetrics.noop()))
        .isSameAs(originalException);
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/streaming/v2/service/PartitionChannelManagerTest.java
================================================
package com.snowflake.kafka.connector.internal.streaming.v2.service;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertSame;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;

import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.config.SinkTaskConfigTestBuilder;
import com.snowflake.kafka.connector.internal.streaming.channel.TopicPartitionChannel;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import org.apache.kafka.common.TopicPartition;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

class PartitionChannelManagerTest {

  private static final String CONNECTOR_NAME = "test_connector";
  private static final String TASK_ID = "0";
  private static final String TOPIC = "test_topic";

  private PartitionChannelManager manager;
  private Map<TopicPartition, TopicPartitionChannel> createdChannels;

  @BeforeEach
  void setUp() {
    createdChannels = new HashMap<>();

    PartitionChannelManager.PartitionChannelBuilder trackingBuilder =
        (topicPartition, tableName, channelName, pipeName) -> {
          TopicPartitionChannel channel = mock(TopicPartitionChannel.class);
          when(channel.getChannelName()).thenReturn(channelName);
          when(channel.getPipeName()).thenReturn(pipeName);
          when(channel.closeChannelAsync()).thenReturn(CompletableFuture.completedFuture(null));
          when(channel.waitForLastProcessedRecordCommitted())
              .thenReturn(CompletableFuture.completedFuture(null));
          createdChannels.put(topicPartition, channel);
          return channel;
        };

    manager = new PartitionChannelManager(testConfig(Collections.emptyMap()), trackingBuilder);
  }

  // --- makeChannelName ---

  @Test
  void makeChannelNameConcatenatesWithUnderscores() {
    assertEquals(
        "myConnector_myTopic_3",
        PartitionChannelManager.makeChannelName("myConnector", "myTopic", 3));
  }

  // --- startPartitions ---

  @Test
  void startPartitionsRegistersChannelsInMap() {
    TopicPartition tp0 = new TopicPartition(TOPIC, 0);
    TopicPartition tp1 = new TopicPartition(TOPIC, 1);
    Map<String, String> tableToPipe = new HashMap<>();
    tableToPipe.put(TOPIC, "pipe_" + TOPIC);

    manager.startPartitions(Arrays.asList(tp0, tp1), tableToPipe);

    assertEquals(2, manager.getPartitionChannels().size());
    assertTrue(manager.getChannel(tp0).isPresent());
    assertTrue(manager.getChannel(tp1).isPresent());
  }

  @Test
  void startPartitionsPassesCorrectNamesToBuilder() {
    Map<String, String> capturedArgs = new HashMap<>();
    PartitionChannelManager.PartitionChannelBuilder capturingBuilder =
        (topicPartition, tableName, channelName, pipeName) -> {
          capturedArgs.put("tableName", tableName);
          capturedArgs.put("channelName", channelName);
          capturedArgs.put("pipeName", pipeName);
          TopicPartitionChannel channel = mock(TopicPartitionChannel.class);
          when(channel.getChannelName()).thenReturn(channelName);
          return channel;
        };

    PartitionChannelManager capturingManager =
        new PartitionChannelManager(testConfig(Collections.emptyMap()), capturingBuilder);

    TopicPartition tp = new TopicPartition(TOPIC, 7);
    Map<String, String> tableToPipe = new HashMap<>();
    tableToPipe.put(TOPIC, "pipe_" + TOPIC);

    capturingManager.startPartitions(Collections.singletonList(tp), tableToPipe);

    String expectedChannelName = PartitionChannelManager.makeChannelName(CONNECTOR_NAME, TOPIC, 7);
    assertEquals(TOPIC, capturedArgs.get("tableName"));
    assertEquals(expectedChannelName, capturedArgs.get("channelName"));
    assertEquals("pipe_" + TOPIC, capturedArgs.get("pipeName"));
  }

  @Test
  void startPartitionsUsesTopicToTableMapForTableName() {
    Map<String, String> topicToTable = new HashMap<>();
    topicToTable.put("raw_topic", "mapped_table");

    Map<String, String> capturedArgs = new HashMap<>();
    PartitionChannelManager.PartitionChannelBuilder capturingBuilder =
        (topicPartition, tableName, channelName, pipeName) -> {
          capturedArgs.put("tableName", tableName);
          capturedArgs.put("pipeName", pipeName);
          TopicPartitionChannel channel = mock(TopicPartitionChannel.class);
          when(channel.getChannelName()).thenReturn(channelName);
          return channel;
        };

    PartitionChannelManager managerWithMapping =
        new PartitionChannelManager(testConfig(topicToTable), capturingBuilder);

    TopicPartition tp = new TopicPartition("raw_topic", 0);
    Map<String, String> tableToPipe = new HashMap<>();
    tableToPipe.put("mapped_table", "pipe_mapped_table");

    managerWithMapping.startPartitions(Collections.singletonList(tp), tableToPipe);

    assertEquals("mapped_table", capturedArgs.get("tableName"));
    assertEquals("pipe_mapped_table", capturedArgs.get("pipeName"));
  }

  // --- getChannel ---

  @Test
  void getChannelByTopicPartitionReturnsChannel() {
    TopicPartition tp = new TopicPartition(TOPIC, 0);
    startSinglePartition(tp);

    Optional<TopicPartitionChannel> result = manager.getChannel(tp);

    assertTrue(result.isPresent());
    assertSame(createdChannels.get(tp), result.get());
  }

  @Test
  void getChannelByStringReturnsChannel() {
    TopicPartition tp = new TopicPartition(TOPIC, 0);
    startSinglePartition(tp);

    String channelName = PartitionChannelManager.makeChannelName(CONNECTOR_NAME, TOPIC, 0);
    Optional<TopicPartitionChannel> result = manager.getChannel(channelName);

    assertTrue(result.isPresent());
    assertSame(createdChannels.get(tp), result.get());
  }

  @Test
  void getChannelReturnsEmptyForUnknownPartition() {
    TopicPartition unknown = new TopicPartition("no_such_topic", 99);
    assertFalse(manager.getChannel(unknown).isPresent());
  }

  @Test
  void getChannelByStringReturnsEmptyForUnknownName() {
    assertFalse(manager.getChannel("nonexistent_channel").isPresent());
  }

  // --- close (subset) ---

  @Test
  void closeRemovesOnlyRequestedPartitions() {
    TopicPartition tp0 = new TopicPartition(TOPIC, 0);
    TopicPartition tp1 = new TopicPartition(TOPIC, 1);
    TopicPartition tp2 = new TopicPartition(TOPIC, 2);
    startPartitions(tp0, tp1, tp2);

    manager.close(Collections.singletonList(tp1));

    assertFalse(manager.getChannel(tp1).isPresent());
    assertTrue(manager.getChannel(tp0).isPresent());
    assertTrue(manager.getChannel(tp2).isPresent());
    assertEquals(2, manager.getPartitionChannels().size());
  }

  @Test
  void closeCallsCloseChannelAsyncOnRequestedPartitions() {
    TopicPartition tp0 = new TopicPartition(TOPIC, 0);
    TopicPartition tp1 = new TopicPartition(TOPIC, 1);
    startPartitions(tp0, tp1);

    manager.close(Collections.singletonList(tp0));

    verify(createdChannels.get(tp0)).closeChannelAsync();
    verify(createdChannels.get(tp1), never()).closeChannelAsync();
  }

  @Test
  void closeHandlesUnknownPartitionsGracefully() {
    TopicPartition tp0 = new TopicPartition(TOPIC, 0);
    startSinglePartition(tp0);

    TopicPartition unknown = new TopicPartition("unknown", 99);
    manager.close(Arrays.asList(tp0, unknown));

    assertFalse(manager.getChannel(tp0).isPresent());
    assertEquals(0, manager.getPartitionChannels().size());
  }

  @Test
  void closeWithEmptyCollectionIsNoop() {
    TopicPartition tp0 = new TopicPartition(TOPIC, 0);
    startSinglePartition(tp0);

    manager.close(Collections.emptyList());

    assertTrue(manager.getChannel(tp0).isPresent());
    assertEquals(1, manager.getPartitionChannels().size());
  }

  // --- closeAll ---

  @Test
  void closeAllClosesAllChannelsAndClearsMap() {
    TopicPartition tp0 = new TopicPartition(TOPIC, 0);
    TopicPartition tp1 = new TopicPartition(TOPIC, 1);
    startPartitions(tp0, tp1);

    manager.closeAll();

    assertTrue(manager.getPartitionChannels().isEmpty());
    verify(createdChannels.get(tp0)).closeChannelAsync();
    verify(createdChannels.get(tp1)).closeChannelAsync();
  }

  @Test
  void closeAllOnEmptyManagerIsNoop() {
    manager.closeAll();
    assertTrue(manager.getPartitionChannels().isEmpty());
  }

  // --- waitForAllChannelsToCommitData ---

  @Test
  void waitForAllChannelsCallsFlushOnEveryChannel() {
    TopicPartition tp0 = new TopicPartition(TOPIC, 0);
    TopicPartition tp1 = new TopicPartition(TOPIC, 1);
    startPartitions(tp0, tp1);

    manager.waitForAllChannelsToCommitData();

    verify(createdChannels.get(tp0)).waitForLastProcessedRecordCommitted();
    verify(createdChannels.get(tp1)).waitForLastProcessedRecordCommitted();
  }

  @Test
  void waitForAllChannelsOnEmptyManagerIsNoop() {
    manager.waitForAllChannelsToCommitData();
    assertTrue(manager.getPartitionChannels().isEmpty());
  }

  // --- helpers ---

  private void startSinglePartition(TopicPartition topicPartition) {
    startPartitions(topicPartition);
  }

  private void startPartitions(TopicPartition... partitions) {
    Map<String, String> tableToPipe = new HashMap<>();
    for (TopicPartition topicPartition : partitions) {
      String tableName = topicPartition.topic();
      tableToPipe.putIfAbsent(tableName, "pipe_" + tableName);
    }
    manager.startPartitions(Arrays.asList(partitions), tableToPipe);
  }

  private static SinkTaskConfig testConfig(Map<String, String> topicToTableMap) {
    return SinkTaskConfigTestBuilder.builder()
        .connectorName(CONNECTOR_NAME)
        .taskId(TASK_ID)
        .topicToTableMap(topicToTableMap)
        .enableSanitization(false)
        .build();
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/telemetry/SnowflakeTelemetryChannelStatusTest.java
================================================
package com.snowflake.kafka.connector.internal.telemetry;

import static com.snowflake.kafka.connector.internal.TestUtils.TEST_CONNECTOR_NAME;
import static com.snowflake.kafka.connector.internal.metrics.MetricsUtil.channelMetricPrefix;
import static org.junit.Assert.assertEquals;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;

import com.codahale.metrics.MetricRegistry;
import com.snowflake.kafka.connector.internal.metrics.MetricsJmxReporter;
import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetryChannelStatus;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicLong;
import net.snowflake.client.jdbc.internal.fasterxml.jackson.databind.ObjectMapper;
import net.snowflake.client.jdbc.internal.fasterxml.jackson.databind.node.ObjectNode;
import org.junit.Test;
import org.mockito.Mockito;

public class SnowflakeTelemetryChannelStatusTest {
  private final String tableName = "tableName";
  private final String connectorName = "connectorName";
  private final String channelName = "channelName";

  @Test
  public void testRegisterAndUnregisterJmxMetrics() {
    MetricRegistry metricRegistry = Mockito.spy(MetricRegistry.class);
    MetricsJmxReporter metricsJmxReporter =
        Mockito.spy(new MetricsJmxReporter(metricRegistry, TEST_CONNECTOR_NAME));

    SnowflakeTelemetryChannelStatus status =
        new SnowflakeTelemetryChannelStatus(
            tableName,
            connectorName,
            channelName,
            1234,
            Optional.of(metricsJmxReporter),
            new AtomicLong(-1),
            new AtomicLong(-1),
            new AtomicLong(-1));

    // Registration: 4 metrics registered, start() NOT called (handled at task level)
    verify(metricsJmxReporter, times(0)).start();
    verify(metricRegistry, times((int) SnowflakeTelemetryChannelStatus.NUM_METRICS))
        .register(Mockito.anyString(), Mockito.any());

    // No removeMatching scan should have been called during registration
    verify(metricsJmxReporter, times(0)).removeMetricsFromRegistry(Mockito.anyString());

    // Unregister: uses targeted removal (4 individual remove calls)
    status.tryUnregisterChannelJMXMetrics();
    verify(metricRegistry, times((int) SnowflakeTelemetryChannelStatus.NUM_METRICS))
        .remove(Mockito.anyString());
  }

  @Test
  public void testDisabledJmx() {
    MetricRegistry metricRegistry = Mockito.spy(MetricRegistry.class);
    MetricsJmxReporter metricsJmxReporter =
        Mockito.spy(new MetricsJmxReporter(metricRegistry, TEST_CONNECTOR_NAME));

    SnowflakeTelemetryChannelStatus snowflakeTelemetryChannelStatus =
        new SnowflakeTelemetryChannelStatus(
            tableName,
            connectorName,
            channelName,
            1234,
            Optional.empty(),
            new AtomicLong(-1),
            new AtomicLong(-1),
            new AtomicLong(-1));
    verify(metricsJmxReporter, times(0)).start();
    verify(metricRegistry, times(0)).register(Mockito.anyString(), Mockito.any());
    verify(metricsJmxReporter, times(0))
        .removeMetricsFromRegistry(channelMetricPrefix(channelName));

    snowflakeTelemetryChannelStatus.tryUnregisterChannelJMXMetrics();
    verify(metricsJmxReporter, times(0))
        .removeMetricsFromRegistry(channelMetricPrefix(channelName));
  }

  @Test
  public void testValidationFailureCountInDumpTo() {
    SnowflakeTelemetryChannelStatus status =
        new SnowflakeTelemetryChannelStatus(
            tableName,
            connectorName,
            channelName,
            1234,
            Optional.empty(),
            new AtomicLong(-1),
            new AtomicLong(-1),
            new AtomicLong(-1));

    // Initially zero
    ObjectNode msg = new ObjectMapper().createObjectNode();
    status.dumpTo(msg);
    assertEquals(0, msg.get(TelemetryConstants.VALIDATION_FAILURE_COUNT).asLong());

    // Increment and verify
    status.incValidationFailureCount();
    status.incValidationFailureCount();
    status.incValidationFailureCount();

    msg = new ObjectMapper().createObjectNode();
    status.dumpTo(msg);
    assertEquals(3, msg.get(TelemetryConstants.VALIDATION_FAILURE_COUNT).asLong());
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/telemetry/SnowflakeTelemetryServiceTest.java
================================================
package com.snowflake.kafka.connector.internal.telemetry;

import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.KEY_CONVERTER;
import static com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams.VALUE_CONVERTER;
import static com.snowflake.kafka.connector.internal.telemetry.SnowflakeTelemetryService.INGESTION_METHOD;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;

import com.snowflake.ingest.streaming.ChannelStatus;
import com.snowflake.kafka.connector.ConnectorConfigTools;
import com.snowflake.kafka.connector.Constants.KafkaConnectorConfigParams;
import com.snowflake.kafka.connector.Utils;
import com.snowflake.kafka.connector.internal.SnowflakeErrors;
import com.snowflake.kafka.connector.internal.TestUtils;
import com.snowflake.kafka.connector.internal.streaming.IngestionMethodConfig;
import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetryChannelCreation;
import com.snowflake.kafka.connector.internal.streaming.telemetry.SnowflakeTelemetryChannelStatus;
import java.time.Duration;
import java.time.Instant;
import java.util.LinkedList;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicLong;
import net.snowflake.client.internal.jdbc.telemetry.Telemetry;
import net.snowflake.client.internal.jdbc.telemetry.TelemetryData;
import net.snowflake.client.jdbc.internal.fasterxml.jackson.databind.JsonNode;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;

public class SnowflakeTelemetryServiceTest {

  public static final String KAFKA_STRING_CONVERTER =
      "org.apache.kafka.connect.storage.StringConverter";
  public static final String KAFKA_CONFLUENT_AVRO_CONVERTER =
      "io.confluent.connect.avro.AvroConverter";
  private long startTime;
  private MockTelemetryClient mockTelemetryClient;

  @BeforeEach
  void setUp() {
    this.startTime = System.currentTimeMillis();
    this.mockTelemetryClient = new MockTelemetryClient();
  }

  @ParameterizedTest
  @EnumSource(value = IngestionMethodConfig.class)
  public void testReportKafkaConnectStart(IngestionMethodConfig ingestionMethodConfig) {
    // given
    Map<String, String> connectorConfig = createConnectorConfig();
    connectorConfig.put(KEY_CONVERTER, KAFKA_STRING_CONVERTER);
    connectorConfig.put(KafkaConnectorConfigParams.VALUE_CONVERTER, KAFKA_CONFLUENT_AVRO_CONVERTER);
    SnowflakeTelemetryService snowflakeTelemetryService =
        createSnowflakeTelemetryService(connectorConfig);

    // when
    snowflakeTelemetryService.reportKafkaConnectStart(System.currentTimeMillis(), connectorConfig);

    // then
    LinkedList<TelemetryData> sentData = this.mockTelemetryClient.getSentTelemetryData();
    assertEquals(1, sentData.size());

    JsonNode allNode = sentData.get(0).getMessage();
    assertEquals(
        SnowflakeTelemetryService.TelemetryType.KAFKA_START.toString(),
        allNode.get("type").asText());
    assertEquals("kafka_connector", allNode.get("source").asText());
    assertEquals(Utils.VERSION, allNode.get("version").asText());

    assertEquals(ingestionMethodConfig.toString(), sentTelemetryDataField(INGESTION_METHOD));

    JsonNode dataNode = allNode.get("data");
    assertTrue(
        dataNode.get(TelemetryConstants.START_TIME).asLong() <= System.currentTimeMillis()
            && dataNode.get(TelemetryConstants.START_TIME).asLong() >= this.startTime);

    assertNotNull(dataNode.get("jdk_version"));
    assertNotNull(dataNode.get("jdk_distribution"));

    validateKeyAndValueConverter(dataNode);

    // All non-sensitive config keys from the map should be present
    assertTrue(dataNode.has(KafkaConnectorConfigParams.SNOWFLAKE_DATABASE_NAME));
    assertTrue(dataNode.has(KafkaConnectorConfigParams.SNOWFLAKE_SCHEMA_NAME));
    assertTrue(dataNode.has(KafkaConnectorConfigParams.SNOWFLAKE_URL_NAME));
    assertTrue(dataNode.has(KafkaConnectorConfigParams.SNOWFLAKE_ROLE_NAME));

    // Sensitive keys must NOT be present
    assertFalse(dataNode.has(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY));
    assertFalse(dataNode.has(KafkaConnectorConfigParams.SNOWFLAKE_PRIVATE_KEY_PASSPHRASE));
  }

  @Test
  public void testReportKafkaConnectStart_clientValidationExplicitlySet() {
    Map<String, String> connectorConfig = createConnectorConfig();
    connectorConfig.put(KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION, "server_side");
    SnowflakeTelemetryService snowflakeTelemetryService =
        createSnowflakeTelemetryService(connectorConfig);

    snowflakeTelemetryService.reportKafkaConnectStart(System.currentTimeMillis(), connectorConfig);

    LinkedList<TelemetryData> sentData = this.mockTelemetryClient.getSentTelemetryData();
    assertEquals(1, sentData.size());

    JsonNode dataNode = sentData.get(0).getMessage().get("data");
    assertEquals(
        "server_side", dataNode.get(KafkaConnectorConfigParams.SNOWFLAKE_VALIDATION).asText());
  }

  @ParameterizedTest
  @EnumSource(value = IngestionMethodConfig.class)
  public void testReportKafkaConnectStop(IngestionMethodConfig ingestionMethodConfig) {
    // given
    Map<String, String> connectorConfig = createConnectorConfig();
    SnowflakeTelemetryService snowflakeTelemetryService =
        createSnowflakeTelemetryService(connectorConfig);

    // when
    snowflakeTelemetryService.reportKafkaConnectStop(System.currentTimeMillis());

    // then
    LinkedList<TelemetryData> sentData = this.mockTelemetryClient.getSentTelemetryData();
    assertEquals(1, sentData.size());

    JsonNode allNode = sentData.get(0).getMessage();
    assertEquals(
        SnowflakeTelemetryService.TelemetryType.KAFKA_STOP.toString(),
        allNode.get("type").asText());
    assertEquals("kafka_connector", allNode.get("source").asText());
    assertEquals(Utils.VERSION, allNode.get("version").asText());

    JsonNode dataNode = allNode.get("data");
    assertNotNull(dataNode);
    assertTrue(dataNode.has(INGESTION_METHOD));
    assertEquals(dataNode.get(INGESTION_METHOD).asInt(), ingestionMethodConfig.ordinal());
    assertTrue(
        dataNode.get(TelemetryConstants.START_TIME).asLong() <= System.currentTimeMillis()
            && dataNode.get(TelemetryConstants.START_TIME).asLong() >= this.startTime);
  }

  @ParameterizedTest
  @EnumSource(value = IngestionMethodConfig.class)
  public void testReportKafkaConnectFatalError(IngestionMethodConfig ingestionMethodConfig) {
    // given
    Map<String, String> connectorConfig = createConnectorConfig();
    SnowflakeTelemetryService snowflakeTelemetryService =
        createSnowflakeTelemetryService(connectorConfig);
    String expectedException =
        SnowflakeErrors.ERROR_0003.getException("test exception").getMessage();

    // when
    snowflakeTelemetryService.reportKafkaConnectFatalError(expectedException);

    // validate data sent
    LinkedList<TelemetryData> sentData = this.mockTelemetryClient.getSentTelemetryData();
    assertEquals(1, sentData.size());

    JsonNode allNode = sentData.get(0).getMessage();
    assertEquals(
        SnowflakeTelemetryService.TelemetryType.KAFKA_FATAL_ERROR.toString(),
        allNode.get("type").asText());
    assertEquals("kafka_connector", allNode.get("source").asText());
    assertEquals(Utils.VERSION, allNode.get("version").asText());

    JsonNode dataNode = allNode.get("data");
    assertNotNull(dataNode);
    assertTrue(dataNode.has(INGESTION_METHOD));
    assertEquals(dataNode.get(INGESTION_METHOD).asInt(), ingestionMethodConfig.ordinal());
    assertTrue(
        dataNode.get(TelemetryConstants.UNIX_TIME).asLong() <= System.currentTimeMillis()
            && dataNode.get(TelemetryConstants.UNIX_TIME).asLong() >= this.startTime);
    assertEquals(dataNode.get(TelemetryConstants.ERROR_DETAIL).asText(), expectedException);
  }

  @Test
  public void testReportKafkaConnectFatalErrorWithChannelContext() {
    Map<String, String> connectorConfig = createConnectorConfig();
    SnowflakeTelemetryService snowflakeTelemetryService =
        createSnowflakeTelemetryService(connectorConfig);

    snowflakeTelemetryService.reportKafkaConnectFatalError(
        "test error", "myChannel", "myTable", "myPipe");

    LinkedList<TelemetryData> sentData = this.mockTelemetryClient.getSentTelemetryData();
    assertEquals(1, sentData.size());

    JsonNode dataNode = sentData.get(0).getMessage().get("data");
    assertEquals("test error", dataNode.get(TelemetryConstants.ERROR_DETAIL).asText());
    assertEquals(
        "myChannel", dataNode.get(TelemetryConstants.TOPIC_PARTITION_CHANNEL_NAME).asText());
    assertEquals("myTable", dataNode.get(TelemetryConstants.TABLE_NAME).asText());
    assertEquals("myPipe", dataNode.get(TelemetryConstants.PIPE_NAME).asText());
  }

  @ParameterizedTest
  @EnumSource(value = IngestionMethodConfig.class)
  public void testReportKafkaPartitionUsage(IngestionMethodConfig ingestionMethodConfig) {
    // given
    Map<String, String> connectorConfig = createConnectorConfig();
    SnowflakeTelemetryService snowflakeTelemetryService =
        createSnowflakeTelemetryService(connectorConfig);

    // expected values
    final String expectedTableName = "tableName";
    final String expectedConnectorName = "connectorName";
    final String expectedTpChannelName = "channelName";
    final long expectedTpChannelCreationTime = 1234;
    final long expectedProcessedOffset = 1;
    final long expectedOffsetPersistedInSnowflake = 4;
    final long expectedLatestConsumerOffset = 5;

    SnowflakeTelemetryBasicInfo partitionUsage;

    SnowflakeTelemetryChannelStatus channelStatus =
        new SnowflakeTelemetryChannelStatus(
            expectedTableName,
            expectedConnectorName,
            expectedTpChannelName,
            expectedTpChannelCreationTime,
            Optional.empty(),
            new AtomicLong(expectedOffsetPersistedInSnowflake),
            new AtomicLong(expectedProcessedOffset),
            new AtomicLong(expectedLatestConsumerOffset));

    channelStatus.incErrorToleratedCount();
    channelStatus.incErrorToleratedCount();
    channelStatus.incErrorToleratedCount();
    channelStatus.updateFromChannelStatus(
        new ChannelStatus(
            "testDb",
            "testSchema",
            "testPipe",
            expectedTpChannelName,
            "SUCCESS",
            "0",
            Instant.now(),
            100,
            105,
            2,
            "42",
            "some error",
            Instant.parse("2026-03-24T00:00:00Z"),
            Duration.ofMillis(45),
            Instant.now()));

    // Recovery count works without JMX
    channelStatus.incRecoveryCount();
    channelStatus.incRecoveryCount();

    partitionUsage = channelStatus;

    // when
    snowflakeTelemetryService.reportKafkaPartitionUsage(partitionUsage, false);

    // then
    LinkedList<TelemetryData> sentData = this.mockTelemetryClient.getSentTelemetryData();
    assertEquals(1, sentData.size());

    JsonNode allNode = sentData.get(0).getMessage();
    assertEquals("kafka_connector", allNode.get("source").asText());
    assertEquals(Utils.VERSION, allNode.get("version").asText());

    JsonNode dataNode = allNode.get("data");
    assertNotNull(dataNode);
    assertTrue(dataNode.has(INGESTION_METHOD));
    assertEquals(dataNode.get(INGESTION_METHOD).asInt(), ingestionMethodConfig.ordinal());
    assertEquals(
        expectedProcessedOffset, dataNode.get(TelemetryConstants.PROCESSED_OFFSET).asLong());
    assertEquals(expectedTableName, dataNode.get(TelemetryConstants.TABLE_NAME).asText());

    assertEquals(
        expectedTpChannelCreationTime,
        dataNode.get(TelemetryConstants.TOPIC_PARTITION_CHANNEL_CREATION_TIME).asLong());
    assertTrue(
        dataNode.get(TelemetryConstants.TOPIC_PARTITION_CHANNEL_CLOSE_TIME).asLong()
                <= System.currentTimeMillis()
            && dataNode.get(TelemetryConstants.TOPIC_PARTITION_CHANNEL_CLOSE_TIME).asLong()
                >= this.startTime);
    assertEquals(
        SnowflakeTelemetryService.TelemetryType.KAFKA_CHANNEL_USAGE.toString(),
        allNode.get("type").asText());
    assertEquals(
        expectedLatestConsumerOffset,
        dataNode.get(TelemetryConstants.LATEST_CONSUMER_OFFSET).asLong());
    assertEquals(
        expectedOffsetPersistedInSnowflake,
        dataNode.get(TelemetryConstants.OFFSET_PERSISTED_IN_SNOWFLAKE).asLong());
    assertEquals(
        expectedTpChannelName,
        dataNode.get(TelemetryConstants.TOPIC_PARTITION_CHANNEL_NAME).asText());
    assertEquals(expectedConnectorName, dataNode.get(TelemetryConstants.CONNECTOR_NAME).asText());

    // Error-tolerated count
    assertEquals(3, dataNode.get(TelemetryConstants.ERROR_TOLERATED_COUNT).asLong());

    // Channel recovery count (works without JMX)
    assertEquals(2, dataNode.get(TelemetryConstants.CHANNEL_RECOVERY_COUNT).asLong());

    // Validation disabled flag
    assertFalse(dataNode.get(TelemetryConstants.VALIDATION_DISABLED).asBoolean());

    // SDK ChannelStatus fields
    assertEquals(100, dataNode.get(TelemetryConstants.ROWS_INSERTED_COUNT).asLong());
    assertEquals(105, dataNode.get(TelemetryConstants.ROWS_PARSED_COUNT).asLong());
    assertEquals(2, dataNode.get(TelemetryConstants.ROWS_ERROR_COUNT).asLong());
    assertEquals(45, dataNode.get(TelemetryConstants.SERVER_AVG_PROCESSING_LATENCY_MS).asLong());

    // SDK ChannelStatus identity and error fields
    assertEquals("testDb", dataNode.get(TelemetryConstants.DATABASE_NAME).asText());
    assertEquals("testSchema", dataNode.get(TelemetryConstants.SCHEMA_NAME).asText());
    assertEquals("testPipe", dataNode.get(TelemetryConstants.PIPE_NAME).asText());
    assertEquals("SUCCESS", dataNode.get(TelemetryConstants.STATUS_CODE).asText());
    assertFalse(dataNode.has("last_error_message")); // omitted for privacy
    assertEquals(
        "2026-03-24T00:00:00Z", dataNode.get(TelemetryConstants.LAST_ERROR_TIMESTAMP).asText());
    assertEquals(
        "42", dataNode.get(TelemetryConstants.LAST_ERROR_OFFSET_TOKEN_UPPER_BOUND).asText());

    // Backpressure/fallback counts (0 since not incremented in this test)
    assertEquals(0, dataNode.get(TelemetryConstants.BACKPRESSURE_RETRY_COUNT).asLong());
    assertEquals(0, dataNode.get(TelemetryConstants.APPEND_ROW_FALLBACK_COUNT).asLong());

    // Schema evolution failure count
    assertEquals(0, dataNode.get(TelemetryConstants.SCHEMA_EVOLUTION_FAILURE_COUNT).asLong());
  }

  @ParameterizedTest
  @EnumSource(value = IngestionMethodConfig.class)
  public void testReportKafkaPartitionStart(IngestionMethodConfig ingestionMethodConfig) {
    // given
    Map<String, String> connectorConfig = createConnectorConfig();
    SnowflakeTelemetryService snowflakeTelemetryService =
        createSnowflakeTelemetryService(connectorConfig);

    SnowflakeTelemetryBasicInfo partitionCreation;
    final String expectedTableName = "tableName";
    final String expectedChannelName = "channelName";
    final long expectedChannelCreationTime = 1234;

    SnowflakeTelemetryChannelCreation channelCreation =
        new SnowflakeTelemetryChannelCreation(
            expectedTableName, expectedChannelName, expectedChannelCreationTime);
    channelCreation.setReuseTable(true);

    partitionCreation = channelCreation;

    // when
    snowflakeTelemetryService.reportKafkaPartitionStart(partitionCreation);

    // then
    LinkedList<TelemetryData> sentData = this.mockTelemetryClient.getSentTelemetryData();
    assertEquals(1, sentData.size());

    JsonNode allNode = sentData.get(0).getMessage();
    assertEquals("kafka_connector", allNode.get("source").asText());
    assertEquals(Utils.VERSION, allNode.get("version").asText());

    JsonNode dataNode = allNode.get("data");
    assertNotNull(dataNode);
    assertTrue(dataNode.has(INGESTION_METHOD));
    assertEquals(dataNode.get(INGESTION_METHOD).asInt(), ingestionMethodConfig.ordinal());
    assertEquals(expectedTableName, dataNode.get(TelemetryConstants.TABLE_NAME).asText());
    assertEquals(
        expectedChannelCreationTime,
        dataNode.get(TelemetryConstants.TOPIC_PARTITION_CHANNEL_CREATION_TIME).asLong());
    assertEquals(
        SnowflakeTelemetryService.TelemetryType.KAFKA_CHANNEL_START.toString(),
        allNode.get("type").asText());
    assertEquals(
        expectedChannelName,
        dataNode.get(TelemetryConstants.TOPIC_PARTITION_CHANNEL_NAME).asText());
  }

  private Map<String, String> createConnectorConfig() {
    return TestUtils.getConnectorConfigurationForStreaming(false);
  }

  private SnowflakeTelemetryService createSnowflakeTelemetryService(
      Map<String, String> connectorConfig) {
    SnowflakeTelemetryService snowflakeTelemetryService;

    snowflakeTelemetryService = new SnowflakeTelemetryService(mockTelemetryClient);
    ConnectorConfigTools.setDefaultValues(connectorConfig);

    snowflakeTelemetryService.setAppName("TEST_APP");
    snowflakeTelemetryService.setTaskID("1");

    return snowflakeTelemetryService;
  }

  private String sentTelemetryDataField(String field) {
    LinkedList<TelemetryData> sentData = this.mockTelemetryClient.getSentTelemetryData();
    assertEquals(1, sentData.size());
    JsonNode allNode = sentData.get(0).getMessage();
    return allNode.get("data").get(field).asText();
  }

  private void validateKeyAndValueConverter(JsonNode dataNode) {
    assertTrue(dataNode.has(KEY_CONVERTER));
    assertTrue(dataNode.get(KEY_CONVERTER).asText().equalsIgnoreCase(KAFKA_STRING_CONVERTER));

    assertTrue(dataNode.has(VALUE_CONVERTER));
    assertTrue(
        dataNode.get(VALUE_CONVERTER).asText().equalsIgnoreCase(KAFKA_CONFLUENT_AVRO_CONVERTER));
  }

  public static class MockTelemetryClient implements Telemetry {

    private final LinkedList<TelemetryData> telemetryDataList;

    private final LinkedList<TelemetryData> sentTelemetryData;

    private final ExecutorService executor = Executors.newSingleThreadExecutor();

    public MockTelemetryClient() {
      this.telemetryDataList = new LinkedList<>();
      this.sentTelemetryData = new LinkedList<>();
    }

    @Override
    public void addLogToBatch(TelemetryData telemetryData) {
      this.telemetryDataList.add(telemetryData);
    }

    @Override
    public void close() {
      this.telemetryDataList.clear();
      this.sentTelemetryData.clear();
    }

    @Override
    public Future<Boolean> sendBatchAsync() {
      return executor.submit(() -> true);
    }

    @Override
    public void postProcess(String s, String s1, int i, Throwable throwable) {}

    public LinkedList<TelemetryData> getSentTelemetryData() {
      this.sentTelemetryData.addAll(telemetryDataList);
      this.telemetryDataList.clear();
      return sentTelemetryData;
    }
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/validation/DataValidationUtilTest.java
================================================
/*
 * COPIED FROM SNOWFLAKE INGEST SDK V1
 * Source: snowflake-ingest-java/src/test/java/net/snowflake/ingest/streaming/internal/DataValidationUtilTest.java
 *
 * Modifications:
 * - Iceberg-specific tests removed (5 test methods)
 * - Package changed to com.snowflake.kafka.connector.internal.validation
 * - Added buildString() helper method inline (was in TestUtils)
 *
 * Copyright (c) 2024 Snowflake Computing Inc. All rights reserved.
 */

package com.snowflake.kafka.connector.internal.validation;

import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.BYTES_16_MB;
import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.BYTES_8_MB;
import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.isAllowedSemiStructuredType;
import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseArray;
import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseArrayNew;
import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseBigDecimal;
import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseBinary;
import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseBoolean;
import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseDate;
import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseObject;
import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseObjectNew;
import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseReal;
import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseString;
import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseTime;
import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseTimestamp;
import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseVariant;
import static com.snowflake.kafka.connector.internal.validation.DataValidationUtil.validateAndParseVariantNew;
import static java.time.ZoneOffset.UTC;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.time.OffsetDateTime;
import java.time.OffsetTime;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.TimeZone;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang3.StringUtils;
import org.junit.Assert;
import org.junit.Test;

public class DataValidationUtilTest {
  private static final ObjectMapper objectMapper = new ObjectMapper();

  /** Helper method to build a string by repeating a character */
  private static String buildString(String str, int count) {
    StringBuilder sb = new StringBuilder(count);
    for (int i = 0; i < count; i++) {
      sb.append(str);
    }
    return sb.toString();
  }

  private void expectErrorCodeAndMessage(
      ErrorCode expectedErrorCode, String expectedExceptionMessage, Runnable action) {
    try {
      action.run();
      Assert.fail("Expected Exception");
    } catch (SFExceptionValidation e) {
      assertEquals(expectedErrorCode.getMessageCode(), e.getVendorCode());
      if (expectedExceptionMessage != null) assertEquals(expectedExceptionMessage, e.getMessage());
    } catch (Exception e) {
      e.printStackTrace();
      Assert.fail("Invalid error through");
    }
  }

  private void expectError(ErrorCode expectedErrorCode, Runnable action) {
    expectErrorCodeAndMessage(expectedErrorCode, null, action);
  }

  @Test
  public void testValidateAndParseDate() {
    assertEquals(9, validateAndParseDate("COL", LocalDate.of(1970, 1, 10), 0));
    assertEquals(9, validateAndParseDate("COL", LocalDateTime.of(1970, 1, 10, 1, 0), 0));
    assertEquals(
        9,
        validateAndParseDate(
            "COL",
            OffsetDateTime.of(1970, 1, 10, 1, 0, 34, 123456789, ZoneOffset.of("-07:00")),
            0));
    assertEquals(
        9,
        validateAndParseDate(
            "COL",
            OffsetDateTime.of(1970, 1, 10, 1, 0, 34, 123456789, ZoneOffset.of("+07:00")),
            0));
    assertEquals(
        9,
        validateAndParseDate(
            "COL",
            ZonedDateTime.of(1970, 1, 10, 1, 0, 34, 123456789, ZoneId.of("America/Los_Angeles")),
            0));
    assertEquals(
        9,
        validateAndParseDate(
            "COL", ZonedDateTime.of(1970, 1, 10, 1, 0, 34, 123456789, ZoneId.of("Asia/Tokyo")), 0));
    assertEquals(19380, validateAndParseDate("COL", Instant.ofEpochMilli(1674478926000L), 0));

    assertEquals(-923, validateAndParseDate("COL", "1967-06-23", 0));
    assertEquals(-923, validateAndParseDate("COL", "  1967-06-23 \t\n", 0));
    assertEquals(-923, validateAndParseDate("COL", "1967-06-23T01:01:01", 0));
    assertEquals(18464, validateAndParseDate("COL", "2020-07-21", 0));
    assertEquals(18464, validateAndParseDate("COL", "2020-07-21T23:31:00", 0));
    assertEquals(18464, validateAndParseDate("COL", "2020-07-21T23:31:00+07:00", 0));
    assertEquals(18464, validateAndParseDate("COL", "2020-07-21T23:31:00-07:00", 0));
    assertEquals(
        18464, validateAndParseDate("COL", "2020-07-21T23:31:00-07:00[America/Los_Angeles]", 0));
    assertEquals(18464, validateAndParseDate("COL", "2020-07-21T23:31:00+09:00[Asia/Tokyo]", 0));

    // Test integer-stored date
    assertEquals(19380, validateAndParseDate("COL", "1674478926", 0));
    assertEquals(19380, validateAndParseDate("COL", "1674478926000", 0));
    assertEquals(19380, validateAndParseDate("COL", "1674478926000000", 0));
    assertEquals(19380, validateAndParseDate("COL", "1674478926000000000", 0));

    // Time input is not supported
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseDate("COL", "20:57:01", 0));

    // Test values out of range
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseDate("COL", LocalDateTime.of(10000, 2, 2, 2, 2), 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseDate("COL", LocalDateTime.of(-10000, 2, 2, 2, 2), 0));

    // Test forbidden values
    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseDate("COL", new Object(), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseDate("COL", LocalTime.now(), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseDate("COL", OffsetTime.now(), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseDate("COL", new java.util.Date(), 0));
    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseDate("COL", false, 0));
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseDate("COL", "", 0));
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseDate("COL", "foo", 0));
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseDate("COL", "1.0", 0));
    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseDate("COL", 'c', 0));
    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseDate("COL", 1, 0));
    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseDate("COL", 1L, 0));
    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseDate("COL", 1.25, 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseDate("COL", BigInteger.valueOf(1), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseDate("COL", BigDecimal.valueOf(1.25), 0));
  }

  @Test
  public void testValidateAndParseTime() {
    // Test local time
    assertEquals(46920, validateAndParseTime("COL", "13:02", 0, 0).longValueExact());
    assertEquals(46920, validateAndParseTime("COL", "  13:02 \t\n", 0, 0).longValueExact());
    assertEquals(46926, validateAndParseTime("COL", "13:02:06", 0, 0).longValueExact());
    assertEquals(469260, validateAndParseTime("COL", "13:02:06", 1, 0).longValueExact());
    assertEquals(46926000000000L, validateAndParseTime("COL", "13:02:06", 9, 0).longValueExact());

    assertEquals(46926, validateAndParseTime("COL", "13:02:06.1234", 0, 0).longValueExact());
    assertEquals(469261, validateAndParseTime("COL", "13:02:06.1234", 1, 0).longValueExact());
    assertEquals(
        46926123400000L, validateAndParseTime("COL", "13:02:06.1234", 9, 0).longValueExact());

    assertEquals(46926, validateAndParseTime("COL", "13:02:06.123456789", 0, 0).longValueExact());
    assertEquals(469261, validateAndParseTime("COL", "13:02:06.123456789", 1, 0).longValueExact());
    assertEquals(
        46926123456789L, validateAndParseTime("COL", "13:02:06.123456789", 9, 0).longValueExact());

    // Test that offset time does not make any difference
    assertEquals(
        46926123456789L,
        validateAndParseTime("COL", "13:02:06.123456789+09:00", 9, 0).longValueExact());
    assertEquals(
        46926123456789L,
        validateAndParseTime("COL", "13:02:06.123456789-09:00", 9, 0).longValueExact());

    // Test integer-stored time and scale guessing
    assertEquals(46926L, validateAndParseTime("COL", "1674478926", 0, 0).longValueExact());
    assertEquals(46926L, validateAndParseTime("COL", "1674478926123", 0, 0).longValueExact());
    assertEquals(46926L, validateAndParseTime("COL", "1674478926123456", 0, 0).longValueExact());
    assertEquals(46926L, validateAndParseTime("COL", "1674478926123456789", 0, 0).longValueExact());

    assertEquals(469260L, validateAndParseTime("COL", "1674478926", 1, 0).longValueExact());
    assertEquals(469261L, validateAndParseTime("COL", "1674478926123", 1, 0).longValueExact());
    assertEquals(469261L, validateAndParseTime("COL", "1674478926123456", 1, 0).longValueExact());
    assertEquals(
        469261L, validateAndParseTime("COL", "1674478926123456789", 1, 0).longValueExact());

    assertEquals(46926000000000L, validateAndParseTime("COL", "1674478926", 9, 0).longValueExact());
    assertEquals(
        46926123000000L, validateAndParseTime("COL", "1674478926123", 9, 0).longValueExact());
    assertEquals(
        46926123456000L, validateAndParseTime("COL", "1674478926123456", 9, 0).longValueExact());
    assertEquals(
        46926123456789L, validateAndParseTime("COL", "1674478926123456789", 9, 0).longValueExact());

    // Test Java objects
    assertEquals(
        46926123456789L,
        validateAndParseTime("COL", LocalTime.of(13, 2, 6, 123456789), 9, 0).longValueExact());
    assertEquals(
        46926123456789L,
        validateAndParseTime(
                "COL", OffsetTime.of(13, 2, 6, 123456789, ZoneOffset.of("+09:00")), 9, 0)
            .longValueExact());

    // Dates and timestamps are forbidden
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseTime("COL", "2023-01-19", 9, 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseTime("COL", "2023-01-19T14:23:55.878137", 9, 0));

    // Test forbidden values
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", LocalDate.now(), 3, 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", LocalDateTime.now(), 3, 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseTime("COL", OffsetDateTime.now(), 3, 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", ZonedDateTime.now(), 3, 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", Instant.now(), 3, 0));
    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", new Date(), 3, 0));
    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", 1.5f, 3, 0));
    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", 1.5, 3, 0));
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseTime("COL", "1.5", 3, 0));
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseTime("COL", "1.0", 3, 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", new Object(), 3, 0));
    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", false, 3, 0));
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseTime("COL", "", 3, 0));
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseTime("COL", "foo", 3, 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseTime("COL", java.sql.Time.valueOf("20:57:00"), 3, 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseTime("COL", java.sql.Date.valueOf("2010-11-03"), 3, 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseTime("COL", java.sql.Timestamp.valueOf("2010-11-03 20:57:00"), 3, 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", BigInteger.ZERO, 3, 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", BigDecimal.ZERO, 3, 0));
    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseTime("COL", 'c', 3, 0));
  }

  @Test
  public void testValidateAndParseTimestamp() throws ParseException {
    TimestampWrapper wrapper =
        DataValidationUtil.validateAndParseTimestamp(
            "COL", "2021-01-01T01:00:00.123+01:00", 4, UTC, false, 0);
    assertEquals(1609459200, wrapper.getEpochSecond());
    assertEquals(123000000, wrapper.getFraction());
    assertEquals(3600, wrapper.getTimezoneOffsetSeconds());
    assertEquals(1500, wrapper.getTimeZoneIndex());

    wrapper = validateAndParseTimestamp("COL", "  2021-01-01T01:00:00.123 \t\n", 9, UTC, true, 0);
    Assert.assertEquals(1609462800, wrapper.getEpochSecond());
    Assert.assertEquals(123000000, wrapper.getFraction());
    Assert.assertEquals(new BigInteger("1609462800123000000"), wrapper.toBinary(false));

    // Test integer-stored time and scale guessing
    SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
    df.setTimeZone(TimeZone.getTimeZone("UTC"));
    assertEquals(
        BigInteger.valueOf(df.parse("1971-01-01 00:00:00.001").getTime())
            .multiply(BigInteger.valueOf(1000000)),
        validateAndParseTimestamp("COL", "31536000001", 9, UTC, true, 0).toBinary(false));

    assertEquals(
        BigInteger.valueOf(df.parse("2969-05-02 23:59:59.999").getTime())
            .multiply(BigInteger.valueOf(1000000)),
        validateAndParseTimestamp("COL", "31535999999999", 9, UTC, true, 0).toBinary(false));

    assertEquals(
        BigInteger.valueOf(df.parse("1971-01-01 00:00:00.000").getTime())
            .multiply(BigInteger.valueOf(1000000)),
        validateAndParseTimestamp("COL", "31536000000000", 9, UTC, true, 0).toBinary(false));

    assertEquals(
        BigInteger.valueOf(df.parse("2969-05-02 23:59:59.999").getTime())
            .multiply(BigInteger.valueOf(1000000)),
        validateAndParseTimestamp("COL", "31535999999999", 9, UTC, true, 0).toBinary(false));

    assertEquals(
        BigInteger.valueOf(df.parse("1971-01-01 00:00:00.000").getTime())
            .multiply(BigInteger.valueOf(1000000)),
        validateAndParseTimestamp("COL", "31536000000000000", 9, UTC, true, 0).toBinary(false));

    // Time input is not supported
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseTimestamp("COL", "20:57:01", 3, UTC, false, 0));

    // Test values out of range
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () ->
            validateAndParseTimestamp(
                "COL", LocalDateTime.of(10000, 2, 2, 2, 2), 3, UTC, false, 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseTimestamp("COL", LocalDateTime.of(0, 2, 2, 2, 2), 3, UTC, false, 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseTimestamp("COL", LocalDateTime.of(-1, 2, 2, 2, 2), 3, UTC, false, 0));

    // Test forbidden values
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseTimestamp("COL", LocalTime.now(), 3, UTC, false, 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseTimestamp("COL", OffsetTime.now(), 3, UTC, false, 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseTimestamp("COL", new Date(), 3, UTC, false, 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseTimestamp("COL", 1.5f, 3, UTC, false, 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseTimestamp("COL", 1.5, 3, UTC, false, 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseTimestamp("COL", "1.5", 3, UTC, false, 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseTimestamp("COL", "1.0", 3, UTC, false, 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseTimestamp("COL", new Object(), 3, UTC, false, 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseTimestamp("COL", false, 3, UTC, false, 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseTimestamp("COL", "", 3, UTC, false, 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseTimestamp("COL", "foo", 3, UTC, false, 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () ->
            validateAndParseTimestamp("COL", java.sql.Time.valueOf("20:57:00"), 3, UTC, false, 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () ->
            validateAndParseTimestamp(
                "COL", java.sql.Date.valueOf("2010-11-03"), 3, UTC, false, 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () ->
            validateAndParseTimestamp(
                "COL", java.sql.Timestamp.valueOf("2010-11-03 20:57:00"), 3, UTC, false, 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseTimestamp("COL", BigInteger.ZERO, 3, UTC, false, 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseTimestamp("COL", BigDecimal.ZERO, 3, UTC, false, 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseTimestamp("COL", 'c', 3, UTC, false, 0));
  }

  @Test
  public void testValidateAndParseTimestamp_integerEpoch() {
    // Integer epoch (seconds) — same as string "1709712000"
    TimestampWrapper fromInt = validateAndParseTimestamp("COL", 1709712000, 9, UTC, true, 0);
    TimestampWrapper fromStr = validateAndParseTimestamp("COL", "1709712000", 9, UTC, true, 0);
    assertEquals(fromStr.toBinary(false), fromInt.toBinary(false));

    // Long epoch (milliseconds) — same as string "1709712000000"
    TimestampWrapper fromLong = validateAndParseTimestamp("COL", 1709712000000L, 9, UTC, true, 0);
    TimestampWrapper fromStrMs = validateAndParseTimestamp("COL", "1709712000000", 9, UTC, true, 0);
    assertEquals(fromStrMs.toBinary(false), fromLong.toBinary(false));

    // Zero epoch
    TimestampWrapper fromZeroInt = validateAndParseTimestamp("COL", 0, 9, UTC, true, 0);
    TimestampWrapper fromZeroStr = validateAndParseTimestamp("COL", "0", 9, UTC, true, 0);
    assertEquals(fromZeroStr.toBinary(false), fromZeroInt.toBinary(false));

    // Negative epoch (before 1970)
    TimestampWrapper fromNeg = validateAndParseTimestamp("COL", -86400, 9, UTC, true, 0);
    TimestampWrapper fromNegStr = validateAndParseTimestamp("COL", "-86400", 9, UTC, true, 0);
    assertEquals(fromNegStr.toBinary(false), fromNeg.toBinary(false));

    // TIMESTAMP_LTZ with integer epoch
    TimestampWrapper ltzFromLong = validateAndParseTimestamp("COL", 1709712000L, 9, UTC, false, 0);
    assertEquals(fromStr.getEpochSecond(), ltzFromLong.getEpochSecond());
  }

  @Test
  public void testValidateAndParseBigDecimal() {
    assertEquals(new BigDecimal("1"), validateAndParseBigDecimal("COL", "1", 0));
    assertEquals(new BigDecimal("1"), validateAndParseBigDecimal("COL", "  1 \t\n ", 0));
    assertEquals(
        new BigDecimal("1000").toBigInteger(),
        validateAndParseBigDecimal("COL", "1e3", 0).toBigInteger());
    assertEquals(
        new BigDecimal("1000").toBigInteger(),
        validateAndParseBigDecimal("COL", "  1e3 \t\n", 0).toBigInteger());
    assertEquals(
        new BigDecimal("1000").toBigInteger(),
        validateAndParseBigDecimal("COL", "1e3", 0).toBigInteger());
    assertEquals(
        new BigDecimal("-1000").toBigInteger(),
        validateAndParseBigDecimal("COL", "-1e3", 0).toBigInteger());
    assertEquals(
        new BigDecimal("1").toBigInteger(),
        validateAndParseBigDecimal("COL", "1e0", 0).toBigInteger());
    assertEquals(
        new BigDecimal("-1").toBigInteger(),
        validateAndParseBigDecimal("COL", "-1e0", 0).toBigInteger());
    assertEquals(
        new BigDecimal("123").toBigInteger(),
        validateAndParseBigDecimal("COL", "1.23e2", 0).toBigInteger());
    assertEquals(
        new BigDecimal("123.4").toBigInteger(),
        validateAndParseBigDecimal("COL", "1.234e2", 0).toBigInteger());
    assertEquals(
        new BigDecimal("0.1234").toBigInteger(),
        validateAndParseBigDecimal("COL", "1.234e-1", 0).toBigInteger());
    assertEquals(
        new BigDecimal("0.1234").toBigInteger(),
        validateAndParseBigDecimal("COL", "1234e-5", 0).toBigInteger());
    assertEquals(
        new BigDecimal("0.1234").toBigInteger(),
        validateAndParseBigDecimal("COL", "1234E-5", 0).toBigInteger());
    assertEquals(new BigDecimal("1"), validateAndParseBigDecimal("COL", 1, 0));
    assertEquals(new BigDecimal("1.0"), validateAndParseBigDecimal("COL", 1D, 0));
    assertEquals(new BigDecimal("1"), validateAndParseBigDecimal("COL", 1L, 0));
    assertEquals(new BigDecimal("1.0"), validateAndParseBigDecimal("COL", 1F, 0));
    assertEquals(
        BigDecimal.valueOf(10).pow(37),
        validateAndParseBigDecimal("COL", BigDecimal.valueOf(10).pow(37), 0));
    assertEquals(
        BigDecimal.valueOf(-1).multiply(BigDecimal.valueOf(10).pow(37)),
        validateAndParseBigDecimal(
            "COL", BigInteger.valueOf(-1).multiply(BigInteger.valueOf(10).pow(37)), 0));

    // Test forbidden values
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseBigDecimal("COL", "honk", 0));
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseBigDecimal("COL", "0x22", 0));
    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBigDecimal("COL", true, 0));
    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBigDecimal("COL", false, 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBigDecimal("COL", new Object(), 0));
    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBigDecimal("COL", 'a', 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBigDecimal("COL", new byte[4], 0));
  }

  @Test
  public void testValidateAndParseString() {
    assertEquals("honk", validateAndParseString("COL", "honk", Optional.empty(), 0));

    // Check max byte length
    String maxString = buildString("a", BYTES_16_MB);
    assertEquals(maxString, validateAndParseString("COL", maxString, Optional.empty(), 0));

    // max byte length - 1 should also succeed
    String maxStringMinusOne = buildString("a", BYTES_16_MB - 1);
    assertEquals(
        maxStringMinusOne, validateAndParseString("COL", maxStringMinusOne, Optional.empty(), 0));

    // max byte length + 1 should fail
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseString("COL", maxString + "a", Optional.empty(), 0));

    // Test that max character length validation counts characters and not bytes
    assertEquals("a", validateAndParseString("COL", "a", Optional.of(1), 0));
    assertEquals("č", validateAndParseString("COL", "č", Optional.of(1), 0));
    assertEquals("❄", validateAndParseString("COL", "❄", Optional.of(1), 0));
    assertEquals("🍞", validateAndParseString("COL", "🍞", Optional.of(1), 0));

    // Test max character length rejection
    expectError(
        ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseString("COL", "a🍞", Optional.of(1), 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseString("COL", "12345", Optional.of(4), 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseString("COL", false, Optional.of(4), 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseString("COL", 12345, Optional.of(4), 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseString("COL", 1.2345, Optional.of(4), 0));

    // Test that invalid UTF-8 strings cannot be ingested
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseString("COL", "foo\uD800bar", Optional.empty(), 0));

    // Test unsupported values
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseString("COL", new Object(), Optional.empty(), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseString("COL", new byte[] {}, Optional.of(4), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseString("COL", new char[] {}, Optional.of(4), 0));
  }

  @Test
  public void testValidateAndParseVariant() throws Exception {
    assertJson("variant", "1", 1);
    assertJson("variant", "1", "1");
    assertJson("variant", "1", " 1 \n");
    assertJson("variant", "{\"key\":1}", "{\"key\":1}");
    assertJson("variant", "{\"key\":1}", " { \"key\": 1 } ");

    // Variants should preserve input format of numbers
    assertJson(
        "variant", "{\"key\":1111111.1111111}", "   {\"key\": 1111111.1111111}    \t\n", false);
    assertJson(
        "variant",
        "{\"key\":11.111111111111e8}",
        "   {\"key\": 11.111111111111e8   }    \t\n",
        false);
    assertJson(
        "variant",
        "{\"key\":11.111111111111e-8}",
        "   {\"key\": 11.111111111111e-8   }    \t\n",
        false);
    assertJson(
        "variant",
        "{\"key\":11.111111111111E8}",
        "   {\"key\": 11.111111111111E8   }    \t\n",
        false);
    assertJson(
        "variant",
        "{\"key\":11.111111111111E-8}",
        "   {\"key\": 11.111111111111E-8   }    \t\n",
        false);
    assertJson(
        "variant",
        "{\"key\":11111111111111e8}",
        "   {\"key\": 11111111111111e8   }    \t\n",
        false);
    assertJson(
        "variant",
        "{\"key\":11111111111111e-8}",
        "   {\"key\": 11111111111111e-8   }    \t\n",
        false);
    assertJson(
        "variant",
        "{\"key\":11111111111111E8}",
        "   {\"key\": 11111111111111E8   }    \t\n",
        false);
    assertJson(
        "variant",
        "{\"key\":11111111111111E-8}",
        "   {\"key\": 11111111111111E-8   }    \t\n",
        false);

    // Test custom serializers
    assertJson("variant", "[-128,0,127]", new byte[] {Byte.MIN_VALUE, 0, Byte.MAX_VALUE});
    assertJson(
        "variant",
        "\"2022-09-28T03:04:12.123456789-07:00\"",
        ZonedDateTime.of(2022, 9, 28, 3, 4, 12, 123456789, ZoneId.of("America/Los_Angeles")));

    // Test valid JSON tokens

    assertJson("variant", "null", null);
    assertJson("variant", "null", "null");
    assertJson("variant", "true", true);
    assertJson("variant", "true", "true");
    assertJson("variant", "false", false);
    assertJson("variant", "false", "false");

    assertJson("variant", "[]", "[]");
    assertJson("variant", "{}", "{}");
    assertJson("variant", "[\"foo\",1,null]", "[\"foo\",1,null]");
    assertJson("variant", "\"\"", "\"\"");

    // Test missing values are null instead of empty string
    assertNull(validateAndParseVariant("COL", "", 0));
    assertNull(validateAndParseVariantNew("COL", "", 0));
    assertNull(validateAndParseVariant("COL", "  ", 0));
    assertNull(validateAndParseVariantNew("COL", "  ", 0));

    // Test that invalid UTF-8 strings cannot be ingested
    expectError(
        ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariant("COL", "\"foo\uD800bar\"", 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseVariantNew("COL", "\"foo\uD800bar\"", 0));

    // Test forbidden values
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariant("COL", "{null}", 0));
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariantNew("COL", "{null}", 0));

    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariant("COL", "}{", 0));
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariantNew("COL", "}{", 0));

    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseVariant("COL", readTree("{}"), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseVariantNew("COL", readTree("{}"), 0));

    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseVariant("COL", new Object(), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseVariantNew("COL", new Object(), 0));

    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariant("COL", "foo", 0));
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariantNew("COL", "foo", 0));

    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseVariant("COL", new Date(), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseVariantNew("COL", new Date(), 0));

    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseVariant("COL", Collections.singletonList(new Object()), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseVariantNew("COL", Collections.singletonList(new Object()), 0));

    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () ->
            validateAndParseVariant(
                "COL",
                Collections.singletonList(Collections.singletonMap("foo", new Object())),
                0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () ->
            validateAndParseVariantNew(
                "COL",
                Collections.singletonList(Collections.singletonMap("foo", new Object())),
                0));

    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseVariant("COL", Collections.singletonMap(new Object(), "foo"), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseVariantNew("COL", Collections.singletonMap(new Object(), "foo"), 0));

    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseVariant("COL", Collections.singletonMap("foo", new Object()), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseVariantNew("COL", Collections.singletonMap("foo", new Object()), 0));

    // Test stripping null terminator
    assertJson("variant", "{\"key\":0,\"\\u0000key\":1}", "{\"key\":0,\"\\u0000key\":1}", false);
    assertJson("variant", "{\"key\\u0000\":0}", "{\"key\\u0000\":0}", false);
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseVariantNew("COL", "{\"key\": 0, \"key\\u0000\": 1}", 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseVariantNew("COL", "{\"key\": 0, \"key\\u0000\\u0000\": 1}", 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () ->
            validateAndParseVariantNew(
                "COL", "{\"key\": {\"key\": {\"key\": 0, \"key\\u0000\": 1}}}", 0));

    assertJson(
        "variant",
        "{\"key\":0,\"\\u0000key\":1}",
        new HashMap<String, Integer>() {
          {
            put("key", 0);
            put("\u0000key", 1);
          }
        },
        false);
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () ->
            validateAndParseVariantNew(
                "COL",
                new HashMap<String, Integer>() {
                  {
                    put("key", 0);
                    put("key\u0000", 1);
                  }
                },
                0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () ->
            validateAndParseVariantNew(
                "COL",
                new HashMap<String, Integer>() {
                  {
                    put("key", 0);
                    put("key\u0000\u0000", 1);
                  }
                },
                0));

    // Test that invalid UTF-8 map keys or values cannot be ingested
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () ->
            validateAndParseVariantNew(
                "COL",
                new HashMap<String, Integer>() {
                  {
                    put("foo\uD800bar", 1);
                  }
                },
                0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () ->
            validateAndParseVariantNew(
                "COL",
                new HashMap<String, String>() {
                  {
                    put("key", "foo\uD800bar");
                  }
                },
                0));
  }

  private void assertJson(String colType, String expectedValue, Object value) {
    assertJson(colType, expectedValue, value, true);
  }

  private void assertJson(
      String colType, String expectedValue, Object value, boolean alsoTestOldApproach) {
    if (colType.equalsIgnoreCase("variant")) {
      assertEquals(expectedValue, validateAndParseVariantNew("COL", value, 0));
      if (alsoTestOldApproach) {
        assertEquals(expectedValue, validateAndParseVariant("COL", value, 0));
      }
    } else if (colType.equalsIgnoreCase("array")) {
      assertEquals(expectedValue, validateAndParseArrayNew("COL", value, 0));
      if (alsoTestOldApproach) {
        assertEquals(expectedValue, validateAndParseArray("COL", value, 0));
      }
    } else if (colType.equalsIgnoreCase("object")) {
      assertEquals(expectedValue, validateAndParseObjectNew("COL", value, 0));
      if (alsoTestOldApproach) {
        assertEquals(expectedValue, validateAndParseObject("COL", value, 0));
      }
    } else {
      Assert.fail("Unexpected colType " + colType);
    }
  }

  @Test
  public void testValidateAndParseArray() throws Exception {
    assertJson("array", "[1]", 1);
    assertJson("array", "[1]", "1");
    assertJson("array", "[\"1\"]", "\"1\"");
    assertJson("array", "[1.1e10]", " 1.1e10 ", false);
    assertJson("array", "[1,2,3]", "  [1, 2, 3] \t\n");
    assertJson("array", "[1,2,3]", new int[] {1, 2, 3});
    assertJson("array", "[\"a\",\"b\",\"c\"]", new String[] {"a", "b", "c"});
    assertJson("array", "[1,2,3]", new Object[] {1, 2, 3});
    assertJson("array", "[1,null,3]", new Object[] {1, null, 3});
    assertJson("array", "[[1,2,3],null,[4,5,6]]", new Object[][] {{1, 2, 3}, null, {4, 5, 6}});
    assertJson("array", "[1,2,3]", Arrays.asList(1, 2, 3));
    assertJson("array", "[[1,2,3],2,3]", Arrays.asList(Arrays.asList(1, 2, 3), 2, 3));

    // Test null values
    assertJson("array", "[null]", "");
    assertJson("array", "[null]", " ");
    assertJson("array", "[null]", "null");
    assertJson("array", "[null]", null);

    // Test that invalid UTF-8 strings cannot be ingested
    expectError(
        ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseArray("COL", "\"foo\uD800bar\"", 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseArrayNew("COL", "\"foo\uD800bar\"", 0));

    // Test forbidden values
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseArray("COL", readTree("[]"), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseArrayNew("COL", readTree("[]"), 0));
    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseArray("COL", new Object(), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseArrayNew("COL", new Object(), 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseArray("COL", "foo", 0)); // invalid JSON
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseArrayNew("COL", "foo", 0)); // invalid JSON
    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseArray("COL", new Date(), 0));
    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseArrayNew("COL", new Date(), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseArray("COL", Collections.singletonList(new Object()), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseArrayNew("COL", Collections.singletonList(new Object()), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () ->
            validateAndParseArray(
                "COL",
                Collections.singletonList(Collections.singletonMap("foo", new Object())),
                0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () ->
            validateAndParseArrayNew(
                "COL",
                Collections.singletonList(Collections.singletonMap("foo", new Object())),
                0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseArray("COL", Collections.singletonMap(new Object(), "foo"), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseArrayNew("COL", Collections.singletonMap(new Object(), "foo"), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseArray("COL", Collections.singletonMap("foo", new Object()), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseArrayNew("COL", Collections.singletonMap("foo", new Object()), 0));
  }

  @Test
  public void testValidateAndParseObject() throws Exception {
    assertJson("object", "{}", " { } ");
    assertJson("object", "{\"key\":1}", "{\"key\":1}");
    assertJson("object", "{\"key\":1}", " { \"key\" : 1 } ");
    assertJson("object", "{\"key\":111.111}", " { \"key\" : 111.111 } ");
    assertJson("object", "{\"key\":111.111e6}", " { \"key\" : 111.111e6 } ", false);
    assertJson("object", "{\"key\":111.111E6}", " { \"key\" : 111.111E6 } ", false);
    assertJson("object", "{\"key\":111.111e-6}", " { \"key\" : 111.111e-6 } ", false);
    assertJson("object", "{\"key\":111.111E-6}", " { \"key\" : 111.111E-6 } ", false);

    final String tooLargeObject =
        objectMapper.writeValueAsString(
            Collections.singletonMap("key", StringUtils.repeat('a', 20000000)));
    expectError(
        ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObject("COL", tooLargeObject, 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObjectNew("COL", tooLargeObject, 0));

    // Test that invalid UTF-8 strings cannot be ingested
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseObject("COL", "{\"foo\": \"foo\uD800bar\"}", 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseObjectNew("COL", "{\"foo\": \"foo\uD800bar\"}", 0));

    // Test forbidden values
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObject("COL", "", 0));
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObjectNew("COL", "", 0));

    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseObject("COL", readTree("{}"), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseObjectNew("COL", readTree("{}"), 0));

    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObject("COL", "[]", 0));
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObjectNew("COL", "[]", 0));

    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObject("COL", "1", 0));
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObjectNew("COL", "1", 0));

    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObject("COL", 1, 0));
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObjectNew("COL", 1, 0));

    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObject("COL", 1.5, 0));
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObjectNew("COL", 1.5, 0));

    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObject("COL", false, 0));
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObjectNew("COL", false, 0));

    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseObject("COL", new Object(), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseObjectNew("COL", new Object(), 0));

    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObject("COL", "foo", 0));
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObjectNew("COL", "foo", 0));

    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseObject("COL", new Date(), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseObjectNew("COL", new Date(), 0));

    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseObject("COL", Collections.singletonList(new Object()), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseObjectNew("COL", Collections.singletonList(new Object()), 0));

    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () ->
            validateAndParseObject(
                "COL",
                Collections.singletonList(Collections.singletonMap("foo", new Object())),
                0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () ->
            validateAndParseObjectNew(
                "COL",
                Collections.singletonList(Collections.singletonMap("foo", new Object())),
                0));

    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseObject("COL", Collections.singletonMap(new Object(), "foo"), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseObjectNew("COL", Collections.singletonMap(new Object(), "foo"), 0));

    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseObject("COL", Collections.singletonMap(new Object(), "foo"), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseObjectNew("COL", Collections.singletonMap(new Object(), "foo"), 0));

    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseObject("COL", Collections.singletonMap("foo", new Object()), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseObjectNew("COL", Collections.singletonMap("foo", new Object()), 0));
  }

  @Test
  public void testValidateDuplicateKeys() {
    // simple JSON object with duplicate keys can not be ingested
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseObjectNew("COL", "{\"key\":1, \"key\":2}", 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseVariantNew("COL", "{\"key\":1, \"key\":2}", 0));

    // nested JSON object with duplicate keys can not be ingested
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () ->
            validateAndParseObjectNew("COL", "{\"key\":1, \"nested\":{\"key\":2, \"key\":3}}", 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () ->
            validateAndParseVariantNew("COL", "{\"key\":1, \"nested\":{\"key\":2, \"key\":3}}", 0));

    // array of objects with duplicate keys can not be ingested
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseArrayNew("COL", "[{\"key\":1, \"key\":2}]", 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseVariantNew("COL", "[{\"key\":1, \"key\":2}]", 0));
  }

  @Test
  public void testTooLargeVariant() {
    char[] stringContent = new char[16 * 1024 * 1024 - 16]; // {"a":"11","b":""}
    Arrays.fill(stringContent, 'c');

    // {"a":"11","b":""}
    Map<String, Object> m = new HashMap<>();
    m.put("a", "11");
    m.put("b", new String(stringContent));
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseVariant("COL", m, 0));
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseArray("COL", m, 0));
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseObject("COL", m, 0));
  }

  @Test
  public void testTooLargeMultiByteSemiStructuredValues() {
    // Variant max size is not in characters, but in bytes
    char[] stringContent = new char[9 * 1024 * 1024]; // 8MB < value < 16MB
    Arrays.fill(stringContent, 'Č');

    Map<String, Object> m = new HashMap<>();
    m.put("a", new String(stringContent));
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_VALUE_ROW,
        "The given row cannot be converted to the internal format due to invalid value: Value"
            + " cannot be ingested into Snowflake column COL of type VARIANT, rowIndex:0, reason:"
            + " Variant too long: length=18874376 maxLength=16777152",
        () -> validateAndParseVariant("COL", m, 0));
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_VALUE_ROW,
        "The given row cannot be converted to the internal format due to invalid value: Value"
            + " cannot be ingested into Snowflake column COL of type ARRAY, rowIndex:0, reason:"
            + " Array too large. length=18874378 maxLength=16777152",
        () -> validateAndParseArray("COL", m, 0));
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_VALUE_ROW,
        "The given row cannot be converted to the internal format due to invalid value: Value"
            + " cannot be ingested into Snowflake column COL of type OBJECT, rowIndex:0, reason:"
            + " Object too large. length=18874376 maxLength=16777152",
        () -> validateAndParseObject("COL", m, 0));
  }

  @Test
  public void testValidVariantType() {
    // Test primitive types
    Assert.assertTrue(isAllowedSemiStructuredType((byte) 1));
    Assert.assertTrue(isAllowedSemiStructuredType((short) 1));
    Assert.assertTrue(isAllowedSemiStructuredType(1));
    Assert.assertTrue(isAllowedSemiStructuredType(1L));
    Assert.assertTrue(isAllowedSemiStructuredType(1.25f));
    Assert.assertTrue(isAllowedSemiStructuredType(1.25d));
    Assert.assertTrue(isAllowedSemiStructuredType(false));
    Assert.assertTrue(isAllowedSemiStructuredType('c'));

    // Test boxed primitive types
    Assert.assertTrue(isAllowedSemiStructuredType(Byte.valueOf((byte) 1)));
    Assert.assertTrue(isAllowedSemiStructuredType(Short.valueOf((short) 1)));
    Assert.assertTrue(isAllowedSemiStructuredType(Integer.valueOf(1)));
    Assert.assertTrue(isAllowedSemiStructuredType(Long.valueOf(1L)));
    Assert.assertTrue(isAllowedSemiStructuredType(Float.valueOf(1.25f)));
    Assert.assertTrue(isAllowedSemiStructuredType(Double.valueOf(1.25d)));
    Assert.assertTrue(isAllowedSemiStructuredType(Boolean.valueOf(false)));
    Assert.assertTrue(isAllowedSemiStructuredType(Character.valueOf('c')));

    // Test primitive arrays
    Assert.assertTrue(isAllowedSemiStructuredType(new byte[] {1}));
    Assert.assertTrue(isAllowedSemiStructuredType(new short[] {1}));
    Assert.assertTrue(isAllowedSemiStructuredType(new int[] {1}));
    Assert.assertTrue(isAllowedSemiStructuredType(new long[] {1L}));
    Assert.assertTrue(isAllowedSemiStructuredType(new float[] {1.25f}));
    Assert.assertTrue(isAllowedSemiStructuredType(new double[] {1.25d}));
    Assert.assertTrue(isAllowedSemiStructuredType(new boolean[] {false}));
    Assert.assertTrue(isAllowedSemiStructuredType(new char[] {'c'}));

    // Test primitive lists
    Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList((byte) 1)));
    Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList((short) 1)));
    Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList(1)));
    Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList(1L)));
    Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList(1.25f)));
    Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList(1.25d)));
    Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList(false)));
    Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList('c')));

    // Test additional numeric types and their collections
    Assert.assertTrue(isAllowedSemiStructuredType(new BigInteger("1")));
    Assert.assertTrue(isAllowedSemiStructuredType(new BigInteger[] {new BigInteger("1")}));
    Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList(new BigInteger("1"))));
    Assert.assertTrue(isAllowedSemiStructuredType(new BigDecimal("1.25")));
    Assert.assertTrue(isAllowedSemiStructuredType(new BigDecimal[] {new BigDecimal("1.25")}));
    Assert.assertTrue(
        isAllowedSemiStructuredType(Collections.singletonList(new BigDecimal("1.25"))));

    // Test strings
    Assert.assertTrue(isAllowedSemiStructuredType("foo"));
    Assert.assertTrue(isAllowedSemiStructuredType(new String[] {"foo"}));
    Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList("foo")));

    // Test date/time objects and their collections
    Assert.assertTrue(isAllowedSemiStructuredType(LocalTime.now()));
    Assert.assertTrue(isAllowedSemiStructuredType(OffsetTime.now()));
    Assert.assertTrue(isAllowedSemiStructuredType(LocalDate.now()));
    Assert.assertTrue(isAllowedSemiStructuredType(LocalDateTime.now()));
    Assert.assertTrue(isAllowedSemiStructuredType(ZonedDateTime.now()));
    Assert.assertTrue(isAllowedSemiStructuredType(OffsetDateTime.now()));
    Assert.assertTrue(isAllowedSemiStructuredType(new LocalTime[] {LocalTime.now()}));
    Assert.assertTrue(isAllowedSemiStructuredType(new OffsetTime[] {OffsetTime.now()}));
    Assert.assertTrue(isAllowedSemiStructuredType(new LocalDate[] {LocalDate.now()}));
    Assert.assertTrue(isAllowedSemiStructuredType(new LocalDateTime[] {LocalDateTime.now()}));
    Assert.assertTrue(isAllowedSemiStructuredType(new ZonedDateTime[] {ZonedDateTime.now()}));
    Assert.assertTrue(isAllowedSemiStructuredType(new OffsetDateTime[] {OffsetDateTime.now()}));
    Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList(LocalTime.now())));
    Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList(OffsetTime.now())));
    Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList(LocalDate.now())));
    Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList(LocalDateTime.now())));
    Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList(ZonedDateTime.now())));
    Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonList(OffsetDateTime.now())));

    // Test mixed collections
    Assert.assertTrue(
        isAllowedSemiStructuredType(
            new Object[] {
              1,
              false,
              new BigInteger("1"),
              LocalDateTime.now(),
              new Object[] {new Object[] {new Object[] {LocalDateTime.now(), false}}}
            }));
    Assert.assertFalse(
        isAllowedSemiStructuredType(
            new Object[] {
              1,
              false,
              new BigInteger("1"),
              LocalDateTime.now(),
              new Object[] {new Object[] {new Object[] {new Object(), false}}}
            }));
    Assert.assertTrue(
        isAllowedSemiStructuredType(
            Arrays.asList(
                new BigInteger("1"),
                "foo",
                false,
                Arrays.asList(13, Arrays.asList(Arrays.asList(false, 'c'))))));
    Assert.assertFalse(
        isAllowedSemiStructuredType(
            Arrays.asList(
                new BigInteger("1"),
                "foo",
                false,
                Arrays.asList(13, Arrays.asList(Arrays.asList(new Object(), 'c'))))));

    // Test maps
    Assert.assertTrue(isAllowedSemiStructuredType(Collections.singletonMap("foo", "bar")));
    Assert.assertFalse(isAllowedSemiStructuredType(Collections.singletonMap(new Object(), "foo")));
    Assert.assertFalse(isAllowedSemiStructuredType(Collections.singletonMap("foo", new Object())));
    Assert.assertTrue(
        isAllowedSemiStructuredType(
            Collections.singletonMap(
                "foo",
                new Object[] {
                  1,
                  false,
                  new BigInteger("1"),
                  LocalDateTime.now(),
                  new Object[] {new Object[] {new Object[] {LocalDateTime.now(), false}}}
                })));
    Assert.assertFalse(
        isAllowedSemiStructuredType(
            Collections.singletonMap(
                "foo",
                new Object[] {
                  1,
                  false,
                  new BigInteger("1"),
                  LocalDateTime.now(),
                  new Object[] {new Object[] {new Object[] {new Object(), false}}}
                })));
    Assert.assertTrue(
        isAllowedSemiStructuredType(
            Collections.singletonMap(
                "foo",
                Arrays.asList(
                    new BigInteger("1"),
                    "foo",
                    false,
                    Arrays.asList(13, Arrays.asList(Arrays.asList(false, 'c')))))));
    Assert.assertFalse(
        isAllowedSemiStructuredType(
            Collections.singletonMap(
                "foo",
                Arrays.asList(
                    new BigInteger("1"),
                    "foo",
                    false,
                    Arrays.asList(13, Arrays.asList(Arrays.asList(new Object(), 'c')))))));
  }

  @Test
  public void testValidateAndParseBinary() throws DecoderException {
    byte[] maxAllowedArray = new byte[BYTES_8_MB];
    byte[] maxAllowedArrayMinusOne = new byte[BYTES_8_MB - 1];

    assertArrayEquals(
        "honk".getBytes(StandardCharsets.UTF_8),
        validateAndParseBinary(
            "COL", "honk".getBytes(StandardCharsets.UTF_8), Optional.empty(), 0));

    assertArrayEquals(
        new byte[] {-1, 0, 1},
        validateAndParseBinary("COL", new byte[] {-1, 0, 1}, Optional.empty(), 0));
    assertArrayEquals(
        Hex.decodeHex("1234567890abcdef"), // pragma: allowlist secret NOT A SECRET
        validateAndParseBinary(
            "COL",
            "1234567890abcdef", // pragma: allowlist secret NOT A SECRET
            Optional.empty(),
            0)); // pragma: allowlist secret NOT A SECRET
    assertArrayEquals(
        Hex.decodeHex("1234567890abcdef"), // pragma: allowlist secret NOT A SECRET
        validateAndParseBinary(
            "COL",
            "  1234567890abcdef \t\n",
            Optional.empty(),
            0)); // pragma: allowlist secret NOT A SECRET

    assertArrayEquals(
        maxAllowedArray, validateAndParseBinary("COL", maxAllowedArray, Optional.empty(), 0));
    assertArrayEquals(
        maxAllowedArrayMinusOne,
        validateAndParseBinary("COL", maxAllowedArrayMinusOne, Optional.empty(), 0));

    // Too large arrays should be rejected
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseBinary("COL", new byte[1], Optional.of(0), 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseBinary("COL", new byte[BYTES_8_MB + 1], Optional.empty(), 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseBinary("COL", new byte[8], Optional.of(7), 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseBinary("COL", "aabb", Optional.of(1), 0));

    // unsupported data types should fail
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseBinary("COL", "000", Optional.empty(), 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> validateAndParseBinary("COL", "abcg", Optional.empty(), 0));
    expectError(
        ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseBinary("COL", "c", Optional.empty(), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () ->
            validateAndParseBinary(
                "COL", Arrays.asList((byte) 1, (byte) 2, (byte) 3), Optional.empty(), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBinary("COL", 1, Optional.empty(), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBinary("COL", 12, Optional.empty(), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseBinary("COL", 1.5, Optional.empty(), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseBinary("COL", BigInteger.ONE, Optional.empty(), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseBinary("COL", false, Optional.empty(), 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW,
        () -> validateAndParseBinary("COL", new Object(), Optional.empty(), 0));
  }

  @Test
  public void testValidateAndParseReal() throws Exception {
    // From number types
    assertEquals(1.23d, validateAndParseReal("COL", 1.23f, 0), 0);
    assertEquals(1.23d, validateAndParseReal("COL", 1.23, 0), 0);
    assertEquals(1.23d, validateAndParseReal("COL", 1.23d, 0), 0);
    assertEquals(1.23d, validateAndParseReal("COL", new BigDecimal("1.23"), 0), 0);
    assertEquals(Double.NaN, validateAndParseReal("COL", "Nan", 0), 0);
    assertEquals(Double.POSITIVE_INFINITY, validateAndParseReal("COL", "inF", 0), 0);
    assertEquals(Double.NEGATIVE_INFINITY, validateAndParseReal("COL", "-inF", 0), 0);
    assertEquals(Double.NEGATIVE_INFINITY, validateAndParseReal("COL", " -inF \t\n", 0), 0);

    // From string
    assertEquals(1.23d, validateAndParseReal("COL", "   1.23 \t\n", 0), 0);
    assertEquals(1.23d, validateAndParseReal("COL", "1.23", 0), 0);
    assertEquals(123d, validateAndParseReal("COL", "1.23E2", 0), 0);
    assertEquals(123d, validateAndParseReal("COL", "1.23e2", 0), 0);

    // Test forbidden values
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseReal("COL", "foo", 0));
    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseReal("COL", 'c', 0));
    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseReal("COL", new Object(), 0));
    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseReal("COL", false, 0));
    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseReal("COL", true, 0));
  }

  @Test
  public void testValidateAndParseBoolean() {

    for (Object input :
        Arrays.asList(
            true,
            "true",
            "True",
            "TruE",
            "t",
            "yes",
            "YeS",
            "y",
            "on",
            "1",
            "  true \t\n",
            1.1,
            -1.1,
            -10,
            10)) {
      assertEquals(1, validateAndParseBoolean("COL", input, 0));
    }
    int rowIndex = 0;
    for (Object input :
        Arrays.asList(false, "false", "False", "FalsE", "f", "no", "NO", "n", "off", "0", 0)) {
      assertEquals(0, validateAndParseBoolean("COL", input, rowIndex));
      rowIndex += 1;
    }

    // Test forbidden values
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBoolean("COL", new Object(), 0));
    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBoolean("COL", 't', 0));
    expectError(ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBoolean("COL", 'f', 0));
    expectError(
        ErrorCode.INVALID_FORMAT_ROW, () -> validateAndParseBoolean("COL", new int[] {}, 0));
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseBoolean("COL", "foobar", 0));
    expectError(ErrorCode.INVALID_VALUE_ROW, () -> validateAndParseBoolean("COL", "", 0));
  }

  /**
   * Tests that exception message are constructed correctly when ingesting forbidden Java type, as
   * well a value of an allowed type, but in invalid format
   */
  @Test
  public void testExceptionMessages() {
    // BOOLEAN
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_FORMAT_ROW,
        "The given row cannot be converted to the internal format: Object of type java.lang.Object"
            + " cannot be ingested into Snowflake column COL of type BOOLEAN, rowIndex:0. Allowed"
            + " Java types: boolean, Number, String",
        () -> validateAndParseBoolean("COL", new Object(), 0));
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_VALUE_ROW,
        "The given row cannot be converted to the internal format due to invalid value: Value"
            + " cannot be ingested into Snowflake column COL of type BOOLEAN, rowIndex:0, reason:"
            + " Not a valid boolean, see"
            + " https://docs.snowflake.com/en/sql-reference/data-types-logical.html#conversion-to-boolean"
            + " for the list of supported formats",
        () -> validateAndParseBoolean("COL", "abc", 0));

    // TIME
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_FORMAT_ROW,
        "The given row cannot be converted to the internal format: Object of type java.lang.Object"
            + " cannot be ingested into Snowflake column COL of type TIME, rowIndex:0. Allowed"
            + " Java types: String, LocalTime, OffsetTime",
        () -> validateAndParseTime("COL", new Object(), 10, 0));
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_VALUE_ROW,
        "The given row cannot be converted to the internal format due to invalid value: Value"
            + " cannot be ingested into Snowflake column COL of type TIME, rowIndex:0, reason:"
            + " Not a valid time, see"
            + " https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview for"
            + " the list of supported formats",
        () -> validateAndParseTime("COL", "abc", 10, 0));

    // DATE
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_FORMAT_ROW,
        "The given row cannot be converted to the internal format: Object of type java.lang.Object"
            + " cannot be ingested into Snowflake column COL of type DATE, rowIndex:0. Allowed"
            + " Java types: String, LocalDate, LocalDateTime, ZonedDateTime, OffsetDateTime",
        () -> validateAndParseDate("COL", new Object(), 0));
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_VALUE_ROW,
        "The given row cannot be converted to the internal format due to invalid value: Value"
            + " cannot be ingested into Snowflake column COL of type DATE, rowIndex:0, reason:"
            + " Not a valid value, see"
            + " https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview for"
            + " the list of supported formats",
        () -> validateAndParseDate("COL", "abc", 0));

    // TIMESTAMP_NTZ
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_FORMAT_ROW,
        "The given row cannot be converted to the internal format: Object of type java.lang.Object"
            + " cannot be ingested into Snowflake column COL of type TIMESTAMP, rowIndex:0."
            + " Allowed Java types: String, LocalDate, LocalDateTime, ZonedDateTime,"
            + " OffsetDateTime",
        () -> validateAndParseTimestamp("COL", new Object(), 3, UTC, true, 0));
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_VALUE_ROW,
        "The given row cannot be converted to the internal format due to invalid value: Value"
            + " cannot be ingested into Snowflake column COL of type TIMESTAMP, rowIndex:0,"
            + " reason: Not a valid value, see"
            + " https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview for"
            + " the list of supported formats",
        () -> validateAndParseTimestamp("COL", "abc", 3, UTC, true, 0));

    // TIMESTAMP_LTZ
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_FORMAT_ROW,
        "The given row cannot be converted to the internal format: Object of type java.lang.Object"
            + " cannot be ingested into Snowflake column COL of type TIMESTAMP, rowIndex:0."
            + " Allowed Java types: String, LocalDate, LocalDateTime, ZonedDateTime,"
            + " OffsetDateTime",
        () -> validateAndParseTimestamp("COL", new Object(), 3, UTC, false, 0));
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_VALUE_ROW,
        "The given row cannot be converted to the internal format due to invalid value: Value"
            + " cannot be ingested into Snowflake column COL of type TIMESTAMP, rowIndex:0,"
            + " reason: Not a valid value, see"
            + " https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview for"
            + " the list of supported formats",
        () -> validateAndParseTimestamp("COL", "abc", 3, UTC, false, 0));

    // TIMESTAMP_TZ
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_FORMAT_ROW,
        "The given row cannot be converted to the internal format: Object of type java.lang.Object"
            + " cannot be ingested into Snowflake column COL of type TIMESTAMP, rowIndex:0."
            + " Allowed Java types: String, LocalDate, LocalDateTime, ZonedDateTime,"
            + " OffsetDateTime",
        () -> validateAndParseTimestamp("COL", new Object(), 3, UTC, false, 0));
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_VALUE_ROW,
        "The given row cannot be converted to the internal format due to invalid value: Value"
            + " cannot be ingested into Snowflake column COL of type TIMESTAMP, rowIndex:0,"
            + " reason: Not a valid value, see"
            + " https://docs.snowflake.com/en/user-guide/data-load-snowpipe-streaming-overview for"
            + " the list of supported formats",
        () -> validateAndParseTimestamp("COL", "abc", 3, UTC, false, 0));

    // NUMBER
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_FORMAT_ROW,
        "The given row cannot be converted to the internal format: Object of type java.lang.Object"
            + " cannot be ingested into Snowflake column COL of type NUMBER, rowIndex:0. Allowed"
            + " Java types: int, long, byte, short, float, double, BigDecimal, BigInteger, String",
        () -> validateAndParseBigDecimal("COL", new Object(), 0));
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_VALUE_ROW,
        "The given row cannot be converted to the internal format due to invalid value: Value"
            + " cannot be ingested into Snowflake column COL of type NUMBER, rowIndex:0, reason:"
            + " Not a valid number",
        () -> validateAndParseBigDecimal("COL", "abc", 0));

    // REAL
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_FORMAT_ROW,
        "The given row cannot be converted to the internal format: Object of type java.lang.Object"
            + " cannot be ingested into Snowflake column COL of type REAL, rowIndex:0. Allowed"
            + " Java types: Number, String",
        () -> validateAndParseReal("COL", new Object(), 0));
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_VALUE_ROW,
        "The given row cannot be converted to the internal format due to invalid value: Value"
            + " cannot be ingested into Snowflake column COL of type REAL, rowIndex:0, reason:"
            + " Not a valid decimal number",
        () -> validateAndParseReal("COL", "abc", 0));

    // STRING
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_FORMAT_ROW,
        "The given row cannot be converted to the internal format: Object of type java.lang.Object"
            + " cannot be ingested into Snowflake column COL of type STRING, rowIndex:0. Allowed"
            + " Java types: String, Number, boolean, char",
        () -> validateAndParseString("COL", new Object(), Optional.empty(), 0));
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_VALUE_ROW,
        "The given row cannot be converted to the internal format due to invalid value: Value"
            + " cannot be ingested into Snowflake column COL of type STRING, rowIndex:0, reason:"
            + " String too long: length=3 characters maxLength=2 characters",
        () -> validateAndParseString("COL", "abc", Optional.of(2), 0));

    // BINARY
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_FORMAT_ROW,
        "The given row cannot be converted to the internal format: Object of type java.lang.Object"
            + " cannot be ingested into Snowflake column COL of type BINARY, rowIndex:0. Allowed"
            + " Java types: byte[], String",
        () -> validateAndParseBinary("COL", new Object(), Optional.empty(), 0));
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_VALUE_ROW,
        "The given row cannot be converted to the internal format due to invalid value: Value"
            + " cannot be ingested into Snowflake column COL of type BINARY, rowIndex:0, reason:"
            + " Binary too long: length=2 maxLength=1",
        () -> validateAndParseBinary("COL", new byte[] {1, 2}, Optional.of(1), 0));
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_VALUE_ROW,
        "The given row cannot be converted to the internal format due to invalid value: Value"
            + " cannot be ingested into Snowflake column COL of type BINARY, rowIndex:0, reason:"
            + " Not a valid hex string",
        () -> validateAndParseBinary("COL", "ghi", Optional.empty(), 0));

    // VARIANT
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_FORMAT_ROW,
        "The given row cannot be converted to the internal format: Object of type java.lang.Object"
            + " cannot be ingested into Snowflake column COL of type VARIANT, rowIndex:0. Allowed"
            + " Java types: String, Primitive data types and their arrays, java.time.*, List<T>,"
            + " Map<String, T>, T[]",
        () -> validateAndParseVariant("COL", new Object(), 0));
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_VALUE_ROW,
        "The given row cannot be converted to the internal format due to invalid value: Value"
            + " cannot be ingested into Snowflake column COL of type VARIANT, rowIndex:0, reason:"
            + " Not a valid JSON",
        () -> validateAndParseVariant("COL", "][", 0));

    // ARRAY
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_FORMAT_ROW,
        "The given row cannot be converted to the internal format: Object of type java.lang.Object"
            + " cannot be ingested into Snowflake column COL of type ARRAY, rowIndex:0. Allowed"
            + " Java types: String, Primitive data types and their arrays, java.time.*, List<T>,"
            + " Map<String, T>, T[]",
        () -> validateAndParseArray("COL", new Object(), 0));
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_VALUE_ROW,
        "The given row cannot be converted to the internal format due to invalid value: Value"
            + " cannot be ingested into Snowflake column COL of type ARRAY, rowIndex:0, reason:"
            + " Not a valid JSON",
        () -> validateAndParseArray("COL", "][", 0));

    // OBJECT
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_FORMAT_ROW,
        "The given row cannot be converted to the internal format: Object of type java.lang.Object"
            + " cannot be ingested into Snowflake column COL of type OBJECT, rowIndex:0. Allowed"
            + " Java types: String, Primitive data types and their arrays, java.time.*, List<T>,"
            + " Map<String, T>, T[]",
        () -> validateAndParseObject("COL", new Object(), 0));
    expectErrorCodeAndMessage(
        ErrorCode.INVALID_VALUE_ROW,
        "The given row cannot be converted to the internal format due to invalid value: Value"
            + " cannot be ingested into Snowflake column COL of type OBJECT, rowIndex:0, reason:"
            + " Not a valid JSON",
        () -> validateAndParseObject("COL", "}{", 0));
  }

  // ================ validateAndParseVariantAsObject ================

  @Test
  public void testValidateAndParseVariantAsObject_jsonObject() {
    Object result =
        DataValidationUtil.validateAndParseVariantAsObject("COL", "{\"a\":1,\"b\":true}", 0);
    Assert.assertTrue(result instanceof Map);
    Map<?, ?> map = (Map<?, ?>) result;
    assertEquals(1, map.get("a"));
    assertEquals(true, map.get("b"));
  }

  @Test
  public void testValidateAndParseVariantAsObject_jsonArray() {
    Object result = DataValidationUtil.validateAndParseVariantAsObject("COL", "[1,2,3]", 0);
    Assert.assertTrue(result instanceof java.util.List);
    assertEquals(Arrays.asList(1, 2, 3), result);
  }

  @Test
  public void testValidateAndParseVariantAsObject_primitive() {
    assertEquals(42, DataValidationUtil.validateAndParseVariantAsObject("COL", "42", 0));
    assertEquals(true, DataValidationUtil.validateAndParseVariantAsObject("COL", "true", 0));
    assertEquals(
        "hello", DataValidationUtil.validateAndParseVariantAsObject("COL", "\"hello\"", 0));
  }

  @Test
  public void testValidateAndParseVariantAsObject_missingNode() {
    assertNull(DataValidationUtil.validateAndParseVariantAsObject("COL", "", 0));
    assertNull(DataValidationUtil.validateAndParseVariantAsObject("COL", "  ", 0));
  }

  @Test
  public void testValidateAndParseVariantAsObject_invalidJson() {
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> DataValidationUtil.validateAndParseVariantAsObject("COL", "not_json", 0));
  }

  @Test
  public void testValidateAndParseVariantAsObject_nativePassthrough() {
    Map<String, Object> nativeMap = Collections.singletonMap("key", "value");
    Object result = DataValidationUtil.validateAndParseVariantAsObject("COL", nativeMap, 0);
    Assert.assertTrue(result instanceof Map);
    assertEquals("value", ((Map<?, ?>) result).get("key"));
  }

  // ================ validateAndParseArrayAsList ================

  @Test
  public void testValidateAndParseArrayAsList_jsonArray() {
    java.util.List<?> result = DataValidationUtil.validateAndParseArrayAsList("COL", "[1,2,3]", 0);
    assertEquals(Arrays.asList(1, 2, 3), result);
  }

  @Test
  public void testValidateAndParseArrayAsList_nonArrayWrapped() {
    java.util.List<?> result =
        DataValidationUtil.validateAndParseArrayAsList("COL", "\"hello\"", 0);
    assertEquals(Collections.singletonList("hello"), result);
  }

  @Test
  public void testValidateAndParseArrayAsList_nativeList() {
    java.util.List<?> result =
        DataValidationUtil.validateAndParseArrayAsList("COL", Arrays.asList(10, 20), 0);
    assertEquals(Arrays.asList(10, 20), result);
  }

  @Test
  public void testValidateAndParseArrayAsList_invalidJson() {
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () -> DataValidationUtil.validateAndParseArrayAsList("COL", "not_json", 0));
  }

  // ================ validateAndFormatTimestamp ================

  @Test
  public void testValidateAndFormatTimestamp_integerEpochNtz() {
    // 1705312800 seconds = 2024-01-15T10:00:00 UTC
    String result = DataValidationUtil.validateAndFormatTimestamp("COL", 1705312800, UTC, true, 0);
    assertEquals("2024-01-15T10:00", result);
  }

  @Test
  public void testValidateAndFormatTimestamp_longEpochNtz() {
    String result = DataValidationUtil.validateAndFormatTimestamp("COL", 1705312800L, UTC, true, 0);
    assertEquals("2024-01-15T10:00", result);
  }

  @Test
  public void testValidateAndFormatTimestamp_integerEpochLtz() {
    String result = DataValidationUtil.validateAndFormatTimestamp("COL", 1705312800, UTC, false, 0);
    assertEquals("2024-01-15T10:00Z", result);
  }

  @Test
  public void testValidateAndFormatTimestamp_stringPassthrough() {
    // String input with explicit timezone
    String result =
        DataValidationUtil.validateAndFormatTimestamp(
            "COL", "2024-01-15T13:45:30+05:00", UTC, false, 0);
    assertEquals("2024-01-15T13:45:30+05:00", result);
  }

  @Test
  public void testValidateAndFormatTimestamp_invalidString() {
    expectError(
        ErrorCode.INVALID_VALUE_ROW,
        () ->
            DataValidationUtil.validateAndFormatTimestamp("COL", "not_a_timestamp", UTC, true, 0));
  }

  private JsonNode readTree(String value) {
    try {
      return objectMapper.readTree(value);
    } catch (JsonProcessingException e) {
      throw new RuntimeException(e);
    }
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/validation/RowValidatorTest.java
================================================
/*
 * Copyright (c) 2026 Snowflake Computing Inc. All rights reserved.
 *
 * Tests for the validation integration layer (Commit 4).
 */

package com.snowflake.kafka.connector.internal.validation;

import static org.junit.jupiter.api.Assertions.*;

import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.*;
import org.junit.jupiter.api.Test;
import org.mockito.Mockito;

/** Tests for RowValidator, ColumnSchema, and ValidationResult */
public class RowValidatorTest {

  // ================ ColumnSchema Tests ================

  @Test
  public void testColumnSchemaParseNumber() throws SQLException {
    ResultSet rs = mockDescribeTableRow("COL1", "NUMBER(38,0)", "Y");
    ColumnSchema schema = ColumnSchema.fromDescribeTableRow(rs);

    assertEquals("COL1", schema.getName());
    assertEquals(ColumnLogicalType.FIXED, schema.getLogicalType());
    assertEquals(ColumnPhysicalType.SB16, schema.getPhysicalType());
    assertTrue(schema.isNullable());
    assertEquals(38, schema.getPrecision());
    assertEquals(0, schema.getScale());
  }

  @Test
  public void testColumnSchemaParseVarchar() throws SQLException {
    ResultSet rs = mockDescribeTableRow("COL2", "VARCHAR(16777216)", "N");
    ColumnSchema schema = ColumnSchema.fromDescribeTableRow(rs);

    assertEquals("COL2", schema.getName());
    assertEquals(ColumnLogicalType.TEXT, schema.getLogicalType());
    assertEquals(ColumnPhysicalType.LOB, schema.getPhysicalType());
    assertFalse(schema.isNullable());
    assertEquals(16777216, schema.getLength());
    // byteLength capped at 16MB (SSv1 SDK limit), not 16777216 * 4 = 64MB
    assertEquals(16777216, schema.getByteLength());
  }

  @Test
  public void testColumnSchemaParseVarcharSmall() throws SQLException {
    // For small VARCHAR, byteLength = length * 4 (no capping needed)
    ResultSet rs = mockDescribeTableRow("COL3", "VARCHAR(1000)", "Y");
    ColumnSchema schema = ColumnSchema.fromDescribeTableRow(rs);

    assertEquals("COL3", schema.getName());
    assertEquals(ColumnLogicalType.TEXT, schema.getLogicalType());
    assertEquals(1000, schema.getLength());
    assertEquals(4000, schema.getByteLength()); // 1000 * 4, no capping
  }

  @Test
  public void testColumnSchemaParseTimestampNtz() throws SQLException {
    ResultSet rs = mockDescribeTableRow("COL3", "TIMESTAMP_NTZ(9)", "Y");
    ColumnSchema schema = ColumnSchema.fromDescribeTableRow(rs);

    assertEquals("COL3", schema.getName());
    assertEquals(ColumnLogicalType.TIMESTAMP_NTZ, schema.getLogicalType());
    assertEquals(ColumnPhysicalType.SB8, schema.getPhysicalType());
    assertEquals(9, schema.getScale());
  }

  @Test
  public void testColumnSchemaParseBinary() throws SQLException {
    ResultSet rs = mockDescribeTableRow("COL4", "BINARY(8388608)", "Y");
    ColumnSchema schema = ColumnSchema.fromDescribeTableRow(rs);

    assertEquals("COL4", schema.getName());
    assertEquals(ColumnLogicalType.BINARY, schema.getLogicalType());
    assertEquals(ColumnPhysicalType.BINARY, schema.getPhysicalType());
    assertEquals(8388608, schema.getByteLength());
  }

  @Test
  public void testColumnSchemaParseVariant() throws SQLException {
    ResultSet rs = mockDescribeTableRow("COL5", "VARIANT", "Y");
    ColumnSchema schema = ColumnSchema.fromDescribeTableRow(rs);

    assertEquals("COL5", schema.getName());
    assertEquals(ColumnLogicalType.VARIANT, schema.getLogicalType());
    assertEquals(ColumnPhysicalType.LOB, schema.getPhysicalType());
  }

  @Test
  public void testColumnSchemaParseArray() throws SQLException {
    ResultSet rs = mockDescribeTableRow("COL6", "ARRAY", "Y");
    ColumnSchema schema = ColumnSchema.fromDescribeTableRow(rs);

    assertEquals(ColumnLogicalType.ARRAY, schema.getLogicalType());
    assertEquals(ColumnPhysicalType.LOB, schema.getPhysicalType());
  }

  @Test
  public void testColumnSchemaParseBoolean() throws SQLException {
    ResultSet rs = mockDescribeTableRow("COL7", "BOOLEAN", "Y");
    ColumnSchema schema = ColumnSchema.fromDescribeTableRow(rs);

    assertEquals(ColumnLogicalType.BOOLEAN, schema.getLogicalType());
    assertEquals(ColumnPhysicalType.SB1, schema.getPhysicalType());
  }

  @Test
  public void testColumnSchemaParseUnknownType() throws SQLException {
    ResultSet rs = mockDescribeTableRow("COL8", "GEOGRAPHY", "Y");
    ColumnSchema schema = ColumnSchema.fromDescribeTableRow(rs);

    assertNull(schema.getLogicalType()); // Unknown types return null
    assertNull(schema.getPhysicalType());
  }

  // ================ ValidationResult Tests ================

  @Test
  public void testValidationResultValid() {
    ValidationResult result = ValidationResult.valid();

    assertTrue(result.isValid());
    assertFalse(result.hasTypeError());
    assertFalse(result.hasStructuralError());
    assertFalse(result.needsSchemaEvolution());
  }

  @Test
  public void testValidationResultTypeError() {
    ValidationResult result = ValidationResult.typeError("COL1", "Invalid type");

    assertFalse(result.isValid());
    assertTrue(result.hasTypeError());
    assertFalse(result.hasStructuralError());
    assertEquals("COL1", result.getColumnName());
    assertEquals("Invalid type", result.getValueError());
    assertEquals("type_error", result.getErrorType());
  }

  @Test
  public void testValidationResultStructuralError() {
    Set<String> extraCols = new HashSet<>(Arrays.asList("EXTRA1", "EXTRA2"));
    Set<String> missingNotNull = new HashSet<>(Arrays.asList("REQUIRED1"));
    Set<String> nullNotNull = new HashSet<>(Arrays.asList("COL2"));

    ValidationResult result =
        ValidationResult.structuralError(extraCols, missingNotNull, nullNotNull);

    assertFalse(result.isValid());
    assertFalse(result.hasTypeError());
    assertTrue(result.hasStructuralError());
    assertTrue(result.needsSchemaEvolution());
    assertEquals(2, result.getExtraColNames().size());
    assertEquals(1, result.getMissingNotNullColNames().size());
    assertEquals(1, result.getNullValueForNotNullColNames().size());
    assertEquals("structural_error", result.getErrorType());
  }

  @Test
  public void testValidationResultEmptyStructuralError() {
    ValidationResult result =
        ValidationResult.structuralError(
            Collections.emptySet(), Collections.emptySet(), Collections.emptySet());

    assertFalse(result.isValid());
    assertTrue(result.hasStructuralError());
    assertFalse(result.needsSchemaEvolution()); // No actual errors
  }

  // ================ RowValidator Tests ================

  @Test
  public void testValidateRowValid() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put("COL1", createColumnSchema("COL1", ColumnLogicalType.TEXT, true, null, null, 100));
    schema.put("COL2", createColumnSchema("COL2", ColumnLogicalType.FIXED, true, 38, 0, null));

    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("COL1", "test value");
    row.put("COL2", 123);

    ValidationResult result = validator.validateRow(row);
    assertTrue(result.isValid());
  }

  @Test
  public void testValidateRowExtraColumn() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put("COL1", createColumnSchema("COL1", ColumnLogicalType.TEXT, true, null, null, 100));

    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("COL1", "test value");
    row.put("COL2", "extra column"); // Extra column not in schema

    ValidationResult result = validator.validateRow(row);
    assertFalse(result.isValid());
    assertTrue(result.hasStructuralError());
    assertTrue(result.getExtraColNames().contains("COL2"));
  }

  @Test
  public void testValidateRowMissingNotNull() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put(
        "COL1",
        createColumnSchema("COL1", ColumnLogicalType.TEXT, false, null, null, 100)); // NOT NULL

    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    // COL1 is missing

    ValidationResult result = validator.validateRow(row);
    assertFalse(result.isValid());
    assertTrue(result.hasStructuralError());
    assertTrue(result.getMissingNotNullColNames().contains("COL1"));
  }

  @Test
  public void testValidateRowNullInNotNull() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put(
        "COL1",
        createColumnSchema("COL1", ColumnLogicalType.TEXT, false, null, null, 100)); // NOT NULL

    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("COL1", null); // Null value in NOT NULL column

    ValidationResult result = validator.validateRow(row);
    assertFalse(result.isValid());
    assertTrue(result.hasStructuralError());
    assertTrue(result.getNullValueForNotNullColNames().contains("COL1"));
  }

  @Test
  public void testValidateRowInvalidType() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put("COL1", createColumnSchema("COL1", ColumnLogicalType.FIXED, true, 38, 0, null));

    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("COL1", "not a number"); // String in numeric column

    ValidationResult result = validator.validateRow(row);
    assertFalse(result.isValid());
    assertTrue(result.hasTypeError());
    assertEquals("COL1", result.getColumnName());
    assertNotNull(result.getValueError());
  }

  @Test
  public void testValidateRowMatchingColumnName() {
    // Column names are expected to be already normalized by the caller (SnowflakeSinkRecord).
    // RowValidator just does direct comparison against raw column names.
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put(
        "COL NAME", createColumnSchema("COL NAME", ColumnLogicalType.TEXT, true, null, null, 100));

    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("COL NAME", "test value"); // Raw column name (already normalized)

    ValidationResult result = validator.validateRow(row);
    assertTrue(result.isValid());
  }

  @Test
  public void testValidateSchemaUnsupportedType() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    ColumnSchema unknownCol =
        createColumnSchema("COL1", null, true, null, null, null); // null logicalType
    schema.put("COL1", unknownCol);

    assertThrows(SFExceptionValidation.class, () -> RowValidator.validateSchema(schema));
  }

  @Test
  public void testValidateSchemaCollatedColumn() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    ColumnSchema collatedCol =
        new ColumnSchema(
            "COL1",
            ColumnLogicalType.TEXT,
            ColumnPhysicalType.LOB,
            true,
            null,
            null,
            100,
            400,
            "en-ci"); // Collated column
    schema.put("COL1", collatedCol);

    assertThrows(SFExceptionValidation.class, () -> RowValidator.validateSchema(schema));
  }

  @Test
  public void testValidateSchemaValid() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put("COL1", createColumnSchema("COL1", ColumnLogicalType.TEXT, true, null, null, 100));
    schema.put("COL2", createColumnSchema("COL2", ColumnLogicalType.FIXED, true, 38, 0, null));
    schema.put(
        "COL3", createColumnSchema("COL3", ColumnLogicalType.VARIANT, true, null, null, null));

    assertDoesNotThrow(() -> RowValidator.validateSchema(schema));
  }

  @Test
  public void testValidateRowEmptyColumnName() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put("COL1", createColumnSchema("COL1", ColumnLogicalType.TEXT, true, null, null, 100));

    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("", "value"); // Empty column name
    row.put("COL1", "test value");

    // Empty column name should be caught - it becomes empty after unquoting
    ValidationResult result = validator.validateRow(row);
    assertFalse(result.isValid());
    // Empty column will be treated as extra column or skipped with warning
  }

  @Test
  public void testValidateRowWhitespaceColumnName() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put("COL1", createColumnSchema("COL1", ColumnLogicalType.TEXT, true, null, null, 100));

    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("   ", "value"); // Whitespace-only column name
    row.put("\t\n", "value2"); // Control characters
    row.put("COL1", "test value");

    // Whitespace column names should be caught
    ValidationResult result = validator.validateRow(row);
    assertFalse(result.isValid());
    // Whitespace columns will be treated as extra columns or skipped with warning
  }

  // ================ Code Review Fix Tests ================

  /**
   * Test that structured OBJECT types are rejected (Issue #1 from code review). SSv1 SDK doesn't
   * support structured OBJECT types like OBJECT(a INT, b TEXT).
   */
  @Test
  public void testStructuredObjectTypeRejected() throws SQLException {
    ResultSet rs = mockDescribeTableRow("COL1", "OBJECT(a NUMBER(38,0), b VARCHAR(16777216))", "Y");

    IllegalArgumentException exception =
        assertThrows(IllegalArgumentException.class, () -> ColumnSchema.fromDescribeTableRow(rs));

    assertTrue(exception.getMessage().contains("Structured OBJECT types are not supported"));
    assertTrue(exception.getMessage().contains("unstructured OBJECT"));
  }

  /**
   * Test that structured ARRAY types are rejected (Issue #1 from code review). SSv1 SDK doesn't
   * support structured ARRAY types like ARRAY(INT).
   */
  @Test
  public void testStructuredArrayTypeRejected() throws SQLException {
    ResultSet rs = mockDescribeTableRow("COL1", "ARRAY(INT)", "Y");

    IllegalArgumentException exception =
        assertThrows(IllegalArgumentException.class, () -> ColumnSchema.fromDescribeTableRow(rs));

    assertTrue(exception.getMessage().contains("Structured ARRAY types are not supported"));
    assertTrue(exception.getMessage().contains("unstructured ARRAY"));
  }

  /** Test that unstructured OBJECT types are accepted. */
  @Test
  public void testUnstructuredObjectTypeAccepted() throws SQLException {
    ResultSet rs = mockDescribeTableRow("COL1", "OBJECT", "Y");
    ColumnSchema schema = ColumnSchema.fromDescribeTableRow(rs);

    assertEquals(ColumnLogicalType.OBJECT, schema.getLogicalType());
    assertEquals(ColumnPhysicalType.LOB, schema.getPhysicalType());
  }

  /** Test that unstructured ARRAY types are accepted. */
  @Test
  public void testUnstructuredArrayTypeAccepted() throws SQLException {
    ResultSet rs = mockDescribeTableRow("COL1", "ARRAY", "Y");
    ColumnSchema schema = ColumnSchema.fromDescribeTableRow(rs);

    assertEquals(ColumnLogicalType.ARRAY, schema.getLogicalType());
    assertEquals(ColumnPhysicalType.LOB, schema.getPhysicalType());
  }

  /**
   * Test that nested type parsing uses lastIndexOf for correct parameter extraction (Issue #1).
   * Without lastIndexOf, "OBJECT(a NUMBER(38,0), b TEXT)" would incorrectly extract params as "a
   * NUMBER(38,0" instead of the full parameter list.
   */
  @Test
  public void testNestedTypeParsingWithLastIndexOf() throws SQLException {
    // This should fail with structured type error, not parsing error
    ResultSet rs = mockDescribeTableRow("COL1", "OBJECT(a NUMBER(38,0), b VARCHAR(100))", "Y");

    IllegalArgumentException exception =
        assertThrows(IllegalArgumentException.class, () -> ColumnSchema.fromDescribeTableRow(rs));

    // Should get structured type error, not malformed type string error
    assertTrue(exception.getMessage().contains("Structured OBJECT types are not supported"));
    assertFalse(exception.getMessage().contains("Malformed type string"));
  }

  /**
   * Test that missing NOT NULL columns trigger schema evolution (Issue #3 from code review). KC v3
   * treated missing and null NOT NULL columns identically - both drop NOT NULL.
   */
  @Test
  public void testMissingNotNullColumnTriggersSchemaEvolution() {
    Map<String, ColumnSchema> schemaMap = new HashMap<>();
    schemaMap.put(
        "COL1",
        createColumnSchema("COL1", ColumnLogicalType.FIXED, false, 38, 0, null)); // NOT NULL

    RowValidator validator = new RowValidator(schemaMap);

    // Missing COL1 entirely (not in row)
    Map<String, Object> row = new HashMap<>();
    // Empty row - missing NOT NULL column

    ValidationResult result = validator.validateRow(row);

    assertFalse(result.isValid());
    assertTrue(result.hasStructuralError());
    assertEquals(1, result.getMissingNotNullColNames().size());
    assertTrue(result.getMissingNotNullColNames().contains("COL1"));

    // Should trigger schema evolution (matches KC v3 behavior)
    assertTrue(result.needsSchemaEvolution());
    assertFalse(result.hasUnresolvableError()); // NOT unresolvable anymore
  }

  /**
   * Test that null NOT NULL columns trigger schema evolution (Issue #3 from code review). This was
   * already working, but verify it still works after fix.
   */
  @Test
  public void testNullNotNullColumnTriggersSchemaEvolution() {
    Map<String, ColumnSchema> schemaMap = new HashMap<>();
    schemaMap.put(
        "COL1",
        createColumnSchema("COL1", ColumnLogicalType.FIXED, false, 38, 0, null)); // NOT NULL

    RowValidator validator = new RowValidator(schemaMap);

    // COL1 present but null
    Map<String, Object> row = new HashMap<>();
    row.put("COL1", null);

    ValidationResult result = validator.validateRow(row);

    assertFalse(result.isValid());
    assertTrue(result.hasStructuralError());
    assertEquals(1, result.getNullValueForNotNullColNames().size());
    assertTrue(result.getNullValueForNotNullColNames().contains("COL1"));

    // Should trigger schema evolution
    assertTrue(result.needsSchemaEvolution());
    assertFalse(result.hasUnresolvableError());
  }

  /**
   * Test that null values in nullable columns are valid (Graphite bot feedback). When a nullable
   * column has a null value, it should pass validation.
   */
  @Test
  public void testNullValueInNullableColumnIsValid() {
    Map<String, ColumnSchema> schemaMap = new HashMap<>();
    schemaMap.put(
        "COL1", createColumnSchema("COL1", ColumnLogicalType.FIXED, true, 38, 0, null)); // NULLABLE
    schemaMap.put(
        "COL2",
        createColumnSchema("COL2", ColumnLogicalType.TEXT, true, null, null, 100)); // NULLABLE

    RowValidator validator = new RowValidator(schemaMap);

    // Both columns present with null values (valid for nullable columns)
    Map<String, Object> row = new HashMap<>();
    row.put("COL1", null);
    row.put("COL2", null);

    ValidationResult result = validator.validateRow(row);

    // Should be valid - null is allowed for nullable columns
    assertTrue(result.isValid());
    assertFalse(result.hasStructuralError());
    assertFalse(result.hasTypeError());
  }

  /** Test that nullable column with actual value also validates correctly. */
  @Test
  public void testNullableColumnWithValue() {
    Map<String, ColumnSchema> schemaMap = new HashMap<>();
    schemaMap.put(
        "COL1", createColumnSchema("COL1", ColumnLogicalType.FIXED, true, 38, 0, null)); // NULLABLE

    RowValidator validator = new RowValidator(schemaMap);

    // Nullable column with actual value
    Map<String, Object> row = new HashMap<>();
    row.put("COL1", 42);

    ValidationResult result = validator.validateRow(row);

    // Should be valid
    assertTrue(result.isValid());
  }

  /**
   * Test that large VARCHAR lengths don't cause integer overflow (Graphite security issue). Without
   * long cast, info.length * 4 can overflow for corrupted/malformed lengths.
   */
  @Test
  public void testVarcharLargeValueNoOverflow() throws SQLException {
    // Test with a value that would overflow if multiplied as int: Integer.MAX_VALUE / 2
    // This simulates corrupted DESCRIBE TABLE result
    int largeLength = Integer.MAX_VALUE / 2; // ~1 billion
    ResultSet rs = mockDescribeTableRow("COL1", "VARCHAR(" + largeLength + ")", "Y");

    ColumnSchema schema = ColumnSchema.fromDescribeTableRow(rs);

    // Should not overflow - byteLength should be capped at MAX_LOB_SIZE_BYTES (16MB)
    assertEquals(16777216, schema.getByteLength()); // 16MB cap
    assertEquals(largeLength, schema.getLength()); // Original length preserved
  }

  // ================ Server-Filled Column Tests (FR7) ================

  @Test
  public void testValidateRow_missingIdentityColumn_passes() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put(
        "ID",
        new ColumnSchema(
            "ID",
            ColumnLogicalType.FIXED,
            ColumnPhysicalType.SB16,
            false,
            38,
            0,
            null,
            null,
            null,
            false,
            true)); // NOT NULL, autoincrement=true
    schema.put("DATA", createColumnSchema("DATA", ColumnLogicalType.TEXT, true, null, null, 100));

    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("DATA", "hello"); // ID is missing — server fills it

    ValidationResult result = validator.validateRow(row);
    assertTrue(result.isValid(), "Record should be valid when identity column is omitted");
  }

  @Test
  public void testValidateRow_missingDefaultNotNullColumn_passes() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put("DATA", createColumnSchema("DATA", ColumnLogicalType.TEXT, true, null, null, 100));
    schema.put(
        "CREATED_AT",
        new ColumnSchema(
            "CREATED_AT",
            ColumnLogicalType.TIMESTAMP_NTZ,
            ColumnPhysicalType.SB8,
            false,
            null,
            9,
            null,
            null,
            null,
            true,
            false)); // NOT NULL, hasDefault=true

    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("DATA", "hello"); // CREATED_AT is missing — server fills it

    ValidationResult result = validator.validateRow(row);
    assertTrue(result.isValid(), "Record should be valid when default NOT NULL column is omitted");
  }

  @Test
  public void testValidateRow_missingRegularNotNullColumn_stillFails() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put(
        "REQUIRED",
        new ColumnSchema(
            "REQUIRED",
            ColumnLogicalType.TEXT,
            ColumnPhysicalType.LOB,
            false,
            null,
            null,
            100,
            400,
            null,
            false,
            false)); // NOT NULL, no default, no autoincrement

    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    // REQUIRED is missing — no server default, should fail

    ValidationResult result = validator.validateRow(row);
    assertFalse(
        result.isValid(), "Record should be invalid when regular NOT NULL column is missing");
    assertTrue(result.getMissingNotNullColNames().contains("REQUIRED"));
  }

  @Test
  public void testValidateRow_mixedServerFilledAndRegularColumns() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put(
        "ID",
        new ColumnSchema(
            "ID",
            ColumnLogicalType.FIXED,
            ColumnPhysicalType.SB16,
            false,
            38,
            0,
            null,
            null,
            null,
            false,
            true)); // autoincrement
    schema.put("DATA", createColumnSchema("DATA", ColumnLogicalType.TEXT, true, null, null, 100));
    schema.put(
        "CREATED_AT",
        new ColumnSchema(
            "CREATED_AT",
            ColumnLogicalType.TIMESTAMP_NTZ,
            ColumnPhysicalType.SB8,
            false,
            null,
            9,
            null,
            null,
            null,
            true,
            false)); // default
    schema.put(
        "STATUS",
        new ColumnSchema(
            "STATUS",
            ColumnLogicalType.FIXED,
            ColumnPhysicalType.SB16,
            false,
            38,
            0,
            null,
            null,
            null,
            true,
            false)); // default

    RowValidator validator = new RowValidator(schema);

    // Only DATA provided — ID, CREATED_AT, STATUS are server-filled
    Map<String, Object> row = new HashMap<>();
    row.put("DATA", "hello");

    ValidationResult result = validator.validateRow(row);
    assertTrue(
        result.isValid(),
        "Record should be valid when only server-filled NOT NULL columns are missing");
  }

  @Test
  public void testValidateRow_explicitValueForIdentityColumn_passes() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put(
        "ID",
        new ColumnSchema(
            "ID",
            ColumnLogicalType.FIXED,
            ColumnPhysicalType.SB16,
            false,
            38,
            0,
            null,
            null,
            null,
            false,
            true)); // autoincrement
    schema.put("DATA", createColumnSchema("DATA", ColumnLogicalType.TEXT, true, null, null, 100));

    RowValidator validator = new RowValidator(schema);

    // User explicitly provides a value for the identity column — should still be accepted
    Map<String, Object> row = new HashMap<>();
    row.put("ID", 42);
    row.put("DATA", "hello");

    ValidationResult result = validator.validateRow(row);
    assertTrue(
        result.isValid(), "Record should be valid when identity column is explicitly provided");
  }

  @Test
  public void testColumnSchema_isServerFilled() {
    ColumnSchema autoincCol =
        new ColumnSchema(
            "ID",
            ColumnLogicalType.FIXED,
            ColumnPhysicalType.SB16,
            false,
            38,
            0,
            null,
            null,
            null,
            false,
            true);
    assertTrue(autoincCol.isServerFilled());
    assertTrue(autoincCol.isAutoincrement());
    assertFalse(autoincCol.hasDefault());

    ColumnSchema defaultCol =
        new ColumnSchema(
            "TS",
            ColumnLogicalType.TIMESTAMP_NTZ,
            ColumnPhysicalType.SB8,
            false,
            null,
            9,
            null,
            null,
            null,
            true,
            false);
    assertTrue(defaultCol.isServerFilled());
    assertFalse(defaultCol.isAutoincrement());
    assertTrue(defaultCol.hasDefault());

    ColumnSchema regularCol =
        createColumnSchema("REG", ColumnLogicalType.TEXT, false, null, null, 100);
    assertFalse(regularCol.isServerFilled());
    assertFalse(regularCol.isAutoincrement());
    assertFalse(regularCol.hasDefault());
  }

  /** Hex string for a BINARY column is converted to byte[] in-place during validation. */
  @Test
  public void testValidateRowBinaryHexStringConvertedToByteArray() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put(
        "BIN_COL",
        new ColumnSchema(
            "BIN_COL",
            ColumnLogicalType.BINARY,
            ColumnPhysicalType.BINARY,
            true,
            null,
            null,
            null,
            8388608,
            null));

    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("BIN_COL", "FFFFFFFF");

    ValidationResult result = validator.validateRow(row);
    assertTrue(result.isValid());
    // Row map must now contain byte[] instead of the original hex string
    assertInstanceOf(byte[].class, row.get("BIN_COL"));
    assertArrayEquals(
        new byte[] {(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF},
        (byte[]) row.get("BIN_COL"));
  }

  /** byte[] input for a BINARY column is preserved as-is. */
  @Test
  public void testValidateRowBinaryByteArrayPassthrough() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put(
        "BIN_COL",
        new ColumnSchema(
            "BIN_COL",
            ColumnLogicalType.BINARY,
            ColumnPhysicalType.BINARY,
            true,
            null,
            null,
            null,
            8388608,
            null));

    RowValidator validator = new RowValidator(schema);

    byte[] input = new byte[] {0x01, 0x02, 0x03};
    Map<String, Object> row = new HashMap<>();
    row.put("BIN_COL", input);

    ValidationResult result = validator.validateRow(row);
    assertTrue(result.isValid());
    assertInstanceOf(byte[].class, row.get("BIN_COL"));
    assertArrayEquals(input, (byte[]) row.get("BIN_COL"));
  }

  /** Empty hex string ("") for a BINARY column is decoded to byte[0]. */
  @Test
  public void testValidateRowBinaryEmptyHexString() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put(
        "BIN_COL",
        new ColumnSchema(
            "BIN_COL",
            ColumnLogicalType.BINARY,
            ColumnPhysicalType.BINARY,
            true,
            null,
            null,
            null,
            8388608,
            null));

    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("BIN_COL", "");

    ValidationResult result = validator.validateRow(row);
    assertTrue(result.isValid());
    assertInstanceOf(byte[].class, row.get("BIN_COL"));
    assertArrayEquals(new byte[0], (byte[]) row.get("BIN_COL"));
  }

  /** Odd-length hex string for a BINARY column produces a type error. */
  @Test
  public void testValidateRowBinaryOddLengthHexStringFails() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put(
        "BIN_COL",
        new ColumnSchema(
            "BIN_COL",
            ColumnLogicalType.BINARY,
            ColumnPhysicalType.BINARY,
            true,
            null,
            null,
            null,
            8388608,
            null));

    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("BIN_COL", "FFF");

    ValidationResult result = validator.validateRow(row);
    assertFalse(result.isValid());
    assertTrue(result.hasTypeError());
    assertEquals("BIN_COL", result.getColumnName());
  }

  /** Lowercase hex string for a BINARY column is decoded case-insensitively. */
  @Test
  public void testValidateRowBinaryLowercaseHexString() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put(
        "BIN_COL",
        new ColumnSchema(
            "BIN_COL",
            ColumnLogicalType.BINARY,
            ColumnPhysicalType.BINARY,
            true,
            null,
            null,
            null,
            8388608,
            null));

    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("BIN_COL", "ffffffff");

    ValidationResult result = validator.validateRow(row);
    assertTrue(result.isValid());
    assertInstanceOf(byte[].class, row.get("BIN_COL"));
    assertArrayEquals(
        new byte[] {(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF},
        (byte[]) row.get("BIN_COL"));
  }

  // ================ VARCHAR Map/List serialization Tests ================

  /**
   * Map sent to a VARCHAR column is serialized to JSON string, matching SSv1/SSv2 SDK behavior.
   * Both SDKs serialize complex objects via Jackson inside appendRow(); RowValidator must
   * replicate.
   */
  @Test
  public void testValidateRowVarcharMapSerializedToJson() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put(
        "STR_COL", createColumnSchema("STR_COL", ColumnLogicalType.TEXT, true, null, null, null));

    RowValidator validator = new RowValidator(schema);

    Map<String, Object> inputMap = new LinkedHashMap<>();
    inputMap.put("key", "value");

    Map<String, Object> row = new HashMap<>();
    row.put("STR_COL", inputMap);

    ValidationResult result = validator.validateRow(row);
    assertTrue(result.isValid());
    assertEquals("{\"key\":\"value\"}", row.get("STR_COL"));
  }

  /** List sent to a VARCHAR column is serialized to JSON array string. */
  @Test
  public void testValidateRowVarcharListSerializedToJson() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put(
        "STR_COL", createColumnSchema("STR_COL", ColumnLogicalType.TEXT, true, null, null, null));

    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("STR_COL", Arrays.asList(1, 2, 3));

    ValidationResult result = validator.validateRow(row);
    assertTrue(result.isValid());
    assertEquals("[1,2,3]", row.get("STR_COL"));
  }

  /** Nested Map sent to VARCHAR is serialized recursively. */
  @Test
  public void testValidateRowVarcharNestedMapSerializedToJson() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put(
        "STR_COL", createColumnSchema("STR_COL", ColumnLogicalType.TEXT, true, null, null, null));

    RowValidator validator = new RowValidator(schema);

    Map<String, Object> nested = new LinkedHashMap<>();
    nested.put("b", 1);
    Map<String, Object> inputMap = new LinkedHashMap<>();
    inputMap.put("a", nested);

    Map<String, Object> row = new HashMap<>();
    row.put("STR_COL", inputMap);

    ValidationResult result = validator.validateRow(row);
    assertTrue(result.isValid());
    assertEquals("{\"a\":{\"b\":1}}", row.get("STR_COL"));
  }

  /** Map serialized to JSON that exceeds VARCHAR(N) length limit produces a type error. */
  @Test
  public void testValidateRowVarcharMapExceedsLengthLimit() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put(
        "STR_COL", createColumnSchema("STR_COL", ColumnLogicalType.TEXT, true, null, null, 5));

    RowValidator validator = new RowValidator(schema);

    Map<String, Object> inputMap = new LinkedHashMap<>();
    inputMap.put("key", "value"); // {"key":"value"} = 15 chars, exceeds 5

    Map<String, Object> row = new HashMap<>();
    row.put("STR_COL", inputMap);

    ValidationResult result = validator.validateRow(row);
    assertFalse(result.isValid());
    assertTrue(result.hasTypeError());
  }

  // ================ Boolean Normalization Tests ================

  /**
   * Integer 0/1 must be normalized to Boolean before reaching the SSv2 SDK. The SDK only accepts
   * Boolean for BOOLEAN columns — Integer inputs are silently dropped without this normalization.
   */
  @Test
  public void testValidateRowBooleanIntegerZeroNormalizedToFalse() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put(
        "BOOL_COL",
        createColumnSchema("BOOL_COL", ColumnLogicalType.BOOLEAN, true, null, null, null));

    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("BOOL_COL", 0);

    ValidationResult result = validator.validateRow(row);
    assertTrue(result.isValid());
    assertEquals(Boolean.FALSE, row.get("BOOL_COL"));
  }

  @Test
  public void testValidateRowBooleanIntegerOneNormalizedToTrue() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put(
        "BOOL_COL",
        createColumnSchema("BOOL_COL", ColumnLogicalType.BOOLEAN, true, null, null, null));

    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("BOOL_COL", 1);

    ValidationResult result = validator.validateRow(row);
    assertTrue(result.isValid());
    assertEquals(Boolean.TRUE, row.get("BOOL_COL"));
  }

  /** Native Boolean values must also be normalized (no-op in effect, but consistent). */
  @Test
  public void testValidateRowBooleanNativeBooleanPassthrough() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put(
        "BOOL_COL",
        createColumnSchema("BOOL_COL", ColumnLogicalType.BOOLEAN, true, null, null, null));

    RowValidator validator = new RowValidator(schema);

    for (Object input : Arrays.asList(Boolean.TRUE, Boolean.FALSE)) {
      Map<String, Object> row = new HashMap<>();
      row.put("BOOL_COL", input);
      ValidationResult result = validator.validateRow(row);
      assertTrue(result.isValid());
      assertInstanceOf(Boolean.class, row.get("BOOL_COL"));
      assertEquals(input, row.get("BOOL_COL"));
    }
  }

  /** String tokens are normalized to Boolean (previously accepted as String by SDK). */
  @Test
  public void testValidateRowBooleanStringTokensNormalizedToBoolean() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put(
        "BOOL_COL",
        createColumnSchema("BOOL_COL", ColumnLogicalType.BOOLEAN, true, null, null, null));

    RowValidator validator = new RowValidator(schema);

    Map<String, Object> trueInputs = new LinkedHashMap<>();
    trueInputs.put("true", Boolean.TRUE);
    trueInputs.put("yes", Boolean.TRUE);
    trueInputs.put("on", Boolean.TRUE);

    Map<String, Object> falseInputs = new LinkedHashMap<>();
    falseInputs.put("false", Boolean.FALSE);
    falseInputs.put("no", Boolean.FALSE);
    falseInputs.put("off", Boolean.FALSE);

    for (Map.Entry<String, Object> entry : trueInputs.entrySet()) {
      Map<String, Object> row = new HashMap<>();
      row.put("BOOL_COL", entry.getKey());
      ValidationResult result = validator.validateRow(row);
      assertTrue(result.isValid(), "Expected valid for input: " + entry.getKey());
      assertEquals(entry.getValue(), row.get("BOOL_COL"), "Expected TRUE for: " + entry.getKey());
    }

    for (Map.Entry<String, Object> entry : falseInputs.entrySet()) {
      Map<String, Object> row = new HashMap<>();
      row.put("BOOL_COL", entry.getKey());
      ValidationResult result = validator.validateRow(row);
      assertTrue(result.isValid(), "Expected valid for input: " + entry.getKey());
      assertEquals(entry.getValue(), row.get("BOOL_COL"), "Expected FALSE for: " + entry.getKey());
    }
  }

  /** Invalid inputs for BOOLEAN still produce a type error. */
  @Test
  public void testValidateRowBooleanInvalidInputProducesTypeError() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put(
        "BOOL_COL",
        createColumnSchema("BOOL_COL", ColumnLogicalType.BOOLEAN, true, null, null, null));

    RowValidator validator = new RowValidator(schema);

    for (Object invalid : Arrays.asList(new HashMap<>(), new ArrayList<>(), "not_a_bool")) {
      Map<String, Object> row = new HashMap<>();
      row.put("BOOL_COL", invalid);
      ValidationResult result = validator.validateRow(row);
      assertFalse(result.isValid(), "Expected type error for input: " + invalid);
      assertTrue(result.hasTypeError(), "Expected type error for input: " + invalid);
      assertEquals("BOOL_COL", result.getColumnName());
    }
  }

  /**
   * Non-0/1 numeric values for BOOLEAN produce a type error. Although SSv1 SDK's
   * DataValidationUtil.validateAndParseBoolean accepts any Number directly, in KC v3 the record
   * mapper converts all values to Strings first — and SSv1's convertStringToBoolean only accepts
   * "0"/"1"/"true"/"false"/"yes"/"no"/"on"/"off". "42" is not in that set, so it's rejected.
   * RowValidator pre-rejects non-0/1 Numbers to match end-to-end KC v3 behavior.
   */
  @Test
  public void testValidateRowBooleanNonZeroOneIntegerProducesTypeError() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put(
        "BOOL_COL",
        createColumnSchema("BOOL_COL", ColumnLogicalType.BOOLEAN, true, null, null, null));

    RowValidator validator = new RowValidator(schema);

    for (Object input : Arrays.asList(42, -1, 999, 2L, -100L)) {
      Map<String, Object> row = new HashMap<>();
      row.put("BOOL_COL", input);
      ValidationResult result = validator.validateRow(row);
      assertFalse(result.isValid(), "Expected type error for numeric input: " + input);
      assertTrue(result.hasTypeError(), "Expected type error for numeric input: " + input);
      assertEquals("BOOL_COL", result.getColumnName());
    }
  }

  // ================ VARIANT normalization (String → native object) ================

  /**
   * JSON object string sent to VARIANT is parsed back to a Map so the SSv2 SDK stores it as a
   * native VARIANT object, not a JSON-quoted string.
   */
  @Test
  public void testValidateRowVariantJsonObjectStringNormalizedToMap() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put("V", createColumnSchema("V", ColumnLogicalType.VARIANT, true, null, null, null));
    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("V", "{\"a\":1}");
    ValidationResult result = validator.validateRow(row);

    assertTrue(result.isValid());
    Object normalized = row.get("V");
    assertTrue(normalized instanceof Map, "Expected Map but got: " + normalized.getClass());
    assertEquals(1, ((Map<?, ?>) normalized).size());
    assertEquals(1, ((Map<?, ?>) normalized).get("a"));
  }

  /**
   * JSON array string sent to VARIANT is parsed back to a List so the SSv2 SDK stores it as a
   * native array.
   */
  @Test
  public void testValidateRowVariantJsonArrayStringNormalizedToList() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put("V", createColumnSchema("V", ColumnLogicalType.VARIANT, true, null, null, null));
    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("V", "[1,2,3]");
    ValidationResult result = validator.validateRow(row);

    assertTrue(result.isValid());
    Object normalized = row.get("V");
    assertTrue(normalized instanceof List, "Expected List but got: " + normalized.getClass());
    assertEquals(Arrays.asList(1, 2, 3), normalized);
  }

  /** Non-String native objects passed to VARIANT are returned unchanged. */
  @Test
  public void testValidateRowVariantNativeObjectPassthrough() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put("V", createColumnSchema("V", ColumnLogicalType.VARIANT, true, null, null, null));
    RowValidator validator = new RowValidator(schema);

    Map<String, Object> nativeMap = new HashMap<>();
    nativeMap.put("key", "value");
    Map<String, Object> row = new HashMap<>();
    row.put("V", nativeMap);

    ValidationResult result = validator.validateRow(row);
    assertTrue(result.isValid());
    assertSame(nativeMap, row.get("V"), "Native Map should not be replaced");
  }

  /** Invalid (non-JSON) string sent to VARIANT produces a type error. */
  @Test
  public void testValidateRowVariantInvalidJsonStringProducesTypeError() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put("V", createColumnSchema("V", ColumnLogicalType.VARIANT, true, null, null, null));
    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("V", "not valid json");
    ValidationResult result = validator.validateRow(row);

    assertFalse(result.isValid());
    assertTrue(result.hasTypeError());
    assertEquals("V", result.getColumnName());
  }

  // ================ ARRAY normalization (String → List) ================

  /**
   * JSON array string sent to ARRAY is parsed back to a List so the SSv2 SDK stores it as a proper
   * array, not a single-element array wrapping the literal string.
   */
  @Test
  public void testValidateRowArrayJsonStringNormalizedToList() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put("A", createColumnSchema("A", ColumnLogicalType.ARRAY, true, null, null, null));
    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("A", "[1,2,3]");
    ValidationResult result = validator.validateRow(row);

    assertTrue(result.isValid());
    Object normalized = row.get("A");
    assertTrue(normalized instanceof List, "Expected List but got: " + normalized.getClass());
    assertEquals(Arrays.asList(1, 2, 3), normalized);
  }

  /**
   * Non-array JSON string sent to ARRAY is wrapped in a single-element List (matching
   * validateAndParseArray behavior which wraps non-arrays into single-element arrays).
   */
  @Test
  public void testValidateRowArrayNonArrayJsonStringWrappedInList() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put("A", createColumnSchema("A", ColumnLogicalType.ARRAY, true, null, null, null));
    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("A", "\"hello\""); // JSON string (not an array)
    ValidationResult result = validator.validateRow(row);

    assertTrue(result.isValid());
    Object normalized = row.get("A");
    assertTrue(normalized instanceof List, "Expected List but got: " + normalized.getClass());
    assertEquals(Arrays.asList("hello"), normalized);
  }

  /** Native List passed to ARRAY is returned unchanged. */
  @Test
  public void testValidateRowArrayNativeListPassthrough() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put("A", createColumnSchema("A", ColumnLogicalType.ARRAY, true, null, null, null));
    RowValidator validator = new RowValidator(schema);

    List<Integer> nativeList = Arrays.asList(10, 20, 30);
    Map<String, Object> row = new HashMap<>();
    row.put("A", nativeList);

    ValidationResult result = validator.validateRow(row);
    assertTrue(result.isValid());
    assertSame(nativeList, row.get("A"), "Native List should not be replaced");
  }

  /** Invalid (non-JSON) string sent to ARRAY produces a type error. */
  @Test
  public void testValidateRowArrayInvalidJsonStringProducesTypeError() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put("A", createColumnSchema("A", ColumnLogicalType.ARRAY, true, null, null, null));
    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("A", "not_json");
    ValidationResult result = validator.validateRow(row);

    assertFalse(result.isValid());
    assertTrue(result.hasTypeError());
    assertEquals("A", result.getColumnName());
  }

  // ================ OBJECT validation Tests ================

  /** Invalid (non-JSON) string sent to OBJECT produces a type error. */
  @Test
  public void testValidateRowObjectInvalidJsonStringProducesTypeError() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put("O", createColumnSchema("O", ColumnLogicalType.OBJECT, true, null, null, null));
    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("O", "not_json");
    ValidationResult result = validator.validateRow(row);

    assertFalse(result.isValid());
    assertTrue(result.hasTypeError());
    assertEquals("O", result.getColumnName());
  }

  /** Valid JSON array string sent to OBJECT is rejected (not an object). */
  @Test
  public void testValidateRowObjectArrayJsonStringProducesTypeError() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put("O", createColumnSchema("O", ColumnLogicalType.OBJECT, true, null, null, null));
    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("O", "[1,2,3]");
    ValidationResult result = validator.validateRow(row);

    assertFalse(result.isValid());
    assertTrue(result.hasTypeError());
    assertEquals("O", result.getColumnName());
  }

  /** Valid JSON object string sent to OBJECT is accepted. */
  @Test
  public void testValidateRowObjectValidJsonStringAccepted() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put("O", createColumnSchema("O", ColumnLogicalType.OBJECT, true, null, null, null));
    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("O", "{\"key\":\"value\"}");
    ValidationResult result = validator.validateRow(row);

    assertTrue(result.isValid());
  }

  /** Invalid hex string for a BINARY column produces a type error. */
  @Test
  public void testValidateRowBinaryInvalidHexStringFails() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put(
        "BIN_COL",
        new ColumnSchema(
            "BIN_COL",
            ColumnLogicalType.BINARY,
            ColumnPhysicalType.BINARY,
            true,
            null,
            null,
            null,
            8388608,
            null));

    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("BIN_COL", "not-valid-hex!");

    ValidationResult result = validator.validateRow(row);
    assertFalse(result.isValid());
    assertTrue(result.hasTypeError());
    assertEquals("BIN_COL", result.getColumnName());
  }

  // ================ Timestamp normalization Tests ================

  /**
   * Integer epoch for TIMESTAMP_NTZ must be normalized to an ISO timestamp string. The SSv2 SDK
   * passes raw integers to the Snowflake backend which interprets them using the channel's default
   * timezone (America/Los_Angeles) instead of UTC. SSv1 SDK converts epochs to UTC client-side.
   */
  @Test
  public void testValidateRowTimestampNtzIntegerEpochNormalized() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put("TS", createTimestampColumnSchema("TS", ColumnLogicalType.TIMESTAMP_NTZ));
    RowValidator validator = new RowValidator(schema);

    // 1705312800 = 2024-01-15T10:00:00Z
    Map<String, Object> row = new HashMap<>();
    row.put("TS", 1705312800);
    ValidationResult result = validator.validateRow(row);

    assertTrue(result.isValid());
    Object normalized = row.get("TS");
    assertInstanceOf(String.class, normalized, "Integer epoch should be normalized to String");
    assertEquals("2024-01-15T10:00", normalized);
  }

  /** Long epoch for TIMESTAMP_NTZ is also normalized (same as Integer). */
  @Test
  public void testValidateRowTimestampNtzLongEpochNormalized() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put("TS", createTimestampColumnSchema("TS", ColumnLogicalType.TIMESTAMP_NTZ));
    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("TS", 1705312800L);
    ValidationResult result = validator.validateRow(row);

    assertTrue(result.isValid());
    Object normalized = row.get("TS");
    assertInstanceOf(String.class, normalized, "Long epoch should be normalized to String");
    assertEquals("2024-01-15T10:00", normalized);
  }

  /** String timestamp for TIMESTAMP_NTZ is validated but returned unchanged. */
  @Test
  public void testValidateRowTimestampNtzStringPassthrough() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put("TS", createTimestampColumnSchema("TS", ColumnLogicalType.TIMESTAMP_NTZ));
    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("TS", "2024-01-15T13:45:30");
    ValidationResult result = validator.validateRow(row);

    assertTrue(result.isValid());
    assertEquals("2024-01-15T13:45:30", row.get("TS"));
  }

  /** Integer epoch for TIMESTAMP_LTZ is normalized to ISO string with UTC offset. */
  @Test
  public void testValidateRowTimestampLtzIntegerEpochNormalized() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put("TS", createTimestampColumnSchema("TS", ColumnLogicalType.TIMESTAMP_LTZ));
    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("TS", 1705312800);
    ValidationResult result = validator.validateRow(row);

    assertTrue(result.isValid());
    Object normalized = row.get("TS");
    assertInstanceOf(String.class, normalized, "Integer epoch should be normalized to String");
    assertEquals("2024-01-15T10:00Z", normalized);
  }

  /** Invalid string for TIMESTAMP_NTZ produces a type error. */
  @Test
  public void testValidateRowTimestampNtzInvalidStringRejects() {
    Map<String, ColumnSchema> schema = new HashMap<>();
    schema.put("TS", createTimestampColumnSchema("TS", ColumnLogicalType.TIMESTAMP_NTZ));
    RowValidator validator = new RowValidator(schema);

    Map<String, Object> row = new HashMap<>();
    row.put("TS", "not_a_timestamp");
    ValidationResult result = validator.validateRow(row);

    assertFalse(result.isValid());
    assertTrue(result.hasTypeError());
    assertEquals("TS", result.getColumnName());
  }

  // ================ Helper Methods ================

  private ResultSet mockDescribeTableRow(String name, String type, String nullable)
      throws SQLException {
    ResultSet rs = Mockito.mock(ResultSet.class);
    Mockito.when(rs.getString("name")).thenReturn(name);
    Mockito.when(rs.getString("type")).thenReturn(type);
    Mockito.when(rs.getString("null?")).thenReturn(nullable);
    return rs;
  }

  private ColumnSchema createColumnSchema(
      String name,
      ColumnLogicalType logicalType,
      boolean nullable,
      Integer precision,
      Integer scale,
      Integer length) {
    ColumnPhysicalType physicalType =
        logicalType != null
            ? (logicalType == ColumnLogicalType.FIXED
                ? ColumnPhysicalType.SB16
                : logicalType == ColumnLogicalType.TEXT
                    ? ColumnPhysicalType.LOB
                    : logicalType == ColumnLogicalType.BOOLEAN
                        ? ColumnPhysicalType.SB1
                        : ColumnPhysicalType.LOB)
            : null;

    Integer byteLength = length != null ? length * 4 : null;

    return new ColumnSchema(
        name, logicalType, physicalType, nullable, precision, scale, length, byteLength, null);
  }

  private ColumnSchema createTimestampColumnSchema(String name, ColumnLogicalType logicalType) {
    return new ColumnSchema(
        name, logicalType, ColumnPhysicalType.SB8, true, null, 9, null, null, null);
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/internal/validation/SqlIdentifierNormalizerTest.java
================================================
package com.snowflake.kafka.connector.internal.validation;

import static org.junit.jupiter.api.Assertions.assertEquals;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.CsvSource;

public class SqlIdentifierNormalizerTest {

  @ParameterizedTest
  @CsvSource({
    // Unquoted → uppercased
    "city, CITY",
    "myCol, MYCOL",
    "ABC, ABC",
    "a_b_c, A_B_C",
    // Unquoted with escaped spaces
    "col\\ name, COL NAME",
  })
  public void testUnquotedIdentifiers(String input, String expected) {
    assertEquals(expected, SqlIdentifierNormalizer.normalizeSqlIdentifier(input));
  }

  @ParameterizedTest
  @CsvSource({
    // Quoted → strip quotes, preserve case
    "'\"city\"', city",
    "'\"MyCol\"', MyCol",
    "'\"ABC\"', ABC",
    "'\"col name\"', col name",
    // Quoted with escaped double-quotes
    "'\"col\"\"name\"', col\"name",
    "'\"\"\"city\"\"\"', \"city\"",
  })
  public void testQuotedIdentifiers(String input, String expected) {
    assertEquals(expected, SqlIdentifierNormalizer.normalizeSqlIdentifier(input));
  }

  @Test
  public void testEmptyString() {
    assertEquals("", SqlIdentifierNormalizer.normalizeSqlIdentifier(""));
  }

  @Test
  public void testSingleChar() {
    assertEquals("A", SqlIdentifierNormalizer.normalizeSqlIdentifier("a"));
  }

  @Test
  public void testSingleQuote() {
    // A single double-quote char is not a valid quoted identifier — treated as unquoted
    assertEquals("\"", SqlIdentifierNormalizer.normalizeSqlIdentifier("\""));
  }

  @Test
  public void testEmptyQuotedIdentifier() {
    assertEquals("", SqlIdentifierNormalizer.normalizeSqlIdentifier("\"\""));
  }

  @Test
  public void testCacheReturnsSameResult() {
    String first = SqlIdentifierNormalizer.normalizeSqlIdentifier("cached_test");
    String second = SqlIdentifierNormalizer.normalizeSqlIdentifier("cached_test");
    assertEquals(first, second);
    assertEquals("CACHED_TEST", first);
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/mock/MockResultSetForSizeTest.java
================================================
package com.snowflake.kafka.connector.mock;

import java.io.InputStream;
import java.io.Reader;
import java.math.BigDecimal;
import java.net.URL;
import java.sql.*;
import java.util.Calendar;
import java.util.Map;

public class MockResultSetForSizeTest implements ResultSet {
  private final int size;
  private int currentIndex;

  public MockResultSetForSizeTest(int size) {
    this.size = size;
    currentIndex = 0;
  }

  @Override
  public boolean next() throws SQLException {
    if (currentIndex < size) {
      currentIndex++;
      return true;
    }
    return false;
  }

  @Override
  public void close() throws SQLException {}

  @Override
  public boolean wasNull() throws SQLException {
    return false;
  }

  @Override
  public String getString(final int columnIndex) throws SQLException {
    return null;
  }

  @Override
  public boolean getBoolean(final int columnIndex) throws SQLException {
    return false;
  }

  @Override
  public byte getByte(final int columnIndex) throws SQLException {
    return 0;
  }

  @Override
  public short getShort(final int columnIndex) throws SQLException {
    return 0;
  }

  @Override
  public int getInt(final int columnIndex) throws SQLException {
    return 0;
  }

  @Override
  public long getLong(final int columnIndex) throws SQLException {
    return 0;
  }

  @Override
  public float getFloat(final int columnIndex) throws SQLException {
    return 0;
  }

  @Override
  public double getDouble(final int columnIndex) throws SQLException {
    return 0;
  }

  @Override
  public BigDecimal getBigDecimal(final int columnIndex, final int scale) throws SQLException {
    return null;
  }

  @Override
  public byte[] getBytes(final int columnIndex) throws SQLException {
    return new byte[0];
  }

  @Override
  public Date getDate(final int columnIndex) throws SQLException {
    return null;
  }

  @Override
  public Time getTime(final int columnIndex) throws SQLException {
    return null;
  }

  @Override
  public Timestamp getTimestamp(final int columnIndex) throws SQLException {
    return null;
  }

  @Override
  public InputStream getAsciiStream(final int columnIndex) throws SQLException {
    return null;
  }

  @Override
  public InputStream getUnicodeStream(final int columnIndex) throws SQLException {
    return null;
  }

  @Override
  public InputStream getBinaryStream(final int columnIndex) throws SQLException {
    return null;
  }

  @Override
  public String getString(final String columnLabel) throws SQLException {
    return null;
  }

  @Override
  public boolean getBoolean(final String columnLabel) throws SQLException {
    return false;
  }

  @Override
  public byte getByte(final String columnLabel) throws SQLException {
    return 0;
  }

  @Override
  public short getShort(final String columnLabel) throws SQLException {
    return 0;
  }

  @Override
  public int getInt(final String columnLabel) throws SQLException {
    return 0;
  }

  @Override
  public long getLong(final String columnLabel) throws SQLException {
    return 0;
  }

  @Override
  public float getFloat(final String columnLabel) throws SQLException {
    return 0;
  }

  @Override
  public double getDouble(final String columnLabel) throws SQLException {
    return 0;
  }

  @Override
  public BigDecimal getBigDecimal(final String columnLabel, final int scale) throws SQLException {
    return null;
  }

  @Override
  public byte[] getBytes(final String columnLabel) throws SQLException {
    return new byte[0];
  }

  @Override
  public Date getDate(final String columnLabel) throws SQLException {
    return null;
  }

  @Override
  public Time getTime(final String columnLabel) throws SQLException {
    return null;
  }

  @Override
  public Timestamp getTimestamp(final String columnLabel) throws SQLException {
    return null;
  }

  @Override
  public InputStream getAsciiStream(final String columnLabel) throws SQLException {
    return null;
  }

  @Override
  public InputStream getUnicodeStream(final String columnLabel) throws SQLException {
    return null;
  }

  @Override
  public InputStream getBinaryStream(final String columnLabel) throws SQLException {
    return null;
  }

  @Override
  public SQLWarning getWarnings() throws SQLException {
    return null;
  }

  @Override
  public void clearWarnings() throws SQLException {}

  @Override
  public String getCursorName() throws SQLException {
    return null;
  }

  @Override
  public ResultSetMetaData getMetaData() throws SQLException {
    return null;
  }

  @Override
  public Object getObject(final int columnIndex) throws SQLException {
    return null;
  }

  @Override
  public Object getObject(final String columnLabel) throws SQLException {
    return null;
  }

  @Override
  public int findColumn(final String columnLabel) throws SQLException {
    return 0;
  }

  @Override
  public Reader getCharacterStream(final int columnIndex) throws SQLException {
    return null;
  }

  @Override
  public Reader getCharacterStream(final String columnLabel) throws SQLException {
    return null;
  }

  @Override
  public BigDecimal getBigDecimal(final int columnIndex) throws SQLException {
    return null;
  }

  @Override
  public BigDecimal getBigDecimal(final String columnLabel) throws SQLException {
    return null;
  }

  @Override
  public boolean isBeforeFirst() throws SQLException {
    return false;
  }

  @Override
  public boolean isAfterLast() throws SQLException {
    return false;
  }

  @Override
  public boolean isFirst() throws SQLException {
    return false;
  }

  @Override
  public boolean isLast() throws SQLException {
    return false;
  }

  @Override
  public void beforeFirst() throws SQLException {}

  @Override
  public void afterLast() throws SQLException {}

  @Override
  public boolean first() throws SQLException {
    return false;
  }

  @Override
  public boolean last() throws SQLException {
    return false;
  }

  @Override
  public int getRow() throws SQLException {
    return 0;
  }

  @Override
  public boolean absolute(final int row) throws SQLException {
    return false;
  }

  @Override
  public boolean relative(final int rows) throws SQLException {
    return false;
  }

  @Override
  public boolean previous() throws SQLException {
    return false;
  }

  @Override
  public void setFetchDirection(final int direction) throws SQLException {}

  @Override
  public int getFetchDirection() throws SQLException {
    return 0;
  }

  @Override
  public void setFetchSize(final int rows) throws SQLException {}

  @Override
  public int getFetchSize() throws SQLException {
    return 0;
  }

  @Override
  public int getType() throws SQLException {
    return 0;
  }

  @Override
  public int getConcurrency() throws SQLException {
    return 0;
  }

  @Override
  public boolean rowUpdated() throws SQLException {
    return false;
  }

  @Override
  public boolean rowInserted() throws SQLException {
    return false;
  }

  @Override
  public boolean rowDeleted() throws SQLException {
    return false;
  }

  @Override
  public void updateNull(final int columnIndex) throws SQLException {}

  @Override
  public void updateBoolean(final int columnIndex, final boolean x) throws SQLException {}

  @Override
  public void updateByte(final int columnIndex, final byte x) throws SQLException {}

  @Override
  public void updateShort(final int columnIndex, final short x) throws SQLException {}

  @Override
  public void updateInt(final int columnIndex, final int x) throws SQLException {}

  @Override
  public void updateLong(final int columnIndex, final long x) throws SQLException {}

  @Override
  public void updateFloat(final int columnIndex, final float x) throws SQLException {}

  @Override
  public void updateDouble(final int columnIndex, final double x) throws SQLException {}

  @Override
  public void updateBigDecimal(final int columnIndex, final BigDecimal x) throws SQLException {}

  @Override
  public void updateString(final int columnIndex, final String x) throws SQLException {}

  @Override
  public void updateBytes(final int columnIndex, final byte[] x) throws SQLException {}

  @Override
  public void updateDate(final int columnIndex, final Date x) throws SQLException {}

  @Override
  public void updateTime(final int columnIndex, final Time x) throws SQLException {}

  @Override
  public void updateTimestamp(final int columnIndex, final Timestamp x) throws SQLException {}

  @Override
  public void updateAsciiStream(final int columnIndex, final InputStream x, final int length)
      throws SQLException {}

  @Override
  public void updateBinaryStream(final int columnIndex, final InputStream x, final int length)
      throws SQLException {}

  @Override
  public void updateCharacterStream(final int columnIndex, final Reader x, final int length)
      throws SQLException {}

  @Override
  public void updateObject(final int columnIndex, final Object x, final int scaleOrLength)
      throws SQLException {}

  @Override
  public void updateObject(final int columnIndex, final Object x) throws SQLException {}

  @Override
  public void updateNull(final String columnLabel) throws SQLException {}

  @Override
  public void updateBoolean(final String columnLabel, final boolean x) throws SQLException {}

  @Override
  public void updateByte(final String columnLabel, final byte x) throws SQLException {}

  @Override
  public void updateShort(final String columnLabel, final short x) throws SQLException {}

  @Override
  public void updateInt(final String columnLabel, final int x) throws SQLException {}

  @Override
  public void updateLong(final String columnLabel, final long x) throws SQLException {}

  @Override
  public void updateFloat(final String columnLabel, final float x) throws SQLException {}

  @Override
  public void updateDouble(final String columnLabel, final double x) throws SQLException {}

  @Override
  public void updateBigDecimal(final String columnLabel, final BigDecimal x) throws SQLException {}

  @Override
  public void updateString(final String columnLabel, final String x) throws SQLException {}

  @Override
  public void updateBytes(final String columnLabel, final byte[] x) throws SQLException {}

  @Override
  public void updateDate(final String columnLabel, final Date x) throws SQLException {}

  @Override
  public void updateTime(final String columnLabel, final Time x) throws SQLException {}

  @Override
  public void updateTimestamp(final String columnLabel, final Timestamp x) throws SQLException {}

  @Override
  public void updateAsciiStream(final String columnLabel, final InputStream x, final int length)
      throws SQLException {}

  @Override
  public void updateBinaryStream(final String columnLabel, final InputStream x, final int length)
      throws SQLException {}

  @Override
  public void updateCharacterStream(final String columnLabel, final Reader reader, final int length)
      throws SQLException {}

  @Override
  public void updateObject(final String columnLabel, final Object x, final int scaleOrLength)
      throws SQLException {}

  @Override
  public void updateObject(final String columnLabel, final Object x) throws SQLException {}

  @Override
  public void insertRow() throws SQLException {}

  @Override
  public void updateRow() throws SQLException {}

  @Override
  public void deleteRow() throws SQLException {}

  @Override
  public void refreshRow() throws SQLException {}

  @Override
  public void cancelRowUpdates() throws SQLException {}

  @Override
  public void moveToInsertRow() throws SQLException {}

  @Override
  public void moveToCurrentRow() throws SQLException {}

  @Override
  public Statement getStatement() throws SQLException {
    return null;
  }

  @Override
  public Object getObject(final int columnIndex, final Map<String, Class<?>> map)
      throws SQLException {
    return null;
  }

  @Override
  public Ref getRef(final int columnIndex) throws SQLException {
    return null;
  }

  @Override
  public Blob getBlob(final int columnIndex) throws SQLException {
    return null;
  }

  @Override
  public Clob getClob(final int columnIndex) throws SQLException {
    return null;
  }

  @Override
  public Array getArray(final int columnIndex) throws SQLException {
    return null;
  }

  @Override
  public Object getObject(final String columnLabel, final Map<String, Class<?>> map)
      throws SQLException {
    return null;
  }

  @Override
  public Ref getRef(final String columnLabel) throws SQLException {
    return null;
  }

  @Override
  public Blob getBlob(final String columnLabel) throws SQLException {
    return null;
  }

  @Override
  public Clob getClob(final String columnLabel) throws SQLException {
    return null;
  }

  @Override
  public Array getArray(final String columnLabel) throws SQLException {
    return null;
  }

  @Override
  public Date getDate(final int columnIndex, final Calendar cal) throws SQLException {
    return null;
  }

  @Override
  public Date getDate(final String columnLabel, final Calendar cal) throws SQLException {
    return null;
  }

  @Override
  public Time getTime(final int columnIndex, final Calendar cal) throws SQLException {
    return null;
  }

  @Override
  public Time getTime(final String columnLabel, final Calendar cal) throws SQLException {
    return null;
  }

  @Override
  public Timestamp getTimestamp(final int columnIndex, final Calendar cal) throws SQLException {
    return null;
  }

  @Override
  public Timestamp getTimestamp(final String columnLabel, final Calendar cal) throws SQLException {
    return null;
  }

  @Override
  public URL getURL(final int columnIndex) throws SQLException {
    return null;
  }

  @Override
  public URL getURL(final String columnLabel) throws SQLException {
    return null;
  }

  @Override
  public void updateRef(final int columnIndex, final Ref x) throws SQLException {}

  @Override
  public void updateRef(final String columnLabel, final Ref x) throws SQLException {}

  @Override
  public void updateBlob(final int columnIndex, final Blob x) throws SQLException {}

  @Override
  public void updateBlob(final String columnLabel, final Blob x) throws SQLException {}

  @Override
  public void updateClob(final int columnIndex, final Clob x) throws SQLException {}

  @Override
  public void updateClob(final String columnLabel, final Clob x) throws SQLException {}

  @Override
  public void updateArray(final int columnIndex, final Array x) throws SQLException {}

  @Override
  public void updateArray(final String columnLabel, final Array x) throws SQLException {}

  @Override
  public RowId getRowId(final int columnIndex) throws SQLException {
    return null;
  }

  @Override
  public RowId getRowId(final String columnLabel) throws SQLException {
    return null;
  }

  @Override
  public void updateRowId(final int columnIndex, final RowId x) throws SQLException {}

  @Override
  public void updateRowId(final String columnLabel, final RowId x) throws SQLException {}

  @Override
  public int getHoldability() throws SQLException {
    return 0;
  }

  @Override
  public boolean isClosed() throws SQLException {
    return false;
  }

  @Override
  public void updateNString(final int columnIndex, final String nString) throws SQLException {}

  @Override
  public void updateNString(final String columnLabel, final String nString) throws SQLException {}

  @Override
  public void updateNClob(final int columnIndex, final NClob nClob) throws SQLException {}

  @Override
  public void updateNClob(final String columnLabel, final NClob nClob) throws SQLException {}

  @Override
  public NClob getNClob(final int columnIndex) throws SQLException {
    return null;
  }

  @Override
  public NClob getNClob(final String columnLabel) throws SQLException {
    return null;
  }

  @Override
  public SQLXML getSQLXML(final int columnIndex) throws SQLException {
    return null;
  }

  @Override
  public SQLXML getSQLXML(final String columnLabel) throws SQLException {
    return null;
  }

  @Override
  public void updateSQLXML(final int columnIndex, final SQLXML xmlObject) throws SQLException {}

  @Override
  public void updateSQLXML(final String columnLabel, final SQLXML xmlObject) throws SQLException {}

  @Override
  public String getNString(final int columnIndex) throws SQLException {
    return null;
  }

  @Override
  public String getNString(final String columnLabel) throws SQLException {
    return null;
  }

  @Override
  public Reader getNCharacterStream(final int columnIndex) throws SQLException {
    return null;
  }

  @Override
  public Reader getNCharacterStream(final String columnLabel) throws SQLException {
    return null;
  }

  @Override
  public void updateNCharacterStream(final int columnIndex, final Reader x, final long length)
      throws SQLException {}

  @Override
  public void updateNCharacterStream(
      final String columnLabel, final Reader reader, final long length) throws SQLException {}

  @Override
  public void updateAsciiStream(final int columnIndex, final InputStream x, final long length)
      throws SQLException {}

  @Override
  public void updateBinaryStream(final int columnIndex, final InputStream x, final long length)
      throws SQLException {}

  @Override
  public void updateCharacterStream(final int columnIndex, final Reader x, final long length)
      throws SQLException {}

  @Override
  public void updateAsciiStream(final String columnLabel, final InputStream x, final long length)
      throws SQLException {}

  @Override
  public void updateBinaryStream(final String columnLabel, final InputStream x, final long length)
      throws SQLException {}

  @Override
  public void updateCharacterStream(
      final String columnLabel, final Reader reader, final long length) throws SQLException {}

  @Override
  public void updateBlob(final int columnIndex, final InputStream inputStream, final long length)
      throws SQLException {}

  @Override
  public void updateBlob(final String columnLabel, final InputStream inputStream, final long length)
      throws SQLException {}

  @Override
  public void updateClob(final int columnIndex, final Reader reader, final long length)
      throws SQLException {}

  @Override
  public void updateClob(final String columnLabel, final Reader reader, final long length)
      throws SQLException {}

  @Override
  public void updateNClob(final int columnIndex, final Reader reader, final long length)
      throws SQLException {}

  @Override
  public void updateNClob(final String columnLabel, final Reader reader, final long length)
      throws SQLException {}

  @Override
  public void updateNCharacterStream(final int columnIndex, final Reader x) throws SQLException {}

  @Override
  public void updateNCharacterStream(final String columnLabel, final Reader reader)
      throws SQLException {}

  @Override
  public void updateAsciiStream(final int columnIndex, final InputStream x) throws SQLException {}

  @Override
  public void updateBinaryStream(final int columnIndex, final InputStream x) throws SQLException {}

  @Override
  public void updateCharacterStream(final int columnIndex, final Reader x) throws SQLException {}

  @Override
  public void updateAsciiStream(final String columnLabel, final InputStream x)
      throws SQLException {}

  @Override
  public void updateBinaryStream(final String columnLabel, final InputStream x)
      throws SQLException {}

  @Override
  public void updateCharacterStream(final String columnLabel, final Reader reader)
      throws SQLException {}

  @Override
  public void updateBlob(final int columnIndex, final InputStream inputStream)
      throws SQLException {}

  @Override
  public void updateBlob(final String columnLabel, final InputStream inputStream)
      throws SQLException {}

  @Override
  public void updateClob(final int columnIndex, final Reader reader) throws SQLException {}

  @Override
  public void updateClob(final String columnLabel, final Reader reader) throws SQLException {}

  @Override
  public void updateNClob(final int columnIndex, final Reader reader) throws SQLException {}

  @Override
  public void updateNClob(final String columnLabel, final Reader reader) throws SQLException {}

  @Override
  public <T> T getObject(final int columnIndex, final Class<T> type) throws SQLException {
    return null;
  }

  @Override
  public <T> T getObject(final String columnLabel, final Class<T> type) throws SQLException {
    return null;
  }

  @Override
  public <T> T unwrap(final Class<T> iface) throws SQLException {
    return null;
  }

  @Override
  public boolean isWrapperFor(final Class<?> iface) throws SQLException {
    return false;
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/records/ConverterTest.java
================================================
package com.snowflake.kafka.connector.records;

import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.snowflake.kafka.connector.internal.SnowflakeKafkaConnectorException;
import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.time.Instant;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.kafka.connect.data.Date;
import org.apache.kafka.connect.data.Decimal;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaAndValue;
import org.apache.kafka.connect.data.SchemaBuilder;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.data.Time;
import org.apache.kafka.connect.data.Timestamp;
import org.apache.kafka.connect.header.ConnectHeaders;
import org.apache.kafka.connect.json.JsonConverter;
import org.apache.kafka.connect.storage.SimpleHeaderConverter;
import org.junit.jupiter.api.Test;

class ConverterTest {

  private static final ObjectMapper mapper = new ObjectMapper();

  @Test
  void testConnectJsonConverter_MapInt64() throws JsonProcessingException {
    JsonConverter jsonConverter = new JsonConverter();
    Map<String, ?> config = Collections.singletonMap("schemas.enable", false);
    jsonConverter.configure(config, false);
    Map<String, Object> jsonMap = new HashMap<>();
    // Value will map to int64.
    jsonMap.put("test", Integer.MAX_VALUE);
    SchemaAndValue schemaAndValue =
        jsonConverter.toConnectData("test", mapper.writeValueAsBytes(jsonMap));
    Map<String, Object> result =
        KafkaRecordConverter.convertToMap(schemaAndValue.schema(), schemaAndValue.value());

    Map<String, Object> expected = new HashMap<>();
    expected.put("test", (long) Integer.MAX_VALUE);
    assertEquals(expected, result);
  }

  @Test
  void testConnectJsonConverter_MapBigDecimal() throws JsonProcessingException {
    JsonConverter jsonConverter = new JsonConverter();
    Map<String, ?> config = Collections.singletonMap("schemas.enable", false);
    jsonConverter.configure(config, false);
    // Use a BigDecimal that fits within precision limits
    Map<String, Object> jsonMap = new HashMap<>();
    jsonMap.put("test", new BigDecimal("12345678901234567890"));
    SchemaAndValue schemaAndValue =
        jsonConverter.toConnectData("test", mapper.writeValueAsBytes(jsonMap));
    Map<String, Object> result =
        KafkaRecordConverter.convertToMap(schemaAndValue.schema(), schemaAndValue.value());

    // BigDecimal gets converted through JSON which treats it as a number
    // JSON doesn't preserve BigDecimal - large numbers become scientific notation or lose precision
    // The important thing is the value is preserved as a numeric type
    assertNotNull(result.get("test"));
    assertInstanceOf(
        Number.class,
        result.get("test"),
        "Expected Number but got: " + result.get("test").getClass());
  }

  @Test
  void testConvertMapWithNestedValues() throws JsonProcessingException {
    JsonConverter jsonConverter = new JsonConverter();
    Map<String, ?> config = Collections.singletonMap("schemas.enable", false);
    jsonConverter.configure(config, false);

    Map<String, Object> nestedMap = new HashMap<>();
    nestedMap.put("nested", "value");

    Map<String, Object> jsonMap = new HashMap<>();
    jsonMap.put("outer", nestedMap);
    jsonMap.put("simple", "text");

    SchemaAndValue schemaAndValue =
        jsonConverter.toConnectData("test", mapper.writeValueAsBytes(jsonMap));
    Map<String, Object> result =
        KafkaRecordConverter.convertToMap(schemaAndValue.schema(), schemaAndValue.value());

    assertEquals("text", result.get("simple"));
    assertInstanceOf(Map.class, result.get("outer"));
    @SuppressWarnings("unchecked")
    Map<String, Object> outerMap = (Map<String, Object>) result.get("outer");
    assertEquals("value", outerMap.get("nested"));
  }

  @Test
  void testConvertHeaders() {
    org.apache.kafka.connect.header.Headers headers =
        new org.apache.kafka.connect.header.ConnectHeaders();
    headers.addString("stringHeader", "value");
    headers.addInt("intHeader", 42);
    headers.addBoolean("boolHeader", true);

    Map<String, String> result = KafkaRecordConverter.convertHeaders(headers);

    assertEquals("value", result.get("stringHeader"));
    assertEquals("42", result.get("intHeader"));
    assertEquals("true", result.get("boolHeader"));
  }

  @Test
  void testConvertKey() {
    // Test string key
    Object stringKeyResult = KafkaRecordConverter.convertKey(Schema.STRING_SCHEMA, "testKey");
    assertEquals("testKey", stringKeyResult);

    // Test int key
    Object intKeyResult = KafkaRecordConverter.convertKey(Schema.INT32_SCHEMA, 123);
    assertEquals(123, intKeyResult);

    // Test null key
    Object nullKeyResult = KafkaRecordConverter.convertKey(Schema.OPTIONAL_STRING_SCHEMA, null);
    assertNull(nullKeyResult);
  }

  @SuppressWarnings("resource")
  @Test
  void testConvertHeaders_WithSimpleHeaderConverter() {
    // Test that headers converted by SimpleHeaderConverter are properly handled
    // This covers the scenario where raw JSON header bytes are first converted by
    // SimpleHeaderConverter
    SimpleHeaderConverter headerConverter = new SimpleHeaderConverter();
    String rawHeader = "{\"f1\": \"1970-03-22T00:00:00.000Z\", \"f2\": true}";

    SchemaAndValue schemaAndValue =
        headerConverter.toConnectHeader(
            "test", "h1", rawHeader.getBytes(StandardCharsets.US_ASCII));

    // SimpleHeaderConverter returns String schema with the raw string value for complex JSON
    ConnectHeaders headers = new ConnectHeaders();
    headers.add("h1", schemaAndValue);

    Map<String, String> result = KafkaRecordConverter.convertHeaders(headers);

    // The header value should contain the JSON structure as a string
    assertNotNull(result.get("h1"));
    assertTrue(result.get("h1").contains("f1"));
    assertTrue(result.get("h1").contains("f2"));
  }

  @Test
  void testConvertHeaders_WithTimestampLogicalType() {
    // Test headers with Timestamp logical type
    ConnectHeaders headers = new ConnectHeaders();
    java.util.Date timestampValue =
        new java.util.Date(80 * 24 * 60 * 60 * 1000L); // 80 days from epoch

    headers.add("timestampHeader", timestampValue, Timestamp.SCHEMA);
    headers.add("boolHeader", true, Schema.BOOLEAN_SCHEMA);

    Map<String, String> result = KafkaRecordConverter.convertHeaders(headers);

    // Timestamp is formatted as ISO-8601 with millisecond precision via ISO_DATE_TIME_FORMAT
    assertEquals("1970-03-22T00:00:00.000Z", result.get("timestampHeader"));
    assertEquals("true", result.get("boolHeader"));
  }

  @Test
  void testConvertHeaders_WithDateLogicalType() {
    // Test headers with Date logical type
    ConnectHeaders headers = new ConnectHeaders();
    // Create a date value (80 days from epoch = 1970-03-22)
    java.util.Date dateValue = new java.util.Date(80 * 24 * 60 * 60 * 1000L);

    headers.add("dateHeader", dateValue, Date.SCHEMA);

    Map<String, String> result = KafkaRecordConverter.convertHeaders(headers);

    // Date should be formatted as ISO date-time string
    assertEquals("1970-03-22T00:00:00.000Z", result.get("dateHeader"));
  }

  @Test
  void testConvertStructWithAllTypes() {
    // Test conversion of Struct with all supported types (equivalent to old RecordContentTest)
    Schema schema =
        SchemaBuilder.struct()
            .field("int8", SchemaBuilder.int8().defaultValue((byte) 2).doc("int8 field").build())
            .field("int16", Schema.INT16_SCHEMA)
            .field("int32", Schema.INT32_SCHEMA)
            .field("int64", Schema.INT64_SCHEMA)
            .field("float32", Schema.FLOAT32_SCHEMA)
            .field("float64", Schema.FLOAT64_SCHEMA)
            .field("boolean", Schema.BOOLEAN_SCHEMA)
            .field("string", Schema.STRING_SCHEMA)
            .field("bytes", Schema.BYTES_SCHEMA)
            .field("array", SchemaBuilder.array(Schema.STRING_SCHEMA).build())
            .field("map", SchemaBuilder.map(Schema.STRING_SCHEMA, Schema.INT32_SCHEMA).build())
            .field(
                "mapNonStringKeys",
                SchemaBuilder.map(Schema.INT32_SCHEMA, Schema.INT32_SCHEMA).build())
            .build();

    Struct original =
        new Struct(schema)
            .put("int8", (byte) 12)
            .put("int16", (short) 12)
            .put("int32", 12)
            .put("int64", 12L)
            .put("float32", 12.2f)
            .put("float64", 12.2)
            .put("boolean", true)
            .put("string", "foo")
            .put("bytes", ByteBuffer.wrap("foo".getBytes()))
            .put("array", Arrays.asList("a", "b", "c"))
            .put("map", Collections.singletonMap("field", 1))
            .put("mapNonStringKeys", Collections.singletonMap(1, 1));

    Map<String, Object> result = KafkaRecordConverter.convertToMap(schema, original);

    assertEquals((byte) 12, result.get("int8"));
    assertEquals((short) 12, result.get("int16"));
    assertEquals(12, result.get("int32"));
    assertEquals(12L, result.get("int64"));
    assertEquals(12.2f, result.get("float32"));
    assertEquals(12.2, result.get("float64"));
    assertEquals(true, result.get("boolean"));
    assertEquals("foo", result.get("string"));
    assertArrayEquals("foo".getBytes(), (byte[]) result.get("bytes"));
    assertEquals(Arrays.asList("a", "b", "c"), result.get("array"));

    @SuppressWarnings("unchecked")
    Map<String, Object> mapResult = (Map<String, Object>) result.get("map");
    assertEquals(1, mapResult.get("field"));

    // Non-string keys are encoded as [[key, value], ...]
    @SuppressWarnings("unchecked")
    List<List<Object>> mapNonStringKeysResult = (List<List<Object>>) result.get("mapNonStringKeys");
    assertEquals(1, mapNonStringKeysResult.size());
    assertEquals(Arrays.asList(1, 1), mapNonStringKeysResult.get(0));
  }

  @Test
  void testConvertValue_WithDefaultValue() {
    // Test that default values are returned when struct field value is null
    Schema fieldSchema = SchemaBuilder.int32().optional().defaultValue(123).build();
    Schema schema = SchemaBuilder.struct().field("field", fieldSchema).build();

    Struct struct = new Struct(schema);
    struct.put("field", null);

    Map<String, Object> result = KafkaRecordConverter.convertToMap(schema, struct);
    assertEquals(123, result.get("field"));
  }

  @Test
  void testConvertReadOnlyByteBuffer() {
    // Test conversion of read-only ByteBuffer
    byte[] original = "bytes".getBytes();
    ByteBuffer buffer = ByteBuffer.wrap(original).asReadOnlyBuffer();

    Schema schema = SchemaBuilder.struct().field("bytesField", Schema.BYTES_SCHEMA).build();

    Struct struct = new Struct(schema).put("bytesField", buffer);

    Map<String, Object> result = KafkaRecordConverter.convertToMap(schema, struct);

    assertArrayEquals(original, (byte[]) result.get("bytesField"));
  }

  @Test
  void testConvertToMap_WithInvalidInput_ThrowsException() {
    // Test that invalid inputs throw exceptions
    assertThrows(
        SnowflakeKafkaConnectorException.class,
        () -> KafkaRecordConverter.convertToMap(Schema.STRING_SCHEMA, "not a map or struct"));
  }

  @Test
  void testConvertKey_WithTypeMismatch_ThrowsException() {
    // Test that type mismatch throws exception
    assertThrows(
        SnowflakeKafkaConnectorException.class,
        () -> KafkaRecordConverter.convertKey(Schema.INT32_SCHEMA, "not an int"));
  }

  @Test
  void testConvertDecimal() {
    // Test Decimal logical type conversion
    Schema decimalSchema = Decimal.schema(2);
    BigDecimal value = new BigDecimal("123.45");

    Schema schema = SchemaBuilder.struct().field("decimal", decimalSchema).build();
    Struct struct = new Struct(schema).put("decimal", value);

    Map<String, Object> result = KafkaRecordConverter.convertToMap(schema, struct);

    assertEquals(value, result.get("decimal"));
  }

  @Test
  void testConvertDecimal_ExceedsPrecision_ReturnsString() {
    // Test that BigDecimal exceeding max precision is converted to string
    Schema decimalSchema = Decimal.schema(0);
    BigDecimal value = new BigDecimal("999999999999999999999999999999999999999");

    Schema schema = SchemaBuilder.struct().field("decimal", decimalSchema).build();
    Struct struct = new Struct(schema).put("decimal", value);

    Map<String, Object> result = KafkaRecordConverter.convertToMap(schema, struct);

    assertEquals(value.toString(), result.get("decimal"));
  }

  @Test
  void testConvertTime() {
    // Test Time logical type conversion
    // Use a fixed time that will work regardless of local timezone
    // The Time logical type represents milliseconds since midnight, formatted with HH:mm:ss.SSSXXX
    java.util.Date timeValue = new java.util.Date(0L); // midnight UTC

    Schema schema = SchemaBuilder.struct().field("time", Time.SCHEMA).build();
    Struct struct = new Struct(schema).put("time", timeValue);

    Map<String, Object> result = KafkaRecordConverter.convertToMap(schema, struct);

    assertNotNull(result.get("time"));
    // The result should be a time string in format HH:mm:ss.SSSXXX
    String timeResult = result.get("time").toString();
    assertTrue(timeResult.contains(":"), "Time should contain colons: " + timeResult);
  }

  @Test
  void testConvertFloatSpecialValues() {
    // Test Float special values (NaN, Infinity)
    Schema schema =
        SchemaBuilder.struct()
            .field("nan", Schema.FLOAT32_SCHEMA)
            .field("posInf", Schema.FLOAT32_SCHEMA)
            .field("negInf", Schema.FLOAT32_SCHEMA)
            .build();

    Struct struct =
        new Struct(schema)
            .put("nan", Float.NaN)
            .put("posInf", Float.POSITIVE_INFINITY)
            .put("negInf", Float.NEGATIVE_INFINITY);

    Map<String, Object> result = KafkaRecordConverter.convertToMap(schema, struct);

    assertEquals("NaN", result.get("nan"));
    assertEquals("Inf", result.get("posInf"));
    assertEquals("-Inf", result.get("negInf"));
  }

  @Test
  void testConvertDoubleSpecialValues() {
    // Test Double special values (NaN, Infinity)
    Schema schema =
        SchemaBuilder.struct()
            .field("nan", Schema.FLOAT64_SCHEMA)
            .field("posInf", Schema.FLOAT64_SCHEMA)
            .field("negInf", Schema.FLOAT64_SCHEMA)
            .build();

    Struct struct =
        new Struct(schema)
            .put("nan", Double.NaN)
            .put("posInf", Double.POSITIVE_INFINITY)
            .put("negInf", Double.NEGATIVE_INFINITY);

    Map<String, Object> result = KafkaRecordConverter.convertToMap(schema, struct);

    assertEquals("NaN", result.get("nan"));
    assertEquals("Inf", result.get("posInf"));
    assertEquals("-Inf", result.get("negInf"));
  }

  /**
   * Regression test for issue #1334.
   *
   * <p>Timestamps whose epoch-millisecond value has fewer than 13 digits (i.e. dates roughly within
   * ±1 year of 1970-01-01) were corrupted when ingested into Snowflake. The old code serialised
   * them as an epoch-millisecond string (e.g. "-23068800000"). Snowflake's integer-stored date
   * auto-detection mistook that 11-digit value for epoch <em>seconds</em>, shifting the stored
   * timestamp by three orders of magnitude (~1236 AD instead of 1969).
   *
   * <p>The fix formats the {@code java.util.Date} as an ISO-8601 string, so Snowflake never
   * triggers its numeric epoch-length heuristic, and the value is safe for Jackson serialization in
   * all downstream paths (including legacy mode and schema evolution).
   */
  @Test
  void testConvertToMap_TimestampNearEpoch_ReturnsIsoString() {
    // 1969-04-08 is ~267 days before the Unix epoch; its epoch-ms value is -23068800000,
    // an 11-digit number that Snowflake's auto-detection misreads as epoch seconds.
    java.util.Date nearEpochDate =
        new java.util.Date(Instant.parse("1969-04-08T00:00:00Z").toEpochMilli());

    Schema schema = SchemaBuilder.struct().field("ts", Timestamp.SCHEMA).build();
    Struct struct = new Struct(schema).put("ts", nearEpochDate);

    Map<String, Object> result = KafkaRecordConverter.convertToMap(schema, struct);

    // Must be an ISO-8601 string — not the epoch-ms string that triggers Snowflake's
    // auto-detection bug, and not a raw Instant that breaks plain Jackson ObjectMapper.
    assertInstanceOf(
        String.class,
        result.get("ts"),
        "Timestamp near epoch must be an ISO-8601 string. Got: " + result.get("ts"));
    assertEquals("1969-04-08T00:00:00.000Z", result.get("ts"));
  }

  @Test
  void testConvertNullValue() {
    // Test null value handling with optional schema
    Schema schema = SchemaBuilder.struct().field("optional", Schema.OPTIONAL_STRING_SCHEMA).build();

    Struct struct = new Struct(schema).put("optional", null);

    Map<String, Object> result = KafkaRecordConverter.convertToMap(schema, struct);

    assertNull(result.get("optional"));
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/records/SnowflakeSinkRecordTest.java
================================================
package com.snowflake.kafka.connector.records;

import static com.snowflake.kafka.connector.Utils.TABLE_COLUMN_METADATA;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;

import com.snowflake.kafka.connector.builder.SinkRecordBuilder;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.time.Instant;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Stream;
import org.apache.kafka.common.record.TimestampType;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaAndValue;
import org.apache.kafka.connect.data.SchemaBuilder;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.header.ConnectHeaders;
import org.apache.kafka.connect.header.Headers;
import org.apache.kafka.connect.json.JsonConverter;
import org.apache.kafka.connect.sink.SinkRecord;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

class SnowflakeSinkRecordTest {

  private static final String TOPIC = "test";
  private static final int PARTITION = 0;

  private final SnowflakeMetadataConfig metadataConfig = new SnowflakeMetadataConfig();
  private final JsonConverter jsonConverter = createJsonConverter();

  @Test
  void testValidRecord_WithJsonMap() {
    // Test creating a valid record from JSON map
    SchemaAndValue schemaAndValue = toConnectData("{\"name\": \"test\", \"value\": 123}");

    SinkRecord kafkaRecord =
        SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION)
            .withSchemaAndValue(schemaAndValue)
            .build();

    SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, true, false);

    assertTrue(record.isValid());
    assertFalse(record.isBroken());
    assertFalse(record.isTombstone());
    assertNull(record.getBrokenReason());
    assertEquals(SnowflakeSinkRecord.RecordState.VALID, record.getState());

    Map<String, Object> content = record.getContent();
    assertEquals("test", content.get("name"));
    assertEquals(123L, content.get("value"));
  }

  @Test
  void testValidRecord_WithStruct() {
    // Test creating a valid record from Struct with multiple types
    Schema schema =
        SchemaBuilder.struct()
            .field("int8", Schema.INT8_SCHEMA)
            .field("int16", Schema.INT16_SCHEMA)
            .field("int32", Schema.INT32_SCHEMA)
            .field("int64", Schema.INT64_SCHEMA)
            .field("float32", Schema.FLOAT32_SCHEMA)
            .field("float64", Schema.FLOAT64_SCHEMA)
            .field("boolean", Schema.BOOLEAN_SCHEMA)
            .field("string", Schema.STRING_SCHEMA)
            .field("bytes", Schema.BYTES_SCHEMA)
            .field("array", SchemaBuilder.array(Schema.STRING_SCHEMA).build())
            .field("map", SchemaBuilder.map(Schema.STRING_SCHEMA, Schema.INT32_SCHEMA).build())
            .build();

    Struct struct =
        new Struct(schema)
            .put("int8", (byte) 12)
            .put("int16", (short) 12)
            .put("int32", 12)
            .put("int64", 12L)
            .put("float32", 12.2f)
            .put("float64", 12.2)
            .put("boolean", true)
            .put("string", "foo")
            .put("bytes", ByteBuffer.wrap("foo".getBytes()))
            .put("array", Arrays.asList("a", "b", "c"))
            .put("map", Collections.singletonMap("field", 1));

    SinkRecord kafkaRecord =
        SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION)
            .withValueSchema(schema)
            .withValue(struct)
            .build();

    SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, true, false);

    assertTrue(record.isValid());
    assertFalse(record.isBroken());
    assertFalse(record.isTombstone());

    Map<String, Object> content = record.getContent();
    assertEquals((byte) 12, content.get("int8"));
    assertEquals((short) 12, content.get("int16"));
    assertEquals(12, content.get("int32"));
    assertEquals(12L, content.get("int64"));
    assertEquals(12.2f, content.get("float32"));
    assertEquals(12.2, content.get("float64"));
    assertEquals(true, content.get("boolean"));
    assertEquals("foo", content.get("string"));
    assertArrayEquals("foo".getBytes(), (byte[]) content.get("bytes"));
    assertEquals(Arrays.asList("a", "b", "c"), content.get("array"));
  }

  @Test
  void testTombstoneRecord() {
    // Test creating a tombstone record (null value)
    SinkRecord kafkaRecord =
        SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION)
            .withValueSchema(null)
            .withValue(null)
            .build();

    SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, true, false);

    assertFalse(record.isValid());
    assertFalse(record.isBroken());
    assertTrue(record.isTombstone());
    assertNull(record.getBrokenReason());
    assertEquals(SnowflakeSinkRecord.RecordState.TOMBSTONE, record.getState());
    assertTrue(record.getContent().isEmpty());
  }

  @Test
  void testBrokenRecord_WithInvalidKeySchema() {
    // Test creating a broken record when key doesn't match schema
    SinkRecord kafkaRecord =
        SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION)
            .withKeySchema(Schema.INT32_SCHEMA)
            .withKey("not an int") // String doesn't match INT32_SCHEMA
            .withValueSchema(Schema.STRING_SCHEMA)
            .withValue("{}")
            .build();

    SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, true, false);

    assertFalse(record.isValid());
    assertTrue(record.isBroken());
    assertFalse(record.isTombstone());
    assertNotNull(record.getBrokenReason());
    assertEquals(SnowflakeSinkRecord.RecordState.BROKEN, record.getState());
  }

  @Test
  void testBrokenRecord_WithInvalidValue() {
    // Test creating a broken record when value cannot be converted
    // Using a String value with STRING_SCHEMA but convertToMap expects Map or Struct
    SinkRecord kafkaRecord =
        SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION)
            .withValueSchema(Schema.STRING_SCHEMA)
            .withValue("just a plain string")
            .build();

    SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, true, false);

    // Record should be broken because convertToMap cannot handle plain String
    assertTrue(record.isBroken());
    assertFalse(record.isValid());
    assertFalse(record.isTombstone());
    assertNotNull(record.getBrokenReason());
    assertEquals(SnowflakeSinkRecord.RecordState.BROKEN, record.getState());
  }

  @Test
  void testGetContentWithMetadata_WhenIncludeMetadataTrue() {
    // Test that metadata is included when flag is true
    SnowflakeSinkRecord record =
        createRecordFromJson("{\"name\": \"test\"}", createMetadataConfigWithAll());

    Map<String, Object> contentWithMetadata = record.getContentWithMetadata(true);

    assertNotNull(contentWithMetadata.get(TABLE_COLUMN_METADATA));
    assertEquals("test", contentWithMetadata.get("name"));
  }

  @Test
  void testGetContentWithMetadata_WhenIncludeMetadataFalse() {
    // Test that metadata is NOT included when flag is false
    SnowflakeSinkRecord record = createRecordFromJson("{\"name\": \"test\"}", metadataConfig);

    Map<String, Object> contentWithMetadata = record.getContentWithMetadata(false);

    assertFalse(contentWithMetadata.containsKey(TABLE_COLUMN_METADATA));
    assertEquals("test", contentWithMetadata.get("name"));
  }

  @Test
  void testMetadataContainsKey() {
    // Test that metadata contains the key
    SchemaAndValue schemaAndValue = toConnectData("{\"name\": \"test\"}");

    SinkRecord kafkaRecord =
        SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION)
            .withKeySchema(Schema.STRING_SCHEMA)
            .withKey("myKey")
            .withSchemaAndValue(schemaAndValue)
            .build();

    SnowflakeSinkRecord record =
        SnowflakeSinkRecord.from(kafkaRecord, createMetadataConfigWithAll(), true, false);

    Map<String, Object> metadata = record.getMetadata();
    assertEquals("myKey", metadata.get("key"));
  }

  @Test
  void testFullMetadataFields() {
    // Test that all metadata fields are present when configured
    Map<String, String> config = new HashMap<>();
    config.put("snowflake.metadata.all", "true");
    config.put("snowflake.metadata.createtime", "true");
    config.put("snowflake.metadata.topic", "true");
    config.put("snowflake.metadata.offset.and.partition", "true");
    config.put("snowflake.streaming.metadata.connectorPushTime", "true");
    SnowflakeMetadataConfig fullMetadataConfig = new SnowflakeMetadataConfig(config);

    SchemaAndValue schemaAndValue = toConnectData("{\"data\": \"value\"}");

    long createTime = 1234567890L;
    long offset = 10L;

    SinkRecord kafkaRecord =
        SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION)
            .withKeySchema(Schema.STRING_SCHEMA)
            .withKey("testKey")
            .withSchemaAndValue(schemaAndValue)
            .withOffset(offset)
            .withTimestamp(createTime, TimestampType.CREATE_TIME)
            .build();

    Instant connectorPushTime = Instant.ofEpochMilli(9876543210L);
    SnowflakeSinkRecord record =
        SnowflakeSinkRecord.from(kafkaRecord, fullMetadataConfig, connectorPushTime, true, false);

    Map<String, Object> metadata = record.getMetadata();

    // Verify all metadata fields
    assertEquals(TOPIC, metadata.get("topic"));
    assertEquals(offset, metadata.get("offset"));
    assertEquals(PARTITION, metadata.get("partition"));
    assertEquals("testKey", metadata.get("key"));
    assertEquals(createTime, metadata.get("CreateTime"));
    assertEquals(connectorPushTime.toEpochMilli(), metadata.get("SnowflakeConnectorPushTime"));
  }

  @ParameterizedTest(name = "timestamp type {0} should produce metadata key {1}")
  @MethodSource("timestampTypeTestCases")
  void testMetadataWithTimestampType(TimestampType timestampType, String expectedMetadataKey) {
    Map<String, String> config = new HashMap<>();
    config.put("snowflake.metadata.createtime", "true");
    SnowflakeMetadataConfig timestampConfig = new SnowflakeMetadataConfig(config);

    SchemaAndValue schemaAndValue = toConnectData("{\"data\": \"value\"}");
    long timestamp = 1609459200000L; // 2021-01-01 00:00:00 UTC

    SinkRecord kafkaRecord =
        SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION)
            .withSchemaAndValue(schemaAndValue)
            .withTimestamp(timestamp, timestampType)
            .build();

    SnowflakeSinkRecord record =
        SnowflakeSinkRecord.from(kafkaRecord, timestampConfig, true, false);

    Map<String, Object> metadata = record.getMetadata();
    assertEquals(timestamp, metadata.get(expectedMetadataKey));
  }

  @Test
  void testMetadataWithHeaders() {
    // Test metadata includes headers with various types
    SchemaAndValue schemaAndValue = toConnectData("{\"data\": \"value\"}");

    Headers headers = new ConnectHeaders();
    headers.addString("stringHeader", "testHeaderValue");
    headers.addInt("intHeader", 42);
    headers.addBoolean("boolHeader", true);

    SinkRecord kafkaRecord = createSinkRecordWithHeaders(schemaAndValue, headers, "key");
    SnowflakeSinkRecord record =
        SnowflakeSinkRecord.from(kafkaRecord, createMetadataConfigWithAll(), true, false);

    Map<String, Object> metadata = record.getMetadata();
    assertNotNull(metadata.get("headers"));

    @SuppressWarnings("unchecked")
    Map<String, String> headersMap = (Map<String, String>) metadata.get("headers");
    assertEquals("testHeaderValue", headersMap.get("stringHeader"));
    assertEquals("42", headersMap.get("intHeader"));
    assertEquals("true", headersMap.get("boolHeader"));
  }

  @Test
  void testMetadataWithComplexHeaders() {
    // Test metadata with headers containing JSON-like complex values
    SchemaAndValue schemaAndValue = toConnectData("{\"data\": \"value\"}");

    Headers headers = new ConnectHeaders();
    headers.addString("objectAsJsonStringHeader", "{\"key1\":\"value1\",\"key2\":\"value2\"}");
    headers.addString("header2", "testheaderstring");

    SinkRecord kafkaRecord = createSinkRecordWithHeaders(schemaAndValue, headers, "key");
    SnowflakeSinkRecord record =
        SnowflakeSinkRecord.from(kafkaRecord, createMetadataConfigWithAll(), true, false);

    Map<String, Object> metadata = record.getMetadata();

    @SuppressWarnings("unchecked")
    Map<String, String> headersMap = (Map<String, String>) metadata.get("headers");
    assertEquals(
        "{\"key1\":\"value1\",\"key2\":\"value2\"}", headersMap.get("objectAsJsonStringHeader"));
    assertEquals("testheaderstring", headersMap.get("header2"));
  }

  @Test
  void testContentWithArray() {
    SnowflakeSinkRecord record =
        createRecordFromJson("{\"key\": [\"a\", \"b\", \"c\"]}", metadataConfig);

    assertTrue(record.isValid());
    Map<String, Object> content = record.getContent();

    @SuppressWarnings("unchecked")
    List<String> arrayValue = (List<String>) content.get("key");
    assertEquals(Arrays.asList("a", "b", "c"), arrayValue);
  }

  @Test
  void testContentWithEmptyArray() {
    SnowflakeSinkRecord record = createRecordFromJson("{\"key\": []}", metadataConfig);

    assertTrue(record.isValid());
    Map<String, Object> content = record.getContent();

    @SuppressWarnings("unchecked")
    List<Object> arrayValue = (List<Object>) content.get("key");
    assertTrue(arrayValue.isEmpty());
  }

  @Test
  void testEmptyContentWithMetadata() {
    SnowflakeSinkRecord record = createRecordFromJson("{}", createMetadataConfigWithAll());

    assertTrue(record.isValid());
    // Content should be empty (no data fields)
    assertTrue(record.getContent().isEmpty());

    // But metadata should still be present
    Map<String, Object> contentWithMetadata = record.getContentWithMetadata(true);
    assertNotNull(contentWithMetadata.get(TABLE_COLUMN_METADATA));
  }

  @Test
  void testContentWithKeyValue() {
    SnowflakeSinkRecord record =
        createRecordFromJson("{\"key\": \"value\"}", createMetadataConfigWithAll());

    assertTrue(record.isValid());

    Map<String, Object> content = record.getContent();
    assertEquals("value", content.get("key"));

    Map<String, Object> contentWithMetadata = record.getContentWithMetadata(true);
    assertEquals("value", contentWithMetadata.get("key"));
    assertNotNull(contentWithMetadata.get(TABLE_COLUMN_METADATA));
  }

  @Test
  void testConnectorPushTime_WhenDisabled_NotPresent() {
    // Test that SnowflakeConnectorPushTime is NOT present when disabled
    Map<String, String> config = new HashMap<>();
    config.put("snowflake.metadata.all", "true");
    config.put("snowflake.streaming.metadata.connectorPushTime", "false");
    SnowflakeMetadataConfig disabledPushTimeConfig = new SnowflakeMetadataConfig(config);

    SnowflakeSinkRecord record =
        createRecordFromJson("{\"data\": \"value\"}", disabledPushTimeConfig);

    Map<String, Object> metadata = record.getMetadata();
    assertFalse(metadata.containsKey("SnowflakeConnectorPushTime"));
  }

  @Test
  void testMetadata_WhenCreateTimeDisabled_NotPresent() {
    // Test that CreateTime is NOT present when snowflake.metadata.createtime=false
    Map<String, String> config = new HashMap<>();
    config.put("snowflake.metadata.createtime", "false");
    config.put("snowflake.metadata.topic", "true");
    config.put("snowflake.metadata.offset.and.partition", "true");
    SnowflakeMetadataConfig noCreateTimeConfig = new SnowflakeMetadataConfig(config);

    SchemaAndValue schemaAndValue = toConnectData("{\"data\": \"value\"}");

    long createTime = 1234567890L;

    SinkRecord kafkaRecord =
        SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION)
            .withSchemaAndValue(schemaAndValue)
            .withTimestamp(createTime, TimestampType.CREATE_TIME)
            .build();

    SnowflakeSinkRecord record =
        SnowflakeSinkRecord.from(kafkaRecord, noCreateTimeConfig, true, false);

    Map<String, Object> metadata = record.getMetadata();
    assertFalse(metadata.containsKey("CreateTime"));
    assertTrue(metadata.containsKey("topic"));
    assertTrue(metadata.containsKey("offset"));
    assertTrue(metadata.containsKey("partition"));
  }

  @Test
  void testMetadata_WhenTopicDisabled_NotPresent() {
    // Test that topic is NOT present when snowflake.metadata.topic=false
    Map<String, String> config = new HashMap<>();
    config.put("snowflake.metadata.createtime", "true");
    config.put("snowflake.metadata.topic", "false");
    config.put("snowflake.metadata.offset.and.partition", "true");
    SnowflakeMetadataConfig noTopicConfig = new SnowflakeMetadataConfig(config);

    SchemaAndValue schemaAndValue = toConnectData("{\"data\": \"value\"}");

    SinkRecord kafkaRecord =
        SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION)
            .withSchemaAndValue(schemaAndValue)
            .withTimestamp(System.currentTimeMillis(), TimestampType.CREATE_TIME)
            .build();

    SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, noTopicConfig, true, false);

    Map<String, Object> metadata = record.getMetadata();
    assertFalse(metadata.containsKey("topic"));
    assertTrue(metadata.containsKey("CreateTime"));
    assertTrue(metadata.containsKey("offset"));
    assertTrue(metadata.containsKey("partition"));
  }

  @Test
  void testMetadata_WhenOffsetAndPartitionDisabled_NotPresent() {
    // Test that offset/partition are NOT present when snowflake.metadata.offset.and.partition=false
    Map<String, String> config = new HashMap<>();
    config.put("snowflake.metadata.createtime", "true");
    config.put("snowflake.metadata.topic", "true");
    config.put("snowflake.metadata.offset.and.partition", "false");
    SnowflakeMetadataConfig noOffsetPartitionConfig = new SnowflakeMetadataConfig(config);

    SchemaAndValue schemaAndValue = toConnectData("{\"data\": \"value\"}");

    SinkRecord kafkaRecord =
        SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION)
            .withSchemaAndValue(schemaAndValue)
            .withTimestamp(System.currentTimeMillis(), TimestampType.CREATE_TIME)
            .build();

    SnowflakeSinkRecord record =
        SnowflakeSinkRecord.from(kafkaRecord, noOffsetPartitionConfig, true, false);

    Map<String, Object> metadata = record.getMetadata();
    assertFalse(metadata.containsKey("offset"));
    assertFalse(metadata.containsKey("partition"));
    assertTrue(metadata.containsKey("topic"));
    assertTrue(metadata.containsKey("CreateTime"));
  }

  @Test
  void testMetadata_WhenAllFieldsDisabled_EmptyMetadata() {
    // Test that when all metadata fields are disabled, metadata has minimal content
    Map<String, String> config = new HashMap<>();
    config.put("snowflake.metadata.createtime", "false");
    config.put("snowflake.metadata.topic", "false");
    config.put("snowflake.metadata.offset.and.partition", "false");
    config.put("snowflake.streaming.metadata.connectorPushTime", "false");
    SnowflakeMetadataConfig allDisabledConfig = new SnowflakeMetadataConfig(config);

    SchemaAndValue schemaAndValue = toConnectData("{\"data\": \"value\"}");

    // Create SinkRecord directly without key to avoid SinkRecordBuilder's default key
    SinkRecord kafkaRecord =
        new SinkRecord(
            TOPIC,
            PARTITION,
            null, // keySchema
            null, // key
            schemaAndValue.schema(),
            schemaAndValue.value(),
            0,
            System.currentTimeMillis(),
            TimestampType.CREATE_TIME);

    SnowflakeSinkRecord record =
        SnowflakeSinkRecord.from(kafkaRecord, allDisabledConfig, true, false);

    Map<String, Object> metadata = record.getMetadata();
    assertFalse(metadata.containsKey("offset"));
    assertFalse(metadata.containsKey("partition"));
    assertFalse(metadata.containsKey("topic"));
    assertFalse(metadata.containsKey("CreateTime"));
    assertFalse(metadata.containsKey("SnowflakeConnectorPushTime"));
    assertFalse(metadata.containsKey("key"));
  }

  @Test
  void testMetadata_WhenAllFieldsExplicitlyDisabled_ContentWithMetadataHasNoMetadataColumn() {
    // Test that when ALL individual metadata fields are disabled AND there's no key/headers,
    // the metadata map is empty and not added to content
    Map<String, String> config = new HashMap<>();
    config.put("snowflake.metadata.all", "false");
    config.put("snowflake.metadata.createtime", "false");
    config.put("snowflake.metadata.topic", "false");
    config.put("snowflake.metadata.offset.and.partition", "false");
    config.put("snowflake.streaming.metadata.connectorPushTime", "false");
    SnowflakeMetadataConfig allDisabledConfig = new SnowflakeMetadataConfig(config);

    SchemaAndValue schemaAndValue = toConnectData("{\"data\": \"value\"}");

    // Create SinkRecord without key and without timestamp to ensure metadata is truly empty
    SinkRecord kafkaRecord =
        new SinkRecord(
            TOPIC,
            PARTITION,
            null, // keySchema
            null, // key
            schemaAndValue.schema(),
            schemaAndValue.value(),
            0,
            null, // no timestamp
            TimestampType.NO_TIMESTAMP_TYPE);

    SnowflakeSinkRecord record =
        SnowflakeSinkRecord.from(kafkaRecord, allDisabledConfig, true, false);

    // When ALL individual metadata fields are disabled and no key present, metadata should be empty
    assertTrue(record.getMetadata().isEmpty());

    // Even when includeAllMetadata is true, no metadata column should be added because metadata is
    // empty
    Map<String, Object> contentWithMetadata = record.getContentWithMetadata(true);
    assertFalse(contentWithMetadata.containsKey(TABLE_COLUMN_METADATA));
    assertEquals("value", contentWithMetadata.get("data"));
  }

  @Test
  void testTimestamp_WhenNoTimestampType_NotPresent() {
    // Test that timestamp is NOT present when TimestampType is NO_TIMESTAMP_TYPE
    Map<String, String> config = new HashMap<>();
    config.put("snowflake.metadata.createtime", "true");
    SnowflakeMetadataConfig timestampConfig = new SnowflakeMetadataConfig(config);

    // Create record without timestamp (NO_TIMESTAMP_TYPE is default in builder)
    SnowflakeSinkRecord record = createRecordFromJson("{\"data\": \"value\"}", timestampConfig);

    Map<String, Object> metadata = record.getMetadata();
    assertFalse(metadata.containsKey("CreateTime"));
    assertFalse(metadata.containsKey("LogAppendTime"));
  }

  @Test
  void testLegacyMode_WithTimestampStruct_JacksonCanSerialize() {
    // Reproducer for PR review comment: when enableSchematization=false,
    // wrapAsRecordContent() serializes via plain ObjectMapper (no JavaTimeModule).
    // If convertToMap returns a raw Instant, MAPPER.writeValueAsString() will throw
    // InvalidDefinitionException.
    java.util.Date nearEpochDate =
        new java.util.Date(java.time.Instant.parse("1969-04-08T00:00:00Z").toEpochMilli());

    Schema schema =
        SchemaBuilder.struct().field("ts", org.apache.kafka.connect.data.Timestamp.SCHEMA).build();
    Struct struct = new Struct(schema).put("ts", nearEpochDate);

    SinkRecord kafkaRecord =
        SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION)
            .withValueSchema(schema)
            .withValue(struct)
            .build();

    // enableSchematization=false triggers wrapAsRecordContent → Jackson serialization
    SnowflakeSinkRecord record =
        SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, false, false);

    // Must not be broken — Jackson must be able to serialize the converted value
    assertFalse(
        record.isBroken(), "Record should not be broken but was: " + record.getBrokenReason());
    assertTrue(record.isValid());
    assertTrue(record.getContent().containsKey("RECORD_CONTENT"));
  }

  @Test
  void testLegacyMode_WithJsonMap_WrapsInRecordContent() {
    SchemaAndValue schemaAndValue = toConnectData("{\"name\": \"test\", \"value\": 123}");
    SinkRecord kafkaRecord =
        SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION)
            .withSchemaAndValue(schemaAndValue)
            .build();

    SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, false, true);

    assertTrue(record.isValid());
    Map<String, Object> content = record.getContent();
    assertTrue(content.containsKey("RECORD_CONTENT"));
    assertEquals(1, content.size());
    @SuppressWarnings("unchecked")
    Map<String, Object> recordContent = (Map<String, Object>) content.get("RECORD_CONTENT");
    assertEquals("test", recordContent.get("name"));
    assertEquals(123L, recordContent.get("value"));
  }

  @Test
  void testLegacyMode_WithPlainString_WrapsInRecordContent() {
    SinkRecord kafkaRecord =
        SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION)
            .withValueSchema(Schema.STRING_SCHEMA)
            .withValue("just a plain string")
            .build();

    SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, false, true);

    assertTrue(record.isValid());
    Map<String, Object> content = record.getContent();
    assertTrue(content.containsKey("RECORD_CONTENT"));
    assertEquals("just a plain string", content.get("RECORD_CONTENT"));
  }

  @Test
  void testLegacyMode_WithByteArray_WrapsInRecordContent() {
    byte[] bytes = "hello".getBytes(java.nio.charset.StandardCharsets.UTF_8);
    SinkRecord kafkaRecord =
        SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION)
            .withValueSchema(Schema.BYTES_SCHEMA)
            .withValue(bytes)
            .build();

    SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, false, true);

    assertTrue(record.isValid());
    Map<String, Object> content = record.getContent();
    assertTrue(content.containsKey("RECORD_CONTENT"));
    assertArrayEquals(bytes, (byte[]) content.get("RECORD_CONTENT"));
  }

  @Test
  void testLegacyMode_TombstoneStillWorks() {
    SinkRecord kafkaRecord =
        SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION)
            .withValueSchema(null)
            .withValue(null)
            .build();

    SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, false, true);

    assertTrue(record.isTombstone());
  }

  @Test
  void testSchematizedMode_WithPlainString_StillBroken() {
    SinkRecord kafkaRecord =
        SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION)
            .withValueSchema(Schema.STRING_SCHEMA)
            .withValue("just a plain string")
            .build();

    SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, true, false);

    assertTrue(record.isBroken());
  }

  @Test
  void testNormalizationEnabled_UppercasesColumnNames() {
    SchemaAndValue schemaAndValue = toConnectData("{\"city\": \"Hsinchu\", \"Country\": \"TW\"}");
    SinkRecord kafkaRecord =
        SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION)
            .withSchemaAndValue(schemaAndValue)
            .build();

    SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, true, true);

    assertTrue(record.isValid());
    Map<String, Object> content = record.getContent();
    // Unquoted identifiers are uppercased
    assertTrue(content.containsKey("CITY"));
    assertTrue(content.containsKey("COUNTRY"));
    assertFalse(content.containsKey("city"));
    assertFalse(content.containsKey("Country"));
  }

  @Test
  void testNormalizationDisabled_PreservesColumnNames() {
    SchemaAndValue schemaAndValue = toConnectData("{\"city\": \"Hsinchu\", \"Country\": \"TW\"}");
    SinkRecord kafkaRecord =
        SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION)
            .withSchemaAndValue(schemaAndValue)
            .build();

    SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, true, false);

    assertTrue(record.isValid());
    Map<String, Object> content = record.getContent();
    // Column names preserved as-is
    assertTrue(content.containsKey("city"));
    assertTrue(content.containsKey("Country"));
    assertFalse(content.containsKey("CITY"));
    assertFalse(content.containsKey("COUNTRY"));
  }

  @Test
  void testNormalizationEnabled_QuotedIdentifierPreservesCase() {
    // Quoted SQL identifiers strip quotes and preserve case
    Schema schema =
        SchemaBuilder.struct()
            .field("\"MyCol\"", Schema.STRING_SCHEMA)
            .field("simple", Schema.STRING_SCHEMA)
            .build();
    Struct struct = new Struct(schema).put("\"MyCol\"", "value1").put("simple", "value2");

    SinkRecord kafkaRecord =
        SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION)
            .withValueSchema(schema)
            .withValue(struct)
            .build();

    SnowflakeSinkRecord record = SnowflakeSinkRecord.from(kafkaRecord, metadataConfig, true, true);

    assertTrue(record.isValid());
    Map<String, Object> content = record.getContent();
    // Quoted "MyCol" → strips quotes → MyCol (case preserved)
    assertTrue(content.containsKey("MyCol"));
    // Unquoted simple → SIMPLE (uppercased)
    assertTrue(content.containsKey("SIMPLE"));

    // Schema field names should also be normalized
    Schema normalizedSchema = record.getSchema();
    assertNotNull(normalizedSchema);
    assertNotNull(normalizedSchema.field("MyCol"));
    assertNotNull(normalizedSchema.field("SIMPLE"));
    assertNull(normalizedSchema.field("\"MyCol\""));
    assertNull(normalizedSchema.field("simple"));
  }

  private static JsonConverter createJsonConverter() {
    JsonConverter converter = new JsonConverter();
    converter.configure(Collections.singletonMap("schemas.enable", false), false);
    return converter;
  }

  private static Stream<Arguments> timestampTypeTestCases() {
    return Stream.of(
        Arguments.of(TimestampType.CREATE_TIME, "CreateTime"),
        Arguments.of(TimestampType.LOG_APPEND_TIME, "LogAppendTime"));
  }

  private SchemaAndValue toConnectData(String jsonPayload) {
    return jsonConverter.toConnectData(TOPIC, jsonPayload.getBytes(StandardCharsets.UTF_8));
  }

  private SnowflakeMetadataConfig createMetadataConfigWithAll() {
    Map<String, String> config = new HashMap<>();
    config.put("snowflake.metadata.all", "true");
    return new SnowflakeMetadataConfig(config);
  }

  private SnowflakeSinkRecord createRecordFromJson(String json, SnowflakeMetadataConfig config) {
    SchemaAndValue schemaAndValue = toConnectData(json);
    SinkRecord kafkaRecord =
        SinkRecordBuilder.forTopicPartition(TOPIC, PARTITION)
            .withSchemaAndValue(schemaAndValue)
            .build();
    return SnowflakeSinkRecord.from(kafkaRecord, config, true, false);
  }

  private SinkRecord createSinkRecordWithHeaders(
      SchemaAndValue schemaAndValue, Headers headers, String key) {
    return new SinkRecord(
        TOPIC,
        PARTITION,
        Schema.STRING_SCHEMA,
        key,
        schemaAndValue.schema(),
        schemaAndValue.value(),
        0,
        null,
        TimestampType.NO_TIMESTAMP_TYPE,
        headers);
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/streaming/iceberg/BaseIcebergIT.java
================================================
package com.snowflake.kafka.connector.streaming.iceberg;

import static com.snowflake.kafka.connector.internal.TestUtils.executeQueryAndCollectResult;
import static com.snowflake.kafka.connector.internal.TestUtils.executeQueryWithParameter;

import com.snowflake.kafka.connector.internal.SnowflakeConnectionService;
import com.snowflake.kafka.connector.internal.TestUtils;
import java.sql.ResultSet;
import java.util.function.Function;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;

public class BaseIcebergIT {

  protected static SnowflakeConnectionService snowflakeDatabase;

  @BeforeAll
  public static void setup() {
    snowflakeDatabase = TestUtils.getConnectionServiceWithEncryptedKey();
  }

  @AfterAll
  public static void teardown() {
    snowflakeDatabase.close();
  }

  protected static void createIcebergTableWithColumnClause(
      String tableName, String columnClause, IcebergVersion icebergVersion) {
    String query =
        "create or replace iceberg table identifier(?) ("
            + columnClause
            + ") "
            + "external_volume = 'test_exvol' "
            + "catalog = 'SNOWFLAKE' "
            + "base_location = 'it' iceberg_version = "
            + (icebergVersion.ordinal() + 1)
            + ";";
    doExecuteQueryWithParameter(query, tableName);
  }

  private static void doExecuteQueryWithParameter(String query, String tableName) {
    executeQueryWithParameter(snowflakeDatabase.getConnection(), query, tableName);
  }

  protected static void dropIcebergTable(String tableName) {
    String query = "drop iceberg table if exists identifier(?)";
    doExecuteQueryWithParameter(query, tableName);
  }

  protected static <T> T select(
      String tableName, String query, Function<ResultSet, T> resultCollector) {
    return executeQueryAndCollectResult(
        snowflakeDatabase.getConnection(), query, tableName, resultCollector);
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/streaming/iceberg/IcebergIngestionIT.java
================================================
package com.snowflake.kafka.connector.streaming.iceberg;

import static com.snowflake.kafka.connector.internal.TestUtils.getConnectorConfigurationForStreaming;

import com.snowflake.kafka.connector.ConnectorConfigTools;
import com.snowflake.kafka.connector.config.SinkTaskConfig;
import com.snowflake.kafka.connector.dlq.InMemoryKafkaRecordErrorReporter;
import com.snowflake.kafka.connector.internal.SnowflakeSinkService;
import com.snowflake.kafka.connector.internal.TestUtils;
import com.snowflake.kafka.connector.internal.streaming.InMemorySinkTaskContext;
import com.snowflake.kafka.connector.internal.streaming.StreamingSinkServiceBuilder;
import com.snowflake.kafka.connector.streaming.iceberg.sql.ComplexJsonRecord;
import com.snowflake.kafka.connector.streaming.iceberg.sql.RecordWithMetadata;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.record.TimestampType;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaAndValue;
import org.apache.kafka.connect.header.ConnectHeaders;
import org.apache.kafka.connect.header.Headers;
import org.apache.kafka.connect.json.JsonConverter;
import org.apache.kafka.connect.sink.SinkRecord;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;

public abstract class IcebergIngestionIT extends BaseIcebergIT {

  private static final int PARTITION = 0;
  private String topic;

  protected String tableName;
  protected TopicPartition topicPartition;
  protected SnowflakeSinkService service;
  protected InMemoryKafkaRecordErrorReporter kafkaRecordErrorReporter;

  /**
   * Override in subclasses to create the target Iceberg table before the service starts. KCv4
   * requires pre-created Iceberg tables; auto-creation is not supported for them.
   */
  protected void createIcebergTable() {}

  @BeforeEach
  public void setUp() {
    tableName = TestUtils.randomTableName();
    topic = tableName;
    topicPartition = new TopicPartition(topic, PARTITION);

    createIcebergTable();

    Map<String, String> config = getConnectorConfigurationForStreaming(false);
    ConnectorConfigTools.setDefaultValues(config);
    SinkTaskConfig sinkTaskConfig =
        SinkTaskConfig.builderFrom(config)
            .tolerateErrors(false)
            .dlqTopicName("test_DLQ")
            .topicToTableMap(Collections.singletonMap(topic, tableName))
            .build();

    kafkaRecordErrorReporter = new InMemoryKafkaRecordErrorReporter();
    service =
        StreamingSinkServiceBuilder.builder(snowflakeDatabase, sinkTaskConfig)
            .withErrorReporter(kafkaRecordErrorReporter)
            .withSinkTaskContext(new InMemorySinkTaskContext(Collections.singleton(topicPartition)))
            .build();
  }

  @AfterEach
  public void tearDown() {
    if (service != null) {
      service.closeAll();
    }
    dropIcebergTable(tableName);
  }

  protected void waitForOffset(long targetOffset) throws Exception {
    TestUtils.assertWithRetry(() -> service.getOffset(topicPartition) == targetOffset);
  }

  protected SinkRecord createKafkaRecord(String jsonString, long offset, boolean withSchema) {
    JsonConverter converter = new JsonConverter();
    converter.configure(
        Collections.singletonMap("schemas.enable", Boolean.toString(withSchema)), false);
    SchemaAndValue inputValue =
        converter.toConnectData(topic, jsonString.getBytes(StandardCharsets.UTF_8));
    Headers headers = new ConnectHeaders();
    headers.addBoolean("booleanHeader", true);
    headers.addString("stringHeader", "test");
    headers.addInt("intHeader", 123);
    headers.addDouble("doubleHeader", 1.234);
    headers.addFloat("floatHeader", 1.234f);
    headers.addLong("longHeader", 123L);
    headers.addShort("shortHeader", (short) 123);
    return new SinkRecord(
        topic,
        PARTITION,
        Schema.STRING_SCHEMA,
        "test",
        inputValue.schema(),
        inputValue.value(),
        offset,
        System.currentTimeMillis(),
        TimestampType.CREATE_TIME,
        headers);
  }

  private final String selectAllSortByOffset =
      "WITH extracted_data AS ("
          + "SELECT *, RECORD_METADATA:\"offset\"::number AS offset_extracted "
          + "FROM identifier(?) "
          + ") "
          + "SELECT * FROM extracted_data "
          + "ORDER BY offset_extracted asc;";

  protected List<RecordWithMetadata<ComplexJsonRecord>>
      selectAllComplexJsonRecordFromRecordContent() {
    return select(tableName, selectAllSortByOffset, ComplexJsonRecord::fromRecordContentColumn);
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/streaming/iceberg/IcebergIngestionIntoVariantIT.java
================================================
package com.snowflake.kafka.connector.streaming.iceberg;

import static com.snowflake.kafka.connector.streaming.iceberg.IcebergVersion.V3;
import static com.snowflake.kafka.connector.streaming.iceberg.sql.ComplexJsonRecord.complexJsonPayload;
import static com.snowflake.kafka.connector.streaming.iceberg.sql.ComplexJsonRecord.complexJsonRecordValueExample;
import static org.assertj.core.api.Assertions.assertThat;

import com.snowflake.kafka.connector.streaming.iceberg.sql.ComplexJsonRecord;
import com.snowflake.kafka.connector.streaming.iceberg.sql.MetadataRecord;
import com.snowflake.kafka.connector.streaming.iceberg.sql.RecordWithMetadata;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import org.junit.jupiter.api.Test;

public class IcebergIngestionIntoVariantIT extends IcebergIngestionIT {

  @Override
  protected void createIcebergTable() {
    createIcebergTableWithColumnClause(
        tableName, "RECORD_METADATA VARIANT, RECORD_CONTENT VARIANT", V3);
  }

  @Test
  void shouldInsertRecordsLegacyBagOfBits() throws Exception {
    final long overMaxIntOffset = (long) Integer.MAX_VALUE + 1;
    final boolean withSchema = false;
    final String message = complexJsonPayload;
    service.insert(
        Arrays.asList(
            createKafkaRecord(message, 0, withSchema), createKafkaRecord(message, 1, withSchema)));
    waitForOffset(2);
    service.insert(Collections.singletonList(createKafkaRecord(message, 2, withSchema)));
    waitForOffset(3);
    service.insert(
        Collections.singletonList(createKafkaRecord(message, overMaxIntOffset, withSchema)));
    waitForOffset(overMaxIntOffset + 1);
    assertRecordsInTable(Arrays.asList(0L, 1L, 2L, overMaxIntOffset));
  }

  private void assertRecordsInTable(List<Long> expectedOffsets) {
    List<RecordWithMetadata<ComplexJsonRecord>> recordsWithMetadata =
        selectAllComplexJsonRecordFromRecordContent();
    assertThat(recordsWithMetadata)
        .hasSize(expectedOffsets.size())
        .extracting(RecordWithMetadata::getRecord)
        .containsOnly(complexJsonRecordValueExample);
    List<MetadataRecord> metadataRecords =
        recordsWithMetadata.stream()
            .map(RecordWithMetadata::getMetadata)
            .collect(Collectors.toList());
    assertThat(metadataRecords)
        .extracting(MetadataRecord::getOffset)
        .containsExactlyElementsOf(expectedOffsets);
    assertThat(metadataRecords)
        .hasSize(expectedOffsets.size())
        .allMatch(
            record ->
                record.getTopic().equals(topicPartition.topic())
                    && record.getPartition().equals(topicPartition.partition())
                    && record.getKey().equals("test")
                    && record.getSnowflakeConnectorPushTime() != null);
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/streaming/iceberg/IcebergIngestionNoSchemaEvolutionIT.java
================================================
package com.snowflake.kafka.connector.streaming.iceberg;

import static com.snowflake.kafka.connector.streaming.iceberg.IcebergVersion.V2;
import static com.snowflake.kafka.connector.streaming.iceberg.sql.ComplexJsonRecord.complexJsonPayload;
import static com.snowflake.kafka.connector.streaming.iceberg.sql.ComplexJsonRecord.complexJsonRecordValueExample;
import static com.snowflake.kafka.connector.streaming.iceberg.sql.ComplexJsonRecord.complexJsonWithSchema;
import static org.assertj.core.api.Assertions.assertThat;

import com.snowflake.kafka.connector.Utils;
import com.snowflake.kafka.connector.streaming.iceberg.sql.ComplexJsonRecord;
import com.snowflake.kafka.connector.streaming.iceberg.sql.MetadataRecord;
import com.snowflake.kafka.connector.streaming.iceberg.sql.RecordWithMetadata;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

public class IcebergIngestionNoSchemaEvolutionIT extends IcebergIngestionIT {

  private static final String PRIMITIVE_JSON_RECORD_CONTENT_OBJECT_SCHEMA =
      "object("
          + "id_int8 NUMBER(10,0),"
          + "id_int16 NUMBER(10,0),"
          + "id_int32 NUMBER(10,0),"
          + "id_int64 NUMBER(19,0),"
          + "description STRING,"
          + "rating_float32 FLOAT,"
          + "rating_float64 FLOAT,"
          + "approval BOOLEAN"
          + ")";

  private static final String COMPLEX_JSON_RECORD_CONTENT_OBJECT_SCHEMA =
      "object("
          + "id_int8 NUMBER(10,0),"
          + "id_int16 NUMBER(10,0),"
          + "id_int32 NUMBER(10,0),"
          + "id_int64 NUMBER(19,0),"
          + "description STRING,"
          + "rating_float32 FLOAT,"
          + "rating_float64 FLOAT,"
          + "approval BOOLEAN,"
          + "array1 ARRAY(LONG),"
          + "array2 ARRAY(STRING),"
          + "array3 ARRAY(BOOLEAN),"
          + "array4 ARRAY(LONG),"
          + "array5 ARRAY(ARRAY(LONG)),"
          + "nestedRecord "
          + PRIMITIVE_JSON_RECORD_CONTENT_OBJECT_SCHEMA
          + ","
          + "nestedRecord2 "
          + PRIMITIVE_JSON_RECORD_CONTENT_OBJECT_SCHEMA
          + ")";

  @Override
  protected void createIcebergTable() {
    createIcebergTableWithColumnClause(
        tableName,
        Utils.TABLE_COLUMN_METADATA
            + " "
            + IcebergDDLTypes.ICEBERG_METADATA_OBJECT_SCHEMA
            + ", "
            + Utils.TABLE_COLUMN_CONTENT
            + " "
            + COMPLEX_JSON_RECORD_CONTENT_OBJECT_SCHEMA,
        V2);
  }

  private static Stream<Arguments> prepareData() {
    return Stream.of(
        Arguments.of("Complex JSON with schema", complexJsonWithSchema, true),
        Arguments.of("Complex JSON without schema", complexJsonPayload, false));
  }

  @ParameterizedTest(name = "{0}")
  @MethodSource("prepareData")
  void shouldInsertRecords(String description, String message, boolean withSchema)
      throws Exception {
    long overMaxIntOffset = (long) Integer.MAX_VALUE + 1;
    service.insert(
        Arrays.asList(
            createKafkaRecord(message, 0, withSchema), createKafkaRecord(message, 1, withSchema)));
    waitForOffset(2);
    service.insert(Collections.singletonList(createKafkaRecord(message, 2, withSchema)));
    waitForOffset(3);
    service.insert(
        Collections.singletonList(createKafkaRecord(message, overMaxIntOffset, withSchema)));
    waitForOffset(overMaxIntOffset + 1);

    assertRecordsInTable(Arrays.asList(0L, 1L, 2L, overMaxIntOffset));
  }

  private void assertRecordsInTable(List<Long> expectedOffsets) {
    List<RecordWithMetadata<ComplexJsonRecord>> recordsWithMetadata =
        selectAllComplexJsonRecordFromRecordContent();
    assertThat(recordsWithMetadata)
        .hasSize(expectedOffsets.size())
        .extracting(RecordWithMetadata::getRecord)
        .containsOnly(complexJsonRecordValueExample);
    List<MetadataRecord> metadataRecords =
        recordsWithMetadata.stream()
            .map(RecordWithMetadata::getMetadata)
            .collect(Collectors.toList());
    assertThat(metadataRecords)
        .extracting(MetadataRecord::getOffset)
        .containsExactlyElementsOf(expectedOffsets);
    assertThat(metadataRecords)
        .hasSize(expectedOffsets.size())
        .allMatch(
            record ->
                record.getTopic().equals(topicPartition.topic())
                    && record.getPartition().equals(topicPartition.partition())
                    && record.getKey().equals("test")
                    && record.getSnowflakeConnectorPushTime() != null);
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/streaming/iceberg/IcebergVersion.java
================================================
package com.snowflake.kafka.connector.streaming.iceberg;

public enum IcebergVersion {
  V1,
  V2,
  V3
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/streaming/iceberg/sql/ComplexJsonRecord.java
================================================
package com.snowflake.kafka.connector.streaming.iceberg.sql;

import static com.fasterxml.jackson.databind.DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.snowflake.kafka.connector.Utils;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import org.assertj.core.api.Assertions;

public class ComplexJsonRecord {

  public static final String complexJsonPayload =
      loadJsonResource("/com/snowflake/kafka/connector/complexJsonPayload.json");

  public static final String complexJsonWithSchema =
      loadJsonResource("/com/snowflake/kafka/connector/complexJsonWithSchema.json");

  private static final ObjectMapper MAPPER =
      new ObjectMapper().configure(FAIL_ON_UNKNOWN_PROPERTIES, false);

  public static final ComplexJsonRecord complexJsonRecordValueExample =
      new ComplexJsonRecord(
          8L,
          16L,
          32L,
          64L,
          "dogs are the best",
          0.5,
          0.25,
          true,
          List.of(1, 2, 3),
          List.of("a", "b", "c"),
          List.of(true),
          List.of(1, 4),
          List.of(List.of(7, 8, 9), List.of(10, 11, 12)),
          PrimitiveJsonRecord.primitiveJsonRecordValueExample,
          PrimitiveJsonRecord.primitiveJsonRecordValueExample);

  private static String loadJsonResource(final String resourcePath) {
    try (InputStream is = ComplexJsonRecord.class.getResourceAsStream(resourcePath)) {
      if (is == null) {
        throw new RuntimeException("Resource not found: " + resourcePath);
      }
      return new String(is.readAllBytes(), StandardCharsets.UTF_8);
    } catch (IOException e) {
      throw new RuntimeException("Failed to load resource: " + resourcePath, e);
    }
  }

  private final Long idInt8;

  private final Long idInt16;

  private final Long idInt32;

  private final Long idInt64;

  private final String description;

  private final Double ratingFloat32;

  private final Double ratingFloat64;

  private final Boolean approval;

  private final List<Integer> array1;
  private final List<String> array2;
  private final List<Boolean> array3;
  private final List<Integer> array4;
  private final List<List<Integer>> array5;

  private final PrimitiveJsonRecord nestedRecord;
  private final PrimitiveJsonRecord nestedRecord2;

  @JsonCreator
  public ComplexJsonRecord(
      @JsonProperty("id_int8") Long idInt8,
      @JsonProperty("id_int16") Long idInt16,
      @JsonProperty("id_int32") Long idInt32,
      @JsonProperty("id_int64") Long idInt64,
      @JsonProperty("description") String description,
      @JsonProperty("rating_float32") Double ratingFloat32,
      @JsonProperty("rating_float64") Double ratingFloat64,
      @JsonProperty("approval") Boolean approval,
      @JsonProperty("array1") List<Integer> array1,
      @JsonProperty("array2") List<String> array2,
      @JsonProperty("array3") List<Boolean> array3,
      @JsonProperty("array4") List<Integer> array4,
      @JsonProperty("array5") List<List<Integer>> array5,
      @JsonProperty("nestedRecord") PrimitiveJsonRecord nestedRecord,
      @JsonProperty("nestedRecord2") PrimitiveJsonRecord nestedRecord2) {
    this.idInt8 = idInt8;
    this.idInt16 = idInt16;
    this.idInt32 = idInt32;
    this.idInt64 = idInt64;
    this.description = description;
    this.ratingFloat32 = ratingFloat32;
    this.ratingFloat64 = ratingFloat64;
    this.approval = approval;
    this.array1 = array1;
    this.array2 = array2;
    this.array3 = array3;
    this.array4 = array4;
    this.array5 = array5;
    this.nestedRecord = nestedRecord;
    this.nestedRecord2 = nestedRecord2;
  }

  public static List<RecordWithMetadata<ComplexJsonRecord>> fromRecordContentColumn(
      ResultSet resultSet) {
    List<RecordWithMetadata<ComplexJsonRecord>> records = new ArrayList<>();

    try {
      while (resultSet.next()) {
        String jsonString = resultSet.getString(Utils.TABLE_COLUMN_CONTENT);
        ComplexJsonRecord record = MAPPER.readValue(jsonString, ComplexJsonRecord.class);
        MetadataRecord metadata = PrimitiveJsonRecord.fromMetadataSingleRow(resultSet);
        records.add(RecordWithMetadata.of(metadata, record));
      }
    } catch (SQLException | IOException e) {
      Assertions.fail("Couldn't map ResultSet to ComplexJsonRecord: " + e.getMessage());
    }
    return records;
  }

  @Override
  public boolean equals(Object o) {
    if (this == o) return true;
    if (o == null || getClass() != o.getClass()) return false;
    ComplexJsonRecord that = (ComplexJsonRecord) o;
    return Objects.equals(idInt8, that.idInt8)
        && Objects.equals(idInt16, that.idInt16)
        && Objects.equals(idInt32, that.idInt32)
        && Objects.equals(idInt64, that.idInt64)
        && Objects.equals(description, that.description)
        && Objects.equals(ratingFloat32, that.ratingFloat32)
        && Objects.equals(ratingFloat64, that.ratingFloat64)
        && Objects.equals(approval, that.approval)
        && Objects.equals(array1, that.array1)
        && Objects.equals(array2, that.array2)
        && Objects.equals(array3, that.array3)
        && Objects.equals(array4, that.array4)
        && Objects.equals(array5, that.array5)
        && Objects.equals(nestedRecord, that.nestedRecord)
        && Objects.equals(nestedRecord2, that.nestedRecord2);
  }

  @Override
  public int hashCode() {
    return Objects.hash(
        idInt8,
        idInt16,
        idInt32,
        idInt64,
        description,
        ratingFloat32,
        ratingFloat64,
        approval,
        array1,
        array2,
        array3,
        array4,
        array5,
        nestedRecord,
        nestedRecord2);
  }

  @Override
  public String toString() {
    return "ComplexJsonRecord{"
        + "idInt8="
        + idInt8
        + ", idInt16="
        + idInt16
        + ", idInt32="
        + idInt32
        + ", idInt64="
        + idInt64
        + ", description='"
        + description
        + '\''
        + ", ratingFloat32="
        + ratingFloat32
        + ", ratingFloat64="
        + ratingFloat64
        + ", approval="
        + approval
        + ", array1="
        + array1
        + ", array2="
        + array2
        + ", array3="
        + array3
        + ", array4="
        + array4
        + ", array5="
        + array5
        + ", nestedRecord="
        + nestedRecord
        + ", nestedRecord2="
        + nestedRecord2
        + '}';
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/streaming/iceberg/sql/MetadataRecord.java
================================================
package com.snowflake.kafka.connector.streaming.iceberg.sql;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.Map;
import java.util.Objects;

public class MetadataRecord {
  private final Long offset;
  private final String topic;
  private final Integer partition;
  private final String key;
  private final Integer schemaId;
  private final Integer keySchemaId;
  private final Long createTime;
  private final Long logAppendTime;
  private final Long snowflakeConnectorPushTime;
  private final Map<String, String> headers;

  @JsonCreator
  public MetadataRecord(
      @JsonProperty("offset") Long offset,
      @JsonProperty("topic") String topic,
      @JsonProperty("partition") Integer partition,
      @JsonProperty("key") String key,
      @JsonProperty("schema_id") Integer schemaId,
      @JsonProperty("key_schema_id") Integer keySchemaId,
      @JsonProperty("CreateTime") Long createTime,
      @JsonProperty("LogAppendTime") Long logAppendTime,
      @JsonProperty("SnowflakeConnectorPushTime") Long snowflakeConnectorPushTime,
      @JsonProperty("headers") Map<String, String> headers) {
    this.offset = offset;
    this.topic = topic;
    this.partition = partition;
    this.key = key;
    this.schemaId = schemaId;
    this.keySchemaId = keySchemaId;
    this.createTime = createTime;
    this.logAppendTime = logAppendTime;
    this.snowflakeConnectorPushTime = snowflakeConnectorPushTime;
    this.headers = headers;
  }

  // Getters for each field
  public Long getOffset() {
    return offset;
  }

  public String getTopic() {
    return topic;
  }

  public Integer getPartition() {
    return partition;
  }

  public String getKey() {
    return key;
  }

  public Integer getSchemaId() {
    return schemaId;
  }

  public Integer getKeySchemaId() {
    return keySchemaId;
  }

  public Long getCreateTime() {
    return createTime;
  }

  public Long getLogAppendTime() {
    return logAppendTime;
  }

  public Long getSnowflakeConnectorPushTime() {
    return snowflakeConnectorPushTime;
  }

  public Map<String, String> getHeaders() {
    return headers;
  }

  @Override
  public boolean equals(Object o) {
    if (this == o) return true;
    if (o == null || getClass() != o.getClass()) return false;
    MetadataRecord that = (MetadataRecord) o;
    return Objects.equals(offset, that.offset)
        && Objects.equals(topic, that.topic)
        && Objects.equals(partition, that.partition)
        && Objects.equals(key, that.key)
        && Objects.equals(schemaId, that.schemaId)
        && Objects.equals(keySchemaId, that.keySchemaId)
        && Objects.equals(createTime, that.createTime)
        && Objects.equals(logAppendTime, that.logAppendTime)
        && Objects.equals(snowflakeConnectorPushTime, that.snowflakeConnectorPushTime)
        && Objects.equals(headers, that.headers);
  }

  @Override
  public int hashCode() {
    return Objects.hash(
        offset,
        topic,
        partition,
        key,
        schemaId,
        keySchemaId,
        createTime,
        logAppendTime,
        snowflakeConnectorPushTime,
        headers);
  }

  @Override
  public String toString() {
    return "MetadataRecord{"
        + "offset="
        + offset
        + ", topic='"
        + topic
        + '\''
        + ", partition="
        + partition
        + ", key='"
        + key
        + '\''
        + ", schemaId="
        + schemaId
        + ", keySchemaId="
        + keySchemaId
        + ", createTime="
        + createTime
        + ", logAppendTime="
        + logAppendTime
        + ", snowflakeConnectorPushTime="
        + snowflakeConnectorPushTime
        + ", headers="
        + headers
        + '}';
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/streaming/iceberg/sql/PrimitiveJsonRecord.java
================================================
package com.snowflake.kafka.connector.streaming.iceberg.sql;

import static com.fasterxml.jackson.databind.DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.snowflake.kafka.connector.Utils;
import java.io.IOException;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Objects;
import org.assertj.core.api.Assertions;

public class PrimitiveJsonRecord {

  public static final PrimitiveJsonRecord primitiveJsonRecordValueExample =
      new PrimitiveJsonRecord(8L, 16L, 32L, 64L, "dogs are the best", 0.5, 0.25, true);

  private static final ObjectMapper MAPPER =
      new ObjectMapper().configure(FAIL_ON_UNKNOWN_PROPERTIES, false);

  private final Long idInt8;

  private final Long idInt16;

  private final Long idInt32;

  private final Long idInt64;

  private final String description;

  private final Double ratingFloat32;

  private final Double ratingFloat64;

  private final Boolean approval;

  @JsonCreator
  public PrimitiveJsonRecord(
      @JsonProperty("id_int8") Long idInt8,
      @JsonProperty("id_int16") Long idInt16,
      @JsonProperty("id_int32") Long idInt32,
      @JsonProperty("id_int64") Long idInt64,
      @JsonProperty("description") String description,
      @JsonProperty("rating_float32") Double ratingFloat32,
      @JsonProperty("rating_float64") Double ratingFloat64,
      @JsonProperty("approval") Boolean approval) {
    this.idInt8 = idInt8;
    this.idInt16 = idInt16;
    this.idInt32 = idInt32;
    this.idInt64 = idInt64;
    this.description = description;
    this.ratingFloat32 = ratingFloat32;
    this.ratingFloat64 = ratingFloat64;
    this.approval = approval;
  }

  public static MetadataRecord fromMetadataSingleRow(ResultSet resultSet) {
    try {
      String jsonString = resultSet.getString(Utils.TABLE_COLUMN_METADATA);
      return MAPPER.readValue(jsonString, MetadataRecord.class);
    } catch (SQLException | IOException e) {
      Assertions.fail("Couldn't map ResultSet to MetadataRecord: " + e.getMessage());
    }
    return null;
  }

  public Long getIdInt8() {
    return idInt8;
  }

  public Long getIdInt16() {
    return idInt16;
  }

  public Long getIdInt32() {
    return idInt32;
  }

  public Long getIdInt64() {
    return idInt64;
  }

  public String getDescription() {
    return description;
  }

  public Double getRatingFloat32() {
    return ratingFloat32;
  }

  public Double getRatingFloat64() {
    return ratingFloat64;
  }

  public Boolean isApproval() {
    return approval;
  }

  @Override
  public boolean equals(Object o) {
    if (this == o) return true;
    if (o == null || getClass() != o.getClass()) return false;
    PrimitiveJsonRecord that = (PrimitiveJsonRecord) o;
    return Objects.equals(idInt8, that.idInt8)
        && Objects.equals(idInt16, that.idInt16)
        && Objects.equals(idInt32, that.idInt32)
        && Objects.equals(idInt64, that.idInt64)
        && Objects.equals(description, that.description)
        && Objects.equals(ratingFloat32, that.ratingFloat32)
        && Objects.equals(ratingFloat64, that.ratingFloat64)
        && Objects.equals(approval, that.approval);
  }

  @Override
  public int hashCode() {
    return Objects.hash(
        idInt8, idInt16, idInt32, idInt64, description, ratingFloat32, ratingFloat64, approval);
  }

  @Override
  public String toString() {
    return "PrimitiveJsonRecord{"
        + "idInt8="
        + idInt8
        + ", idInt16="
        + idInt16
        + ", idInt32="
        + idInt32
        + ", idInt64="
        + idInt64
        + ", description='"
        + description
        + '\''
        + ", ratingFloat32="
        + ratingFloat32
        + ", ratingFloat64="
        + ratingFloat64
        + ", approval="
        + approval
        + '}';
  }
}


================================================
FILE: src/test/java/com/snowflake/kafka/connector/streaming/iceberg/sql/RecordWithMetadata.java
================================================
package com.snowflake.kafka.connector.streaming.iceberg.sql;

public class RecordWithMetadata<T> {
  private final T record;
  private final MetadataRecord metadata;

  private RecordWithMetadata(MetadataRecord metadata, T record) {
    this.record = record;
    this.metadata = metadata;
  }

  public static <T> RecordWithMetadata<T> of(MetadataRecord metadata, T record) {
    return new RecordWithMetadata<>(metadata, record);
  }

  public T getRecord() {
    return record;
  }

  public MetadataRecord getMetadata() {
    return metadata;
  }
}


================================================
FILE: src/test/resources/com/snowflake/kafka/connector/complexJsonPayload.json
================================================
{
  "record_content": {
    "id_int8": 8,
    "id_int16": 16,
    "id_int32": 32,
    "id_int64": 64,
    "description": "dogs are the best",
    "rating_float32": 0.5,
    "rating_float64": 0.25,
    "approval": true,
    "array1": [
      1,
      2,
      3
    ],
    "array2": [
      "a",
      "b",
      "c"
    ],
    "array3": [
      true
    ],
    "array4": [
      1,
      4
    ],
    "array5": [
      [
        7,
        8,
        9
      ],
      [
        10,
        11,
        12
      ]
    ],
    "nestedRecord": {
      "id_int8": 8,
      "id_int16": 16,
      "id_int32": 32,
      "id_int64": 64,
      "description": "dogs are the best",
      "rating_float32": 0.5,
      "rating_float64": 0.25,
      "approval": true
    },
    "nestedRecord2": {
      "id_int8": 8,
      "id_int16": 16,
      "id_int32": 32,
      "id_int64": 64,
      "description": "dogs are the best",
      "rating_float32": 0.5,
      "rating_float64": 0.25,
      "approval": true
    }
  }
}


================================================
FILE: src/test/resources/com/snowflake/kafka/connector/complexJsonWithSchema.json
================================================
{
  "schema": {
    "type": "struct",
    "fields": [
      {
        "field": "record_content",
        "type": "struct",
        "fields": [
          {
            "field": "id_int8",
            "type": "int8"
          },
          {
            "field": "id_int16",
            "type": "int16"
          },
          {
            "field": "id_int32",
            "type": "int32"
          },
          {
            "field": "id_int64",
            "type": "int64"
          },
          {
            "field": "description",
            "type": "string"
          },
          {
            "field": "rating_float32",
            "type": "float"
          },
          {
            "field": "rating_float64",
            "type": "double"
          },
          {
            "field": "approval",
            "type": "boolean"
          },
          {
            "field": "array1",
            "type": "array",
            "items": {
              "type": "int32"
            }
          },
          {
            "field": "array2",
            "type": "array",
            "items": {
              "type": "string"
            }
          },
          {
            "field": "array3",
            "type": "array",
            "items": {
              "type": "boolean"
            }
          },
          {
            "field": "array4",
            "type": "array",
            "items": {
              "type": "int32"
            },
            "optional": true
          },
          {
            "field": "array5",
            "type": "array",
            "items": {
              "type": "array",
              "items": {
                "type": "int32"
              }
            }
          },
          {
            "field": "nestedRecord",
            "type": "struct",
            "fields": [
              {
                "field": "id_int8",
                "type": "int8"
              },
              {
                "field": "id_int16",
                "type": "int16"
              },
              {
                "field": "id_int32",
                "type": "int32"
              },
              {
                "field": "id_int64",
                "type": "int64"
              },
              {
                "field": "description",
                "type": "string"
              },
              {
                "field": "rating_float32",
                "type": "float"
              },
              {
                "field": "rating_float64",
                "type": "double"
              },
              {
                "field": "approval",
                "type": "boolean"
              }
            ],
            "optional": true,
            "name": "sf.kc.test"
          },
          {
            "field": "nestedRecord2",
            "type": "struct",
            "fields": [
              {
                "field": "id_int8",
                "type": "int8"
              },
              {
                "field": "id_int16",
                "type": "int16"
              },
              {
                "field": "id_int32",
                "type": "int32"
              },
              {
                "field": "id_int64",
                "type": "int64"
              },
              {
                "field": "description",
                "type": "string"
              },
              {
                "field": "rating_float32",
                "type": "float"
              },
              {
                "field": "rating_float64",
                "type": "double"
              },
              {
                "field": "approval",
                "type": "boolean"
              }
            ],
            "optional": true,
            "name": "sf.kc.test"
          }
        ]
      }
    ],
    "optional": false,
    "name": "sf.kc.test"
  },
  "payload": {
    "record_content": {
      "id_int8": 8,
      "id_int16": 16,
      "id_int32": 32,
      "id_int64": 64,
      "description": "dogs are the best",
      "rating_float32": 0.5,
      "rating_float64": 0.25,
      "approval": true,
      "array1": [
        1,
        2,
        3
      ],
      "array2": [
        "a",
        "b",
        "c"
      ],
      "array3": [
        true
      ],
      "array4": [
        1,
        4
      ],
      "array5": [
        [
          7,
          8,
          9
        ],
        [
          10,
          11,
          12
        ]
      ],
      "nestedRecord": {
        "id_int8": 8,
        "id_int16": 16,
        "id_int32": 32,
        "id_int64": 64,
        "description": "dogs are the best",
        "rating_float32": 0.5,
        "rating_float64": 0.25,
        "approval": true
      },
      "nestedRecord2": {
        "id_int8": 8,
        "id_int16": 16,
        "id_int32": 32,
        "id_int64": 64,
        "description": "dogs are the best",
        "rating_float32": 0.5,
        "rating_float64": 0.25,
        "approval": true
      }
    }
  }
}


================================================
FILE: src/test/resources/log4j.properties
================================================
log4j.rootLogger=INFO, STDOUT, file
log4j.logger.deng=INFO
log4j.appender.STDOUT=org.apache.log4j.ConsoleAppender
log4j.appender.STDOUT.layout=org.apache.log4j.PatternLayout
log4j.appender.STDOUT.layout.ConversionPattern=%d{dd-MM-yyyy HH:mm:ss} %t %-5p %m %c{1}:%L%n
log4j.appender.file=org.apache.log4j.RollingFileAppender
log4j.appender.file.File=sf.log
log4j.appender.file.layout=org.apache.log4j.PatternLayout
# date with format, Thread name, log severity, print only class name({1}) with line number, "-" message to print and platform dependent "\n"
# https://logging.apache.org/log4j/1.2/apidocs/org/apache/log4j/PatternLayout.html
# Example: 26-04-2021 16:42:16 main DEBUG SnowflakeInternalStage:95 - (<custom message>)
log4j.appender.file.layout.ConversionPattern=%d{dd-MM-yyyy HH:mm:ss} %t %-5p %m %c{1}:%L%n
log4j.logger.com.snowflake.kafka.connector=TRACE
# Avoid httpClient flooding the log
log4j.logger.net.snowflake.client.jdbc.internal.apache.http.wire=WARN
log4j.logger.net.snowflake.client.jdbc.internal.apache.http.headers=WARN

================================================
FILE: src/test/resources/squid.conf
================================================
acl SSL_ports port 443
acl Safe_ports port 80          # http
acl Safe_ports port 21          # ftp
acl Safe_ports port 443         # https
acl Safe_ports port 70          # gopher
acl Safe_ports port 210         # wais
acl Safe_ports port 1025-65535  # unregistered ports
acl Safe_ports port 280         # http-mgmt
acl Safe_ports port 488         # gss-http
acl Safe_ports port 591         # filemaker
acl Safe_ports port 777         # multiling http
acl CONNECT method CONNECT

http_access deny !Safe_ports
http_access deny CONNECT !SSL_ports

http_port 3128
coredump_dir /var/spool/squid

refresh_pattern ^ftp:           1440    20%     10080
refresh_pattern ^gopher:        1440    0%      1440
refresh_pattern -i (/cgi-bin/|\?) 0     0%      0
refresh_pattern (Release|Packages(.gz)*)$      0       20%     2880
refresh_pattern .               0       20%     4320

auth_param basic program /usr/lib/squid/basic_ncsa_auth /etc/squid/passwords
auth_param basic realm proxy
acl authenticated proxy_auth REQUIRED
http_access allow authenticated

http_access allow localhost

ident_lookup_access deny all
http_access deny all

================================================
FILE: test/.gitignore
================================================
apache.tgz
apache_log
kafka_*
confluent-*
rest_request_generated/


================================================
FILE: test/E2E_TEST_PLAN.md
================================================
# E2E Test Plan: Kafka Connector v4 (SSv2)

## Table of Contents

- [1. Overview](#1-overview)
- [2. Test Dimensions](#2-test-dimensions)
- [3. Test Categories](#3-test-categories)
  - [3.1 Data Ingestion](#31-data-ingestion)
  - [3.2 Error Handling](#32-error-handling)
  - [3.3 Schema Evolution](#33-schema-evolution)
  - [3.4 RECORD_CONTENT Mode](#34-record_content-mode)
  - [3.5 Connector Lifecycle & Resilience](#35-connector-lifecycle--resilience)
  - [3.6 Default Pipe Features](#36-default-pipe-features)
  - [3.7 Load & Stress](#37-load--stress)
- [4. Data Type Compatibility](#4-data-type-compatibility-strategy)

### Status Legend


| Icon | Meaning |
|------|---------|
| 🟢 | Done -- test exists and passes |
| 🟡 | Known divergence -- test documents behavioral difference between v3 and v4, or has a known gap |
| 🔴 | Missing -- test must be written |

### Priority (for 🔴 items)

| Tier | Meaning | Criteria |
|------|---------|----------|
| **P0** | GA blocker | Data correctness risk, no existing coverage, explicit FR requirement |
| **P1** | Should have for GA | Important for launch or fast-follow, partial coverage exists |
| **P2** | Deferred post-GA | Lower risk, high implementation cost, or substantial existing coverage |

---

## 1. Overview

### Background

Kafka Connector v4 replaces v3's dual ingestion engines (file-based Snowpipe + SSv1 Streaming) with SSv2 exclusively. The GA strategy requires **functional parity** with v3 in compatibility mode, plus a new high-throughput mode.

### Connector Operating Modes

| Mode | Config | Validation | Schema Evolution | Error Handling | Target Use Case |
|------|--------|-----------|-----------------|----------------|-----------------|
| **Compatibility** | `snowflake.validation=client_side` | Client-side | Client-side `ALTER TABLE` | Sync DLQ / Sync Abort | v3 migration, parity |
| **High-Throughput** | `snowflake.validation=server_side` (default) | None (server) | Server-side (table `ENABLE_SCHEMA_EVOLUTION`) | Async Error Tables | Max throughput (10 GB/s target) |

### PRD Functional Requirements

| FR | Name | Scope |
|----|------|-------|
| FR1 | Client-Side DLQ (`errors.tolerance=all`) -- includes data type validation parity | Compatibility mode |
| FR2 | Client-Side Abort (`errors.tolerance=none`) -- includes data type validation parity | Compatibility mode |
| FR3 | Validation Toggle (`snowflake.validation`) | Mode switch |
| FR4 | Legacy Schema Toggle (`snowflake.enable.schematization`) | Both modes |
| FR5 | Default Pipes Only (`MATCH_BY_COLUMN_NAME`) | Both modes |
| FR6 | Schema Evolution | Both modes (different paths) |
| FR7 | Default Pipe Improvements (Identity, Defaults, Clustering) | Both modes |
| FR8 | Performance & Stability Baselines | High-throughput mode |
| FR9 | v3/v4 DLQ Parity | Compatibility mode -- requires DLQ error messages are byte-for-byte identical between v3 and v4 |
| FR10 | Telemetry & Usage Tracking | Both modes |
| FR11 | Pre-Flight Safety Check | High-throughput mode |

### When Dual Testing is Required

Running every test in dual mode (v3 + v4) doubles CI time. Dual is justified only when **v3 and v4 can produce different results** for the same input. There are two root causes of behavioral divergence:

1. **Data type handling differences between SDKs**: SSv1 and SSv2 may serialize, validate, or store values differently for the same Snowflake type. Known example: SSv1 parses JSON-like strings into native JSON objects in VARIANT columns, while SSv2 stores them as string literals. Similar differences may exist for BINARY encoding, TIMESTAMP precision, or other types. See [Section 4: Data Type Compatibility Strategy](#4-data-type-compatibility-strategy) for the full analysis.

2. **Client-side validation lifecycle**: SSv1 always validates (built into the SDK, cannot be disabled). V4 has a separate `RowValidator` (copied from SSv1's `DataValidationUtil`) that can be toggled. This affects DLQ routing, abort behavior, and schema evolution triggering. Additionally, v3 requires `schematization=true` for schema evolution; v4 does not.

---

## 2. Test Dimensions

Every test scenario can be classified across these independent dimensions:

### Data Format

Only formats used in the **Snowpipe Streaming** ingestion path are in scope. Legacy Snowpipe-only converters (`SnowflakeJsonConverter`, `SnowflakeAvroConverter`) are excluded -- we are migrating `SNOWPIPE_STREAMING` mode, not file-based `SNOWPIPE`.

| Format | Key Converter | Value Converter | Schema Registry | Platform |
|--------|--------------|-----------------|-----------------|----------|
| **JSON (native)** | StringConverter | JsonConverter | No | Any |
| **Avro SR** | StringConverter | AvroConverter (Confluent) | Yes | Confluent |
| **Avro SR (keys+values)** | AvroConverter | AvroConverter | Yes | Confluent |
| **Protobuf SR** | StringConverter | ProtobufConverter (Confluent) | Yes | Confluent |
| **Protobuf (native)** | StringConverter | Custom (raw bytes) | No | Any |
| **String (raw)** | StringConverter | StringConverter | No | Any |
| **Bytes (raw)** | ByteArrayConverter | ByteArrayConverter | No | Any |

### Architecture

| Value | Config | Behavior |
|-------|--------|----------|
| `server_side` (default) | `snowflake.validation=server_side` | Server-side only, Error Tables |
| `client_side` | `snowflake.validation=client_side` | Client-side validation, DLQ, abort |

### Schematization Mode

| Value | Config | Table Layout |
|-------|--------|-------------|
| `on` (default in v4) | `snowflake.enable.schematization=true` | Flat columns + `RECORD_METADATA` |
| `off` | `snowflake.enable.schematization=false` | `RECORD_CONTENT` + `RECORD_METADATA` (VARIANT) |

### Platform

| Platform | Schema Registry | Notes |
|----------|----------------|-------|
| Apache Kafka | Embedded (limited) | No Confluent SR converters |
| Confluent Platform | Full SR support | Required for Avro SR, Protobuf SR tests |

---

## 3. Test Categories

### 3.1 Data Ingestion

Basic data lands correctly in Snowflake for each format. This is the foundation -- every other category builds on it.

#### 3.1.1 JSON (Compatibility Mode)

| Status | Test | Version | Rationale | File |
|:------:|------|---------|-----------|------|
| 🟢 | String keys + JSON values | dual | Asserts data values in VARIANT header column (confirmed v3/v4 difference) | `test_string_json.py` |
| 🟢 | JSON keys + JSON values | v4 | Row count + metadata only; no data value assertions | `test_json_json.py` |
| 🟢 | JSON without schema + ReplaceField SMT | v4 | SMT runs in KC framework, not SDK-dependent | `test_native_string_json_without_schema.py` |
| 🟢 | Complex SMT chain (ValueToKey + ExtractField + ReplaceField) | v4 | Same -- KC framework SMT processing | `test_native_complex_smt.py` |
| 🟢 | Nullable values after ExtractField SMT | v4 | SMT + tombstone handling, KC framework level | `test_nullable_values_after_smt.py` |
| 🟢 | Snowpipe Streaming multi-partition (3p x 1000) | v4 | Row count + offset uniqueness check only | `test_snowpipe_streaming_string_json.py` |
| 🟢 | Multiple topics -> one table (3 topics x 3 partitions) | v4 | Row count + topic distribution check only | `test_multiple_topic_to_one_table_snowpipe_streaming.py` |
| 🟢 | Tombstone handling (`behavior.on.null.values=IGNORE`) | v4 | v4-only; dual coverage pending. Asserts data values in VARIANT header column. | `test_snowpipe_streaming_string_json_ignore_tombstone.py` |
| 🔴 | Large blob ingestion (20 MiB JSON) | v4 | **P2.** Row count check; tests SDK buffer limits, not validation. v3 equivalent: `TestLargeBlobSnowpipe` | -- |

#### 3.1.2 Avro (Compatibility Mode)

These tests were originally ported from v3 with identical assertions. v4 was run against those assertions on Confluent 7.8.0 (2026-03-31) and all passed — confirming v4 produces identical results. v3 itself cannot run in the current infrastructure due to the SR classloader conflict, but parity is indirectly verified through the captured reference assertions.

| Status | Test | Version | Rationale | File |
|:------:|------|---------|-----------|------|
| 🟢 | String keys + Avro SR values | v4 | Assertions capture v3 reference behavior (ported from v3). v4 parity confirmed 2026-03-31. v3 cannot run due to SR classloader conflict. | `test_string_avrosr.py` |
| 🟢 | Avro SR keys + Avro SR values (NaN, Inf) | v4 | Same -- v4 parity confirmed 2026-03-31. | `test_avrosr_avrosr.py` |
| 🟢 | Snowpipe Streaming + Avro SR (3p x 1000) | v4 | Same -- v4 parity confirmed 2026-03-31. | `test_snowpipe_streaming_string_avro_sr.py` |

#### 3.1.3 Protobuf (Compatibility Mode)

| Status | Test | Version | Rationale | File |
|:------:|------|---------|-----------|------|
| 🟢 | Confluent Protobuf SR (nested types, special floats) | v4 | Assertions capture v3 reference behavior (ported from v3). v4 parity confirmed 2026-03-31. V3 cannot run due to SR classloader conflict. | `test_confluent_protobuf_protobuf.py` |
| 🟢 | Native Protobuf (raw bytes, no SR) | v4 | Protobuf deserialization is converter-level, not SDK | `test_native_string_protobuf.py` |

#### 3.1.4 Schema & Type Mapping (Compatibility Mode)

The existing `test_schema_mapping.py` is the beginning of type compatibility testing but has significant gaps. It will be **subsumed by the comprehensive `test_type_compatibility.py`** proposed in [Section 4](#4-data-type-compatibility-strategy). The new test file extends coverage to all Snowflake types, adds negative cases, and runs in dual mode.

| Status | Test | Version | Rationale | File |
|:------:|------|---------|-----------|------|
| 🟢 | Type mapping (JSON): 10 types, positive only | v4 | Superseded by `test_type_compatibility.py` for comprehensive dual-mode coverage. | `test_schema_mapping.py` |
| 🟢 | Unsupported converter rejection | v4 | Converter rejection is KC framework level | `test_schema_not_supported_converter.py` |

#### 3.1.5 Table Creation

Auto table creation requires the connector to infer column types from the incoming data schema. Table creation itself is converter-independent — testing with a single converter (Avro SR, which provides an explicit schema) is sufficient.

> **Note on v3 reference capture**: The v3-first reference capture technique (used for SR tests in PR #1398) is not feasible here because these tests require Confluent Schema Registry, which triggers the v3 classloader conflict. However, these tests only assert table schema and row counts — not data values — so parity risk is low. The table creation DDL is generated by the same converter code in both versions.

| Status | Test | Version | Rationale | Format | File |
|:------:|------|---------|-----------|--------|------|
| 🟢 | Auto table creation from Avro SR schema | v4 | v4-only; v3 blocked by SR classloader. Asserts table schema and row counts only. | Avro SR | `test_auto_table_creation.py` |
| 🟢 | Auto table creation with topic2table mapping | v4 | v4-only; v3 blocked by SR classloader. Asserts table schema and row counts only. | Avro SR | `test_auto_table_creation_topic2table.py` |

#### 3.1.6 High-Throughput Mode Ingestion

`snowflake.validation=server_side`.

| Status | Test | Version | Format | Notes |
|:------:|------|---------|--------|-------|
| 🔴 | Valid JSON records land correctly; verify toggle default is `server_side` when config omitted | v4 | JSON | **P0.** FR3 -- verify data arrives without client-side RowValidator + default toggle behavior |
| 🔴 | Valid Avro SR records land correctly | v4 | Avro SR | **P1.** FR3 |

#### 3.1.7 Iceberg Tables

> **V3 scope note**: V3 (3.2.x) has iceberg support via `snowflake.streaming.iceberg.enabled=true`
> but it was experimental and used custom connector-side code (`IcebergInitService`,
> `IcebergTableStreamingRecordMapper`, `IcebergSchemaEvolutionService`) that was removed in v4.
> V4 delegates iceberg entirely to SSv2 which handles it transparently.  The `v4_config_to_v3`
> migration does not inject `snowflake.streaming.iceberg.enabled=true`, so running these tests
> against v3 would silently write to regular (non-iceberg) tables rather than fail loudly.
> All iceberg tests are therefore v4-only.
>
> **External volume prerequisite**: tests require an AWS external volume named
> `kafka_push_e2e_volume_aws` (override with env var `ICEBERG_EXTERNAL_VOLUME`).

| Status | Test | Version | Rationale | Format | Cloud | File |
|:------:|------|---------|-----------|--------|-------|------|
| 🟢 | Iceberg JSON ingestion (2x2: validation x schematization) | v4 | schema=off: VARIANT bag-of-bits; schema=on: mixed VARIANT+typed table (BIGINT/DOUBLE/TEXT pre-declared, no SE needed); all 4 combos pass | JSON | AWS | `iceberg/test_iceberg_json.py::test_iceberg_json_ingestion` |
| 🟢 | Iceberg Avro ingestion (2x2: validation x schematization) | v4 | Same matrix as JSON but with Avro SR + AvroConverter; verifies typed columns and RECORD_METADATA | Avro SR | AWS | `iceberg/test_iceberg_avro.py::test_iceberg_avro_ingestion` |
| 🟢 | Iceberg SE JSON — add column (client-side) | v4 | Connector issues `ALTER ICEBERG TABLE ADD COLUMN` when RowValidator detects new columns; table starts with CITY+RECORD_METADATA, wave 1 adds AGE, wave 2 adds COUNTRY | JSON | AWS | `iceberg/test_iceberg_se_json.py::test_iceberg_se_add_column` |
| 🟢 | Iceberg SE JSON — multi-wave (client-side) | v4 | Three-wave evolution: city-only → city+age → city+age+country; verifies NULL backfill for pre-existing rows | JSON | AWS | `iceberg/test_iceberg_se_json.py::test_iceberg_se_multi_wave` |
| 🟡 | Iceberg SE JSON — server-side (xfail -- known limitation) | v4 | `ENABLE_SCHEMA_EVOLUTION=TRUE` + `validation=server_side` (HT mode) silently discards typed column additions on ICEBERG_VERSION=3 tables; client-side SE via `ALTER ICEBERG TABLE ADD COLUMN` works correctly; remove xfail once Snowflake server-side SE supports typed columns on iceberg | JSON | AWS | `iceberg/test_iceberg_se_json.py::test_iceberg_se_json_server_side` |
| 🟢 | Iceberg SE Avro — add column (client-side) | v4 | Same as JSON SE but with Avro SR; verifies column additions from evolving Avro schemas | Avro SR | AWS | `iceberg/test_iceberg_se_avro.py::test_iceberg_se_avro_add_column` |

#### 3.1.8 Pre-Flight Check (FR11)

| Status | Test | Scenario | Notes |
|:------:|------|----------|-------|
| 🟢 | No Error Table configured -> startup warning logged, connector runs | validation=false, no Error Table | `test_error_table_without_error_logging` |
| 🟢 | Error Table configured -> startup succeeds, errors captured | validation=false, Error Table present | `test_error_table_with_error_logging` |

#### 3.1.9 Case Sensitivity

| Status | Test | Version | Rationale | File |
|:------:|------|---------|-----------|------|
| 🟢 | Case-sensitive table name handling | dual | Verifies table name case sensitivity across v3/v4 | `compatibility/test_compatibility_case_sensitivity.py::test_compatibility_case_sensitivity_table_name` |
| 🟢 | Case-sensitive ingestion column names | dual | Verifies column name case handling across v3/v4 | `compatibility/test_compatibility_case_sensitivity.py::test_compatibility_case_sensitivity_ingestion_columns` |
| 🟢 | Case sensitivity in schema evolution | dual | Verifies SE handles case-sensitive column names | `compatibility/test_compatibility_case_sensitivity.py::test_case_sensitivity_schema_evolution` |

#### 3.1.10 Migration

| Status | Test | Version | Rationale | File |
|:------:|------|---------|-----------|------|
| 🟢 | v3→v4 migration without duplicates | dual | Verifies seamless migration path | `compatibility/test_migration.py::test_migration_without_duplicates` |
| 🟢 | v3→v4 migration with possible duplicates | dual | Verifies migration handles at-least-once delivery | `compatibility/test_migration.py::test_migration_with_possible_duplicates` |

---

### 3.2 Error Handling

Error handling is the highest-risk area for v3/v4 parity. In v3, SSv1 always validates and errors are deterministic. In v4, the `RowValidator` (copied from SSv1's `DataValidationUtil`) is a separate layer that can be toggled. **All compatibility-mode error handling tests must be dual** because they directly exercise client-side validation.

#### 3.2.1 Dead Letter Queue -- `errors.tolerance=all` (FR1, Compatibility Mode)

| Status | Test | Version | Format | Error Type | File |
|:------:|------|---------|--------|-----------|------|
| 🟢 | Invalid JSON -> DLQ | v4 | JSON | Deserialization | `test_snowpipe_streaming_string_json_dlq.py` -- v4-only; dual conversion pending |
| 🔴 | Schema mapping error -> DLQ | v4 | JSON | Type mismatch | `test_snowpipe_streaming_schema_mapping_dlq.py` -- `@pytest.mark.skip`; broken, not divergent |
| 🔴 | DLQ Kafka headers preserved (v3/v4 byte-for-byte comparison) | dual | JSON | Any | **P0.** FR9: DLQ error messages must be identical between v3 and v4 |
| 🔴 | DLQ with Avro data | dual | Avro SR | Deserialization | **P2.** FR1 -- v3 parity blocked by SR classloader. DLQ routing is format-independent (KC framework level); format-specific differences unlikely. |
| 🔴 | DLQ with Protobuf data | dual | Protobuf SR | Deserialization | **P2.** FR1 -- same reasoning as Avro DLQ. |
| 🔴 | DLQ with multi-partition topics | dual | JSON | Mixed | **P1.** FR1 -- only one test (`test_snowpipe_streaming_string_json_ignore_tombstone.py`) currently exercises multi-partition. |

#### 3.2.2 Abort -- `errors.tolerance=none` (FR2, Compatibility Mode)

| Status | Test | Version | Format | Error Type | Notes |
|:------:|------|---------|--------|-----------|-------|
| 🔴 | Deserialization error -> task FAILED | dual | JSON | Bad payload | **P1.** FR2. Verify v4 aborts identically to v3 on bad payload. Abort mechanism already verified by `ingest_one_type_abort` fixture; gap is v3/v4 parity for this error type. |
| 🔴 | Schema mismatch -> task FAILED | dual | JSON | Type mismatch | **P1.** FR2. Verify v4 aborts identically to v3 on type mismatch. Same — mechanism works, parity not yet verified. |

#### 3.2.3 Error Table Routing (High-Throughput Mode)

When `snowflake.validation=server_side`, invalid records route to SSv2 Error Tables instead of DLQ.

| Status | Test | Version | Format | Notes |
|:------:|------|---------|--------|-------|
| 🟢 | Invalid records -> SSv2 Error Table (not DLQ) | v4 | JSON | `test_error_table_with_error_logging`, `test_error_table_accounting[v4-ht]` |
| 🟢 | Error Table + value validation (VARCHAR overflow) and schema mismatch (missing NOT NULL column) | v4 | JSON | `test_error_table_schema_mismatch` |
| 🟢 | Compat routes to DLQ while HT routes to Error Table (same bad record, both modes) | v4 | JSON | `test_error_table_vs_dlq_routing` |

---

### 3.3 Schema Evolution

Schema evolution has two code paths:

- **Compatibility mode** (`validation.enabled=true`): Client-side `ALTER TABLE ADD COLUMN` / `ALTER TABLE DROP NOT NULL`. The connector's `RowValidator` detects structural mismatches (extra columns, missing NOT NULL) and the `SnowflakeSchemaEvolutionService` issues DDL.
- **High-throughput mode** (`validation.enabled=false`): Records go directly to SSv2 SDK. Schema evolution depends on the Snowflake table's `ENABLE_SCHEMA_EVOLUTION = TRUE` property -- the server handles it.

#### 3.3.1 Client-Side Schema Evolution (Compatibility Mode)

**Analysis notes:**
- All `se_*.json` config templates set `snowflake.validation=client_side` but do NOT explicitly set `snowflake.enable.schematization`. The v4 default is `true`, so schematization is implicitly on.
- `test_schema_evolution_streaming.py` uses `snowpipe_streaming_schema_evolution.json` which also does not set validation or schematization explicitly (relying on defaults).
- **Overlap detected**: `test_se_nonnullable_json` and `test_schema_evolution_drop_not_null` test the same scenario (NOT NULL column dropped by SE). `test_se_auto_table_creation_json` and `test_schema_evolution_add_columns` partially overlap (new columns added via SE). These should be deduplicated when the SE test branches are merged.
- **Config_variants gap**: `evo=True, schema=False` combos are all skipped with a TODO. `evo=False, schema=True, valid=False` returns early with no assertions.

Tests are dual when they exercise the client-side validation path (structural error detection triggers SE). Tests that only check row counts after SE can be v4-only.

| Status | Test | Version | Rationale | Format | File |
|:------:|------|---------|-----------|--------|------|
| 🟢 | Add columns (JSON, `{city, age}`) | dual | SE triggers via RowValidator structural error detection | JSON | `test_schema_evolution_streaming.py::test_schema_evolution_add_columns` |
| 🟢 | Multi-wave evolution (wave 1 -> wave 2) | dual | Same path -- structural error triggers ADD COLUMN | JSON | `test_schema_evolution_streaming.py::test_schema_evolution_multi_wave` |
| 🟢 | Happy path (schema matches table) | v4 | No SE triggered, no validation-dependent behavior | JSON | `test_schema_evolution_streaming.py::test_schema_evolution_happy_path` |
| 🟢 | Drop NOT NULL constraint | dual | SE triggers via RowValidator null-in-NOT-NULL detection | JSON | `test_schema_evolution_streaming.py::test_schema_evolution_drop_not_null` |
| 🟢 | Disabled mid-stream (toggle SE off) | v4 | Tests DDL privilege, not validation path | JSON | `test_schema_evolution_streaming.py::test_schema_evolution_disabled_mid_stream` |
| 🟢 | Config matrix (8 combos: `evo x schematization x validation`) | dual | Core validation/SE interaction test. Has internal `pytest.skip()` for certain v3/v4 combos. `evo=True, schema=False` combos skipped with TODO. | JSON | `test_schema_evolution_streaming.py::test_schema_evolution_config_variants` |
| 🟢 | Avro SR with 2 topics, different schemas | v4 | v3 can't auto-create tables for Avro SR with topic2table.map; pre-created tables cause pipe invalidation on ALTER TABLE | Avro SR | `schema_evolution/test_se_avro_sr.py` |
| 🟢 | Auto table creation + SE (JSON, 2 topics) | dual | SE + auto-create triggers via structural error | JSON | `schema_evolution/test_se_auto_table_creation_json.py` |
| 🟢 | Auto table creation + SE (Avro SR, 2 topics) | v4 | Auto table creation is v4-only; v3 requires pre-existing tables | Avro SR | `schema_evolution/test_se_auto_table_creation_avro_sr.py` |
| 🟢 | Non-nullable columns + SE | dual | SE triggers via null-in-NOT-NULL path | JSON | `schema_evolution/test_se_nonnullable_json.py` |
| 🟢 | Tombstone handling during SE | dual | Asserts data values with SE | JSON | `schema_evolution/test_se_json_ignore_tombstone.py` |
| 🟢 | Random batch sizes (flush timing) | dual | Tests timing-sensitive SE path | JSON | `schema_evolution/test_se_random_row_count.py` |
| 🟢 | Nullable values after SMT + SE | dual | SE structural error path | JSON + SMT | `schema_evolution/test_se_nullable_values_after_smt.py` |

#### 3.3.2 Server-Side Schema Evolution (High-Throughput Mode)

When `snowflake.validation=server_side`, the connector does not perform client-side validation or DDL. Records go directly to the SSv2 SDK's `channel.appendRow()`. Schema evolution depends entirely on the Snowflake table property `ENABLE_SCHEMA_EVOLUTION = TRUE`.

Note: The connector source has no `MATCH_BY_COLUMN_NAME` or FDN-specific logic. "Server-side SE" means the Snowflake service handles schema mismatches for tables with `ENABLE_SCHEMA_EVOLUTION = TRUE`.

The `test_schema_evolution_config_variants` test already covers `evo=True, schema=True, valid=False` for v4 (server-side SE with schematization on). However, important gaps remain:

| Status | Test | Version | Format | Notes | Suggested File |
|:------:|------|---------|--------|-------|----------------|
| 🟢 | Server-side SE: new columns added (validation off, SE on) | v4 | JSON | Minimal coverage; config_variants covers this combo | `test_schema_evolution_ht.py` |
| 🔴 | Server-side SE: NOT NULL dropped + schematization off (parametrized) | v4 | JSON | **P1.** Two scenarios in one parametrized test: NOT NULL drop and schematization=off. config_variants skips the latter (TODO). | `test_schema_evolution_ht.py` |
| 🔴 | Server-side SE with Avro SR | v4 | Avro SR | **P2.** FR6. Avro provides explicit schema; server-side SE may behave differently than JSON. | `test_schema_evolution_ht.py` |
| 🔴 | Concurrent SE from multiple partitions | v4 | JSON | **P1.** Race condition in ALTER TABLE from multiple tasks. Cannot be caught by unit tests. | `test_schema_evolution_ht.py` |

---

### 3.4 RECORD_CONTENT Mode

`snowflake.enable.schematization=false` -- data lands in `RECORD_CONTENT` + `RECORD_METADATA` VARIANT columns (FR4).

`RECORD_CONTENT` is a VARIANT column. Validation mode (`snowflake.validation`) is irrelevant here — the entire payload goes into VARIANT with no type checking. The `snowflake.validation` config was removed from these test templates.

V3 parity was verified by running JSON, String, and ByteArray tests in dual mode (v3 + v4) on Confluent 7.8.0 (2026-03-31) — both versions produce identical results. Tests are now v4-only. Note that v3's own E2E tests (`SnowflakeSinkTaskForStreamingIT.java`) had **no RECORD_CONTENT value assertions** — they only checked row counts and RECORD_METADATA key presence (`offset`, `partition`). Our v4 tests are net-new coverage: field-level content verification, base64 encoding for bytes, and double-encoding edge case handling.

| Status | Test | Version | Rationale | Format | File |
|:------:|------|---------|-----------|--------|------|
| 🟢 | RECORD_CONTENT JSON (StringConverter key, JsonConverter value) | v4 | v3 parity confirmed (dual run 2026-03-31). Assertions capture v3 reference behavior. | JSON (native) | `test_snowpipe_streaming_legacy_string_json.py` |
| 🟢 | RECORD_CONTENT StringConverter (raw string payload) | v4 | v3 parity confirmed (dual run 2026-03-31). Assertions capture v3 reference behavior. | String | `test_snowpipe_streaming_legacy_string_converter.py` |
| 🟢 | RECORD_CONTENT ByteArrayConverter (base64 payload) | v4 | v3 parity confirmed (dual run 2026-03-31). Assertions capture v3 reference behavior. | Bytes | `test_snowpipe_streaming_legacy_byte_array_converter.py` |
| 🟢 | RECORD_CONTENT + Avro SR | v4 | v4 confirmed 2026-03-31. v3 parity cannot be verified: v3's bundled SR classes clash with Confluent 7.8.0 platform SR classes (ServiceConfigurationError). Assertions reflect expected Avro deserialization behavior. | Avro SR | `test_snowpipe_streaming_legacy_avro_sr.py` |
| 🔴 | RECORD_CONTENT + SMT (nullable values, ExtractField) | v4 | **P2.** Data values in VARIANT + SMT interaction. v3 equivalent: `TestSnowpipeStreamingNullableValuesAfterSmt` | JSON + SMT | -- |

---

### 3.5 Connector Lifecycle & Resilience

All tests send data in phases, performing disruptive operations between sends. These are v4-only: lifecycle operations (pause/resume/restart/delete) are Kafka Connect framework behavior, not SDK-dependent. The connector's interaction with the KC REST API is identical regardless of SSv1 vs SSv2.

> **Note on ingestion pattern**: Existing lifecycle tests send a batch, perform the disruptive operation, then send another batch. This "phase-based" approach may not sufficiently exercise interleaving — if all data lands within a single flush cycle, the disruption happens in a quiet window. New resilience tests (channel invalidation, backend errors, network partitions) should use a **continuous ingestion** pattern: a background producer sends records throughout the test while disruptions occur, ensuring the connector handles mid-flight interruptions.

#### 3.5.1 Lifecycle Tests (existing)

| Status | Test | Operation Sequence | Version | File |
|:------:|------|-------------------|---------|------|
| 🟢 | Restart (task + connector) | send -> restart -> send -> restart -> send | v4 | `test_kc_restart.py` |
| 🟢 | Delete -> Create (new connector, same name) | send -> delete -> create -> send | v4 | `test_kc_delete_create.py` |
| 🟢 | Delete -> Create + Chaos | send -> delete -> create (with failures) -> send | v4 | `test_kc_delete_create_chaos.py` |
| 🟢 | Delete -> Resume (new connector, inherits offsets) | send -> delete -> resume -> send | v4 | `test_kc_delete_resume.py` |
| 🟢 | Delete -> Resume + Chaos | send -> delete -> resume (with failures) -> send | v4 | `test_kc_delete_resume_chaos.py` |
| 🟢 | Pause -> Create (new connector while paused) | send -> pause -> create -> send | v4 | `test_kc_pause_create.py` |
| 🟢 | Pause -> Create + Chaos | send -> pause -> create (with failures) -> send | v4 | `test_kc_pause_create_chaos.py` |
| 🟢 | Pause -> Resume (same connector) | send -> pause -> resume -> send | v4 | `test_kc_pause_resume.py` |
| 🟢 | Pause -> Resume + Chaos | send -> pause -> resume (with failures) -> send | v4 | `test_kc_pause_resume_chaos.py` |
| 🟢 | Recreate (multiple delete/create cycles) | send -> delete -> recreate -> send x2 | v4 | `test_kc_recreate.py` |
| 🟢 | Recreate + Chaos | multiple cycles with failures | v4 | `test_kc_recreate_chaos.py` |

#### 3.5.2 CREATE OR REPLACE TABLE Recovery

`CREATE OR REPLACE TABLE` mid-stream causes v4 to silently lose data. v3 recovers because SSv1's `isClosed()` detects pipe invalidation. v4's SSv2 SDK does not surface the invalidation — `isClosed()` returns `false` and `appendRow()` succeeds (buffers locally), so the existing recovery path never triggers. **Root cause under investigation.**

| Status | Test | Version | Notes | File |
|:------:|------|---------|-------|------|
| 🔴 | Table replacement recovery (single topic, SE re-evolve) | v4 (xfail) | **P1.** Requires connector fix. Currently v3-only. | `schema_evolution/test_se_replace_table.py` |
| 🔴 | Table replacement recovery (multi-topic, SE re-evolve) | v4 (xfail) | **P1.** Same issue. Currently v3-only. | `schema_evolution/test_se_multi_topic_replace_table.py` |

#### 3.5.3 Fault Injection & Recovery (missing)

These tests should use continuous ingestion (background producer) to exercise mid-flight fault handling.

| Status | Test | Fault Type | Version | Notes |
|:------:|------|-----------|---------|-------|
| 🔴 | Channel invalidation recovery | Server-side channel drop; client must detect and re-open | v4 | **P2.** Verify no data loss after channel re-open under continuous load. Requires SSv2 server-side channel drop simulation -- hard to reproduce in Docker. |
| 🔴 | Transient server errors (5xx + 429) | Simulated 5xx errors and 429 throttling during ingestion | v4 | **P2.** Verify backoff/retry and eventual recovery. PR #1386 implemented offset-based backoff for 429; unit tests cover retry logic. E2E requires mock proxy. |
| 🔴 | Network partition tolerance | Temporary connectivity loss between KC worker and Snowflake | v4 | **P2.** Verify connector recovers after partition heals, no duplicate/lost records. Requires Docker network manipulation during continuous ingestion -- hard to make deterministic in CI. |

---

### 3.6 Default Pipe Features

FR5 (Default Pipes only) + FR7 (Default Pipe Improvements). Must be tested in both compatibility and high-throughput modes.
| Status | Test | Feature | Mode | Version | Suggested File |
|:------:|------|---------|------|---------|----------------|
| 🟢 | Auto-Increment (Identity) columns | FR7 | Compatibility | v4 | `test_default_pipe_features.py` |
| 🟢 | Auto-Increment (Identity) columns | FR7 | High-Throughput | v4 | `test_default_pipe_features.py` |
| 🟢 | Default timestamp properties | FR7 | Compatibility | v4 | `test_default_pipe_features.py` |
| 🟢 | Default timestamp properties | FR7 | High-Throughput | v4 | `test_default_pipe_features.py` |
| 🔴 | Pre-clustered tables | FR7 | Compatibility | v4 | `test_default_pipe_features.py` |
| 🔴 | Pre-clustered tables | FR7 | High-Throughput | v4 | `test_default_pipe_features.py` |

---

### 3.7 Load & Stress

> **Scope**: These are CI-level smoke/pressure tests that run in pre-commit. They verify the connector handles moderate load without failures but are not intended to represent production-scale benchmarking. Dedicated load and benchmarking tests exist separately for validating throughput at scale (e.g., 10 GB/s target for high-throughput mode).

| Status | Test | Scale | Version | File |
|:------:|------|-------|---------|------|
| 🟢 | Pressure: 200 topics x 12 partitions x 10K records (24M total) | High | v4 | `pressure/test_pressure_init.py` |
| 🟢 | Pressure + Restart: 10 topics x 3 partitions x 200K records with chaos ops | High | v4 | `pressure/test_pressure_restart.py` |

---

## 4. Data Type Compatibility

**Does v4 compatibility mode handle every Snowflake data type the same way v3 does?**

V4 client-side validation (`RowValidator` + `DataValidationUtil`, code copied from SSv1 SDK) runs before the SSv2 SDK. Server-side mode bypasses client validation entirely. Divergences occur when SSv2 handles a value differently than SSv1 did, and client-side normalization doesn't compensate.

Tests: `test_type_compatibility.py` (JSON, dual mode). Each test covers positive (valid values land correctly) and negative (invalid values routed to DLQ).

| Target Data Type | v3 | v4 Client | v4 Server | Notes |
|---|:---:|:---:|:---:|---|
| NUMBER | 🟢 | 🟢 | 🟢 | |
| FLOAT | 🟢 | 🟢 | 🟢 | |
| VARCHAR | 🟢 | 🟢 | 🟢 | |
| BINARY (hex String input) | 🟢 | 🟢 | 🟡 | Server-side may interpret hex as base64, producing incorrect bytes |
| BOOLEAN | 🟢 | 🟢 | 🟢 | |
| BOOLEAN (Integer 0/1 input) | 🟢 | 🟢 | 🟡 | Server-side rejects Integer boolean values; rows not ingested |
| DATE | 🟢 | 🟢 | 🟢 | |
| TIME | 🟢 | 🟢 | 🟢 | |
| TIMESTAMP_NTZ | 🟢 | 🟢 | 🟢 | |
| TIMESTAMP_NTZ (Integer epoch) | 🟢 | 🟢 | 🟡 | Server-side shifts stored value by default timezone offset (~8h) |
| TIMESTAMP_LTZ | 🟢 | 🟢 | 🟢 | |
| TIMESTAMP_TZ | 🟢 | 🟢 | 🟢 | |
| VARIANT | 🟢 | 🟢 | 🟢 | |
| VARIANT (JSON String input) | 🟢 | 🟢 | 🟢 | |
| VARIANT (bare String input) | 🟢 | 🟢 | 🟡 | Server-side accepts invalid JSON scalars; client-side correctly rejects to DLQ |
| OBJECT | 🟢 | 🟢 | 🟢 | |
| ARRAY | 🟢 | 🟢 | 🟢 | |
| ARRAY (JSON String input) | 🟢 | 🟢 | 🟡 | Server-side wraps string as single-element array instead of parsing |
| NULL | 🟢 | 🟢 | 🟢 | |
| NULL (VARIANT column) | 🟢 | 🟡 | 🟡 | Stored as text `'null'` instead of SQL NULL |
| Cross-type mismatch | 🟢 | 🟢 | 🟢 | |
| GEOGRAPHY, GEOMETRY | 🟢 | 🟢 | 🟢 | Unsupported in Streaming; rejected in all modes |
| VECTOR | 🟡 | 🟢 | 🟢 | New in v4. Not supported in v3. |
| Structured OBJECT/ARRAY | 🟡 | 🟢 | 🟢 | New in v4. Not supported in v3. |
| Collated VARCHAR | 🔴 | 🔴 | 🔴 | Not tested. Unit-level coverage only. |

### Avro-Specific Type Mapping

Tests: `test_type_compatibility_avro.py` (Avro SR, v4-compat + v4-ht). V3 parity testing is blocked by the SR classloader conflict.

Avro provides typed values (native int, float, boolean, bytes, logical types) unlike schemaless JSON. The AvroConverter produces Kafka Connect Structs with schemas, testing a different pipeline path than JSON.

| Avro Type | Target Column | v4 Client | v4 Server | Notes |
|---|:---:|:---:|:---:|---|
| `int` | NUMBER | 🟢 | 🟢 | 32-bit typed integer |
| `long` | NUMBER | 🟢 | 🟢 | 64-bit typed integer |
| `float` | FLOAT | 🟢 | 🟢 | 32-bit; incl. NaN, Inf, -Inf as native floats |
| `double` | FLOAT | 🟢 | 🟢 | 64-bit; incl. NaN, Inf, -Inf |
| `string` | VARCHAR | 🟢 | 🟢 | |
| `boolean` | BOOLEAN | 🟢 | 🟢 | Native bool (no 0/1 coercion path) |
| `bytes` | BINARY | 🟢 | 🟢 | Raw bytes; RowValidator unwraps ByteBuffer to byte[] |
| `date` logical | DATE | 🟢 | 🟢 | Days-from-epoch via Avro logical type |
| `timestamp-millis` logical | TIMESTAMP_NTZ | 🟢 | 🟢 | Millis-from-epoch via logical type |
| `array` | ARRAY | 🟢 | 🟢 | Native Avro array |
| `map` | VARIANT | 🟢 | 🟢 | Avro map → VARIANT |
| null unions | various | 🟢 | 🟢 | Nullable union handling |
| `bytes` → VARCHAR | divergence | 🟢 | 🟡 | v4-compat rejects byte[]; v4-ht coerces to base64 |
| `bytes` → NUMBER | error | 🟢 | 🟢 | Cross-type: byte[] rejected (Avro-specific) |
| `float` NaN/Inf → NUMBER | error | 🟢 | 🟢 | Cross-type: native float NaN (Avro-specific) |
| `map`/`array` → BOOLEAN | error | 🟢 | 🟢 | Cross-type: typed complex → primitive |


================================================
FILE: test/README.md
================================================
# End-to-End Tests for Snowflake Kafka Connector

E2E tests spin up a Kafka cluster in Docker, send records, and verify they appear in Snowflake.

CI workflows: **`end-to-end.yaml`** (E2E) and **`end-to-end-stress.yml`** (stress tests) in `.github/workflows/`.

### Prerequisites

- Docker (with Docker Compose v2)
- Snowflake credentials (`profile.json`)
- Built connector plugin (run `build_runtime_jar.sh` first)

## Quick Start

```bash
# 1. Build the connector (from project root)
export SNOWFLAKE_CREDENTIAL_FILE=/path/to/profile.json
./test/build_runtime_jar.sh . package confluent   # or 'apache'

# 2. Run tests
cd test
./run_tests.sh --platform=confluent --platform-version=7.8.0
```

### Usage

```bash
./run_tests.sh --platform=<confluent|apache> --platform-version=<version> [options]

# Confluent examples:
./run_tests.sh --platform=confluent --platform-version=7.8.0
./run_tests.sh --platform=confluent --platform-version=6.2.15
./run_tests.sh --platform=confluent --platform-version=7.8.0 --tests=test_string_json

# Apache Kafka examples:
./run_tests.sh --platform=apache --platform-version=3.7.2
./run_tests.sh --platform=apache --platform-version=2.8.2 --java-version=11

# Options:
./run_tests.sh --platform=confluent --platform-version=7.8.0 -- -m pressure  # Stress tests
./run_tests.sh --platform=apache --platform-version=3.7.2 --keep             # Keep containers after tests
./run_tests.sh --platform=confluent --platform-version=7.8.0 --rebuild       # Force rebuild images
./run_tests.sh --platform=confluent --platform-version=7.8.0 --logs          # Show logs on failure
./run_tests.sh --platform=apache --platform-version=3.7.2 --cloud=AWS        # Target specific Snowflake cloud
```

### Supported Versions

**Confluent Platform:**
- `6.2.x` (e.g., 6.2.15)
- `7.x` (e.g., 7.6.0, 7.8.2)

**Apache Kafka:** Any version available as an official tarball (e.g., 2.8.2, 3.7.2)

## Architecture

The test environment uses layered Docker Compose files in `docker/`:

- `docker-compose.base.yml` -- test-runner container (shared by all platforms)
- `docker-compose.confluent.yml` -- Confluent Platform (Zookeeper, Kafka, Schema Registry, Kafka Connect as separate containers)
- `docker-compose.apache.yml` -- Apache Kafka (single container with embedded services)
- `docker-compose.amd64.yml` -- forces linux/amd64 emulation (Confluent 6.2.x on ARM)

### Confluent Platform

```
┌────────────────────────────────────────────────────────────┐
│  zookeeper   │    kafka     │schema-registry│kafka-connect │
│   :2181      │   :9092      │    :8081      │    :8083     │
├──────────────┴──────────────┴───────────────┴──────────────┤
│                      test-runner                           │
│              (Python + protobuf + tests)                   │
└────────────────────────────────────────────────────────────┘
```

### Apache Kafka

```
┌────────────────────────────────────────────────────────────┐
│                        kafka                               │
│    (Zookeeper + Kafka + Connect + Schema Registry)         │
│     :2181       :9092   :8083     :8081                    │
├────────────────────────────────────────────────────────────┤
│                      test-runner                           │
│              (Python + protobuf + tests)                   │
└────────────────────────────────────────────────────────────┘
```

### How E2E Tests Work

There are two test infrastructures that run side by side:

**Pytest (primary)** -- New tests live in `tests/` as standard pytest modules.
Fixtures in `conftest.py` handle connector lifecycle, table creation, and cleanup
(including Kafka topic deletion). The runner script passes connection addresses and
platform version as CLI options; see `conftest.py` for the full list.

**Legacy infra** -- Older tests live in `test_suit/` as classes with
`send`/`verify`/`clean` methods. They are orchestrated by `test_verify.py` via
`test_executor.py`. Connector config templates in `rest_request_template/` have a
one-to-one correspondence with these classes (e.g., `travis_correct_json_json.json`
maps to `test_suit/test_json_json.py`). The driver replaces placeholder values
(e.g., `SNOWFLAKE_TEST_TOPIC`, `CONFLUENT_SCHEMA_REGISTRY`) with runtime values.

Both share the same `KafkaDriver` (`lib/driver.py`) and connector config templates.
`run_tests.sh` passes all pytest CLI options explicitly when launching the
test-runner container.

## Stress Tests

Stress tests use the same Docker infrastructure but with `-m pressure` passed to pytest:

```bash
./run_tests.sh --platform=confluent --platform-version=7.6.0 -- -m pressure
```

CI stress (`.github/workflows/end-to-end-stress.yml`) sets `CONNECT_OFFSET_FLUSH_INTERVAL_MS=10000` on the `run-e2e-tests` step so Kafka Connect uses a **10 second** `offset.flush.interval.ms` (less frequent `preCommit` than the default 1 s E2E setting). For local pressure runs, export the same variable before `./run_tests.sh` if you want that behavior.

When `-m pressure` is set, pytest selects only the pressure-marked tests:

1. **test_pressure_restart** (`tests/pressure/test_pressure_restart.py`) -- Creates 10 topics with 3 partitions each and sends 200,000 records per partition. During verification, the connector is periodically restarted, paused, resumed, and deleted/recreated to test resilience under load.

2. **test_pressure_init** (`tests/pressure/test_pressure_init.py`) -- Creates 200 topics with 12 partitions each and sends 10,000 records per partition (2,400 partitions, 24M records total). Sends are parallelized across 10 threads.

Both tests verify that the exact expected row count appears in Snowflake.

## Debugging

### View logs
```bash
docker logs -f test-kafka-connect
docker logs -f test-kafka
```

### Check connector status
```bash
curl http://localhost:8083/connectors
curl http://localhost:8083/connectors/<name>/status
```

### Manual cleanup
```bash
cd test/docker
docker compose -f docker-compose.base.yml -f docker-compose.confluent.yml down -v --remove-orphans
```

## Directory Structure

```
test/
  tests/                      Pytest test modules (new tests go here)
  conftest.py                 Pytest fixtures and CLI options
  pyproject.toml              Pytest configuration
  lib/                        Shared helpers (KafkaDriver, config, crypto)
  rest_request_template/      Connector config templates (one per test case)
  test_suit/                  Legacy E2E test classes (send/verify/clean)
  test_data/                  Protobuf schema and generated code
  docker/                     Docker Compose files, Dockerfiles, and test runner scripts
  apache_properties/          Kafka/Zookeeper/Connect config (used by Apache Docker image)
  build_runtime_jar.sh        Builds connector JAR/ZIP
  test_verify.py              Legacy E2E test entry point
  test_suites.py              Legacy test suite registry
  test_selector.py            Legacy test filtering logic
  test_executor.py            Legacy test execution engine
```


================================================
FILE: test/__init__.py
================================================


================================================
FILE: test/apache_properties/connect-distributed.properties
================================================
##
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##

# This file contains some of the configurations for the Kafka Connect distributed worker. This file is intended
# to be used with the examples, and some settings may differ from those used in a production system, especially
# the `bootstrap.servers` and those specifying replication factors.

# A list of host/port pairs to use for establishing the initial connection to the Kafka cluster.
bootstrap.servers=localhost:9092

# by default this value is 300000 ( 5minutes)
# this is the max threshold of two consecutive polling times. If poll() is not invoked within this time, a rebalance can occur
max.poll.interval.ms=360000

# default is 500 records
max.poll.records=100

# unique name for the cluster, used in forming the Connect cluster group. Note that this must not conflict with consumer group IDs
group.id=connect-cluster

# The converters specify the format of data in Kafka and how to translate it into Connect data. Every Connect user will
# need to configure these based on the format they want their data in when loaded from or stored into Kafka
key.converter=org.apache.kafka.connect.json.JsonConverter
value.converter=org.apache.kafka.connect.json.JsonConverter
# Converter-specific settings can be passed in by prefixing the Converter's setting with the converter we want to apply
# it to
key.converter.schemas.enable=true
value.converter.schemas.enable=true

# Topic to use for storing offsets. This topic should have many partitions and be replicated and compacted.
# Kafka Connect will attempt to create the topic automatically when needed, but you can always manually create
# the topic before starting Kafka Connect if a specific topic configuration is needed.
# Most users will want to use the built-in default replication factor of 3 or in some cases even specify a larger value.
# Since this means there must be at least as many brokers as the maximum replication factor used, we'd like to be able
# to run this example on a single-broker cluster and so here we instead set the replication factor to 1.
offset.storage.topic=connect-offsets
offset.storage.replication.factor=1
#offset.storage.partitions=25

# Topic to use for storing connector and task configurations; note that this should be a single partition, highly replicated,
# and compacted topic. Kafka Connect will attempt to create the topic automatically when needed, but you can always manually create
# the topic before starting Kafka Connect if a specific topic configuration is needed.
# Most users will want to use the built-in default replication factor of 3 or in some cases even specify a larger value.
# Since this means there must be at least as many brokers as the maximum replication factor used, we'd like to be able
# to run this example on a single-broker cluster and so here we instead set the replication factor to 1.
config.storage.topic=connect-configs
config.storage.replication.factor=1

# Topic to use for storing statuses. This topic can have multiple partitions and should be replicated and compacted.
# Kafka Connect will attempt to create the topic automatically when needed, but you can always manually create
# the topic before starting Kafka Connect if a specific topic configuration is needed.
# Most users will want to use the built-in default replication factor of 3 or in some cases even specify a larger value.
# Since this means there must be at least as many brokers as the maximum replication factor used, we'd like to be able
# to run this example on a single-broker cluster and so here we instead set the replication factor to 1.
status.storage.topic=connect-status
status.storage.replication.factor=1
#status.storage.partitions=5

# Flush much faster than normal, which is useful for testing/debugging
offset.flush.interval.ms=1000

# These are provided to inform the user about the presence of the REST host and port configs 
# Hostname & Port for the REST API to listen on. If this is set, it will bind to the interface used to listen to requests.
#rest.host.name=
rest.port=8083

# The Hostname & Port that will be given out to other workers to connect to i.e. URLs that are routable from other servers.
#rest.advertised.host.name=
#rest.advertised.port=

# Set to a list of filesystem paths separated by commas (,) to enable class loading isolation for plugins
# (connectors, converters, transformations). The list should consist of top level directories that include 
# any combination of: 
# a) directories immediately containing jars with plugins and their dependencies
# b) uber-jars with plugins and their dependencies
# c) directories immediately containing the package directory structure of classes of plugins and their dependencies
# Examples: 
# plugin.path=/usr/local/share/java,/usr/local/share/kafka/plugins,/opt/connectors,
plugin.path=/usr/local/share/kafka/plugins

config.providers=file
config.providers.file.class=org.apache.kafka.common.config.provider.FileConfigProvider

# Allow connectors to override consumer/producer configs (e.g. consumer.override.auto.offset.reset)
connector.client.config.override.policy=All


================================================
FILE: test/apache_properties/file-secrets.txt
================================================
PASSPHRASE=test

================================================
FILE: test/apache_properties/kraft-server.properties
================================================
# KRaft mode server configuration for Apache Kafka 4.x+
# Combined broker + controller on a single node (no ZooKeeper)

############################# KRaft Settings #############################

process.roles=broker,controller
node.id=1
controller.quorum.voters=1@localhost:9093
controller.listener.names=CONTROLLER
listener.security.protocol.map=CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT

############################# Socket Server Settings #############################

listeners=PLAINTEXT://:9092,CONTROLLER://:9093
inter.broker.listener.name=PLAINTEXT

num.network.threads=10
num.io.threads=8

socket.send.buffer.bytes=102400
socket.receive.buffer.bytes=102400
socket.request.max.bytes=104857600

############################# Message Size Settings #############################

# 30 MiB - matches ZK-based server.properties
message.max.bytes=31457280
replica.fetch.max.bytes=31457280

############################# Log Basics #############################

log.dirs=/tmp/kraft-combined-logs
num.partitions=1
num.recovery.threads.per.data.dir=1

############################# Internal Topic Settings  #############################

offsets.topic.replication.factor=1
transaction.state.log.replication.factor=1
transaction.state.log.min.isr=1

############################# Log Retention Policy #############################

log.retention.hours=168
log.segment.bytes=1073741824
log.retention.check.interval.ms=300000

############################# Group Coordinator Settings #############################

group.initial.rebalance.delay.ms=0


================================================
FILE: test/apache_properties/schema-registry.properties
================================================
#
# Copyright 2018 Confluent Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# The address the socket server listens on.
#   FORMAT:
#     listeners = listener_name://host_name:port
#   EXAMPLE:
#     listeners = PLAINTEXT://your.host.name:9092
listeners=http://0.0.0.0:8081

# Zookeeper connection string for the Zookeeper cluster used by your Kafka cluster
# (see zookeeper docs for details).
# This is a comma separated host:port pairs, each corresponding to a zk
# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
#kafkastore.connection.url=localhost:2181

# Alternatively, Schema Registry can now operate without Zookeeper, handling all coordination via
# Kafka brokers. Use this setting to specify the bootstrap servers for your Kafka cluster and it
# will be used both for selecting the master schema registry instance and for storing the data for
# registered schemas.
# (Note that you cannot mix the two modes; use this mode only on new deployments or by shutting down
# all instances, switching to the new configuration, and then starting the schema registry
# instances again.)
kafkastore.bootstrap.servers=PLAINTEXT://localhost:9092

# The name of the topic to store schemas in
kafkastore.topic=_schemas

# If true, API requests that fail will include extra debugging information, including stack traces
debug=false


================================================
FILE: test/apache_properties/server.properties
================================================
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# see kafka.server.KafkaConfig for additional details and defaults

############################# Server Basics #############################

# The id of the broker. This must be set to a unique integer for each broker.
broker.id=0

############################# Socket Server Settings #############################

# The address the socket server listens on. It will get the value returned from 
# java.net.InetAddress.getCanonicalHostName() if not configured.
#   FORMAT:
#     listeners = listener_name://host_name:port
#   EXAMPLE:
#     listeners = PLAINTEXT://your.host.name:9092
# Uncomment the following line to run tests on local Mac
#listeners=PLAINTEXT://localhost:9092

# Hostname and port the broker will advertise to producers and consumers. If not set, 
# it uses the value for "listeners" if configured.  Otherwise, it will use the value
# returned from java.net.InetAddress.getCanonicalHostName().
#advertised.listeners=PLAINTEXT://your.host.name:9092

# Maps listener names to security protocols, the default is for them to be the same. See the config documentation for more details
#listener.security.protocol.map=PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL

# The number of threads that the server uses for receiving requests from the network and sending responses to the network
num.network.threads=10

# The number of threads that the server uses for processing requests, which may include disk I/O
num.io.threads=8

# The send buffer (SO_SNDBUF) used by the socket server
socket.send.buffer.bytes=102400

# The receive buffer (SO_RCVBUF) used by the socket server
socket.receive.buffer.bytes=102400

# The maximum size of a request that the socket server will accept (protection against OOM)
socket.request.max.bytes=104857600

############################# Message Size Settings #############################

# Increasing max message size to test large messages with Kafka Connector.
# 30 MiB
message.max.bytes=31457280
replica.fetch.max.bytes=31457280

############################# Log Basics #############################

# A comma separated list of directories under which to store log files
log.dirs=/tmp/kafka-logs

# The default number of log partitions per topic. More partitions allow greater
# parallelism for consumption, but this will also result in more files across
# the brokers.
num.partitions=1

# The number of threads per data directory to be used for log recovery at startup and flushing at shutdown.
# This value is recommended to be increased for installations with data dirs located in RAID array.
num.recovery.threads.per.data.dir=1

############################# Internal Topic Settings  #############################
# The replication factor for the group metadata internal topics "__consumer_offsets" and "__transaction_state"
# For anything other than development testing, a value greater than 1 is recommended to ensure availability such as 3.
offsets.topic.replication.factor=1
transaction.state.log.replication.factor=1
transaction.state.log.min.isr=1

############################# Log Flush Policy #############################

# Messages are immediately written to the filesystem but by default we only fsync() to sync
# the OS cache lazily. The following configurations control the flush of data to disk.
# There are a few important trade-offs here:
#    1. Durability: Unflushed data may be lost if you are not using replication.
#    2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush.
#    3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to excessive seeks.
# The settings below allow one to configure the flush policy to flush data after a period of time or
# every N messages (or both). This can be done globally and overridden on a per-topic basis.

# The number of messages to accept before forcing a flush of data to disk
#log.flush.interval.messages=10000

# The maximum amount of time a message can sit in a log before we force a flush
#log.flush.interval.ms=1000

############################# Log Retention Policy #############################

# The following configurations control the disposal of log segments. The policy can
# be set to delete segments after a period of time, or after a given size has accumulated.
# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
# from the end of the log.

# The minimum age of a log file to be eligible for deletion due to age
log.retention.hours=168

# A size-based retention policy for logs. Segments are pruned from the log unless the remaining
# segments drop below log.retention.bytes. Functions independently of log.retention.hours.
#log.retention.bytes=1073741824

# The maximum size of a log segment file. When this size is reached a new log segment will be created.
log.segment.bytes=1073741824

# The interval at which log segments are checked to see if they can be deleted according
# to the retention policies
log.retention.check.interval.ms=300000

############################# Zookeeper #############################

# Zookeeper connection string (see zookeeper docs for details).
# This is a comma separated host:port pairs, each corresponding to a zk
# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
# You can also append an optional chroot string to the urls to specify the
# root directory for all kafka znodes.
zookeeper.connect=localhost:2181

# Timeout in ms for connecting to zookeeper
zookeeper.connection.timeout.ms=18000


############################# Group Coordinator Settings #############################

# The following configuration specifies the time, in milliseconds, that the GroupCoordinator will delay the initial consumer rebalance.
# The rebalance will be further delayed by the value of group.initial.rebalance.delay.ms as new members join the group, up to a maximum of max.poll.interval.ms.
# The default value for this is 3 seconds.
# We override this to 0 here as it makes for a better out-of-the-box experience for development and testing.
# However, in production environments the default value of 3 seconds is more suitable as this will help to avoid unnecessary, and potentially expensive, rebalances during application startup.
group.initial.rebalance.delay.ms=0


================================================
FILE: test/apache_properties/zookeeper.properties
================================================
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
# 
#    http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# the directory where the snapshot is stored.
dataDir=/tmp/zookeeper
# the port at which the clients will connect
clientPort=2181
# disable the per-ip limit on the number of connections since this is a non-production config
maxClientCnxns=0
# Disable the adminserver by default to avoid port conflicts.
# Set the port to something non-conflicting if choosing to enable this
admin.enableServer=false
# admin.serverPort=8080


================================================
FILE: test/build_image.sh
================================================
#!/bin/bash

# exit on error
set -e

# error printing function
function error_exit() {
    echo >&2 $1
    exit 1
}

# check argument number is 1 or 2 or 3
if [ $# -gt 3 ] || [ $# -lt 1 ]; then
    error_exit "Usage: ./build_image.sh <version> [<path to snowflake repo>] [verify/package/none] .  Aborting."
fi

KAFKA_CONNECT_TAG=$1
SNOWFLAKE_CONNECTOR_PATH=$2
BUILD_METHOD=$3

if [[ -z "${BUILD_METHOD}" ]]; then
    # Default build method verify
    BUILD_METHOD="verify"
fi

# check if connector path is set or checkout from github master
if [[ -z "${SNOWFLAKE_CONNECTOR_PATH}" ]]; then
    # Always re-pull code from github, no one should develop under the test_script folder
    echo -e "\n=== path to snowflake connector repo is not set, clone snowflake-kafka-connector from github and build ==="
    SNOWFLAKE_CONNECTOR_PATH="./snowflake-kafka-connector"
    echo -e "\n=== $SNOWFLAKE_CONNECTOR_PATH will be force deleted ==="
    rm -rf $SNOWFLAKE_CONNECTOR_PATH
    mkdir $SNOWFLAKE_CONNECTOR_PATH
    git clone https://github.com/snowflakedb/snowflake-kafka-connector $SNOWFLAKE_CONNECTOR_PATH
fi

# check if the provided snowflake connector folder exist
if [ ! -d $SNOWFLAKE_CONNECTOR_PATH ]; then
    error_exit "Provided path to snowflake connector repo $SNOWFLAKE_CONNECTOR_PATH does not exist.  Aborting."
fi

# require the environment variable for credentials
if [[ -z "${SNOWFLAKE_CREDENTIAL_FILE}" ]]; then
    error_exit "Require environment variable SNOWFLAKE_CREDENTIAL_FILE but it's not set.  Aborting."
fi

if [ ! -f "$SNOWFLAKE_CREDENTIAL_FILE" ]; then
    error_exit "Provided SNOWFLAKE_CREDENTIAL_FILE $SNOWFLAKE_CREDENTIAL_FILE does not exist.  Aborting."
fi

# check required commands
command -v docker >/dev/null 2>&1 || error_exit "Require docker but it's not installed.  Aborting."
command -v minikube >/dev/null 2>&1 || error_exit "Require minikube but it's not installed.  Aborting."
command -v mvn >/dev/null 2>&1 || error_exit "Require mvn but it's not installed.  Aborting."

# match all versions of built SF connector (including release candidates like rc1)
SNOWFLAKE_PLUGIN_NAME_REGEX="snowflake-kafka-connector-[0-9]*\.[0-9]*\.[0-9]*(-rc[0-9]+)?\.jar$"
SNOWFLAKE_PLUGIN_PATH="$SNOWFLAKE_CONNECTOR_PATH/target"

SNOWFLAKE_DOCKER_IMAGE="snowflakedb/kc-dev-build"
SNOWFLAKE_TAG="dev"
KAFKA_CONNECT_DOCKER_IMAGE="confluentinc/cp-kafka-connect"
KAFKA_CONNECT_PLUGIN_PATH="/usr/share/confluent-hub-components"
KAFKA_CONNECT_PLUGIN_PATH_5_0_0="/usr/share/java"

DEV_CONTAINER_NAME="snow-dev-build"

# bind minikube to local docker image repo
if ! minikube status; then
    echo -e "\n=== minikube not running, try to start ==="
    minikube config set memory 8192
    minikube config set cpus 4
    minikube config set disk-size 20000MB
    minikube start
fi
eval $(minikube docker-env)

# copy credential to SNOWFLAKE_CONNECTOR_PATH
cp -rf $SNOWFLAKE_CREDENTIAL_FILE $SNOWFLAKE_CONNECTOR_PATH || true

# build and test the local repo
pushd $SNOWFLAKE_CONNECTOR_PATH
case $BUILD_METHOD in
	verify)
	  mvn clean
    mvn verify -Dgpg.skip=true
		;;
	package)
	  mvn clean
    mvn package -Dgpg.skip=true
		;;
	none)
		echo -e "\n=== skip building, please make sure built connector exist ==="
		;;
  *)
    error_exit "Usage: ./build_image.sh <version> [<path to snowflake repo>] [verify/package/none] . Unknown build method $BUILD_METHOD.  Aborting."
  esac
popd

# get built image name
# only match the first line
SNOWFLAKE_PLUGIN_NAME=$(ls $SNOWFLAKE_PLUGIN_PATH | grep -E "$SNOWFLAKE_PLUGIN_NAME_REGEX" | head -n 1)
echo -e "\n=== built connector name: $SNOWFLAKE_PLUGIN_NAME ==="

# download Kafka connect docker image
echo -e "\n=== pull image from $KAFKA_CONNECT_DOCKER_IMAGE:$KAFKA_CONNECT_TAG ==="
docker pull $KAFKA_CONNECT_DOCKER_IMAGE:$KAFKA_CONNECT_TAG

# clean up
echo -e "\n=== try to delete container $DEV_CONTAINER_NAME if it exist ==="
$(docker rm $DEV_CONTAINER_NAME) || true

# copy built jar file to kafka connect image
echo -e "\n=== create docker container ==="
docker create --name $DEV_CONTAINER_NAME $KAFKA_CONNECT_DOCKER_IMAGE:$KAFKA_CONNECT_TAG

echo -e "\n=== copy built snowflake plugin into container ==="
docker cp $SNOWFLAKE_PLUGIN_PATH/$SNOWFLAKE_PLUGIN_NAME $DEV_CONTAINER_NAME:$KAFKA_CONNECT_PLUGIN_PATH/$SNOWFLAKE_PLUGIN_NAME || \
docker cp $SNOWFLAKE_PLUGIN_PATH/$SNOWFLAKE_PLUGIN_NAME $DEV_CONTAINER_NAME:$KAFKA_CONNECT_PLUGIN_PATH_5_0_0/$SNOWFLAKE_PLUGIN_NAME

echo -e "\n=== commit the mocified container to snowflake image ==="
docker commit $DEV_CONTAINER_NAME $SNOWFLAKE_DOCKER_IMAGE:$SNOWFLAKE_TAG

# no need to push to docker hub since k8s can use local image
# push the image to our docker hub
# echo -e "\n=== push snowflake image to docker hub ==="
# docker push $SNOWFLAKE_DOCKER_IMAGE:$SNOWFLAKE_TAG

# clean up
echo -e "\n=== delete container $DEV_CONTAINER_NAME ==="
docker rm $DEV_CONTAINER_NAME

# copy the jar to plugin path for apache kafka
APACHE_KAFKA_CONNECT_PLUGIN_PATH="/usr/local/share/kafka/plugins"
mkdir -m 777 -p $APACHE_KAFKA_CONNECT_PLUGIN_PATH || \
sudo mkdir -m 777 -p $APACHE_KAFKA_CONNECT_PLUGIN_PATH 
cp $SNOWFLAKE_PLUGIN_PATH/$SNOWFLAKE_PLUGIN_NAME $APACHE_KAFKA_CONNECT_PLUGIN_PATH || true
echo -e "\n=== copied connector to $APACHE_KAFKA_CONNECT_PLUGIN_PATH ==="

================================================
FILE: test/build_runtime_jar.sh
================================================
#!/bin/bash

# exit on error
set -e
set -x

# error printing function
function error_exit() {
    echo >&2 $1
    exit 1
}

# check argument number is 1 or 2 or 3
if [ $# -gt 4 ] || [ $# -lt 1 ]; then
    error_exit "Usage: ./build_runtime_jar.sh [<path to snowflake repo>] [verify/package/none] [apache/confluent] [AWS/AZURE/GCP].
    Default values are: verify, apache, AWS. Exiting script"
fi

SNOWFLAKE_CONNECTOR_PATH=$1
BUILD_METHOD=$2
BUILD_FOR_RUNTIME=$3
BUILD_FOR_CLOUD=$4

if [[ -z "${BUILD_METHOD}" ]]; then
    # Default build method verify
    BUILD_METHOD="verify"
fi

if [[ $BUILD_FOR_RUNTIME == "confluent" ]]; then
    POM_FILE_NAME="pom_confluent.xml"
else
  # Default build target is for Apache
  BUILD_FOR_RUNTIME="apache"
  POM_FILE_NAME="pom.xml"
fi

# Some of the integration tests use cloud vendor specific resources
if [[ -z "${BUILD_FOR_CLOUD}" ]]; then
    # Default
    BUILD_FOR_CLOUD="AWS"
fi

# check if connector path is set or checkout from github master
if [[ -z "${SNOWFLAKE_CONNECTOR_PATH}" ]]; then
    # Always re-pull code from github, no one should develop under the test_script folder
    echo -e "\n=== path to snowflake connector repo is not set, clone snowflake-kafka-connector from github and build ==="
    SNOWFLAKE_CONNECTOR_PATH="./snowflake-kafka-connector"
    echo -e "\n=== $SNOWFLAKE_CONNECTOR_PATH will be force deleted ==="
    rm -rf $SNOWFLAKE_CONNECTOR_PATH
    mkdir $SNOWFLAKE_CONNECTOR_PATH
    git clone https://github.com/snowflakedb/snowflake-kafka-connector $SNOWFLAKE_CONNECTOR_PATH
fi

# check if the provided snowflake connector folder exist
if [ ! -d $SNOWFLAKE_CONNECTOR_PATH ]; then
    error_exit "Provided path to snowflake connector repo $SNOWFLAKE_CONNECTOR_PATH does not exist.  Aborting."
fi

# require the environment variable for credentials
if [[ -z "${SNOWFLAKE_CREDENTIAL_FILE}" ]]; then
    error_exit "Require environment variable SNOWFLAKE_CREDENTIAL_FILE but it's not set.  Aborting."
fi

if [ ! -f "$SNOWFLAKE_CREDENTIAL_FILE" ]; then
    error_exit "Provided SNOWFLAKE_CREDENTIAL_FILE $SNOWFLAKE_CREDENTIAL_FILE does not exist.  Aborting."
fi

# check required commands
command -v mvn >/dev/null 2>&1 || error_exit "Require mvn but it's not installed.  Aborting."

# match all versions of built SF connector (including release candidates like rc1)
SNOWFLAKE_PLUGIN_NAME_REGEX="snowflake-kafka-connector-[0-9]*\.[0-9]*\.[0-9]*(-rc[0-9]+)?\.jar$"
SNOWFLAKE_PLUGIN_PATH="$SNOWFLAKE_CONNECTOR_PATH/target"

KAFKA_CONNECT_PLUGIN_PATH="/usr/local/share/kafka/plugins"

# copy credential to SNOWFLAKE_CONNECTOR_PATH
cp -rf $SNOWFLAKE_CREDENTIAL_FILE $SNOWFLAKE_CONNECTOR_PATH || true

echo "Building Jar for Runtime: $BUILD_FOR_RUNTIME"

# build and test the local repo
pushd $SNOWFLAKE_CONNECTOR_PATH
case $BUILD_METHOD in
	verify)
	  # mvn clean should clean the target directory, hence using default pom.xml
	  mvn -f $POM_FILE_NAME clean

	  # skip Iceberg tests outside of AWS
	  if [[ $BUILD_FOR_CLOUD == "AWS" ]]; then
	    echo "Running integration tests against AWS cloud"
	    mvn -f $POM_FILE_NAME verify -Dgpg.skip=true -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false -Dmaven.wagon.httpconnectionManager.ttlSeconds=120 -P aws
	  else
	    echo "Running integration tests against non-AWS cloud"
	    mvn -f $POM_FILE_NAME verify -Dgpg.skip=true -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false -Dmaven.wagon.httpconnectionManager.ttlSeconds=120 -P non-aws
	  fi
		;;
	package)
	  # mvn clean should clean the target directory, hence using default pom.xml
	  mvn -f $POM_FILE_NAME clean
	  # mvn package with pom_confluent runs the kafka-connect-maven-plugin which creates a zip file
	  # More information: https://docs.confluent.io/platform/current/connect/kafka-connect-maven-plugin/site/plugin-info.html
    mvn -f $POM_FILE_NAME package -Dgpg.skip=true -DskipTests -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false -Dmaven.wagon.httpconnectionManager.ttlSeconds=120
		;;
	none)
		echo -e "\n=== skip building, please make sure built connector exist ==="
		;;
  *)
    error_exit "Usage: ./build_image.sh [<path to snowflake repo>] [verify/package/none] . Unknown build method $BUILD_METHOD.  Aborting."
  esac
popd

# get built image name
# only match the first line
SNOWFLAKE_PLUGIN_NAME=$(ls $SNOWFLAKE_PLUGIN_PATH | grep -E "$SNOWFLAKE_PLUGIN_NAME_REGEX" | head -n 1)
echo -e "\nbuilt connector name: $SNOWFLAKE_PLUGIN_NAME"

mkdir -m 777 -p $KAFKA_CONNECT_PLUGIN_PATH || \
sudo mkdir -m 777 -p $KAFKA_CONNECT_PLUGIN_PATH

if [[ $BUILD_FOR_RUNTIME == "confluent" ]]; then
    # For confluent, copy the zip file and unzip it later
    echo "For confluent RUNTIME: Copying Kafka Connect Maven Generated Zip file to a temporary location"
    cp $SNOWFLAKE_PLUGIN_PATH/components/packages/snowflakeinc-snowflake-kafka-connector-*.zip /tmp/sf-kafka-connect-plugin.zip
    ls /tmp/sf-kafka-connect-plugin*
else
    # Apache Kafka
    # Only copy built connector to plugin path
    cp $SNOWFLAKE_PLUGIN_PATH/$SNOWFLAKE_PLUGIN_NAME $KAFKA_CONNECT_PLUGIN_PATH || true
    echo -e "copied SF Plugin Connector to $KAFKA_CONNECT_PLUGIN_PATH"
fi

KAFKA_CONNECT_DOCKER_JAR_PATH="$SNOWFLAKE_CONNECTOR_PATH/docker-setup/snowflake-kafka-docker/jars"
mkdir -m 777 -p $KAFKA_CONNECT_DOCKER_JAR_PATH
cp $SNOWFLAKE_PLUGIN_PATH/$SNOWFLAKE_PLUGIN_NAME $KAFKA_CONNECT_DOCKER_JAR_PATH || true
echo -e "copied connector to $KAFKA_CONNECT_DOCKER_JAR_PATH for docker"

================================================
FILE: test/conftest.py
================================================
import logging
import os
import sys
import time
from typing import Dict, List

import pytest
from _pytest.reports import TestReport

from lib.config_migration import v4_config_to_v3
from lib.driver import KafkaDriver
from lib.fixtures.session import (  # noqa: F401 — re-exported for pytest discovery
    sensor_pb2,
    credentials_unsalted,
    session_name_salt,
    test_schema,
    credentials,
    driver,
)
from lib.fixtures.connector import (  # noqa: F401
    create_topics,
    create_connector,
    create_custom_connector,
)
from lib.fixtures.table import (  # noqa: F401
    create_table,
    snowflake_table,
    create_iceberg_table,
    iceberg_external_volume,
)
from lib.fixtures.function import connector_version, name_salt  # noqa: F401

logger = logging.getLogger(__name__)


# ---------------------------------------------------------------------------
# Custom CLI options
# ---------------------------------------------------------------------------


def pytest_addoption(parser):
    """Register CLI options.

    Every option falls back to an environment variable so that tests can
    be launched inside a container where the compose file already sets
    the values -- no long CLI arg lists needed.
    """
    group = parser.getgroup("kafka-e2e", "Kafka connector end-to-end test options")
    group.addoption(
        "--kafka-address",
        default=os.environ.get("KAFKA_BOOTSTRAP_SERVERS"),
        help="Kafka bootstrap server address (env: KAFKA_BOOTSTRAP_SERVERS)",
    )
    group.addoption(
        "--schema-registry-address",
        default=os.environ.get("SCHEMA_REGISTRY_URL", ""),
        help="Schema registry URL (env: SCHEMA_REGISTRY_URL)",
    )
    group.addoption(
        "--kafka-connect-address",
        default=os.environ.get("KAFKA_CONNECT_ADDRESS"),
        help="Kafka Connect REST address (env: KAFKA_CONNECT_ADDRESS)",
    )
    group.addoption(
        "--platform",
        choices=["confluent", "apache"],
        default=os.environ.get("KAFKA_PLATFORM"),
        help="Kafka platform: 'confluent' or 'apache' (env: KAFKA_PLATFORM)",
    )
    group.addoption(
        "--platform-version",
        default=os.environ.get("KAFKA_PLATFORM_VERSION"),
        help="Kafka / Confluent platform version (env: KAFKA_PLATFORM_VERSION)",
    )
    group.addoption(
        "--name-salt",
        default=os.environ.get("TEST_NAME_SALT"),
        help="Unique salt appended to connector and topic names (env: TEST_NAME_SALT, auto-generated if omitted)",
    )
    # currently unused, all tests run on all clouds
    group.addoption(
        "--cloud",
        choices=["AWS", "GCP", "AZURE"],
        default=os.environ.get("SF_CLOUD_PLATFORM"),
        help="Snowflake cloud platform: AWS, GCP, or AZURE (env: SF_CLOUD_PLATFORM)",
    )
    group.addoption(
        "--enable-ssl",
        action="store_true",
        default=os.environ.get("ENABLE_SSL", "").lower() in ("true", "1", "yes"),
        help="Enable SSL for Kafka connections (env: ENABLE_SSL)",
    )


_REQUIRED_OPTIONS = {
    "--kafka-address": "KAFKA_BOOTSTRAP_SERVERS",
    "--kafka-connect-address": "KAFKA_CONNECT_ADDRESS",
    "--platform": "KAFKA_PLATFORM",
    "--platform-version": "KAFKA_PLATFORM_VERSION",
}


def pytest_configure(config):
    # An empty salt silently resolves to the unsalted schema name, which is
    # shared with Java integration tests.  Dropping it would break those runs.
    name_salt_value = config.getoption("--name-salt")
    if name_salt_value is not None and name_salt_value == "":
        raise pytest.UsageError(
            "--name-salt / TEST_NAME_SALT must not be empty "
            "(omit it entirely to auto-generate, or provide a non-empty value)"
        )

    config.addinivalue_line(
        "markers", "schema_evolution: schema evolution e2e tests (FR6)"
    )
    config.addinivalue_line(
        "markers", "compatibility: v3/v4 dual-version compatibility tests"
    )
    config.addinivalue_line(
        "markers",
        "correctness: connector correctness tests (schema mapping, DLQ, multi-topic)",
    )
    config.addinivalue_line(
        "markers", "confluent_only: requires Confluent platform (schema registry)"
    )
    config.addinivalue_line("markers", "pressure: load / stress tests")
    config.addinivalue_line(
        "markers",
        "iceberg: iceberg table tests (requires ICEBERG_EXTERNAL_VOLUME)",
    )

    # Validate required options (set via CLI or env var)
    missing = []
    for opt, env in _REQUIRED_OPTIONS.items():
        if config.getoption(opt) is None:
            missing.append(f"  {opt}  (or env {env})")
    if missing:
        raise pytest.UsageError(
            "Missing required configuration:\n" + "\n".join(missing)
        )


def pytest_collection_modifyitems(config, items):
    if config.getoption("--platform") == "confluent":
        return
    skip = pytest.mark.skip(reason="requires Confluent platform (schema registry)")
    for item in items:
        if "confluent_only" in item.keywords:
            item.add_marker(skip)


@pytest.fixture()
def create_connector_from_file(
    driver: KafkaDriver,  # noqa: F811
    name_salt: str,  # noqa: F811
    connector_version: str,  # noqa: F811
):
    """DEPRECATED

    Factory fixture: call to register a connector for the current version.

    All connectors created during the test are torn down automatically.

    Args:
        v4_config_file: Config template for the v4 connector.
        v3_config_file: Optional separate config template for v3. When omitted,
            v4_config_file is auto-migrated via v4_config_to_v3.
    """
    created = []

    def _create(
        v4_config_filename: str, *, config_overrides: Dict[str, str] = None
    ) -> dict:
        def try_convert_and_apply_overrides(config: Dict[str, str]) -> Dict[str, str]:
            match connector_version:
                case "v3":
                    logger.info(f"Will transform {v4_config_filename} to KC v3 config")
                    config = v4_config_to_v3(config)
                case "v4":
                    pass
            if config_overrides:
                config.update(config_overrides)
            return config

        rest_request = driver.createConnector(
            name_salt=name_salt,
            rest_request_template_filename=v4_config_filename,
            config_transform=try_convert_and_apply_overrides,
        )
        created.append(rest_request["name"])
        return rest_request

    try:
        yield _create
    finally:
        for connector_name in reversed(created):
            driver.closeConnector(connector_name)


@pytest.fixture(scope="session")
def wait_for_rows(driver: KafkaDriver):  # noqa: F811 — pytest fixture injection, not a true redefinition
    """Returns a polling helper that waits until a Snowflake table reaches the expected row count.

    Supports an optional ``connector_name`` parameter: when provided, each
    poll iteration also checks the Kafka Connect task status via the REST API.
    If any task is in FAILED state the helper raises immediately instead of
    waiting for the full timeout -- a failed task will never produce more rows.

    Default timeout/interval can be overridden globally via environment
    variables ``E2E_WAIT_TIMEOUT`` and ``E2E_WAIT_INTERVAL``.
    """
    default_timeout = int(os.environ.get("E2E_WAIT_TIMEOUT", "300"))
    default_interval = int(os.environ.get("E2E_WAIT_INTERVAL", "5"))

    def _wait(
        table_name: str,
        expected: int,
        *,
        timeout: int | None = None,
        interval: int | None = None,
        at_least: bool = False,
        connector_name: str | None = None,
        max_consecutive_failures: int = 6,
    ):
        timeout = timeout or default_timeout
        interval = interval or default_interval
        deadline = time.monotonic() + timeout
        consecutive_failures = 0
        while True:
            count = driver.select_number_of_records(table_name)
            if count is not None:
                if count == expected:
                    return count
                if at_least and count > expected:
                    return count
                if not at_least and count > expected:
                    raise AssertionError(
                        f"Found more than {expected} rows in {table_name} (got {count})"
                    )
            if time.monotonic() >= deadline:
                raise AssertionError(
                    f"Timed out waiting for {expected} rows in {table_name} "
                    f"(got {count} after {timeout}s)"
                )
            if connector_name is not None:
                if failed := driver.get_failed_tasks(connector_name):
                    consecutive_failures += 1
                    if consecutive_failures >= max_consecutive_failures:
                        traces = "\n".join(
                            f"  task {t['id']}: {t.get('trace', 'no trace')}"
                            for t in failed
                        )
                        raise AssertionError(
                            f"Connector {connector_name} has FAILED tasks while "
                            f"waiting for {expected} rows in {table_name} "
                            f"(got {count}):\n{traces}"
                        )
                    logger.warning(
                        f"Connector {connector_name} has failed tasks "
                        f"({consecutive_failures}/{max_consecutive_failures}), "
                        f"waiting for recovery..."
                    )
                else:
                    consecutive_failures = 0
            logger.info(
                f"Waiting for {'at least ' if at_least else ''}{expected} rows "
                f"in {table_name} (currently {count}), retrying in {interval}s..."
            )
            time.sleep(interval)

    return _wait


# ---------------------------------------------------------------------------
# GitHub Actions step summary (failures only)
# ---------------------------------------------------------------------------

_github_summary_failures: List[TestReport] = []


@pytest.hookimpl(hookwrapper=True)
def pytest_runtest_makereport(item):
    """Collect failed test reports for GITHUB_STEP_SUMMARY."""
    outcome = yield
    report = outcome.get_result()
    if report.when == "call" and report.failed and report.longrepr:
        _github_summary_failures.append(report)


def _python_error_annotation(report: TestReport) -> None:
    """Emit a ::error workflow command to stderr for GitHub annotations."""
    filename, line, domain = report.location
    parts = [f"file=test/{filename}", f"title={domain}"]
    if line is not None:
        parts.append(f"line={line + 1}")
    opts = ",".join(parts)
    # longrepr can be a ReprExceptionInfo (has .reprcrash.message) or a plain
    # string (e.g. for xpass-strict failures).
    longrepr = report.longrepr
    if hasattr(longrepr, "reprcrash") and longrepr.reprcrash is not None:
        message = longrepr.reprcrash.message
    else:
        message = str(longrepr).split("\n", 1)[0]
    print(f"::error {opts}::{message}", file=sys.stderr)


def pytest_sessionfinish(session, exitstatus):
    """Append failure summary to GITHUB_STEP_SUMMARY when set (e.g. in GitHub Actions)."""
    summary_path = os.environ.get("GITHUB_STEP_SUMMARY")
    if not summary_path or not _github_summary_failures or exitstatus == 0:
        return
    for report in _github_summary_failures:
        _python_error_annotation(report)
    try:
        with open(summary_path, "a", encoding="utf-8") as summary_file:
            summary_file.write("\n## Python test failures\n\n")
            for report in _github_summary_failures:
                summary_file.write(f"### {report.nodeid}\n\n")
                summary_file.write("```\n")
                summary_file.write(report.longreprtext)
                summary_file.write("\n```\n\n")
    except OSError:
        logger.debug("Could not write to GITHUB_STEP_SUMMARY", exc_info=True)


================================================
FILE: test/connect-log4j.properties
================================================
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Sample Log4j properties file for connect, used only for testing
# This can work as an example for SF KC users.

log4j.rootLogger=INFO, stdout, connectAppender

# Send the logs to the console.

log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout

# Send the logs to a file, rolling the file at midnight local time. For example, the `File` option specifies the
# location of the log files (e.g. ${kafka.logs.dir}/connect.log), and at midnight local time the file is closed
# and copied in the same directory but with a filename that ends in the `DatePattern` option.

log4j.appender.connectAppender=org.apache.log4j.DailyRollingFileAppender
log4j.appender.connectAppender.DatePattern='.'yyyy-MM-dd-HH
log4j.appender.connectAppender.File=${kafka.logs.dir}/connect.log
log4j.appender.connectAppender.layout=org.apache.log4j.PatternLayout

# The `%X{connector.context}` parameter in the layout includes connector-specific and task-specific information
# in the log message, where appropriate. This makes it easier to identify those log messages that apply to a
# specific connector. Simply add this parameter to the log layout configuration below to include the contextual information.
connect.log.pattern=[%d] %p %X{connector.context}%m (%c:%L)%n

log4j.appender.stdout.layout.ConversionPattern=${connect.log.pattern}
log4j.appender.connectAppender.layout.ConversionPattern=${connect.log.pattern}

log4j.logger.org.apache.zookeeper=ERROR
log4j.logger.org.reflections=ERROR

log4j.logger.com.snowflake.kafka.connector=DEBUG

# DEBUG produce tons of logs - use carefully
log4j.logger.net.snowflake=INFO


================================================
FILE: test/docker/.gitignore
================================================
# Environment files with credentials
.env

# Docker build artifacts
*.log


================================================
FILE: test/docker/Dockerfile.apache-kafka
================================================
# Dockerfile that mirrors run_test_apache.sh setup
# Downloads official Apache Kafka tarball and runs it the same way
# Supports both ZooKeeper mode (<=3.x) and KRaft mode (4.x+)

ARG JAVA_VERSION=11
FROM eclipse-temurin:${JAVA_VERSION}-jdk

ARG KAFKA_VERSION=2.8.2
ARG SCALA_VERSION=2.12

# Install dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    curl \
    netcat-openbsd \
    wget \
    && rm -rf /var/lib/apt/lists/*

WORKDIR /opt

# Download and extract Apache Kafka
RUN curl -sL "https://archive.apache.org/dist/kafka/${KAFKA_VERSION}/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz" -o kafka.tgz && \
    tar xzf kafka.tgz && \
    mv kafka_${SCALA_VERSION}-${KAFKA_VERSION} kafka && \
    rm kafka.tgz

# Install FIPS jars
RUN wget -q -P /opt/kafka/libs https://repo1.maven.org/maven2/org/bouncycastle/bcpkix-fips/2.1.8/bcpkix-fips-2.1.8.jar && \
    wget -q -P /opt/kafka/libs https://repo1.maven.org/maven2/org/bouncycastle/bc-fips/2.1.0/bc-fips-2.1.0.jar

# Create plugin directory (same path as in connect-distributed.properties)
RUN mkdir -p /usr/local/share/kafka/plugins

# Copy config files (both ZK and KRaft configs; start script picks the right one)
COPY apache_properties/zookeeper.properties /opt/kafka/config/zookeeper.properties
COPY apache_properties/server.properties /opt/kafka/config/server.properties
COPY apache_properties/kraft-server.properties /opt/kafka/config/kraft-server.properties
COPY apache_properties/connect-distributed.properties /opt/kafka/config/connect-distributed.properties
COPY connect-log4j.properties /opt/kafka/config/connect-log4j.properties

WORKDIR /opt/kafka

# Expose ports: Zookeeper(2181), Kafka(9092), Controller(9093), Kafka Connect(8083)
EXPOSE 2181 9092 9093 8083


================================================
FILE: test/docker/Dockerfile.builder
================================================
# Builder image for compiling protobuf dependencies
# Build artifacts are created during image build (cached) and can be copied out

FROM maven:3.9-eclipse-temurin-11

# Install protoc 3.21.x (compatible with protobuf-java 3.21.12)
ARG PROTOC_VERSION=21.12
RUN apt-get update && apt-get install -y --no-install-recommends \
    curl \
    unzip \
    git \
    python3 \
    && rm -rf /var/lib/apt/lists/*

# Download and install protoc
RUN ARCH=$(dpkg --print-architecture) && \
    if [ "$ARCH" = "arm64" ]; then PROTOC_ARCH="aarch_64"; else PROTOC_ARCH="x86_64"; fi && \
    curl -sLO "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-linux-${PROTOC_ARCH}.zip" && \
    unzip -q "protoc-${PROTOC_VERSION}-linux-${PROTOC_ARCH}.zip" -d /usr/local && \
    rm "protoc-${PROTOC_VERSION}-linux-${PROTOC_ARCH}.zip" && \
    chmod +x /usr/local/bin/protoc

WORKDIR /build

# Clone and build BlueApron protobuf converter
ARG CONVERTER_VERSION=3.1.0
RUN mkdir -p /output && \
    git clone -q https://github.com/blueapron/kafka-connect-protobuf-converter /build/converter && \
    cd /build/converter && \
    git checkout -q tags/v${CONVERTER_VERSION} && \
    mvn clean package -q -DskipTests && \
    cp target/kafka-connect-protobuf-converter-*-jar-with-dependencies.jar /output/

# Copy protobuf source and compile to Java
# Build context is test/ directory
COPY test_data/sensor.proto /build/test_data/
COPY test_data/protobuf/pom.xml /build/test_data/protobuf/
RUN mkdir -p /build/test_data/protobuf/src/main/java && \
    protoc --proto_path=/build/test_data --java_out=/build/test_data/protobuf/src/main/java sensor.proto

# Build protobuf test data JAR
RUN cd /build/test_data/protobuf && \
    mvn clean package -q -DskipTests && \
    mkdir -p /output && \
    cp target/kafka-test-protobuf-*-jar-with-dependencies.jar /output/

# Output directory contains the built JARs
# Copy them out with: docker cp $(docker create protobuf-builder):/output/. ./target/
WORKDIR /output


================================================
FILE: test/docker/Dockerfile.test-runner
================================================
# Test runner container with Python and all test dependencies
FROM python:3.13-slim

# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    curl \
    unzip \
    netcat-openbsd \
    librdkafka-dev \
    jq \
    && rm -rf /var/lib/apt/lists/*

# Install protoc 25.1 from binary (matching CI)
ARG PROTOC_VERSION=25.1
RUN ARCH=$(dpkg --print-architecture) && \
    if [ "$ARCH" = "arm64" ]; then PROTOC_ARCH="aarch_64"; else PROTOC_ARCH="x86_64"; fi && \
    curl -fsSL -o protoc.zip \
      "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-linux-${PROTOC_ARCH}.zip" && \
    unzip -o protoc.zip -d /usr/local bin/protoc 'include/*' && \
    rm protoc.zip && \
    chmod +x /usr/local/bin/protoc

# Verify protoc version
RUN protoc --version

# Upgrade pip first
RUN pip install --upgrade pip

# Install Python dependencies
RUN pip install --no-cache-dir \
    requests \
    certifi \
    "confluent-kafka[avro,json,protobuf]==2.13.0" \
    avro-python3 \
    kafka-python-ng \
    "snowflake-connector-python==4.2.0" \
    pytest

# Create app directory
WORKDIR /app

CMD ["bash"]


================================================
FILE: test/docker/docker-compose.amd64.yml
================================================
# Platform override for Confluent 6.2.x
# These container images are only available for linux/amd64, not linux/arm64.
# This file is automatically used when running with Confluent 6.2.x on ARM Macs.

services:
  zookeeper:
    platform: linux/amd64

  kafka:
    platform: linux/amd64

  schema-registry:
    platform: linux/amd64

  kafka-connect:
    platform: linux/amd64


================================================
FILE: test/docker/docker-compose.apache.yml
================================================
# Apache Kafka services
# Supports both ZooKeeper mode (<=3.x) and KRaft mode (4.x+)
# Usage: docker compose -f docker-compose.base.yml -f docker-compose.apache.yml up

services:
  # Single container running Kafka + Kafka Connect
  # ZooKeeper mode: also runs ZK inside the container
  # KRaft mode (KRAFT_MODE=true): combined broker+controller, no ZK
  kafka:
    image: ghcr.io/snowflakedb/snowflake-kafka-connector/apache-kafka:${KAFKA_VERSION:-2.8.2}-java${JAVA_VERSION:-11}
    build:
      context: ..
      dockerfile: docker/Dockerfile.apache-kafka
      args:
        KAFKA_VERSION: ${KAFKA_VERSION:-2.8.2}
        JAVA_VERSION: ${JAVA_VERSION:-11}
        SCALA_VERSION: ${SCALA_VERSION:-2.12}
    hostname: kafka
    extra_hosts:
      - "host.docker.internal:host-gateway"
    environment:
      KAFKA_HEAP_OPTS: "-Xms512m -Xmx2g"
      KRAFT_MODE: ${KRAFT_MODE:-false}
      CONNECT_OFFSET_FLUSH_INTERVAL_MS: ${CONNECT_OFFSET_FLUSH_INTERVAL_MS:-1000}
    volumes:
      - ${CONNECTOR_PLUGIN_PATH:-/tmp/sf-kafka-connect-plugin}:/usr/local/share/kafka/plugins/snowflake-connector
      - ${V3_PLUGIN_PATH:-/tmp/sf-kafka-connect-v3}:/usr/local/share/kafka/plugins/snowflake-connector-v3
      - ${EXTRA_JARS_PATH:-/tmp/kafka-connect-extra-jars}:/usr/local/share/kafka/plugins/protobuf-converter
      - ./scripts/start-apache-kafka.sh:/opt/start.sh:ro
    command: ["/bin/bash", "/opt/start.sh"]
    healthcheck:
      test: ["CMD-SHELL", "curl -sf http://localhost:8083/connectors || exit 1"]
      interval: 10s
      timeout: 10s
      retries: 30
      start_period: 60s

  # Override base test-runner dependencies and connection settings
  test-runner:
    depends_on:
      kafka:
        condition: service_healthy
    environment:
      KAFKA_BOOTSTRAP_SERVERS: kafka:9092
      KAFKA_CONNECT_ADDRESS: kafka:8083
      KAFKA_CONNECT_HOST: kafka
      SCHEMA_REGISTRY_URL: ""


================================================
FILE: test/docker/docker-compose.base.yml
================================================
# Base services shared by all platforms
# Usage: docker compose -f docker-compose.base.yml -f docker-compose.<platform>.yml up

services:
  # Test runner - Python tests
  test-runner:
    build:
      context: .
      dockerfile: Dockerfile.test-runner
    environment:
      SNOWFLAKE_CREDENTIAL_FILE: /credentials/profile.json
      KAFKA_PLATFORM:
      KAFKA_PLATFORM_VERSION:
      TEST_NAME_SALT:
      SF_CLOUD_PLATFORM:
      ENABLE_SSL: ${ENABLE_SSL:-false}
      SNOWPIPE_STREAMING_URL: ${SNOWPIPE_STREAMING_URL:-}
    volumes:
      - ${SNOWFLAKE_CREDENTIAL_FILE:?SNOWFLAKE_CREDENTIAL_FILE is required}:/credentials/profile.json:ro
      - ../test_suit:/app/test_suit:ro
      - ../rest_request_template:/app/rest_request_template:ro
      - ../test_data:/app/test_data
      - ../__init__.py:/app/__init__.py:ro
      - ../lib:/app/lib:ro
      - ../conftest.py:/app/conftest.py:ro
      - ../pyproject.toml:/app/pyproject.toml:ro
      - ../tests:/app/tests:ro
    working_dir: /app

networks:
  default:


================================================
FILE: test/docker/docker-compose.confluent-kraft.yml
================================================
# KRaft mode override for Confluent 8.x+
# Usage: docker compose -f docker-compose.base.yml -f docker-compose.confluent.yml -f docker-compose.confluent-kraft.yml up
#
# Layers KRaft-specific Kafka broker config on top of the base Confluent compose
# file. Only the differences are specified here; shared services (kafka-connect,
# schema-registry, test-runner) are inherited to avoid config drift.

services:
  # Confluent 8.x dropped cp-zookeeper. Replace the inherited zookeeper service
  # with a tiny stub that satisfies kafka's depends_on from the base file.
  zookeeper:
    image: busybox:1.37
    command: ["sleep", "infinity"]
    healthcheck:
      test: ["CMD-SHELL", "true"]
      interval: 1s
      retries: 1

  kafka:
    environment:
      KAFKA_NODE_ID: 1
      KAFKA_PROCESS_ROLES: broker,controller
      KAFKA_CONTROLLER_QUORUM_VOTERS: 1@kafka:29093
      KAFKA_LISTENERS: PLAINTEXT://kafka:29092,CONTROLLER://kafka:29093,PLAINTEXT_HOST://0.0.0.0:9092
      KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
      KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER
      KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
      KAFKA_LOG_DIRS: /tmp/kraft-combined-logs
      CLUSTER_ID: MkU3OEVBNTcwNTJENDM2Qk


================================================
FILE: test/docker/docker-compose.confluent.yml
================================================
# Confluent Platform services
# Usage: docker compose -f docker-compose.base.yml -f docker-compose.confluent.yml up

services:
  zookeeper:
    image: confluentinc/cp-zookeeper:${CONFLUENT_VERSION:-7.8.0}
    hostname: zookeeper
    environment:
      ZOOKEEPER_CLIENT_PORT: 2181
      ZOOKEEPER_TICK_TIME: 2000
    healthcheck:
      test: ["CMD", "nc", "-z", "localhost", "2181"]
      interval: 5s
      timeout: 10s
      retries: 10

  kafka:
    image: confluentinc/cp-kafka:${CONFLUENT_VERSION:-7.8.0}
    hostname: kafka
    depends_on:
      zookeeper:
        condition: service_healthy
    environment:
      KAFKA_BROKER_ID: 1
      KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
      KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
      KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:29092,PLAINTEXT_HOST://localhost:9092
      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
      KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0
      KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1
      KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1
      KAFKA_AUTO_CREATE_TOPICS_ENABLE: "true"
    healthcheck:
      test: ["CMD", "kafka-broker-api-versions", "--bootstrap-server", "localhost:9092"]
      interval: 5s
      timeout: 10s
      retries: 10

  kafka-connect:
    image: confluentinc/cp-kafka-connect:${CONFLUENT_VERSION:-7.8.0}
    hostname: kafka-connect
    extra_hosts:
      - "host.docker.internal:host-gateway"
    depends_on:
      kafka:
        condition: service_healthy
      schema-registry:
        condition: service_healthy
    environment:
      CONNECT_BOOTSTRAP_SERVERS: kafka:29092
      CONNECT_REST_ADVERTISED_HOST_NAME: kafka-connect
      CONNECT_REST_PORT: 8083
      CONNECT_GROUP_ID: test-connect-group
      CONNECT_CONFIG_STORAGE_TOPIC: connect-configs
      CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: 1
      CONNECT_OFFSET_STORAGE_TOPIC: connect-offsets
      CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: 1
      CONNECT_STATUS_STORAGE_TOPIC: connect-status
      CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: 1
      CONNECT_OFFSET_FLUSH_INTERVAL_MS: ${CONNECT_OFFSET_FLUSH_INTERVAL_MS:-1000}
      CONNECT_KEY_CONVERTER: org.apache.kafka.connect.storage.StringConverter
      CONNECT_VALUE_CONVERTER: org.apache.kafka.connect.json.JsonConverter
      CONNECT_VALUE_CONVERTER_SCHEMAS_ENABLE: "false"
      CONNECT_PLUGIN_PATH: /usr/share/java,/usr/share/confluent-hub-components,/opt/kafka-connect/plugins
      CONNECT_LOG4J_ROOT_LOGLEVEL: INFO
      CONNECT_LOG4J_LOGGERS: com.snowflake=INFO
      CONNECT_LOG4J_APPENDER_STDOUT_LAYOUT_CONVERSIONPATTERN: "[%d] %p %X{connector.context}%m (%c:%L)%n"
      KAFKA_HEAP_OPTS: "-Xms512m -Xmx6g"
      KAFKA_OPTS: "${KAFKA_OPTS:-}"
      SS_LOG_LEVEL: warn
    volumes:
      - ${CONNECTOR_PLUGIN_PATH:-/tmp/sf-kafka-connect-plugin}:/opt/kafka-connect/plugins/snowflake-connector
      - ${V3_PLUGIN_PATH:-/tmp/sf-kafka-connect-v3}:/opt/kafka-connect/plugins/snowflake-connector-v3
      - ${EXTRA_JARS_PATH:-/tmp/kafka-connect-extra-jars}:/opt/kafka-connect/plugins/protobuf-converter
      - ${JOLOKIA_JAR_PATH:-/dev/null}:/opt/jolokia/jolokia-agent.jar:ro
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8083/connectors"]
      interval: 5s
      timeout: 10s
      retries: 20

  # Schema Registry for Confluent
  schema-registry:
    image: confluentinc/cp-schema-registry:${CONFLUENT_VERSION:-7.8.0}
    hostname: schema-registry
    depends_on:
      kafka:
        condition: service_healthy
    environment:
      SCHEMA_REGISTRY_HOST_NAME: schema-registry
      SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: kafka:29092
      SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:8081
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8081/subjects"]
      interval: 5s
      timeout: 10s
      retries: 10

  # Override base test-runner dependencies
  test-runner:
    depends_on:
      kafka-connect:
        condition: service_healthy
      schema-registry:
        condition: service_healthy
    environment:
      KAFKA_BOOTSTRAP_SERVERS: kafka:29092
      KAFKA_CONNECT_ADDRESS: kafka-connect:8083
      KAFKA_CONNECT_HOST: kafka-connect
      SCHEMA_REGISTRY_URL: http://schema-registry:8081


================================================
FILE: test/docker/docker-compose.profile-apache.yml
================================================
# Profiling overlay for Apache Kafka (single-container mode).
# Layers JFR, GC logging, JMX, and async-profiler support onto the kafka service.
#
# Usage:
#   docker compose -f docker-compose.base.yml -f docker-compose.apache.yml \
#                  -f docker-compose.profile-apache.yml up
#
# Or via run_tests.sh:
#   ./run_tests.sh --platform=apache --platform-version=3.7.0 --profile [--keep] -- ...
#
# Note: KAFKA_HEAP_OPTS applies to all JVMs in the container (ZK, broker, Connect).
# The 2g heap is shared across ZK + broker + Connect, which may be tight for
# large-scale profiling workloads. Use the Confluent platform for heavy tests.
# JFR and GC logs capture the Connect worker process specifically because
# connect-distributed.sh is the last process started and inherits these flags.

services:
  kafka:
    cap_add:
      - SYS_PTRACE
    ports:
      - "127.0.0.1:9999:9999"
    environment:
      KAFKA_HEAP_OPTS: >-
        -Xms512m -Xmx2g
        -XX:StartFlightRecording=name=profile,filename=/tmp/profile/kc-profile.jfr,settings=profile,maxsize=500m,dumponexit=true
        -XX:FlightRecorderOptions=stackdepth=256
        -Xlog:gc*:file=/tmp/profile/gc.log:time,uptime,level,tags:filecount=5,filesize=50m
        -XX:+HeapDumpOnOutOfMemoryError
        -XX:HeapDumpPath=/tmp/profile/heapdump.hprof
        -Dcom.sun.management.jmxremote
        -Dcom.sun.management.jmxremote.port=9999
        -Dcom.sun.management.jmxremote.rmi.port=9999
        -Dcom.sun.management.jmxremote.authenticate=false
        -Dcom.sun.management.jmxremote.ssl=false
        -Djava.rmi.server.hostname=localhost
    tmpfs:
      - /tmp/profile:uid=1000,gid=1000
    volumes:
      - ${ASYNC_PROFILER_PATH:-/dev/null}:/opt/async-profiler:ro


================================================
FILE: test/docker/docker-compose.profile-confluent.yml
================================================
# Profiling overlay for Confluent Platform.
# Layers JFR, GC logging, JMX, and async-profiler support onto kafka-connect.
#
# Usage:
#   docker compose -f docker-compose.base.yml -f docker-compose.confluent.yml \
#                  -f docker-compose.profile-confluent.yml up
#
# Or via run_tests.sh:
#   ./run_tests.sh --platform=confluent --platform-version=7.8.0 --profile [--keep] -- ...
#
# Collecting results:
#   - JFR recording:  test/scripts/profile_connect.sh jfr-dump
#   - GC logs:        test/scripts/profile_connect.sh collect ./results
#   - Heap dumps:     test/scripts/profile_connect.sh heap-dump
#   - Flame graphs:   test/scripts/profile_connect.sh async-cpu 60
#
# Analysis:
#   - JFR:  jfr summary kc-profile.jfr  OR  open in JDK Mission Control
#   - GC:   https://gceasy.io
#   - Heap: Eclipse MAT

services:
  kafka-connect:
    cap_add:
      - SYS_PTRACE
    ports:
      - "9999:9999"
    environment:
      KAFKA_HEAP_OPTS: >-
        -Xms512m -Xmx6g
        -XX:StartFlightRecording=name=profile,filename=/tmp/profile/kc-profile.jfr,settings=profile,maxsize=500m,dumponexit=true
        -XX:FlightRecorderOptions=stackdepth=256
        -Xlog:gc*:file=/tmp/profile/gc.log:time,uptime,level,tags:filecount=5,filesize=50m
        -XX:+HeapDumpOnOutOfMemoryError
        -XX:HeapDumpPath=/tmp/profile/heapdump.hprof
        -Dcom.sun.management.jmxremote
        -Dcom.sun.management.jmxremote.port=9999
        -Dcom.sun.management.jmxremote.rmi.port=9999
        -Dcom.sun.management.jmxremote.authenticate=false
        -Dcom.sun.management.jmxremote.ssl=false
        -Djava.rmi.server.hostname=localhost
    tmpfs:
      - /tmp/profile:uid=1000,gid=1000
    volumes:
      - ${ASYNC_PROFILER_PATH:-/dev/null}:/opt/async-profiler:ro


================================================
FILE: test/docker/scripts/start-apache-kafka.sh
================================================
#!/bin/bash
# Startup script for Apache Kafka in Docker.
# Supports two modes controlled by the KRAFT_MODE env var:
#   KRAFT_MODE=true  -> KRaft (Kafka 4.x+): combined broker+controller, no ZooKeeper
#   KRAFT_MODE=false -> ZooKeeper mode (Kafka <=3.x): ZK + broker + Connect

set -e

KAFKA_HOME=/opt/kafka
LOG_DIR=/var/log/kafka

mkdir -p $LOG_DIR

echo "Java version:"
java -version

# CONNECT_OFFSET_FLUSH_INTERVAL_MS, when set, overrides offset.flush.interval.ms in connect-distributed.properties.
CONNECT_DISTRIBUTED_CONFIG="$KAFKA_HOME/config/connect-distributed.properties"
if [ -n "${CONNECT_OFFSET_FLUSH_INTERVAL_MS:-}" ]; then
  echo "Setting offset.flush.interval.ms=${CONNECT_OFFSET_FLUSH_INTERVAL_MS} in connect-distributed.properties"
  sed -i "s/^offset\\.flush\\.interval\\.ms=.*/offset.flush.interval.ms=${CONNECT_OFFSET_FLUSH_INTERVAL_MS}/" \
    "$CONNECT_DISTRIBUTED_CONFIG"
fi

if [ "${KRAFT_MODE:-false}" = "true" ]; then
    #######################################################################
    # KRaft mode (Kafka 4.x+)
    #######################################################################
    echo "=== KRaft mode ==="
    rm -rf /tmp/kraft-combined-logs 2>/dev/null || true

    CLUSTER_ID=$($KAFKA_HOME/bin/kafka-storage.sh random-uuid)
    echo "Generated cluster ID: $CLUSTER_ID"

    echo "=== Formatting storage ==="
    $KAFKA_HOME/bin/kafka-storage.sh format \
        -t "$CLUSTER_ID" \
        -c $KAFKA_HOME/config/kraft-server.properties

    echo "=== Starting Kafka (KRaft combined broker+controller) ==="
    $KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_HOME/config/kraft-server.properties > $LOG_DIR/kafka.log 2>&1 &
    KAFKA_PID=$!

    echo "Waiting for Kafka broker..."
    for i in {1..30}; do
        if nc -z localhost 9092 2>/dev/null; then
            echo "Kafka broker is ready"
            break
        fi
        sleep 1
    done

    sleep 5

    echo "=== Starting Kafka Connect ==="
    $KAFKA_HOME/bin/connect-distributed.sh $KAFKA_HOME/config/connect-distributed.properties > $LOG_DIR/kc.log 2>&1 &
    KC_PID=$!

    echo "Waiting for Kafka Connect..."
    for i in {1..60}; do
        if curl -s http://localhost:8083/connectors > /dev/null 2>&1; then
            echo "Kafka Connect is ready"
            break
        fi
        sleep 2
    done

    echo "=== All services started (KRaft) ==="
    echo "Kafka PID: $KAFKA_PID"
    echo "Kafka Connect PID: $KC_PID"

    trap "kill $KC_PID $KAFKA_PID 2>/dev/null; exit 0" SIGTERM SIGINT
else
    #######################################################################
    # ZooKeeper mode (Kafka <=3.x)
    #######################################################################
    echo "=== ZooKeeper mode ==="
    rm -rf /tmp/kafka-logs /tmp/zookeeper 2>/dev/null || true

    echo "=== Starting Zookeeper ==="
    $KAFKA_HOME/bin/zookeeper-server-start.sh $KAFKA_HOME/config/zookeeper.properties > $LOG_DIR/zookeeper.log 2>&1 &
    ZOOKEEPER_PID=$!

    echo "Waiting for Zookeeper..."
    for i in {1..30}; do
        if nc -z localhost 2181 2>/dev/null; then
            echo "Zookeeper is ready"
            break
        fi
        sleep 1
    done

    echo "=== Starting Kafka ==="
    $KAFKA_HOME/bin/kafka-server-start.sh $KAFKA_HOME/config/server.properties > $LOG_DIR/kafka.log 2>&1 &
    KAFKA_PID=$!

    echo "Waiting for Kafka..."
    for i in {1..30}; do
        if nc -z localhost 9092 2>/dev/null; then
            echo "Kafka is ready"
            break
        fi
        sleep 1
    done

    sleep 5

    echo "=== Starting Kafka Connect ==="
    $KAFKA_HOME/bin/connect-distributed.sh $KAFKA_HOME/config/connect-distributed.properties > $LOG_DIR/kc.log 2>&1 &
    KC_PID=$!

    echo "Waiting for Kafka Connect..."
    for i in {1..60}; do
        if curl -s http://localhost:8083/connectors > /dev/null 2>&1; then
            echo "Kafka Connect is ready"
            break
        fi
        sleep 2
    done

    echo "=== All services started (ZooKeeper) ==="
    echo "Zookeeper PID: $ZOOKEEPER_PID"
    echo "Kafka PID: $KAFKA_PID"
    echo "Kafka Connect PID: $KC_PID"

    trap "kill $KC_PID $KAFKA_PID $ZOOKEEPER_PID 2>/dev/null; exit 0" SIGTERM SIGINT
fi

tail -f $LOG_DIR/*.log &
wait


================================================
FILE: test/download_v3_jar.sh
================================================
#!/bin/bash
#
# Downloads the KC v3 connector JAR from Maven Central.
# Skips download if the JAR already exists at the target path.
#
# Usage:
#   ./download_v3_jar.sh [target_dir]
#
# Default target: /tmp/sf-kafka-connect-v3
#

set -e

V3_VERSION="3.5.3"
JAR_NAME="snowflake-kafka-connector-${V3_VERSION}.jar"
MAVEN_URL="https://repo1.maven.org/maven2/com/snowflake/snowflake-kafka-connector/${V3_VERSION}/${JAR_NAME}"

TARGET_DIR="${1:-/tmp/sf-kafka-connect-v3}"
TARGET_JAR="${TARGET_DIR}/${JAR_NAME}"

if [ -f "$TARGET_JAR" ]; then
    echo "KC v3 JAR already exists: $TARGET_JAR (skipping download)" >&2
    echo "$TARGET_DIR"
    exit 0
fi

mkdir -p "$TARGET_DIR"

echo "Downloading KC v3 JAR (${V3_VERSION}) from Maven Central..." >&2
curl -fSL -o "$TARGET_JAR" "$MAVEN_URL"
echo "Downloaded: $TARGET_JAR" >&2

echo "$TARGET_DIR"


================================================
FILE: test/lib/__init__.py
================================================


================================================
FILE: test/lib/config.py
================================================
from dataclasses import asdict, dataclass
import json
import logging
from pathlib import Path
import re

from lib.crypto import parse_private_key


@dataclass
class Profile:
    """Represents the SNOWFLAKE_CREDENTIAL_FILE (profile.json) content.

    Fields are consumed in two places in end-to-end tests:
    - Snowflake Python connector, used locally for validating data
    - Kafka Connect connector config

    The latter is filled from test-specific templates and sent to Kafka Connect.
    """

    protocol: str = None
    host: str = None
    port: int = None

    account: str = None
    user: str = None
    role: str = None
    private_key: str = None

    database: str = None
    schema: str = None
    warehouse: str = None

    # Unused in end-to-end tests
    password: str = None
    encrypted_private_key: str = None
    private_key_passphrase: str = None
    oauth_client_id: str = None
    oauth_client_secret: str = None
    oauth_refresh_token: str = None
    oauth_token_endpoint: str = None
    des_rsa_key: str = None

    @staticmethod
    def load(path: Path) -> "Profile":
        with open(path) as f:
            return Profile(**json.load(f))

    def get_or_infer_account(self) -> str:
        if self.account is not None:
            return self.account

        reg = "[^/]*snowflakecomputing"  # find the account name
        account = re.findall(reg, self.host)
        if len(account) != 1 or len(account[0]) < 20:
            logging.warning(
                "Format error in 'host' field at profile.json, expecting account.snowflakecomputing.com:443"
            )
        return account[0][:-19]

    def make_url(self) -> str:
        protocol_prefix = f"{self.protocol}://" if self.protocol else ""
        port_suffix = f":{self.port}" if self.port else ""
        return protocol_prefix + self.host + port_suffix


@dataclass
class SnowflakeConnectorConfig:
    """Configuration for the Snowflake Python Connector used by the test driver to execute verification queries."""

    protocol: str
    host: str
    port: int

    account: str
    user: str
    private_key: bytes

    database: str
    schema: str
    warehouse: str

    @staticmethod
    def from_profile(profile: Profile) -> "SnowflakeConnectorConfig":
        return SnowflakeConnectorConfig(
            protocol=profile.protocol,
            host=profile.host.split(":")[0],
            port=profile.port,
            account=profile.get_or_infer_account(),
            user=profile.user,
            private_key=parse_private_key(profile.private_key),
            database=profile.database,
            schema=profile.schema,
            warehouse=profile.warehouse,
        )

    def to_dict(self) -> dict:
        return {k: v for k, v in asdict(self).items() if v is not None}


================================================
FILE: test/lib/config_migration.py
================================================
"""Config migration between KC v3 and v4 connector configurations."""

import copy
from typing import Dict

V4_CONNECTOR_CLASS = "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector"
V3_CONNECTOR_CLASS = "com.snowflake.kafka.connector.SnowflakeSinkConnector"

V3_CONFIG_TEMPLATE = {
    "connector.class": V3_CONNECTOR_CLASS,
    "snowflake.ingestion.method": "SNOWPIPE_STREAMING",
    # placeholders for templating
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    # make tests fast
    "buffer.flush.time": "1",
    "snowflake.streaming.max.client.lag": "1",
}

V4_CONFIG_TEMPLATE = {
    "connector.class": V4_CONNECTOR_CLASS,
    # placeholders for templating
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    # no need to validate compatibility when creating a v4 connector directly
    "snowflake.streaming.validate.compatibility.with.classic": "false",
}


def v4_config_to_v3(config: Dict[str, str]) -> Dict[str, str]:
    """Convert a v4 connector config to v3 equivalent."""
    v3 = copy.deepcopy(config)
    v3["connector.class"] = V3_CONNECTOR_CLASS
    v3["snowflake.ingestion.method"] = "SNOWPIPE_STREAMING"
    # v3 defaults to schematization off; v4 defaults to on.
    # Preserve v4's default by setting it explicitly for v3 when unspecified.
    v3.setdefault("snowflake.enable.schematization", "true")
    v3.setdefault("buffer.flush.time", "1")
    v3.setdefault("snowflake.streaming.max.client.lag", "1")
    # Map v4 include-connector-name back to the v3 channel naming flag
    include_connector_name = v3.pop(
        "snowflake.streaming.classic.offset.migration.include.connector.name", None
    )
    if include_connector_name is not None:
        v3.setdefault(
            "snowflake.streaming.channel.name.include.connector.name",
            include_connector_name,
        )
    # Strip v4-only settings.
    v3.pop("snowflake.streaming.classic.offset.migration", None)
    return v3


def v3_config_to_v4(config: Dict[str, str]) -> Dict[str, str]:
    """Convert a v3 connector config to v4 equivalent."""
    v4 = copy.deepcopy(config)
    v4["connector.class"] = V4_CONNECTOR_CLASS

    # === Deprecated settings ===
    v4.pop("snowflake.ingestion.method", None)

    # === New defaults ===
    # v4 defaults to schematization on; v3 defaults to off.
    # Preserve v3's default by setting it explicitly for v4 when unspecified.
    v4.setdefault("snowflake.enable.schematization", "false")

    # === Compatibility settings ===
    v4["snowflake.validation"] = "client_side"
    v4["snowflake.compatibility.enable.column.identifier.normalization"] = "true"
    v4["snowflake.compatibility.enable.autogenerated.table.name.sanitization"] = "true"

    # === Offset migration ===
    # v4 migrates committed offsets from SSv1 channels; default to best_effort.
    v4["snowflake.streaming.classic.offset.migration"] = "best_effort"

    # If v3 used connector-name-prefixed channel naming, carry it over so
    # the SSv1 offset migration lookup uses the matching channel name format.
    include_connector_name = v4.pop(
        "snowflake.streaming.channel.name.include.connector.name", None
    )
    # IMPORTANT NOTE: In previous versions of the connector, the behavior was not controlled by this setting and exhibited the implicit effective value:
    # - v2.0.0–v2.0.1: always false; no streaming channel name prefix.
    # - v2.1.0–v2.1.1: always true (hardcoded connector name prefix, no config knob).
    # - v2.1.2–v2.5.0: always false; v2.1.2 reverted to V1 naming with automatic migration.
    # - v3.0.0–v3.3.1: always false; the setting did not exist.
    # - v3.4.0–v3.5.3: configurable (default false); the setting was introduced here.
    v4["snowflake.streaming.classic.offset.migration.include.connector.name"] = (
        include_connector_name if include_connector_name is not None else "false"
    )

    return v4


================================================
FILE: test/lib/crypto.py
================================================
import re
import textwrap
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import serialization


def normalize_private_key(private_key: str, is_encrypted) -> bytes:
    """Accepts a private key string and returns a normalized PEM-encoded private key."""

    # Remove header, footer, and line breaks.
    private_key = re.sub("-+[A-Za-z ]+-+", "", private_key)
    private_key = re.sub("\\s", "", private_key)

    if is_encrypted:
        header = "-----BEGIN ENCRYPTED PRIVATE KEY-----"
        footer = "-----END ENCRYPTED PRIVATE KEY-----"
    else:
        header = "-----BEGIN PRIVATE KEY-----"
        footer = "-----END PRIVATE KEY-----"

    # Group in lines of 64 characters, append header and footer.
    return "\n".join([header, *textwrap.wrap(private_key, 64), footer]).encode()


def parse_private_key(private_key_str: str, password_str: str | None = None) -> bytes:
    password: bytes | None = password_str.encode("ascii") if password_str else None

    private_key_normalized: bytes = normalize_private_key(
        private_key_str, password is not None
    )

    private_key = serialization.load_pem_private_key(
        private_key_normalized, password=password, backend=default_backend()
    )

    return private_key.private_bytes(
        encoding=serialization.Encoding.DER,
        format=serialization.PrivateFormat.PKCS8,
        encryption_algorithm=serialization.NoEncryption(),
    )


================================================
FILE: test/lib/driver.py
================================================
import json
import logging
import os
import time
from typing import Callable, Dict
import uuid
from pathlib import Path
from urllib.parse import urlparse

import requests
import snowflake.connector
from confluent_kafka import (
    Consumer,
    ConsumerGroupTopicPartitions,
    KafkaError,
    OFFSET_BEGINNING,
    Producer,
    TopicPartition,
)
from confluent_kafka.admin import AdminClient, ConfigResource, NewPartitions, NewTopic
from confluent_kafka.avro import AvroProducer

from lib.config import Profile, SnowflakeConnectorConfig


def quote_name(name: str) -> str:
    return '"' + name.replace('"', '""') + '"'


class Error(Exception):
    """Base class for test exceptions"""

    pass


class ResetAndRetry(Error):
    """Raised when we want to reset the retry count"""

    def __init__(self, msg=""):
        self.msg = msg


class RetryableError(Error):
    """Raised when we can retry"""

    def __init__(self, msg=""):
        self.msg = msg


class NonRetryableError(Error):
    """Raised when we cannot retry"""

    def __init__(self, msg=""):
        self.msg = msg


logger = logging.getLogger(__name__)


class KafkaDriver:
    def __init__(
        self,
        kafkaAddress: str,
        schemaRegistryAddress: str,
        kafkaConnectAddress: str,
        credentials: Profile,
        testVersion: str,
        enableSSL: bool,
    ):
        self.testVersion = testVersion
        self.credentials = credentials

        self.TEST_DATA_FOLDER = Path("test_data")
        self.httpHeader = {
            "Content-type": "application/json",
            "Accept": "application/json",
        }

        self.SEND_INTERVAL = 0.01  # send a record every 10 ms
        self.VERIFY_INTERVAL = 10  # verify every 10 secs
        self.MAX_RETRY = 60  # max wait time 1 min
        self.MAX_FLUSH_BUFFER_SIZE = (
            5000  # flush buffer when 5000 data was in the queue
        )

        self.kafkaConnectAddress = kafkaConnectAddress
        self.schemaRegistryAddress = schemaRegistryAddress
        self.kafkaAddress = kafkaAddress

        if enableSSL:
            logger.info("=== Enable SSL ===")
            self.client_config = {
                "bootstrap.servers": kafkaAddress,
                "security.protocol": "SASL_SSL",
                "ssl.ca.location": "./crts/ca-cert",
                "sasl.mechanism": "PLAIN",
                "sasl.username": "client",
                "sasl.password": "client-secret",
            }
        else:
            self.client_config = {
                "bootstrap.servers": kafkaAddress,
                "broker.address.family": "v4",
            }

        self.adminClient = AdminClient(self.client_config)

        producer_config = self.client_config.copy()
        # Setting max request size to 30 MiB to support large blob tests.
        producer_config["message.max.bytes"] = 31457280  # 30 MiB
        self.producer = Producer(producer_config)

        consumer_config = self.client_config.copy()
        consumer_config["group.id"] = f"my-group-{uuid.uuid4()}"
        consumer_config["auto.offset.reset"] = "earliest"
        self.consumer = Consumer(consumer_config)

        self._avro_producer_config = producer_config.copy()
        self._avro_producer_config["schema.registry.url"] = schemaRegistryAddress
        # Lazy-init: Apache platform has no schema registry, so we can't
        # create the AvroProducer eagerly.
        self._avroProducer = None

        snowflake_connector_config = SnowflakeConnectorConfig.from_profile(credentials)
        self.snowflake_conn = snowflake.connector.connect(
            **snowflake_connector_config.to_dict()
        )

    @property
    def avroProducer(self):
        if self._avroProducer is None:
            self._avroProducer = AvroProducer(self._avro_producer_config)
        return self._avroProducer

    def msgSendInterval(self):
        # sleep self.SEND_INTERVAL before send the second message
        time.sleep(self.SEND_INTERVAL)

    def startConnectorWaitTime(self):
        time.sleep(10)

    def verifyWaitTime(self):
        # sleep before verifying result in SF DB
        logger.info(
            f"=== Sleep {self.VERIFY_INTERVAL} secs before verify result in Snowflake DB ==="
        )
        time.sleep(self.VERIFY_INTERVAL)

    def verifyWithRetry(self, func, retry_round, configFileName):
        retryNum = 0
        while retryNum < self.MAX_RETRY:
            try:
                func(retry_round)
                break
            except ResetAndRetry:
                retryNum = 0
                logger.info(f"=== Reset retry count and retry {configFileName} ===")
            except RetryableError as e:
                retryNum += 1
                logger.warning(f"=== Failed {configFileName}, retryable. {e.msg} ===")
                self.verifyWaitTime()
            except NonRetryableError as e:
                logger.error(
                    f"=== Non retryable error for {configFileName} raised ===\n{e.msg}"
                )
                raise e
            except snowflake.connector.errors.ProgrammingError as e:
                logger.error(f"Error in VerifyWithRetry for {configFileName}: {e}")
                if e.errno == 2003:
                    retryNum += 1
                    logger.warning(
                        f"=== Failed, table not created for {configFileName} ==="
                    )
                    self.verifyWaitTime()
                else:
                    raise
        if retryNum == self.MAX_RETRY:
            logger.error(f"=== Max retry exceeded for {configFileName} ===")
            raise NonRetryableError()

    def createTopics(self, topicName, partitionNum=1, replicationNum=1):
        self.adminClient.create_topics(
            [NewTopic(topicName, partitionNum, replicationNum)]
        )

    def deleteTopic(self, topicName):
        deleted_topics = self.adminClient.delete_topics([topicName])
        for topic, f in deleted_topics.items():
            try:
                f.result()  # The result itself is None
                logger.info(f"Topic deletion successful: {topic}")
            except Exception as e:
                logger.error(f"Failed to delete topic {topicName}: {e}")

    def describeTopic(self, topicName):
        configs = self.adminClient.describe_configs(
            resources=[
                ConfigResource(restype=ConfigResource.Type.TOPIC, name=topicName)
            ]
        )
        for _, f in configs.items():
            try:
                configs = f.result()
                logger.info(f"Topic {topicName} config is as follows:")
                for key, value in configs.items():
                    logger.info(f"{key}: {value}")
            except Exception as e:
                logger.error(f"Failed to describe topic {topicName}: {e}")

    def createPartitions(self, topicName, new_total_partitions):
        kafka_partitions = self.adminClient.create_partitions(
            new_partitions=[NewPartitions(topicName, new_total_partitions)]
        )
        for topic, f in kafka_partitions.items():
            try:
                f.result()  # The result itself is None
                logger.info(f"Topic {topic} partitions created")
            except Exception as e:
                logger.error(f"Failed to create topic partitions {topic}: {e}")

    def sendBytesData(self, topic, value, key=None, partition=0, headers=None):
        if not key:
            for i, v in enumerate(value):
                self.producer.produce(
                    topic, value=v, partition=partition, headers=headers or []
                )
                if (i + 1) % self.MAX_FLUSH_BUFFER_SIZE == 0:
                    self.producer.flush()
        else:
            for i, (k, v) in enumerate(zip(key, value, strict=True)):
                self.producer.produce(
                    topic, value=v, key=k, partition=partition, headers=headers or []
                )
                if (i + 1) % self.MAX_FLUSH_BUFFER_SIZE == 0:
                    self.producer.flush()
        self.producer.flush()

    def sendAvroSRData(
        self,
        topic,
        value,
        value_schema,
        key=None,
        key_schema="",
        partition=0,
        headers=None,
    ):
        if not key:
            for i, v in enumerate(value):
                self.avroProducer.produce(
                    topic=topic,
                    value=v,
                    value_schema=value_schema,
                    partition=partition,
                    headers=headers or [],
                )
                if (i + 1) % self.MAX_FLUSH_BUFFER_SIZE == 0:
                    self.producer.flush()
        else:
            for i, (k, v) in enumerate(zip(key, value, strict=True)):
                self.avroProducer.produce(
                    topic=topic,
                    value=v,
                    value_schema=value_schema,
                    key=k,
                    key_schema=key_schema,
                    partition=partition,
                    headers=headers or [],
                )
                if (i + 1) % self.MAX_FLUSH_BUFFER_SIZE == 0:
                    self.producer.flush()
        self.avroProducer.flush()

    def consume_messages_dlq(self, config, partition_no, target_dlq_offset_number):
        """

        :param config: Connector config
        :param partition_no: partition no to search for target offset
        :param target_dlq_offset_number: Target offset number to find which stops finding any more offsets in DLQ
        :return: count of offsets
        """
        dlq_topic_name = config["config"]["errors.deadletterqueue.topic.name"]
        return self.consume_messages(
            dlq_topic_name, partition_no, target_dlq_offset_number
        )

    def _wait_for_topic(self, topic_name: str, timeout: float = 120) -> None:
        """Poll broker metadata until topic_name appears.

        DLQ topics are auto-created by Kafka Connect on the first error
        record, so they may not exist when consume_messages is called.
        """
        deadline = time.monotonic() + timeout
        while time.monotonic() < deadline:
            metadata = self.adminClient.list_topics(timeout=5)
            if topic_name in metadata.topics:
                return
            logger.debug(
                f"Topic {topic_name!r} not yet visible in broker metadata, waiting..."
            )
            time.sleep(2)
        raise TimeoutError(
            f"Topic {topic_name!r} did not appear in broker metadata within {timeout}s"
        )

    def consume_messages(self, topic_name, partition_no, target_offset):
        """
        Consumes messages from a topic and returns how many consumed.
        This function stops when target_offset number is reached.

        Uses assign() instead of subscribe() to bypass the async consumer-group
        rebalance.  With subscribe(), if the topic doesn't exist at the time of
        the call (e.g. a DLQ topic auto-created by Kafka Connect), the broker
        returns an empty partition assignment and the 60-second timeout expires
        before any messages are consumed.  assign() with OFFSET_BEGINNING is
        synchronous and works even for newly-created topics.

        :param topic_name: name of topic
        :param target_offset: Stops function when this offset is reached for partition 0
        :return: Count of messages consumed
        """
        self._wait_for_topic(topic_name)
        tp = TopicPartition(topic_name, partition_no, OFFSET_BEGINNING)
        self.consumer.assign([tp])

        messages_consumed_count = 0
        start_time = time.time()
        try:
            while True:
                if time.time() - start_time >= 120:
                    logger.warning(
                        f"Couldn't find target_offset:{target_offset} in topic:{topic_name} in 120 Seconds"
                    )
                    break
                msg = self.consumer.poll(10.0)  # Time out in seconds
                if msg is None:
                    continue
                if msg.error():
                    if msg.error().code() == KafkaError._PARTITION_EOF:
                        logger.info("Reached end of partition")
                    else:
                        logger.error(f"Error while consuming message: {msg.error()}")
                else:
                    messages_consumed_count += 1
                    logger.debug(
                        f"Received message: key={msg.key()}, value={msg.value()}, partition={msg.partition()}, offset={msg.offset()}"
                    )
                    if (
                        msg.partition() == partition_no
                        and msg.offset() >= target_offset
                    ):
                        logger.info(
                            f"Reached target offset of {target_offset} for Topic:{topic_name}"
                        )
                        break
        except KafkaError as e:
            logger.error(f"Kafka error: {e}")

        return messages_consumed_count

    # returns kafka or confluent version
    def get_kafka_version(self):
        return self.testVersion

    def cleanTableStagePipe(self, topic: str):
        logger.info(f"=== Drop table {topic} ===")
        self.snowflake_conn.cursor().execute(
            f"DROP TABLE IF EXISTS {quote_name(topic)}"
        )

        # Drop SSv2 streaming pipe (current naming convention: tableName-STREAMING)
        ssv2PipeName = f"{topic}-STREAMING"
        logger.info(f"=== Drop SSv2 pipe {ssv2PipeName} ===")
        self.snowflake_conn.cursor().execute(
            f"DROP PIPE IF EXISTS {quote_name(ssv2PipeName)}"
        )

        logger.info("=== Done ===")

    def create_table(self, table_name: str):
        logger.info(f"=== Creating table {table_name} ===")
        self.snowflake_conn.cursor().execute(
            f"CREATE TABLE IF NOT EXISTS {quote_name(table_name)} (RECORD_METADATA VARIANT)"
        )

    def drop_table(self, table_name: str):
        logger.info(f"=== Dropping table {table_name} ===")
        self.snowflake_conn.cursor().execute(
            f"DROP TABLE IF EXISTS {quote_name(table_name)}"
        )

    def select_number_of_records(self, table_name: str) -> str | None:
        try:
            return (
                self.snowflake_conn.cursor()
                .execute(f"SELECT count(*) FROM {quote_name(table_name)}")
                .fetchone()[0]
            )
        except snowflake.connector.errors.ProgrammingError as e:
            if "does not exist or not authorized" in e.msg:
                return None
            raise

    def get_connector_status(self, connector_name: str) -> dict | None:
        """Query Kafka Connect REST API for connector and task states.

        Returns the parsed JSON from GET /connectors/{name}/status, e.g.:
        {
          "name": "...",
          "connector": {"state": "RUNNING", ...},
          "tasks": [{"id": 0, "state": "RUNNING", ...}, ...]
        }
        Returns None if the connector is not found or the request fails.
        """
        url = f"http://{self.kafkaConnectAddress}/connectors/{connector_name}/status"
        try:
            r = requests.get(url, timeout=10)
            if r.ok:
                return r.json()
            logger.debug(f"GET {url} returned {r.status_code}: {r.text[:200]}")
        except Exception as e:
            logger.debug(f"Failed to query connector status: {e}")
        return None

    def wait_for_connector_running(
        self, connector_name: str, timeout: int = 60, interval: int = 3
    ):
        """Poll until the connector and all its tasks report RUNNING state.

        Raises TimeoutError if the connector does not reach RUNNING within
        *timeout* seconds.
        """
        deadline = time.monotonic() + timeout
        while True:
            status = self.get_connector_status(connector_name)
            if status is not None:
                connector_state = status.get("connector", {}).get("state")
                tasks = status.get("tasks", [])
                if (
                    connector_state == "RUNNING"
                    and tasks
                    and all(t.get("state") == "RUNNING" for t in tasks)
                ):
                    logger.info(
                        f"Connector {connector_name} is RUNNING with "
                        f"{len(tasks)} task(s)"
                    )
                    return
            if time.monotonic() >= deadline:
                raise TimeoutError(
                    f"Connector {connector_name} did not reach RUNNING state "
                    f"within {timeout}s (last status: {status})"
                )
            time.sleep(interval)

    def get_failed_tasks(self, connector_name: str) -> list:
        """Return list of FAILED tasks with their traces, or empty list."""
        status = self.get_connector_status(connector_name)
        if status is None:
            return []
        return [t for t in status.get("tasks", []) if t.get("state") == "FAILED"]

    def get_consumer_group_offset(
        self, connector_name: str, topic: str, partition: int = 0
    ) -> int | None:
        """Query the committed consumer group offset for a connector's sink task.

        Returns the committed offset, or None if no offset has been committed yet.
        """
        group_id = f"connect-{connector_name}"
        request = ConsumerGroupTopicPartitions(
            group_id, [TopicPartition(topic, partition)]
        )
        futures = self.adminClient.list_consumer_group_offsets([request])
        response = futures[group_id].result()
        for topic_partition in response.topic_partitions:
            if topic_partition.error:
                logger.error(
                    f"Error querying offset for {group_id}/{topic}[{partition}]: "
                    f"{topic_partition.error}"
                )
                return None
            return topic_partition.offset
        return None

    def restartConnector(self, connectorName):
        requestURL = (
            f"http://{self.kafkaConnectAddress}/connectors/{connectorName}/restart"
        )
        r = requests.post(requestURL, headers=self.httpHeader)
        logger.info(f"{r} restart connector")

    def restartConnectorAndTasks(self, connectorName):
        requestURL = f"http://{self.kafkaConnectAddress}/connectors/{connectorName}/restart?includeTasks=true&onlyFailed=false"
        r = requests.post(requestURL, headers=self.httpHeader)
        logger.info(f"{r} restart connector and all tasks")

    def pauseConnector(self, connectorName):
        requestURL = (
            f"http://{self.kafkaConnectAddress}/connectors/{connectorName}/pause"
        )
        r = requests.put(requestURL, headers=self.httpHeader)
        logger.info(f"{r} pause connector")

    def resumeConnector(self, connectorName):
        requestURL = (
            f"http://{self.kafkaConnectAddress}/connectors/{connectorName}/resume"
        )
        r = requests.put(requestURL, headers=self.httpHeader)
        logger.info(f"{r} resume connector")

    def deleteConnector(self, connectorName):
        requestURL = f"http://{self.kafkaConnectAddress}/connectors/{connectorName}"
        r = requests.delete(requestURL, headers=self.httpHeader)
        logger.info(f"{r} delete connector")

    def closeConnector(self, connector_name: str, *, wait_timeout: int = None):
        """Delete a connector.
        If `wait_timeout` is provided, also wait for it to fully disappear.

        The Kafka Connect DELETE endpoint returns immediately, but the worker
        shuts down the task's consumer asynchronously.  We poll until a GET
        returns 404 so the caller can safely assume no consumer is running.
        """
        base_url = f"http://{self.kafkaConnectAddress}/connectors/{connector_name}"
        logger.info(f"=== Delete connector {connector_name} ===")
        response = requests.delete(base_url, timeout=10)
        match response.ok:
            case True:
                logger.info(f"Delete response code: {response.status_code}")
            case False:
                logger.error(
                    f"Failed to delete connector {connector_name}: {response.text}"
                )

        if wait_timeout is None:
            return response.ok

        deadline = time.monotonic() + wait_timeout
        while time.monotonic() < deadline:
            try:
                status_code = requests.get(base_url, timeout=5).status_code
            except requests.exceptions.RequestException as exc:
                logger.debug(
                    f"Transient error polling connector {connector_name}: {exc}"
                )
                time.sleep(1)
                continue
            if status_code == 404:
                logger.info(f"Connector {connector_name} fully removed")
                return True
            logger.debug(
                f"Connector {connector_name} still present (status {status_code}), "
                f"waiting..."
            )
            time.sleep(1)
        logging.error(
            f"Connector {connector_name} did not disappear within {wait_timeout}s"
        )
        return False

    Config = Dict[str, str]

    def createConnector(
        self,
        name_salt: str,
        *,
        # Either pass those:
        unsalted_name: str = None,
        config_template: Config = None,
        # Or those (deprecated):
        rest_request_template_filename: str = None,
        config_transform: Callable[[Config], Config] = None,
    ):
        """Creates the connector either with:
        - an unsalted name and a config template
        - a REST request template filename and an optional transform

        Returns the generated config."""

        match rest_request_template_filename:
            case None:
                assert unsalted_name is not None
                assert config_template is not None
                assert config_transform is None
                rest_request_template = {
                    "name": "SNOWFLAKE_CONNECTOR_NAME",
                    "config": config_template,
                }
            case _:
                assert unsalted_name is None
                assert config_template is None
                rest_request_template_path = (
                    Path("rest_request_template") / rest_request_template_filename
                )
                logger.info(
                    f"=== Generating connector REST request from {rest_request_template_path} ==="
                )
                unsalted_name = rest_request_template_filename.split(".")[0]
                with rest_request_template_path.open() as f:
                    rest_request_template = json.load(f)

        snowflake_connector_name = unsalted_name + name_salt
        logger.info(f"=== Creating connector: {snowflake_connector_name} ===")
        logger.info(
            f"Config template: {json.dumps(rest_request_template['config'], indent=4)}"
        )

        snowflake_topic_name = snowflake_connector_name

        def replace_values(obj, replacements):
            """Recursively traverse a parsed JSON object, applying substring replacements to string values."""
            if isinstance(obj, dict):
                return {k: replace_values(v, replacements) for k, v in obj.items()}
            elif isinstance(obj, list):
                return [replace_values(item, replacements) for item in obj]
            elif isinstance(obj, str):
                for old, new in replacements.items():
                    obj = obj.replace(old, new)
                return obj
            else:
                return obj

        rest_request = replace_values(
            rest_request_template,
            {
                "SNOWFLAKE_HOST": self.credentials.make_url(),
                "SNOWFLAKE_DATABASE": self.credentials.database,
                "SNOWFLAKE_SCHEMA": self.credentials.schema,
                "SNOWFLAKE_USER": self.credentials.user,
                "SNOWFLAKE_ROLE": self.credentials.role,
                "SNOWFLAKE_PRIVATE_KEY": self.credentials.private_key,
                "CONFLUENT_SCHEMA_REGISTRY": self.schemaRegistryAddress,
                "SNOWFLAKE_TEST_TOPIC": snowflake_topic_name,
                "SNOWFLAKE_CONNECTOR_NAME": snowflake_connector_name,
                "_NAME_SALT": name_salt,
            },
        )

        if config_transform is not None:
            rest_request["config"] = config_transform(rest_request["config"])

        # Allow the Snowpipe Streaming SDK's URL to be overridden for testing
        # against a local Snowflake deployment.
        if snowpipe_streaming_url := os.environ.get("SNOWPIPE_STREAMING_URL"):
            logger.info(
                f"Overriding Snowpipe Streaming SDK URL to {snowpipe_streaming_url}"
            )
            parsed = urlparse(snowpipe_streaming_url)
            extra_overrides = [
                f"scheme:{parsed.scheme}",
                f"host:{parsed.hostname}",
                f"port:{parsed.port}",
            ]
            override_key = "snowflake.streaming.client.provider.override.map"
            match rest_request["config"].get(override_key):
                case None | "":
                    overrides = extra_overrides
                case _ as existing_overrides:
                    overrides = [existing_overrides] + extra_overrides
            rest_request["config"][override_key] = ",".join(overrides)

        MAX_RETRY = 9
        retry = 0
        delete_url = (
            f"http://{self.kafkaConnectAddress}/connectors/{snowflake_connector_name}"
        )
        post_url = f"http://{self.kafkaConnectAddress}/connectors"
        while retry < MAX_RETRY:
            try:
                logger.info(f"Delete request: {delete_url}")
                code = requests.delete(delete_url, timeout=10).status_code
                logger.info(f"Delete request returned: {code}")
                if code in (200, 201, 404):
                    break
            except Exception as e:
                logger.error(f"An exception occurred: {e}")
            logger.info(
                "=== sleep for 3 secs to wait for kafka connect to accept connection ==="
            )
            time.sleep(3)
            retry += 1
        if retry == MAX_RETRY:
            logger.error(f"Kafka Delete request not successful: {delete_url}")

        logger.info(f"Post HTTP request to Create Connector: {post_url}")
        r = requests.post(post_url, json=rest_request, headers=self.httpHeader)
        logger.info(
            f"Connector Name:{snowflake_connector_name} POST Response:{r.status_code}"
        )
        if not r.ok:
            logger.error(
                f"Failed creating connector {snowflake_connector_name}: "
                f"{r.status_code} {r.reason}, {r.text}"
            )
            time.sleep(10)
            logger.info(
                f"Retrying POST request for connector:{snowflake_connector_name}"
            )
            r = requests.post(post_url, json=rest_request, headers=self.httpHeader)
            logger.info(
                f"Connector Name:{snowflake_connector_name} POST Response:{r.status_code}"
            )
            if not r.ok:
                raise RuntimeError(
                    f"Failed to create connector:{snowflake_connector_name}"
                )
        getConnectorResponse = requests.get(post_url)
        logger.info(
            f"Get Connectors status:{getConnectorResponse.status_code}, response:{getConnectorResponse.content}"
        )

        return rest_request


================================================
FILE: test/lib/fixtures/__init__.py
================================================


================================================
FILE: test/lib/fixtures/connector.py
================================================
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass
import logging
from typing import Dict, List
import pytest
from lib.driver import KafkaDriver
from lib.config_migration import v3_config_to_v4, v4_config_to_v3

logger = logging.getLogger(__name__)


@pytest.fixture
def create_topics(driver: KafkaDriver, name_salt):
    """Use for creating multiple topics and tables in parallel."""

    created_topics: List[str] = []
    created_tables: List[str] = []

    def _create_one(topic, num_partitions, replication_factor, with_table):
        salted = f"{topic}{name_salt}"
        logger.info(f"Creating topic {salted}")
        driver.createTopics(salted, num_partitions, replication_factor)
        created_topics.append(salted)
        if with_table:
            driver.create_table(salted)
            created_tables.append(salted)
        return salted

    def _create(
        topics: List[str], *, num_partitions=1, replication_factor=1, with_tables=True
    ):
        with ThreadPoolExecutor(max_workers=10) as executor:
            for t in topics:
                executor.submit(
                    _create_one, t, num_partitions, replication_factor, with_tables
                )
        return [f"{t}{name_salt}" for t in topics]

    try:
        yield _create
    finally:
        with ThreadPoolExecutor(max_workers=10) as executor:
            for _ in executor.map(driver.deleteTopic, created_topics):
                pass
            for _ in executor.map(driver.drop_table, created_tables):
                pass


@pytest.fixture()
def create_custom_connector(driver: KafkaDriver, name_salt: str):
    @dataclass
    class Connector:
        name: str
        config: Dict[str, str]

        def close(self, **kwargs):
            created.remove(self)
            return driver.closeConnector(self.name, **kwargs)

    created: List[Connector] = []

    def _create(
        unsalted_name: str,
        config_template: Dict[str, str],
    ) -> Connector:
        rest_request = driver.createConnector(
            name_salt=name_salt,
            unsalted_name=unsalted_name,
            config_template=config_template,
        )
        connector = Connector(name=rest_request["name"], config=rest_request["config"])
        created.append(connector)
        return connector

    try:
        yield _create
    finally:
        for connector in reversed(created):
            driver.closeConnector(connector.name)


@pytest.fixture
def create_connector(create_custom_connector, connector_version: str, request):
    test_name = request.node.originalname

    def _create(
        *,
        v3_config: dict[str, str] = None,
        v4_config: dict[str, str] = None,
    ):
        assert v3_config or v4_config
        assert not (v3_config and v4_config)
        config = None
        match (connector_version, v3_config, v4_config):
            case ("v3", _, None):
                config = v3_config
            case ("v3", None, _):
                config = v4_config_to_v3(v4_config)
            case ("v4", _, None):
                config = v3_config_to_v4(v3_config)
            case ("v4", None, _):
                config = v4_config
            case _:
                raise ValueError(f"Invalid connector version: {connector_version}")
        return create_custom_connector(test_name, config)

    return _create


================================================
FILE: test/lib/fixtures/function.py
================================================
import pytest


@pytest.fixture(params=["v4", "v3"])
def connector_version(request):
    """The Snowflake Kafka Connector version under test.

    Every test that (transitively) depends on this fixture is automatically run twice:
    once for v4 and once for v3.
    """
    return request.param


@pytest.fixture
def name_salt(session_name_salt, connector_version, request):  # noqa: F811
    """Diversify names between test runs and connector versions.

    When a test has additional parametrize dimensions beyond connector_version
    (e.g. sanitize_autogenerated_table_names), all variants for the same
    connector version would otherwise share an identical name_salt and thus
    create the same Kafka topics and Snowflake tables.  Kafka topic deletion is
    asynchronous, so the second variant may observe stale data from the first.

    To prevent this, embed a short discriminator derived from the extra
    parametrize values into the salt so every variant gets a unique namespace.
    """
    base = f"{session_name_salt}_V3" if connector_version == "v3" else session_name_salt

    callspec = getattr(request.node, "callspec", None)
    if callspec is not None:
        extra_params = {
            k: v for k, v in callspec.params.items() if k != "connector_version"
        }
        if extra_params:
            # Build a short suffix from the first character of each extra param
            # value's string representation to keep names within reasonable length.
            discriminator = "_".join(str(v)[:1].upper() for v in extra_params.values())
            base = f"{base}_{discriminator}"

    return base


================================================
FILE: test/lib/fixtures/session.py
================================================
import logging
import os
from pathlib import Path
import random
import string
import subprocess
import pytest
from lib.config import Profile, SnowflakeConnectorConfig
import snowflake

from lib.driver import KafkaDriver

logger = logging.getLogger(__name__)

_PROTO_DIR = Path(__file__).parents[2] / "test_data"


@pytest.fixture(scope="session")
def sensor_pb2():
    """Compile sensor.proto and return the generated module."""
    subprocess.run(
        ["protoc", "--python_out=.", "sensor.proto"],
        cwd=_PROTO_DIR,
        check=True,
    )
    import test_data.sensor_pb2

    return test_data.sensor_pb2


@pytest.fixture(scope="session")
def credentials_unsalted():
    """Load the credentials from the environment variable SNOWFLAKE_CREDENTIAL_FILE."""
    credential_path = Path(os.environ["SNOWFLAKE_CREDENTIAL_FILE"])
    assert credential_path.is_file(), (
        f"SNOWFLAKE_CREDENTIAL_FILE={credential_path} does not exist"
    )
    return Profile.load(credential_path)


@pytest.fixture(scope="session")
def session_name_salt(request):
    """Common name salt for all tests in this session."""
    salt = request.config.getoption("--name-salt")
    if salt is None:
        chars = string.ascii_uppercase + string.digits
        salt = "_" + "".join(random.choices(chars, k=7))
    logger.info(f"Using session name salt: {salt}")
    return salt


@pytest.fixture(scope="session")
def test_schema(credentials_unsalted, session_name_salt):
    """Create an isolated schema for this test session and drop it on teardown.

    The schema name is `<original_schema><session_name_salt>`.
    """
    original_schema = credentials_unsalted.schema
    salted_schema = f"{original_schema}{session_name_salt}"
    fqn = f"{credentials_unsalted.database}.{salted_schema}"

    conn_config = SnowflakeConnectorConfig.from_profile(credentials_unsalted)
    try:
        logger.info(f"Creating test schema: {fqn}")
        conn = snowflake.connector.connect(**conn_config.to_dict())
        conn.cursor().execute(f"CREATE SCHEMA IF NOT EXISTS {fqn}")
        yield salted_schema
    finally:
        logger.info(f"Dropping test schema: {fqn}")
        conn = snowflake.connector.connect(**conn_config.to_dict())
        conn.cursor().execute(f"DROP SCHEMA IF EXISTS {fqn} CASCADE")
        conn.close()


@pytest.fixture(scope="session")
def credentials(credentials_unsalted, test_schema):
    """Load the credentials from the environment variable SNOWFLAKE_CREDENTIAL_FILE and replaces the schema with its salted version.

    Mutating
    `credentials.schema` before the driver is built ensures that every
    Snowflake object (tables, pipes, channels) created by both the test
    harness and the Kafka connector lands in the throwaway schema.
    """
    credentials_unsalted.schema = test_schema
    return credentials_unsalted


@pytest.fixture(scope="session")
def driver(request, credentials):
    return KafkaDriver(
        kafkaAddress=request.config.getoption("--kafka-address"),
        schemaRegistryAddress=request.config.getoption("--schema-registry-address"),
        kafkaConnectAddress=request.config.getoption("--kafka-connect-address"),
        credentials=credentials,
        testVersion=request.config.getoption("--platform-version"),
        enableSSL=request.config.getoption("--enable-ssl"),
    )


================================================
FILE: test/lib/fixtures/table.py
================================================
import logging
import os
from typing import List

import pytest
from lib.driver import KafkaDriver, quote_name
from snowflake.connector import DictCursor

logger = logging.getLogger(__name__)

ICEBERG_EXTERNAL_VOLUME = os.environ.get(
    "ICEBERG_EXTERNAL_VOLUME", "kafka_push_e2e_volume_aws"
)


@pytest.fixture()
def snowflake_table(
    driver: KafkaDriver, name_salt: str, request: pytest.FixtureRequest
):
    """Tears down the Snowflake table named after the current test at teardown.

    Table name: ``{test_function_name_without_test_prefix}{name_salt}``

    Tests that manually create a table (or rely on auto-table-creation) declare
    this fixture to ensure the table is dropped after the test completes.
    """
    table_name = (request.node.originalname.removeprefix("test_") + name_salt).upper()
    yield table_name
    driver.drop_table(table_name)


class Table:
    """Class with helper functions for working with a Snowflake table.
    Doesn't create the table unless you call `create`."""

    def __init__(self, driver: KafkaDriver, name: str):
        self.driver = driver
        self.name = name

    def create(self, columns: str):
        self.driver.snowflake_conn.cursor().execute(
            f"CREATE OR REPLACE TABLE {quote_name(self.name)} {columns}"
        )

    def select(self, projections: str, extra_clauses: str = ""):
        return (
            self.driver.snowflake_conn.cursor(DictCursor)
            .execute(
                f"SELECT {projections} FROM {quote_name(self.name)} {extra_clauses}"
            )
            .fetchall()
        )

    def select_scalar(self, projection: str, extra_clauses: str = ""):
        return (
            self.driver.snowflake_conn.cursor()
            .execute(
                f"SELECT {projection} FROM {quote_name(self.name)} {extra_clauses}"
            )
            .fetchone()[0]
        )

    def schema(self, *, as_dict: bool = False):
        return (
            (
                self.driver.snowflake_conn.cursor(DictCursor)
                if as_dict
                else self.driver.snowflake_conn.cursor()
            )
            .execute(f"DESC TABLE {quote_name(self.name)}")
            .fetchall()
        )

    def drop(self):
        self.driver.drop_table(self.name)


class IcebergTable(Table):
    """Iceberg table variant — uses ``CREATE/DROP ICEBERG TABLE`` DDL.

    ``columns`` follows the same convention as :class:`Table` and can include
    table-level options after the column list, e.g.
    ``"(RECORD_METADATA VARIANT, CITY TEXT) ENABLE_SCHEMA_EVOLUTION = TRUE"``.

    The iceberg-specific clauses (``EXTERNAL_VOLUME``, ``CATALOG``,
    ``BASE_LOCATION``, ``ICEBERG_VERSION``) are appended automatically.
    """

    def create(self, columns: str):
        self.driver.snowflake_conn.cursor().execute(
            f"CREATE OR REPLACE ICEBERG TABLE {quote_name(self.name)} "
            f"{columns} "
            f"EXTERNAL_VOLUME = '{ICEBERG_EXTERNAL_VOLUME}' "
            f"CATALOG = 'SNOWFLAKE' "
            f"BASE_LOCATION = '{self.name}' "
            f"ICEBERG_VERSION = 3"
        )

    def drop(self):
        self.driver.snowflake_conn.cursor().execute(
            f"DROP ICEBERG TABLE IF EXISTS {quote_name(self.name)}"
        )


@pytest.fixture(scope="session")
def iceberg_external_volume(driver: KafkaDriver):
    """Session-scoped probe: checks whether the iceberg external volume exists.

    Returns the volume name if available, otherwise calls ``pytest.skip()``.
    Every test that uses ``create_iceberg_table`` transitively depends on this
    fixture, so all iceberg tests are skipped in environments where the volume
    is not provisioned (e.g. AZURE, GCP CI accounts).
    """
    try:
        rows = (
            driver.snowflake_conn.cursor()
            .execute(f"DESC EXTERNAL VOLUME {ICEBERG_EXTERNAL_VOLUME}")
            .fetchall()
        )
        if rows:
            logger.info(
                "Iceberg external volume %s is available", ICEBERG_EXTERNAL_VOLUME
            )
            return ICEBERG_EXTERNAL_VOLUME
    except Exception:
        logger.debug(
            "Failed to describe external volume %s",
            ICEBERG_EXTERNAL_VOLUME,
            exc_info=True,
        )
    pytest.skip(
        f"Iceberg external volume '{ICEBERG_EXTERNAL_VOLUME}' not found — "
        f"skipping iceberg tests (set ICEBERG_EXTERNAL_VOLUME env var to override)"
    )


@pytest.fixture()
def create_iceberg_table(
    driver: KafkaDriver,
    name_salt: str,
    request: pytest.FixtureRequest,
    iceberg_external_volume: str,
):
    """Creates an iceberg table in the test schema.  Mirrors :func:`create_table`
    but produces :class:`IcebergTable` objects.

    ``columns`` can include table-level options after the column list, e.g.
    ``"(RECORD_METADATA VARIANT, CITY TEXT) ENABLE_SCHEMA_EVOLUTION = TRUE"``.

    Teardown: drops the iceberg table and, when ``cleanup_topic=True`` (the
    default), also deletes the matching Kafka topic.
    """

    created_tables: List[IcebergTable] = []
    topics_to_cleanup: List[str] = []

    def _create(
        unsalted_name: str = None, *, columns: str, cleanup_topic: bool = True
    ) -> IcebergTable:
        unsalted_name = unsalted_name or request.node.originalname
        table_name = unsalted_name + name_salt
        table = IcebergTable(driver, table_name)
        table.create(columns)
        created_tables.append(table)
        if cleanup_topic:
            topics_to_cleanup.append(table.name)
        return table

    try:
        yield _create
    finally:
        for table in created_tables:
            table.drop()
        for topic in topics_to_cleanup:
            driver.deleteTopic(topic)


@pytest.fixture()
def create_table(driver: KafkaDriver, name_salt: str, request: pytest.FixtureRequest):
    """Creates a table in the test schema. Defaults to the test name.

    `columns` can also be followed with table options, e.g.
    ``"(col1 TYPE, col2 TYPE) ENABLE_SCHEMA_EVOLUTION = TRUE"``.

    The Kafka topic is cleaned up after the test.  The Snowflake table
    (and associated stage/pipe) is left for the session-scoped
    `test_schema` teardown (`DROP SCHEMA ... CASCADE`) to remove.
    """

    created_tables: List[Table] = []
    topics_to_cleanup: List[str] = []

    def _create(
        unsalted_name: str = None, *, columns: str, cleanup_topic: bool = True
    ) -> Table:
        unsalted_name = unsalted_name or request.node.originalname
        table_name = unsalted_name + name_salt
        table = Table(driver, table_name)
        table.create(columns)
        created_tables.append(table)
        if cleanup_topic:
            topics_to_cleanup.append(table.name)
        return table

    try:
        yield _create
    finally:
        for table in created_tables:
            table.drop()
        for topic in topics_to_cleanup:
            driver.deleteTopic(topic)


================================================
FILE: test/lib/matchers.py
================================================
"""Lightweight matcher objects for use with pytest's ``assert ==``.

These matchers implement ``__eq__`` so they can be dropped into dicts or
lists and compared with ``==``.  When a comparison fails, pytest's assertion
rewriting shows the matcher's ``__repr__`` in the diff, making it clear what
was expected.

Usage::

    from lib.matchers import ANY_INT, RegexMatch

    assert metadata == {
        "offset": 0,
        "CreateTime": ANY_INT,
        "topic": RegexMatch(r"my_topic_\\w+"),
    }
"""

import re


class AnyInstance:
    """Matches any value that is an instance of the given type(s)."""

    def __init__(self, *expected_types):
        self._types = expected_types

    def __eq__(self, other):
        return isinstance(other, self._types)

    def __repr__(self):
        names = ", ".join(t.__name__ for t in self._types)
        return f"<any {names}>"


class RegexMatch:
    """Matches any string that fully matches the given pattern."""

    def __init__(self, pattern):
        self._pattern = pattern

    def __eq__(self, other):
        return isinstance(other, str) and re.fullmatch(self._pattern, other) is not None

    def __repr__(self):
        return f"<regex {self._pattern!r}>"


ANY_INT = AnyInstance(int)
ANY_STR = AnyInstance(str)


================================================
FILE: test/lib/utils.py
================================================
from itertools import islice
import json
import logging
import threading
import time
from typing import Callable

from lib.driver import KafkaDriver


def wait_for(f: Callable[[], bool], *, timeout: int = 60, interval: int = 5) -> bool:
    deadline = time.monotonic() + timeout
    while True:
        if f():
            return True
        if time.monotonic() > deadline:
            return False
        time.sleep(interval)


class RecordProducer:
    """Produces sequentially numbered JSON records to a Kafka topic."""

    def __init__(self, driver: KafkaDriver, topic: str):
        self._driver = driver
        self._topic = topic
        self.records_produced = 0
        self._generator = self._make_generator()
        self._stop_event = threading.Event()
        self._thread = None

    def _make_generator(self):
        while True:
            self.records_produced += 1
            yield json.dumps({"number": str(self.records_produced)}).encode()

    def send(self, n: int):
        self._driver.sendBytesData(self._topic, islice(self._generator, n), [], 0)

    def start_continuous(self, batch_size: int = 10, interval: float = 0.1):
        """Start a background thread that sends records continuously."""
        self._stop_event.clear()

        def _produce():
            while not self._stop_event.is_set():
                self.send(batch_size)
                self._stop_event.wait(interval)

        self._thread = threading.Thread(target=_produce, daemon=True)
        self._thread.start()
        logging.info(
            f"Started continuous producer (batch_size={batch_size}, interval={interval}s)"
        )

    def stop_continuous(self, timeout: float = 5):
        if self._thread is not None:
            self._stop_event.set()
            self._thread.join(timeout=timeout)
            self._thread = None
            logging.info(
                f"Stopped continuous producer (total: {self.records_produced})"
            )


================================================
FILE: test/pyproject.toml
================================================
[tool.pytest.ini_options]
testpaths = ["tests"]
pythonpath = ["."]
log_cli = true
log_cli_level = "INFO"
log_cli_format = "%(asctime)s [%(levelname)s] %(name)s: %(message)s"
log_cli_date_format = "%Y-%m-%dT%H:%M:%S%z"
markers = [
    "compatibility: data-type and version compatibility tests across ingestion modes",
    "confluent_only: test requires Confluent platform (schema registry)",
    "iceberg: iceberg table tests (requires ICEBERG_EXTERNAL_VOLUME)",
    "pressure: long-running stress/pressure test",
    "compatibility: v3/v4 dual-version compatibility tests",
    "schema_evolution: schema evolution e2e tests",
    "correctness: connector correctness tests (schema mapping, DLQ, multi-topic)",
]


================================================
FILE: test/rest_request_template/datagen_connector.json
================================================
{
  "name": "datagen-stocktrades_kc",
  "config": {
    "connector.class": "io.confluent.kafka.connect.datagen.DatagenConnector",
    "kafka.topic": "SNOWFLAKESINK_JP_KC",
    "quickstart": "Stock_Trades",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "max.interval": 10,
    "iterations": 100000,
    "tasks.max": "1"
  }
}


================================================
FILE: test/rest_request_template/datatype_ingestion.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "1",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "none",
    "errors.log.enable": true
  }
}


================================================
FILE: test/rest_request_template/iceberg_avro_aws.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "1",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "io.confluent.connect.avro.AvroConverter",
    "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.streaming.iceberg.enabled": true,
    "snowflake.streaming.enable.single.buffer": true,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/iceberg_json_aws.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "1",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.streaming.iceberg.enabled": true,
    "snowflake.streaming.enable.single.buffer": true,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/iceberg_schema_evolution_avro_aws.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "1",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "io.confluent.connect.avro.AvroConverter",
    "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.streaming.iceberg.enabled": true,
    "snowflake.streaming.enable.single.buffer": true,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/iceberg_schema_evolution_json_aws.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "1",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.streaming.iceberg.enabled": true,
    "snowflake.streaming.enable.single.buffer": true,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/nullable_values_after_smt.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "1",
    "buffer.count.records": "100",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "behavior.on.null.values": "IGNORE",
    "transforms": "extractField",
    "transforms.extractField.type": "org.apache.kafka.connect.transforms.ExtractField$Value",
    "transforms.extractField.field": "optionalField",
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/snowpipe_streaming_legacy_avro_sr.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "1",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "snowflake.enable.schematization": "false",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "io.confluent.connect.avro.AvroConverter",
    "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/snowpipe_streaming_legacy_byte_array_converter.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "1",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "snowflake.enable.schematization": "false",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.converters.ByteArrayConverter",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/snowpipe_streaming_legacy_string_converter.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "1",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "snowflake.enable.schematization": "false",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.storage.StringConverter",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/snowpipe_streaming_legacy_string_json.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "1",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "snowflake.enable.schematization": "false",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/snowpipe_streaming_schema_evolution.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "1",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_SCHEMA_EVOLUTION_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/snowpipe_streaming_schema_mapping_dlq.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "1",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_SCHEMA_MAPPING_DLQ_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/snowpipe_streaming_string_json_dlq.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "1",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_SNOWPIPE_STREAMING_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/test_kc_delete_create.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "3",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/test_kc_delete_create_chaos.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "3",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/test_kc_delete_resume.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "3",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/test_kc_delete_resume_chaos.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "3",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/test_kc_pause_create.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "3",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/test_kc_pause_create_chaos.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "3",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/test_kc_pause_resume.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "3",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/test_kc_pause_resume_chaos.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "3",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/test_kc_recreate.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "3",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/test_kc_recreate_chaos.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "3",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/test_kc_resilience.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "3",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/test_kc_restart.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "3",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/test_snowpipe_streaming_string_json_ignore_tombstone.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "1",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "none",
    "errors.log.enable": true,
    "behavior.on.null.values": "IGNORE",
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/travis_correct_auto_table_creation.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "1",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "io.confluent.connect.avro.AvroConverter",
    "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/travis_correct_auto_table_creation_topic2table.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC0,SNOWFLAKE_TEST_TOPIC1",
    "snowflake.topic2table.map": "SNOWFLAKE_TEST_TOPIC0:SNOWFLAKE_CONNECTOR_NAME,SNOWFLAKE_TEST_TOPIC1:SNOWFLAKE_CONNECTOR_NAME",
    "tasks.max": "1",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "io.confluent.connect.avro.AvroConverter",
    "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/travis_correct_avro_avro.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "key.converter": "org.apache.kafka.connect.serialization.AvroConverter",
    "value.converter": "org.apache.kafka.connect.serialization.AvroConverter",
    "value.converter.schemas.enable": "true",
    "key.converter.schemas.enable": "true",
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/travis_correct_avrosr_avrosr.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "io.confluent.connect.avro.AvroConverter",
    "key.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY",
    "value.converter": "io.confluent.connect.avro.AvroConverter",
    "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY",
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/travis_correct_confluent_protobuf_protobuf.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "io.confluent.connect.protobuf.ProtobufConverter",
    "key.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY",
    "value.converter": "io.confluent.connect.protobuf.ProtobufConverter",
    "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY",
    "transforms": "add_record_content",
    "transforms.add_record_content.type": "org.apache.kafka.connect.transforms.HoistField$Value",
    "transforms.add_record_content.field": "RECORD_CONTENT",
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/travis_correct_json_json.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "key.converter.schemas.enable": "false",
    "snowflake.metadata.createtime": "false",
    "snowflake.metadata.topic": false,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/travis_correct_multiple_topic_to_one_table_snowpipe_streaming.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC0,SNOWFLAKE_TEST_TOPIC1,SNOWFLAKE_TEST_TOPIC2",
    "snowflake.topic2table.map": "SNOWFLAKE_TEST_TOPIC0:SNOWFLAKE_CONNECTOR_NAME,SNOWFLAKE_TEST_TOPIC1:SNOWFLAKE_CONNECTOR_NAME,SNOWFLAKE_TEST_TOPIC2:SNOWFLAKE_CONNECTOR_NAME",
    "tasks.max": "1",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/travis_correct_native_complex_smt.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "transforms": "createKey,extractInt,dropFieldC2",
    "transforms.createKey.type": "org.apache.kafka.connect.transforms.ValueToKey",
    "transforms.createKey.fields": "c1",
    "transforms.extractInt.type": "org.apache.kafka.connect.transforms.ExtractField$Key",
    "transforms.extractInt.field": "c1",
    "transforms.dropFieldC2.type": "org.apache.kafka.connect.transforms.ReplaceField$Value",
    "transforms.dropFieldC2.exclude": "c2",
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/travis_correct_native_string_json_without_schema.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "transforms": "dropFieldC2",
    "transforms.dropFieldC2.type": "org.apache.kafka.connect.transforms.ReplaceField$Value",
    "transforms.dropFieldC2.exclude": "c2",
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/travis_correct_native_string_protobuf.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "com.blueapron.connect.protobuf.ProtobufConverter",
    "value.converter.protoClassName": "com.snowflake.kafka.test.protobuf.SensorReadingImpl$SensorReading",
    "transforms": "add_record_content",
    "transforms.add_record_content.type": "org.apache.kafka.connect.transforms.HoistField$Value",
    "transforms.add_record_content.field": "RECORD_CONTENT",
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/travis_correct_schema_mapping.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "1",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/travis_correct_schema_not_supported_converter.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "1",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/travis_correct_snowpipe_streaming_string_avro_sr.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "1",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "io.confluent.connect.avro.AvroConverter",
    "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/travis_correct_snowpipe_streaming_string_json.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "1",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "jmx": "true",
    "errors.tolerance": "all",
    "errors.log.enable": true,
    "errors.deadletterqueue.topic.name": "DLQ_TOPIC_NAME_SALT",
    "errors.deadletterqueue.topic.replication.factor": 1,
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/travis_correct_string_avro.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.serialization.AvroConverter",
    "value.converter.schemas.enable": "false",
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/travis_correct_string_avrosr.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "io.confluent.connect.avro.AvroConverter",
    "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY",
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/travis_correct_string_json.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "snowflake.jdbc.map": "isInsecureMode : true, notYetExistingProp : true",
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/rest_request_template/travis_correct_string_proxy.json
================================================
{
  "name": "SNOWFLAKE_CONNECTOR_NAME",
  "config": {
    "connector.class": "com.snowflake.kafka.connector.SnowflakeStreamingSinkConnector",
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "snowflake.url.name": "SNOWFLAKE_HOST",
    "snowflake.user.name": "SNOWFLAKE_USER",
    "snowflake.private.key": "SNOWFLAKE_PRIVATE_KEY",
    "snowflake.database.name": "SNOWFLAKE_DATABASE",
    "snowflake.schema.name": "SNOWFLAKE_SCHEMA",
    "snowflake.role.name": "SNOWFLAKE_ROLE",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "jvm.proxy.host": "localhost",
    "jvm.proxy.port": "3128",
    "jvm.proxy.username": "admin",
    "jvm.proxy.password": "test",
    "snowflake.jdbc.map": "isInsecureMode : true, notYetExistingProp : true",
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
    "snowflake.streaming.validate.compatibility.with.classic": "false"
  }
}


================================================
FILE: test/run_tests.sh
================================================
#!/bin/bash
#
# Snowflake Kafka Connector - Docker-based E2E Tests
#
# Usage:
#   ./run_tests.sh --platform=<confluent|apache> --platform-version=<version> [options]
#
# Examples:
#   ./run_tests.sh --platform=apache --platform-version=2.8.2
#   ./run_tests.sh --platform=apache --platform-version=3.7.0
#   ./run_tests.sh --platform=apache --platform-version=4.0.0
#   ./run_tests.sh --platform=confluent --platform-version=7.8.0
#   ./run_tests.sh --platform=confluent --platform-version=8.0.0
#   ./run_tests.sh --platform=confluent --platform-version=7.8.0 -- tests/test_string_json.py
#
# Prerequisites:
#   - Docker and Docker Compose
#   - SNOWFLAKE_CREDENTIAL_FILE environment variable set
#   - Connector plugin built (run build_runtime_jar.sh first)
#

set -e

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
DOCKER_DIR="$SCRIPT_DIR/docker"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"

# Unique Docker Compose project name per worktree, derived from the repo
# directory basename. Prevents collisions when multiple worktrees run tests
# concurrently (Docker Compose defaults to the parent directory name, which
# is always "docker" here).
export COMPOSE_PROJECT_NAME="$(basename "$PROJECT_ROOT")"

# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'

error_exit() {
    echo -e "${RED}ERROR: $1${NC}" >&2
    exit 1
}

info() {
    echo -e "${GREEN}INFO: $1${NC}"
}

warn() {
    echo -e "${YELLOW}WARN: $1${NC}"
}

usage() {
    echo "Usage: $0 --platform=<confluent|apache> --platform-version=<version> [options]"
    echo ""
    echo "Platform:"
    echo "  --platform=PLATFORM           Platform: 'confluent' or 'apache' (default: confluent)"
    echo "  --platform-version=VERSION    Kafka/Confluent platform version (default: 7.8.0)"
    echo "                                Confluent: 6.2.x, 7.x, 8.x (KRaft)"
    echo "                                Apache: 2.x, 3.x, 4.x (KRaft)"
    echo ""
    echo "Options:"
    echo "  --cloud=CLOUD        Snowflake cloud platform: AWS, GCP, or AZURE"
    echo "  --java-version=VER   Java version for Apache Kafka (default: 11)"
    echo "  --jmx                Enable JMX metrics scraping via Jolokia"
    echo "  --profile            Enable JVM profiling (JFR, GC logs, JMX, async-profiler)"
    echo "  --keep               Keep containers running after tests"
    echo "  -i, --interactive    Start infra, then drop into a bash shell in the test-runner"
    echo "  --rebuild            Force rebuild of images"
    echo "  --logs-dir=DIR       Save service logs to a file in DIR on failure"
    echo "  -h, --help           Show this help message"
    echo "  -- ARGS              Pass remaining args directly to pytest"
    echo ""
    echo "Environment:"
    echo "  SNOWFLAKE_CREDENTIAL_FILE  Path to Snowflake credentials JSON (required unless LOCAL_PROXY_PORT is set)"
    echo "  LOCAL_PROXY_PORT           Port of the proxy for the local Snowflake deployment"
    echo "  CONNECT_OFFSET_FLUSH_INTERVAL_MS  Passed through to the Connect worker (see docker-compose; optional)"
    echo ""
    echo "Examples:"
    echo "  $0 --platform=confluent --platform-version=7.8.0"
    echo "  $0 --platform=confluent --platform-version=8.0.0    # KRaft mode"
    echo "  $0 --platform=apache --platform-version=2.8.2"
    echo "  $0 --platform=apache --platform-version=4.0.0       # KRaft mode"
    echo "  $0 --platform=confluent --platform-version=7.8.0 -- -k test_string_json"
    echo "  $0 --platform=apache --platform-version=3.7.0 --keep -- -m pressure"
    echo "  $0 --platform=confluent --platform-version=7.8.0 -i   # interactive shell"
    echo "  $0 --platform=confluent --platform-version=7.8.0 --profile --keep -- -m pressure"
    echo "  $0 --platform=confluent --platform-version=7.8.0 --logs-dir=/tmp/test-logs"
    exit 1
}

# Parse arguments
PLATFORM="confluent"
PLATFORM_VERSION="7.8.0"
JAVA_VERSION="11"
JMX_ENABLED="false"
PROFILE_ENABLED="false"
KEEP_RUNNING="false"
INTERACTIVE="false"
FORCE_REBUILD="false"
LOGS_DIR=""
PASSTHROUGH_ARGS=()

while [[ $# -gt 0 ]]; do
    case $1 in
        --platform=*)
            PLATFORM="${1#*=}"
            shift
            ;;
        --platform-version=*)
            PLATFORM_VERSION="${1#*=}"
            shift
            ;;
        --cloud=*)
            SF_CLOUD_PLATFORM="${1#*=}"
            shift
            ;;
        --java-version=*)
            JAVA_VERSION="${1#*=}"
            shift
            ;;
        --jmx)
            JMX_ENABLED="true"
            shift
            ;;
        --profile)
            PROFILE_ENABLED="true"
            shift
            ;;
        --keep)
            KEEP_RUNNING="true"
            shift
            ;;
        -i|--interactive)
            INTERACTIVE="true"
            shift
            ;;
        --rebuild)
            FORCE_REBUILD="true"
            shift
            ;;
        --logs-dir=*)
            LOGS_DIR="${1#*=}"
            shift
            ;;
        -h|--help)
            usage
            ;;
        --)
            shift
            PASSTHROUGH_ARGS=("$@")
            break
            ;;
        *)
            error_exit "Unknown option: $1"
            ;;
    esac
done

# Validate required arguments
if [ -z "$PLATFORM" ]; then
    error_exit "Missing required argument: --platform=<confluent|apache>"
fi

if [ -z "$PLATFORM_VERSION" ]; then
    error_exit "Missing required argument: --platform-version=<version>"
fi

# Base compose file + platform-specific compose file
BASE_COMPOSE="-f docker-compose.base.yml"

SCALA_VERSION="2.12"
KRAFT_MODE="false"

case $PLATFORM in
    confluent)
        case $PLATFORM_VERSION in
            6.2.*)
                info "Platform: Confluent $PLATFORM_VERSION"
                # 6.2.x containers are only available for linux/amd64
                COMPOSE_FILES="$BASE_COMPOSE -f docker-compose.confluent.yml -f docker-compose.amd64.yml"
                info "Note: Confluent 6.2.x requires linux/amd64 (using emulation on ARM)"
                START_SERVICES="zookeeper kafka schema-registry kafka-connect"
                ;;
            7.*)
                info "Platform: Confluent $PLATFORM_VERSION"
                COMPOSE_FILES="$BASE_COMPOSE -f docker-compose.confluent.yml"
                START_SERVICES="zookeeper kafka schema-registry kafka-connect"
                ;;
            8.*)
                info "Platform: Confluent $PLATFORM_VERSION (KRaft mode)"
                COMPOSE_FILES="$BASE_COMPOSE -f docker-compose.confluent.yml -f docker-compose.confluent-kraft.yml"
                START_SERVICES="kafka schema-registry kafka-connect"
                ;;
            *)
                error_exit "Unsupported Confluent version: $PLATFORM_VERSION (supported: 6.2.x, 7.x, 8.x)"
                ;;
        esac
        CONFLUENT_VERSION="$PLATFORM_VERSION"
        KAFKA_VERSION=""
        KAFKA_CONNECT_ADDRESS="kafka-connect:8083"
        HEALTH_CHECK_SERVICE="kafka-connect"
        ;;
    apache)
        COMPOSE_FILES="$BASE_COMPOSE -f docker-compose.apache.yml"
        CONFLUENT_VERSION=""
        KAFKA_VERSION="$PLATFORM_VERSION"
        KAFKA_CONNECT_ADDRESS="kafka:8083"
        HEALTH_CHECK_SERVICE="kafka"
        START_SERVICES="kafka"

        case $PLATFORM_VERSION in
            4.*)
                info "Platform: Apache Kafka $PLATFORM_VERSION (KRaft mode)"
                SCALA_VERSION="2.13"
                KRAFT_MODE="true"
                JAVA_VERSION="17"
                ;;
            *)
                info "Platform: Apache Kafka $PLATFORM_VERSION (official tarball)"
                ;;
        esac
        ;;
    *)
        error_exit "Unknown platform: $PLATFORM (supported: confluent, apache)"
        ;;
esac

# Layer profiling overlay (platform-specific to avoid undefined service errors)
if [ "$PROFILE_ENABLED" = "true" ]; then
    COMPOSE_FILES="$COMPOSE_FILES -f docker-compose.profile-${PLATFORM}.yml"
    info "Profiling enabled: JFR, GC logs, JMX (port 9999), heap dump on OOM"
    info "Use test/scripts/profile_connect.sh to interact with the profiler"
fi

# Check prerequisites
command -v docker >/dev/null 2>&1 || error_exit "Docker is not installed"
command -v docker compose >/dev/null 2>&1 || command -v docker-compose >/dev/null 2>&1 || error_exit "Docker Compose is not installed"

# Check credentials file
if [ -n "${LOCAL_PROXY_PORT:-}" ]; then
    # Fetch credentials from local proxy
    PROXY_CREDENTIAL_URL="http://localhost:${LOCAL_PROXY_PORT}/proxy/kafka-connector-profile"
    info "Fetching credentials from proxy: $PROXY_CREDENTIAL_URL"
    SNOWFLAKE_CREDENTIAL_FILE="$(mktemp /tmp/kafka-connector-test-snowflake-credentials-XXXXXX.json)"
    if ! curl -sf "$PROXY_CREDENTIAL_URL" -o "$SNOWFLAKE_CREDENTIAL_FILE"; then
        rm -f "$SNOWFLAKE_CREDENTIAL_FILE"
        error_exit "Failed to fetch credentials from $PROXY_CREDENTIAL_URL"
    fi
    info "Credentials fetched to: $SNOWFLAKE_CREDENTIAL_FILE"
else
    if [ -z "$SNOWFLAKE_CREDENTIAL_FILE" ]; then
        error_exit "SNOWFLAKE_CREDENTIAL_FILE environment variable is not set"
    fi

    if [ ! -f "$SNOWFLAKE_CREDENTIAL_FILE" ]; then
        error_exit "Credential file not found: $SNOWFLAKE_CREDENTIAL_FILE"
    fi

    # Convert to absolute path
    SNOWFLAKE_CREDENTIAL_FILE="$(cd "$(dirname "$SNOWFLAKE_CREDENTIAL_FILE")" && pwd)/$(basename "$SNOWFLAKE_CREDENTIAL_FILE")"
fi
info "Credentials: $SNOWFLAKE_CREDENTIAL_FILE"

# Check for connector plugin based on platform
PLUGIN_DIR="/tmp/sf-kafka-connect-plugin"
rm -rf "$PLUGIN_DIR"
mkdir -p "$PLUGIN_DIR"

if [ "$PLATFORM" = "apache" ]; then
    # Apache: Look for JAR in plugin path
    PLUGIN_JAR_PATH="/usr/local/share/kafka/plugins"
    PLUGIN_JAR=$(ls "$PLUGIN_JAR_PATH"/snowflake-kafka-connector-*.jar 2>/dev/null | head -n 1)

    if [ -z "$PLUGIN_JAR" ]; then
        error_exit "Connector plugin JAR not found at $PLUGIN_JAR_PATH/. Run './build_runtime_jar.sh . package apache' first."
    fi

    info "Using Apache connector JAR: $PLUGIN_JAR"
    cp "$PLUGIN_JAR" "$PLUGIN_DIR/"

elif [ "$PLATFORM" = "confluent" ]; then
    # Confluent: Look for zip file
    PLUGIN_ZIP="/tmp/sf-kafka-connect-plugin.zip"

    if [ ! -f "$PLUGIN_ZIP" ]; then
        error_exit "Connector plugin zip not found at $PLUGIN_ZIP. Run './build_runtime_jar.sh . package confluent' first."
    fi

    info "Extracting Confluent connector zip: $PLUGIN_ZIP"
    unzip -q "$PLUGIN_ZIP" -d "$PLUGIN_DIR"
fi

info "Plugin prepared in $PLUGIN_DIR"

# Build protobuf dependencies
EXTRA_JARS_DIR="/tmp/kafka-connect-extra-jars"
mkdir -p "$EXTRA_JARS_DIR"

compile_protobuf_dependencies() {
    info "Building protobuf dependencies..."
    cd "$DOCKER_DIR"
    
    docker build -t protobuf-builder -f Dockerfile.builder ..
    
    info "Extracting JARs from image..."
    CONTAINER_ID=$(docker create protobuf-builder)
    docker cp "$CONTAINER_ID:/output/." "$EXTRA_JARS_DIR/"
    docker rm "$CONTAINER_ID" > /dev/null
    
    info "Extra JARs prepared in $EXTRA_JARS_DIR:"
    ls -la "$EXTRA_JARS_DIR"
}

compile_protobuf_dependencies

# Download KC v3 JAR for dual-version testing (skips if already cached)
info "Preparing KC v3 connector JAR..."
V3_PLUGIN_DIR=$("$SCRIPT_DIR/download_v3_jar.sh")
export V3_PLUGIN_PATH="$V3_PLUGIN_DIR"
info "v3 plugin path: $V3_PLUGIN_PATH"

if [ "$JMX_ENABLED" = "true" ]; then
    # Download Jolokia JMX agent for metrics scraping
    JOLOKIA_DIR="/tmp/jolokia"
    JOLOKIA_VERSION="2.5.1"
    JOLOKIA_JAR="$JOLOKIA_DIR/jolokia-agent.jar"
    mkdir -p "$JOLOKIA_DIR"
    if [ ! -f "$JOLOKIA_JAR" ]; then
        info "Downloading Jolokia JMX agent v${JOLOKIA_VERSION}..."
        curl -fsSL -o "$JOLOKIA_JAR" \
            "https://repo1.maven.org/maven2/org/jolokia/jolokia-agent-jvm/${JOLOKIA_VERSION}/jolokia-agent-jvm-${JOLOKIA_VERSION}-javaagent.jar"
    fi
    export JOLOKIA_JAR_PATH="$JOLOKIA_JAR"
    export KAFKA_OPTS="-javaagent:/opt/jolokia/jolokia-agent.jar=port=8778,host=0.0.0.0"
fi

# Generate test name salt
TEST_NAME_SALT="$(python3 -c '
import random, string
chars = string.ascii_uppercase + string.digits
print("_" + "".join(random.choices(chars, k=7)))
')"
info "Test name salt: $TEST_NAME_SALT"

# Export environment for docker-compose
export CONFLUENT_VERSION
export KAFKA_VERSION
export JAVA_VERSION
export SCALA_VERSION
export KRAFT_MODE
export SNOWFLAKE_CREDENTIAL_FILE
export CONNECTOR_PLUGIN_PATH="$PLUGIN_DIR"
export EXTRA_JARS_PATH="$EXTRA_JARS_DIR"

# Env vars consumed by pytest via conftest.py (inside the test-runner container)
export KAFKA_PLATFORM="$PLATFORM"
export KAFKA_PLATFORM_VERSION="$PLATFORM_VERSION"
export TEST_NAME_SALT
if [ -n "${LOCAL_PROXY_PORT:-}" ]; then
    export SNOWPIPE_STREAMING_URL="http://host.docker.internal:${LOCAL_PROXY_PORT}"
    info "Snowpipe Streaming URL: $SNOWPIPE_STREAMING_URL"
fi

cd "$DOCKER_DIR"

# Build images
BUILD_ARGS=""
if [ "$FORCE_REBUILD" = "true" ]; then
    BUILD_ARGS="--no-cache"
fi

info "Building test runner image..."
docker compose $COMPOSE_FILES build $BUILD_ARGS test-runner

if [ "$PLATFORM" = "apache" ]; then
    APACHE_IMAGE="ghcr.io/snowflakedb/snowflake-kafka-connector/apache-kafka:${KAFKA_VERSION}-java${JAVA_VERSION}"
    if [ "$FORCE_REBUILD" != "true" ] && docker pull "$APACHE_IMAGE" < /dev/null 2>/dev/null; then
        info "Using prebuilt Apache Kafka image: $APACHE_IMAGE"
    else
        info "Building Apache Kafka image..."
        docker compose $COMPOSE_FILES build $BUILD_ARGS kafka
    fi
fi

# When profiling, force-remove stale containers from prior --keep runs.
# Bind mounts (plugin JARs) become stale if the host directory was recreated
# while a kept container still held the old mount inode.
if [ "$PROFILE_ENABLED" = "true" ]; then
    info "Cleaning stale containers for fresh profiling..."
    docker compose $COMPOSE_FILES down -v --remove-orphans 2>/dev/null || true
fi

# Start services
info "Starting services: $START_SERVICES"
docker compose $COMPOSE_FILES up -d $START_SERVICES

# Wait for services
info "Waiting for services to be healthy..."
TIMEOUT=300
ELAPSED=0

while [ $ELAPSED -lt $TIMEOUT ]; do
    if docker compose $COMPOSE_FILES ps $HEALTH_CHECK_SERVICE 2>/dev/null | grep -q "healthy"; then
        info "All services are healthy!"
        break
    fi
    sleep 5
    ELAPSED=$((ELAPSED + 5))
    echo -n "."
done
echo ""

if [ $ELAPSED -ge $TIMEOUT ]; then
    error_exit "Services failed to become healthy within ${TIMEOUT}s"
fi

# Reset profiling to a clean slate (discard startup/warmup data from prior runs)
if [ "$PROFILE_ENABLED" = "true" ]; then
    PROFILE_CONTAINER=$(docker compose $COMPOSE_FILES ps -q $HEALTH_CHECK_SERVICE)
    if [ -n "$PROFILE_CONTAINER" ]; then
        info "Resetting JFR recording to clean slate..."
        docker exec "$PROFILE_CONTAINER" sh -c '
            rm -f /tmp/profile/kc-profile-*.jfr /tmp/profile/flamegraph-*.html 2>/dev/null
            PID=$(jcmd 2>/dev/null | grep -v jcmd | head -1 | awk "{print \$1}")
            if [ -n "$PID" ]; then
                jcmd "$PID" JFR.stop name=profile 2>/dev/null || true
                jcmd "$PID" JFR.start name=profile filename=/tmp/profile/kc-profile.jfr \
                    settings=profile maxsize=500m dumponexit=true 2>/dev/null || true
            fi
        ' 2>/dev/null || warn "JFR reset failed — profiling data may include startup noise"
        info "JFR recording restarted — clean slate for this test run"
    fi
fi

# Start JMX metrics scraper in the background
METRICS_FILE="/tmp/sf-metrics-${PLATFORM}-${PLATFORM_VERSION}-$(date +%Y%m%d-%H%M%S).jsonl"
METRICS_PID=""

start_metrics_scraper() {
    local scraper="$PROJECT_ROOT/test/scripts/scrape_metrics.sh"
    if [ ! -x "$scraper" ]; then
        error_exit "Metrics scraper not found or not executable: $scraper"
    fi
    "$scraper" \
        --poll --interval=10 --output="$METRICS_FILE" --host=localhost --port=8778 &
    METRICS_PID=$!
    disown "$METRICS_PID" 2>/dev/null || true
}

stop_metrics_scraper() {
    if [ -n "$METRICS_PID" ] && kill -0 "$METRICS_PID" 2>/dev/null; then
        kill "$METRICS_PID" 2>/dev/null || true
        wait "$METRICS_PID" 2>/dev/null || true
        METRICS_PID=""
    fi
}

cleanup() {
    stop_metrics_scraper
    if [ "$KEEP_RUNNING" = "false" ]; then
        info "Cleaning up containers..."
        docker compose $COMPOSE_FILES down -v --remove-orphans 2>/dev/null || true
    else
        warn "Keeping containers running (--keep specified)"
        echo "To stop: cd $DOCKER_DIR && docker compose $COMPOSE_FILES down -v"
    fi
}
trap cleanup EXIT

if [ "$JMX_ENABLED" = "true" ]; then
    # Give Jolokia a moment to initialize, then start scraping
    sleep 3
    start_metrics_scraper
    echo ""
    echo -e "${GREEN}========================================${NC}"
    echo -e "${GREEN}  JMX Metrics: ${METRICS_FILE}${NC}"
    echo -e "${GREEN}========================================${NC}"
    echo ""
fi

# All connection and platform info is passed via env vars (set in
# docker-compose + the exports above), so pytest only needs -v here.
PYTEST_ARGS=(-v)

# Don't remove the test-runner container when --keep is set so the user
# can exec into it for debugging.
RUN_FLAGS=(-i)
if [ "$INTERACTIVE" = "true" ]; then
    RUN_FLAGS+=("-t")
fi
if [ "$KEEP_RUNNING" = "false" ]; then
    RUN_FLAGS+=("--rm")
fi
# When running in GitHub Actions, mount GITHUB_STEP_SUMMARY so pytest can append failures.
if [ -n "${GITHUB_STEP_SUMMARY:-}" ]; then
    SUMMARY_DIR="$(dirname "$GITHUB_STEP_SUMMARY")"
    SUMMARY_FILE="$(basename "$GITHUB_STEP_SUMMARY")"
    RUN_FLAGS+=(-v "${SUMMARY_DIR}:/github_step_summary" -e "GITHUB_STEP_SUMMARY=/github_step_summary/${SUMMARY_FILE}")
fi

# Run tests (or drop into a shell with --interactive)
set +e
if [ "$INTERACTIVE" = "true" ]; then
    info "Starting interactive shell in test-runner (run pytest manually)..."
    docker compose $COMPOSE_FILES run "${RUN_FLAGS[@]}" test-runner bash
    TEST_EXIT_CODE=$?
else
    info "Running tests..."
    docker compose $COMPOSE_FILES run "${RUN_FLAGS[@]}" test-runner \
        pytest "${PYTEST_ARGS[@]}" "${PASSTHROUGH_ARGS[@]}"
    TEST_EXIT_CODE=$?
fi
set -e

# Stop the scraper before containers go away
stop_metrics_scraper

# Save logs on failure
if [ $TEST_EXIT_CODE -ne 0 ] && [ -n "$LOGS_DIR" ]; then
    mkdir -p "$LOGS_DIR"
    LOG_FILE="$LOGS_DIR/${PLATFORM}-${PLATFORM_VERSION}-${HEALTH_CHECK_SERVICE}.log"
    warn "Tests failed. Saving service logs to $LOG_FILE..."
    docker compose $COMPOSE_FILES logs $HEALTH_CHECK_SERVICE > "$LOG_FILE" 2>&1
fi

if [ "$JMX_ENABLED" = "true" ]; then
    # Print metrics summary
    METRICS_LINES=0
    if [ -f "$METRICS_FILE" ]; then
        METRICS_LINES=$(wc -l < "$METRICS_FILE")
    fi

    echo ""
    echo -e "${GREEN}========================================${NC}"
    echo -e "${GREEN}  JMX Metrics: ${METRICS_FILE}${NC}"
    echo -e "${GREEN}  Snapshots collected: ${METRICS_LINES}${NC}"
    if [ "$METRICS_LINES" -gt 0 ] 2>/dev/null; then
        echo -e "${GREEN}  Analyze: ${PROJECT_ROOT}/test/scripts/analyze_metrics.sh ${METRICS_FILE}${NC}"
    fi
    echo -e "${GREEN}========================================${NC}"
fi

if [ "$PROFILE_ENABLED" = "true" ]; then
    echo ""
    echo -e "${GREEN}========================================${NC}"
    echo -e "${GREEN}  Profiling artifacts in container${NC}"
    echo -e "${GREEN}  Collect: $PROJECT_ROOT/test/scripts/profile_connect.sh collect [DIR]${NC}"
    echo -e "${GREEN}  Status:  $PROJECT_ROOT/test/scripts/profile_connect.sh status${NC}"
    echo -e "${GREEN}========================================${NC}"
fi

if [ $TEST_EXIT_CODE -ne 0 ]; then
    echo -e "\n${RED}========================================${NC}"
    echo -e "${RED}  TESTS FAILED (exit code: $TEST_EXIT_CODE)${NC}"
    echo -e "${RED}========================================${NC}"
    exit $TEST_EXIT_CODE
fi

echo -e "\n${GREEN}========================================${NC}"
echo -e "${GREEN}  ALL TESTS PASSED${NC}"
echo -e "${GREEN}========================================${NC}"
exit 0


================================================
FILE: test/scripts/analyze_metrics.sh
================================================
#!/bin/bash
#
# Analyze scraped JMX metrics from a JSONL file produced by scrape_metrics.sh.
#
# Usage:
#   ./analyze_metrics.sh <file>              Aggregate summary (default)
#   ./analyze_metrics.sh <file> detail       Per-task breakdown
#   ./analyze_metrics.sh <file> lag          Per-channel offset lag
#
# If <file> is omitted, uses the most recent /tmp/sf-metrics-*.jsonl.

set -e

FILE="${1}"
MODE="${2:-summary}"

if [ -z "$FILE" ]; then
    FILE=$(ls -t /tmp/sf-metrics-*.jsonl 2>/dev/null | head -1)
    if [ -z "$FILE" ]; then
        echo "No metrics file found. Provide a path or run scrape_metrics.sh first." >&2
        exit 1
    fi
fi

if [ ! -f "$FILE" ]; then
    echo "File not found: $FILE" >&2
    exit 1
fi

exec python3 - "$FILE" "$MODE" <<'PYEOF'
import json, sys, re
from collections import defaultdict

file_path = sys.argv[1]
mode = sys.argv[2]

snapshots = []
with open(file_path) as f:
    for line in f:
        line = line.strip()
        if not line:
            continue
        data = json.loads(line)
        if data.get("metrics"):
            snapshots.append(data)

if not snapshots:
    print("No metric snapshots with data found.")
    sys.exit(0)

last = snapshots[-1]
first = snapshots[0]
metrics = last["metrics"]

# ── helpers ──────────────────────────────────────────────────────────────────

def parse_mbean(key):
    """Extract connector, task/channel, category, name from an MBean key."""
    parts = {}
    _, _, attrs = key.partition(":")
    for token in attrs.split(","):
        k, _, v = token.partition("=")
        parts[k] = v
    return parts

def fmt_dur(seconds):
    if seconds is None or seconds == 0:
        return "     -"
    if seconds < 0.001:
        return f"{seconds*1e6:6.1f}us"
    if seconds < 1:
        return f"{seconds*1e3:6.1f}ms"
    return f"{seconds:6.2f}s "

def fmt_count(n):
    return f"{n:,}"

def fmt_rate(r):
    if r < 1:
        return f"{r:.3f}"
    if r < 1000:
        return f"{r:.1f}"
    return f"{r:,.0f}"

def aggregate_timer(timer_name, timers_by_task):
    """Aggregate a Timer across all tasks: weighted mean/p50/p95 and worst-case max."""
    total_count = 0
    weighted_mean = 0.0
    weighted_p50 = 0.0
    weighted_p95 = 0.0
    worst_max = 0.0
    for task, timers in sorted(timers_by_task.items()):
        t = timers.get(timer_name)
        if not t:
            continue
        c = t.get("Count", 0)
        total_count += c
        if c > 0:
            weighted_mean += t.get("Mean", 0) * c
            weighted_p50 += t.get("50thPercentile", 0) * c
            weighted_p95 += t.get("95thPercentile", 0) * c
            worst_max = max(worst_max, t.get("Max", 0))
    if total_count > 0:
        weighted_mean /= total_count
        weighted_p50 /= total_count
        weighted_p95 /= total_count
    return total_count, weighted_mean, weighted_p50, weighted_p95, worst_max

def print_timer_row(label, timer_name, timers_by_task):
    count, mean, p50, p95, mx = aggregate_timer(timer_name, timers_by_task)
    print(f"  {label:<24} {fmt_count(count):>8}  "
          f"{fmt_dur(mean)}  {fmt_dur(p50)}  "
          f"{fmt_dur(p95)}  {fmt_dur(mx)}")

# ── classify metrics ─────────────────────────────────────────────────────────

task_timers = defaultdict(dict)
task_counters = defaultdict(dict)
task_meters = defaultdict(dict)
task_gauges = defaultdict(dict)
channel_gauges = defaultdict(dict)
channel_counters = defaultdict(dict)

# Accept both old (latency) and new (duration) names for backward compat
TIMER_NAMES = {
    "put-duration", "precommit-duration",
    "put-latency", "precommit-latency",
    "open-duration", "close-duration", "start-duration",
    "channel-open-duration", "sdk-client-create-duration",
    "precommit-offset-fetch-duration",
}
METER_NAMES = {"put-records"}
COUNTER_NAMES = {"open-count", "close-count", "precommit-partitions-skipped", "channel-open-count"}
GAUGE_NAMES = {"assigned-partitions", "sdk-client-count"}
OFFSET_NAMES = {"latest-consumer-offset", "persisted-in-snowflake-offset", "processed-offset"}
CHANNEL_COUNTER_NAMES = {"channel-recovery-count"}

for key, val in metrics.items():
    p = parse_mbean(key)
    task = p.get("task", "")
    channel = p.get("channel", "")
    name = p.get("name", "")
    cat = p.get("category", "")

    if task.startswith("task-"):
        if name in TIMER_NAMES:
            task_timers[task][name] = val
        elif name in METER_NAMES:
            task_meters[task][name] = val
        elif name in COUNTER_NAMES:
            task_counters[task][name] = val
        elif name in GAUGE_NAMES:
            task_gauges[task][name] = val
    elif cat == "offsets":
        if name in OFFSET_NAMES:
            channel_gauges[channel][name] = val
        elif name in CHANNEL_COUNTER_NAMES:
            channel_counters[channel][name] = val

num_tasks = len(set(list(task_timers.keys()) + list(task_counters.keys())
                    + list(task_meters.keys()) + list(task_gauges.keys())))
num_channels = len(channel_gauges)
num_snapshots = len(snapshots)

# Detect whether we have the new "duration" names or old "latency" names
has_new_names = any("put-duration" in t for t in task_timers.values())
PUT_TIMER = "put-duration" if has_new_names else "put-latency"
PRECOMMIT_TIMER = "precommit-duration" if has_new_names else "precommit-latency"

# ── header ───────────────────────────────────────────────────────────────────

print("=" * 72)
print("  Snowflake Kafka Connector - Metrics Analysis")
print("=" * 72)
print(f"  File:       {file_path}")
print(f"  Snapshots:  {num_snapshots}  ({first['timestamp']} .. {last['timestamp']})")
print(f"  Tasks:      {num_tasks}")
print(f"  Channels:   {num_channels}")
print()

# ── summary mode (default) ───────────────────────────────────────────────────

if mode == "summary":

    # --- method durations ---
    print("-" * 72)
    print("  Method Durations  (aggregated across all tasks, last snapshot)")
    print("-" * 72)
    hdr = f"  {'method':<24} {'calls':>8}  {'mean':>8}  {'p50':>8}  {'p95':>8}  {'max':>8}"
    print(hdr)

    print_timer_row("put()", PUT_TIMER, task_timers)
    print_timer_row("preCommit()", PRECOMMIT_TIMER, task_timers)
    print_timer_row("  offset fetch (SDK)", "precommit-offset-fetch-duration", task_timers)
    print()

    # --- lifecycle durations ---
    print("-" * 72)
    print("  Lifecycle Durations  (aggregated across all tasks)")
    print("-" * 72)
    print(hdr)

    print_timer_row("start()", "start-duration", task_timers)
    print_timer_row("open()", "open-duration", task_timers)
    print_timer_row("close()", "close-duration", task_timers)
    print_timer_row("channel open (SDK)", "channel-open-duration", task_timers)
    print_timer_row("SDK client create", "sdk-client-create-duration", task_timers)
    print()

    # --- throughput ---
    print("-" * 72)
    print("  Throughput")
    print("-" * 72)
    total_records = 0
    total_mean_rate = 0.0
    total_1m_rate = 0.0
    for task, meters in sorted(task_meters.items()):
        m = meters.get("put-records")
        if m:
            total_records += m.get("Count", 0)
            total_mean_rate += m.get("MeanRate", 0)
            total_1m_rate += m.get("OneMinuteRate", 0)

    print(f"  Total records ingested:  {fmt_count(total_records)}")
    print(f"  Mean rate:               {fmt_rate(total_mean_rate)} records/sec")
    print(f"  1-minute rate:           {fmt_rate(total_1m_rate)} records/sec")

    total_skipped = sum(
        c.get("precommit-partitions-skipped", {}).get("Count", 0)
        for c in task_counters.values()
    )
    print(f"  preCommit partitions skipped: {fmt_count(total_skipped)}")
    print()

    # --- lifecycle counts ---
    print("-" * 72)
    print("  Lifecycle Counts  (across all tasks)")
    print("-" * 72)
    total_open = sum(c.get("open-count", {}).get("Count", 0) for c in task_counters.values())
    total_close = sum(c.get("close-count", {}).get("Count", 0) for c in task_counters.values())
    total_assigned = sum(g.get("assigned-partitions", {}).get("Value", 0) for g in task_gauges.values())
    total_channel_open = sum(c.get("channel-open-count", {}).get("Count", 0) for c in task_counters.values())
    total_sdk_clients = sum(g.get("sdk-client-count", {}).get("Value", 0) for g in task_gauges.values())

    print(f"  open() calls:            {fmt_count(total_open)}")
    print(f"  close() calls:           {fmt_count(total_close)}")
    print(f"  assigned partitions:     {fmt_count(total_assigned)}  (current)")
    print(f"  channel opens (total):   {fmt_count(total_channel_open)}")
    print(f"  SDK clients (current):   {fmt_count(total_sdk_clients)}")

    total_recovery = sum(
        c.get("channel-recovery-count", {}).get("Count", 0)
        for c in channel_counters.values()
    )
    print(f"  channel recoveries:      {fmt_count(total_recovery)}")
    print()

    # --- offset lag (computed from raw offsets) ---
    print("-" * 72)
    print("  Offset Lag Summary  (last snapshot)")
    print("-" * 72)
    lags = []
    for ch, gauges in channel_gauges.items():
        consumer = gauges.get("latest-consumer-offset", {}).get("Value")
        persisted = gauges.get("persisted-in-snowflake-offset", {}).get("Value")
        if consumer is not None and persisted is not None and consumer >= 0 and persisted >= 0:
            lags.append(max(0, consumer - persisted))

    if lags:
        with_lag = sum(1 for l in lags if l > 0)
        print(f"  Channels:          {len(lags)}")
        print(f"  With lag > 0:      {with_lag}")
        print(f"  Max lag:           {max(lags)}")
        print(f"  Mean lag:          {sum(lags)/len(lags):.1f}")
    else:
        print("  (no offset metrics found)")
    print()

# ── detail mode ──────────────────────────────────────────────────────────────

elif mode == "detail":
    all_tasks = sorted(
        set(list(task_timers.keys()) + list(task_counters.keys())
            + list(task_meters.keys()) + list(task_gauges.keys())),
        key=lambda t: int(re.search(r"(\d+)$", t).group(1)) if re.search(r"(\d+)$", t) else 0,
    )

    DETAIL_TIMERS = [
        ("put()", PUT_TIMER),
        ("preCommit()", PRECOMMIT_TIMER),
        ("  offset fetch", "precommit-offset-fetch-duration"),
        ("open()", "open-duration"),
        ("close()", "close-duration"),
        ("start()", "start-duration"),
        ("channel open", "channel-open-duration"),
        ("SDK client create", "sdk-client-create-duration"),
    ]

    print("-" * 72)
    print("  Per-Task Breakdown  (last snapshot)")
    print("-" * 72)

    for task in all_tasks:
        assigned = task_gauges.get(task, {}).get("assigned-partitions", {}).get("Value", 0)
        opens = task_counters.get(task, {}).get("open-count", {}).get("Count", 0)
        closes = task_counters.get(task, {}).get("close-count", {}).get("Count", 0)
        ch_opens = task_counters.get(task, {}).get("channel-open-count", {}).get("Count", 0)
        sdk_clients = task_gauges.get(task, {}).get("sdk-client-count", {}).get("Value", 0)

        print(f"\n  {task}  (partitions={assigned}, opens={opens}, closes={closes},"
              f" ch_opens={ch_opens}, sdk_clients={sdk_clients})")

        for label, timer_name in DETAIL_TIMERS:
            t = task_timers.get(task, {}).get(timer_name)
            if not t or t.get("Count", 0) == 0:
                continue
            c = t.get("Count", 0)
            print(f"    {label:<20}  calls={fmt_count(c):>6}"
                  f"  mean={fmt_dur(t.get('Mean',0))}"
                  f"  p50={fmt_dur(t.get('50thPercentile',0))}"
                  f"  p95={fmt_dur(t.get('95thPercentile',0))}"
                  f"  max={fmt_dur(t.get('Max',0))}")

        m = task_meters.get(task, {}).get("put-records")
        if m:
            print(f"    {'records':<20}  total={fmt_count(m.get('Count',0)):>6}"
                  f"  mean_rate={fmt_rate(m.get('MeanRate',0))} rec/s"
                  f"  1m_rate={fmt_rate(m.get('OneMinuteRate',0))} rec/s")

        skipped = task_counters.get(task, {}).get("precommit-partitions-skipped", {}).get("Count", 0)
        if skipped > 0:
            print(f"    precommit-skipped: {skipped}")

    print()

# ── lag mode ─────────────────────────────────────────────────────────────────

elif mode == "lag":
    print("-" * 72)
    print("  Offset Lag by Channel  (last snapshot)")
    print("-" * 72)

    if not channel_gauges:
        print("  (no channel metrics found)")
    else:
        rows = []
        for ch, gauges in sorted(channel_gauges.items()):
            processed = gauges.get("processed-offset", {}).get("Value", -1)
            consumer = gauges.get("latest-consumer-offset", {}).get("Value", -1)
            persisted = gauges.get("persisted-in-snowflake-offset", {}).get("Value", -1)
            lag = max(0, consumer - persisted) if consumer >= 0 and persisted >= 0 else 0
            recovery = channel_counters.get(ch, {}).get("channel-recovery-count", {}).get("Count", 0)

            m = re.search(r"_(\d+)$", ch)
            part_id = m.group(1) if m else "?"
            short = ch if len(ch) <= 40 else f"...{ch[-37:]}"
            rows.append((part_id, short, lag, processed, consumer, persisted, recovery))

        hdr = f"  {'channel':<42} {'lag':>5}  {'processed':>10}  {'consumer':>10}  {'persisted':>10}  {'recover':>7}"
        print(hdr)
        for part_id, short, lag, processed, consumer, persisted, recovery in sorted(rows, key=lambda r: int(r[0]) if r[0].isdigit() else 0):
            flag = " *" if lag > 0 else "  "
            rec_flag = f"  {recovery:>7}" if recovery > 0 else f"  {recovery:>7}"
            print(f"  {short:<42} {lag:>5}{flag}"
                  f"  {processed:>10}  {consumer:>10}  {persisted:>10}{rec_flag}")

    print()

else:
    print(f"Unknown mode: {mode}", file=sys.stderr)
    print("Usage: analyze_metrics.sh <file> [summary|detail|lag]", file=sys.stderr)
    sys.exit(1)
PYEOF


================================================
FILE: test/scripts/profile_connect.sh
================================================
#!/bin/bash
#
# Profile the Kafka Connect worker running in Docker.
#
# Wraps JFR and async-profiler commands against the Kafka Connect container.
# Requires the profiling overlay (docker-compose.profile-confluent.yml or
# docker-compose.profile-apache.yml) to be active.
#
# Usage:
#   ./profile_connect.sh <command> [options]
#
# Commands:
#   jfr-dump                    Dump the continuous JFR recording to a file
#   jfr-stop                    Stop JFR and dump final recording
#   heap-dump                   Take a heap dump (hprof)
#   thread-dump                 Print thread dump to stdout
#   async-cpu [DURATION]        CPU flame graph via async-profiler (default: 60s)
#   async-alloc [DURATION]      Allocation flame graph via async-profiler (default: 60s)
#   async-wall [DURATION]       Wall-clock flame graph via async-profiler (default: 60s)
#   collect [OUTPUT_DIR]        Collect all profiling artifacts from the container
#   status                      Show JFR recording status and JVM info
#
# Examples:
#   ./profile_connect.sh status
#   ./profile_connect.sh async-cpu 30
#   ./profile_connect.sh jfr-dump
#   ./profile_connect.sh heap-dump
#   ./profile_connect.sh collect ./profiling-results
#
# Prerequisites:
#   - Containers running with docker-compose.profile.yml overlay
#   - For async-* commands: async-profiler mounted via ASYNC_PROFILER_PATH env var

set -e

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
DOCKER_DIR="$SCRIPT_DIR/../docker"

RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'

error_exit() { echo -e "${RED}ERROR: $1${NC}" >&2; exit 1; }
info() { echo -e "${GREEN}INFO: $1${NC}"; }
warn() { echo -e "${YELLOW}WARN: $1${NC}"; }

# Detect which container is running Kafka Connect
detect_container() {
    local project_root
    project_root="$(cd "$SCRIPT_DIR/../.." && pwd)"
    local project_name
    project_name="$(basename "$project_root")"

    # Try confluent kafka-connect first, then apache kafka
    local container
    container=$(docker ps --filter "name=${project_name}.*kafka-connect" --format '{{.Names}}' | head -1)
    if [ -z "$container" ]; then
        container=$(docker ps --filter "name=${project_name}.*kafka" --format '{{.Names}}' | head -1)
    fi

    if [ -z "$container" ]; then
        error_exit "No Kafka Connect container found. Is the test environment running?"
    fi
    echo "$container"
}

# Find the Kafka Connect JVM PID inside the container
find_kc_pid() {
    local container="$1"
    # Kafka Connect main class
    local pid
    pid=$(docker exec "$container" jcmd 2>/dev/null \
        | grep -i "ConnectDistributed\|connect-distributed" \
        | awk '{print $1}' | head -1)

    if [ -z "$pid" ]; then
        # Fallback: find any java process
        pid=$(docker exec "$container" jcmd 2>/dev/null \
            | grep -v "^$\|jcmd" | head -1 | awk '{print $1}')
    fi

    if [ -z "$pid" ]; then
        error_exit "Cannot find Kafka Connect JVM PID in container $container"
    fi
    echo "$pid"
}

COMMAND="${1:-help}"
shift || true

case "$COMMAND" in
    status)
        CONTAINER=$(detect_container)
        PID=$(find_kc_pid "$CONTAINER")
        info "Container: $CONTAINER"
        info "Kafka Connect PID: $PID"
        echo ""
        echo "=== JFR Recordings ==="
        docker exec "$CONTAINER" jcmd "$PID" JFR.check 2>/dev/null || echo "(no JFR recordings)"
        echo ""
        echo "=== VM Info ==="
        docker exec "$CONTAINER" jcmd "$PID" VM.info 2>/dev/null | head -20
        echo ""
        echo "=== Heap Usage ==="
        docker exec "$CONTAINER" jcmd "$PID" GC.heap_info 2>/dev/null || true
        ;;

    jfr-dump)
        CONTAINER=$(detect_container)
        PID=$(find_kc_pid "$CONTAINER")
        OUTFILE="/tmp/profile/kc-profile-$(date +%Y%m%d-%H%M%S).jfr"
        info "Dumping JFR recording to $OUTFILE..."
        docker exec "$CONTAINER" jcmd "$PID" JFR.dump name=profile filename="$OUTFILE"
        info "Done. Retrieve with: docker cp $CONTAINER:$OUTFILE ."
        ;;

    jfr-stop)
        CONTAINER=$(detect_container)
        PID=$(find_kc_pid "$CONTAINER")
        OUTFILE="/tmp/profile/kc-profile-final.jfr"
        info "Stopping JFR recording..."
        docker exec "$CONTAINER" jcmd "$PID" JFR.stop name=profile filename="$OUTFILE"
        info "Final recording at $OUTFILE"
        info "Retrieve with: docker cp $CONTAINER:$OUTFILE ."
        ;;

    heap-dump)
        CONTAINER=$(detect_container)
        PID=$(find_kc_pid "$CONTAINER")
        OUTFILE="/tmp/profile/heap-$(date +%Y%m%d-%H%M%S).hprof"
        info "Taking heap dump (this may pause the JVM briefly)..."
        docker exec "$CONTAINER" jcmd "$PID" GC.heap_dump "$OUTFILE"
        info "Heap dump at $OUTFILE"
        info "Retrieve with: docker cp $CONTAINER:$OUTFILE ."
        ;;

    thread-dump)
        CONTAINER=$(detect_container)
        PID=$(find_kc_pid "$CONTAINER")
        docker exec "$CONTAINER" jcmd "$PID" Thread.print
        ;;

    async-cpu|async-alloc|async-wall)
        CONTAINER=$(detect_container)
        PID=$(find_kc_pid "$CONTAINER")
        DURATION="${1:-60}"
        EVENT="${COMMAND#async-}"

        # Locate async-profiler (prefer /opt mount, fall back to /tmp copy)
        ASPROF=""
        for candidate in /opt/async-profiler/bin/asprof /tmp/async-profiler/bin/asprof; do
            if docker exec "$CONTAINER" test -f "$candidate" 2>/dev/null; then
                ASPROF="$candidate"
                break
            fi
        done
        if [ -z "$ASPROF" ]; then
            error_exit "async-profiler not found in container. Set ASYNC_PROFILER_PATH or docker cp it to /tmp/async-profiler/."
        fi

        # Use itimer for cpu profiling — perf_event_open is typically restricted
        # in containers even with SYS_PTRACE. itimer uses SIGPROF instead.
        if [ "$EVENT" = "cpu" ]; then
            EVENT="itimer"
        fi

        OUTFILE="/tmp/profile/flamegraph-${COMMAND#async-}-$(date +%Y%m%d-%H%M%S).html"
        info "Profiling $EVENT for ${DURATION}s (PID $PID)..."
        docker exec "$CONTAINER" "$ASPROF" \
            -d "$DURATION" -f "$OUTFILE" -e "$EVENT" "$PID"
        info "Flame graph at $OUTFILE"
        info "Retrieve with: docker cp $CONTAINER:$OUTFILE ."
        ;;

    collect)
        CONTAINER=$(detect_container)
        OUTPUT_DIR="${1:-./profiling-results-$(date +%Y%m%d-%H%M%S)}"
        mkdir -p "$OUTPUT_DIR"

        info "Collecting profiling artifacts from $CONTAINER into $OUTPUT_DIR/"

        # Dump JFR before collecting
        PID=$(find_kc_pid "$CONTAINER")
        docker exec "$CONTAINER" jcmd "$PID" JFR.dump name=profile \
            filename="/tmp/profile/kc-profile-collected.jfr" 2>/dev/null || true

        # Copy via tar pipe — docker cp cannot read from tmpfs mounts
        if ! docker exec "$CONTAINER" test -d /tmp/profile 2>/dev/null; then
            warn "/tmp/profile does not exist in container. Was --profile enabled?"
        elif [ "$(docker exec "$CONTAINER" find /tmp/profile -maxdepth 1 -type f | wc -l)" -eq 0 ]; then
            warn "/tmp/profile is empty — no profiling artifacts to collect."
        else
            docker exec "$CONTAINER" tar cf - -C /tmp/profile . \
                | tar xf - -C "$OUTPUT_DIR/"
        fi

        info "Collected artifacts:"
        ls -lh "$OUTPUT_DIR/"

        # Print analysis hints
        echo ""
        echo "=== Analysis ==="
        echo "  JFR:    jfr summary $OUTPUT_DIR/kc-profile-collected.jfr"
        echo "          jfr print --events jdk.ExecutionSample $OUTPUT_DIR/*.jfr | head -100"
        echo "          jfr print --events jdk.ObjectAllocationSample $OUTPUT_DIR/*.jfr | head -100"
        echo "  GC:     Upload $OUTPUT_DIR/gc.log to https://gceasy.io"
        echo "  Heap:   Open $OUTPUT_DIR/*.hprof in Eclipse MAT"
        echo "  Flames: Open $OUTPUT_DIR/flamegraph-*.html in a browser"
        ;;

    help|--help|-h)
        head -30 "$0" | grep "^#" | sed 's/^# \?//'
        ;;

    *)
        error_exit "Unknown command: $COMMAND. Run '$0 help' for usage."
        ;;
esac


================================================
FILE: test/scripts/scrape_metrics.sh
================================================
#!/bin/bash
#
# Scrape Snowflake Kafka Connector JMX metrics via Jolokia.
#
# Usage:
#   ./scrape_metrics.sh [options]
#
# Modes:
#   --once               Single snapshot to stdout (default)
#   --poll               Continuous scraping to a JSONL file
#   --interval=SECONDS   Poll interval (default: 10)
#   --output=FILE        Output file for --poll mode (default: /tmp/sf-metrics.jsonl)
#   --host=HOST          Jolokia host (default: kafka-connect)
#   --port=PORT          Jolokia port (default: 8778)
#   --pretty             Pretty-print JSON output (--once mode only)
#
# Examples:
#   ./scrape_metrics.sh --once --pretty
#   ./scrape_metrics.sh --poll --interval=5 --output=/tmp/metrics.jsonl
#   ./scrape_metrics.sh --once --host=localhost

set -e

HOST="${KAFKA_CONNECT_HOST:-kafka-connect}"
PORT="8778"
MODE="once"
INTERVAL=10
OUTPUT="/tmp/sf-metrics.jsonl"
PRETTY="false"

while [[ $# -gt 0 ]]; do
    case $1 in
        --once)     MODE="once"; shift ;;
        --poll)     MODE="poll"; shift ;;
        --interval=*) INTERVAL="${1#*=}"; shift ;;
        --output=*)   OUTPUT="${1#*=}"; shift ;;
        --host=*)     HOST="${1#*=}"; shift ;;
        --port=*)     PORT="${1#*=}"; shift ;;
        --pretty)     PRETTY="true"; shift ;;
        -h|--help)
            head -20 "$0" | grep "^#" | sed 's/^# \?//'
            exit 0
            ;;
        *) echo "Unknown option: $1" >&2; exit 1 ;;
    esac
done

JOLOKIA_URL="http://${HOST}:${PORT}/jolokia"
SF_DOMAIN="snowflake.kafka.connector"

fetch_snapshot() {
    local timestamp
    timestamp=$(date -u +%Y-%m-%dT%H:%M:%SZ)

    # Fetch all Snowflake connector MBeans in one request
    local raw
    raw=$(curl -sf "${JOLOKIA_URL}/read/${SF_DOMAIN}:*" 2>/dev/null) || {
        echo "{\"timestamp\":\"${timestamp}\",\"error\":\"Cannot reach Jolokia at ${JOLOKIA_URL}\"}"
        return 1
    }

    # Reshape: add timestamp, extract just the value map
    echo "$raw" | jq -c --arg ts "$timestamp" '{timestamp: $ts, metrics: .value}'
}

case $MODE in
    once)
        result=$(fetch_snapshot)
        if [ "$PRETTY" = "true" ]; then
            echo "$result" | jq .
        else
            echo "$result"
        fi
        ;;
    poll)
        echo "Scraping ${SF_DOMAIN} from ${JOLOKIA_URL} every ${INTERVAL}s → ${OUTPUT}" >&2
        mkdir -p "$(dirname "$OUTPUT")"
        while true; do
            fetch_snapshot >> "$OUTPUT"
            sleep "$INTERVAL"
        done
        ;;
esac


================================================
FILE: test/test_data/.gitignore
================================================
*_pb2.py
protobuf/src
protobuf/target


================================================
FILE: test/test_data/protobuf/pom.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.snowflake</groupId>
    <artifactId>kafka-test-protobuf</artifactId>
    <version>1.0.0</version>

    <properties>
        <java.version>1.8</java.version>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    </properties>

    <dependencies>
        <dependency>
            <groupId>com.google.protobuf</groupId>
            <artifactId>protobuf-java</artifactId>
            <version>3.25.5</version>
        </dependency>
    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.3</version>
                <configuration>
                    <source>${java.version}</source>
                    <target>${java.version}</target>
                </configuration>
            </plugin>
            <plugin>
                <artifactId>maven-assembly-plugin</artifactId>
                <version>3.1.0</version>
                <configuration>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                </configuration>
                <executions>
                    <execution>
                        <id>make-assembly</id>
                        <phase>package</phase>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>


</project>

================================================
FILE: test/test_data/sensor.proto
================================================
syntax = "proto3";

package com.snowflake.kafka.test.protobuf;
option java_outer_classname = "SensorReadingImpl";

message SensorReading {
   message Device {
      string deviceID = 1;
      bool enabled = 2;
   }

   Device device = 1;
   int64 dateTime = 2;
   double reading = 3;

   float float_val = 4;
   int32 int32_val = 5;
   sint32 sint32_val = 6;
   sint64 sint64_val = 7;
   uint32 uint32_val = 8;

   bytes bytes_val = 9;
   repeated double double_array_val = 10;
   uint64 uint64_val = 11;

}

================================================
FILE: test/tests/__init__.py
================================================


================================================
FILE: test/tests/compatibility/__init__.py
================================================


================================================
FILE: test/tests/compatibility/conftest.py
================================================
"""Shared fixtures for data-type ingestion tests.

Provides two infrastructure patterns:
  1. Single-table batch connector (``results`` fixture) — module-scoped, creates
     one table + one topic + one connector per ingestion mode.  All test cases
     are defined as data in test_type_compatibility.py, sent in one batch,
     queried once, then asserted.  Used by test_type_compatibility.py.
  2. Per-test connector (``typed_table`` + ``standalone_ingest``) — function-
     scoped, one connector per test. Used by test_unsupported_types.py for types
     that crash streaming channels.
"""

import datetime
import json
import logging
import math
import time
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Literal
from uuid import uuid4

import pytest
from confluent_kafka import Consumer as KafkaConsumer, KafkaError

from lib.config_migration import v4_config_to_v3
from lib.driver import quote_name

logger = logging.getLogger(__name__)

TEMPLATE_DIR = Path("rest_request_template")
BASE_TEMPLATE = "datatype_ingestion.json"


# ---------------------------------------------------------------------------
# Sentinel for unset expected_value
# ---------------------------------------------------------------------------


class _Unset:
    def __repr__(self):
        return "UNSET"


UNSET = _Unset()


# ---------------------------------------------------------------------------
# Test case definition
# ---------------------------------------------------------------------------


@dataclass(frozen=True)
class Case:
    """A single test vector: send ``value`` to column ``col``, expect outcome.

    The optional ``group`` tag controls which test function owns this case.
    Tests filter on group to avoid name-based set lookups.
    """

    name: str
    col: str
    value: Any
    expect: Literal["ingested", "error"]
    expected_value: Any = UNSET
    approx: float | None = None
    group: str | None = None


def cases_where(*, col=None, expect=None, group=None, exclude_groups=None):
    """Filter CASES (from test_type_compatibility) by column, outcome, and/or group."""
    from .test_type_compatibility import CASES

    result = CASES
    if col is not None:
        result = [c for c in result if c.col == col]
    if expect is not None:
        result = [c for c in result if c.expect == expect]
    if group is not None:
        result = [c for c in result if c.group == group]
    if exclude_groups is not None:
        result = [c for c in result if c.group not in exclude_groups]
    return result


# ---------------------------------------------------------------------------
# Results dataclass
# ---------------------------------------------------------------------------


# Map DDL base types to comparison categories
_COMPARE_CATEGORIES = {
    "FLOAT": "float",
    "VARIANT": "json",
    "OBJECT": "json",
    "ARRAY": "json",
    "TIMESTAMP_LTZ": "timestamp_ltz",
    "TIMESTAMP_TZ": "timestamp_tz",
}


def _ddl_category(col: str, columns: dict) -> str:
    """Derive comparison category from a column's DDL type."""
    ddl = columns.get(col, "")
    base = ddl.split("(")[0].strip().upper()
    return _COMPARE_CATEGORIES.get(base, "exact")


@dataclass(frozen=True)
class Results:
    """Outcome of sending all CASES through one connector instance."""

    rows: dict
    dlq_ids: frozenset
    mode: str
    total_sent: int
    columns: dict  # {col_name: ddl_type} — used for comparison dispatch
    error_table_rows: tuple = ()  # populated for v4-ht only

    @property
    def total_ingested(self):
        return len(self.rows)

    @property
    def total_dlq(self):
        return len(self.dlq_ids)

    @property
    def total_missing(self):
        return self.total_sent - self.total_ingested - self.total_dlq

    def assert_ingested(self, case):
        """Assert that ``case`` landed in the table with the correct value."""
        assert case.name in self.rows, (
            f"[{case.name}] expected in table but not found "
            f"(mode={self.mode}, in_dlq={case.name in self.dlq_ids})"
        )
        actual = self.rows[case.name].get(case.col)

        if not isinstance(case.expected_value, _Unset):
            expected = case.expected_value
        else:
            expected = case.value

        if expected is None:
            assert actual is None, f"[{case.name}] expected NULL, got {actual!r}"
            return

        if case.approx is not None:
            assert float(actual) == pytest.approx(float(expected), abs=case.approx), (
                f"[{case.name}] approx mismatch: {actual!r} != {expected!r} ± {case.approx}"
            )
            return

        category = _ddl_category(case.col, self.columns)
        match category:
            case "float":
                self._compare_float(case.name, actual, expected)
            case "json":
                self._compare_json(case.name, actual, expected)
            case "timestamp_ltz":
                assert isinstance(actual, datetime.datetime), (
                    f"[{case.name}] expected datetime, got {type(actual).__name__}: {actual!r}"
                )
                assert actual.replace(tzinfo=None) == expected, (
                    f"[{case.name}] LTZ mismatch (tz-stripped): {actual!r} != {expected!r}"
                )
            case "timestamp_tz":
                assert isinstance(actual, datetime.datetime), (
                    f"[{case.name}] expected datetime, got {type(actual).__name__}: {actual!r}"
                )
                assert actual.tzinfo is not None, (
                    f"[{case.name}] expected tz-aware datetime, got naive: {actual!r}"
                )
            case _:
                assert actual == expected, (
                    f"[{case.name}] value mismatch: {actual!r} != {expected!r}"
                )

    def assert_error(self, case):
        """Assert that ``case`` did NOT land in the table (and hit DLQ if applicable)."""
        assert case.name not in self.rows, (
            f"[{case.name}] expected NOT in table but found: "
            f"{self.rows[case.name].get(case.col)!r} (mode={self.mode})"
        )
        # v4-ht has no DLQ — errors silently drop records server-side
        if self.mode != "v4-ht":
            assert case.name in self.dlq_ids, (
                f"[{case.name}] expected in DLQ but not found (mode={self.mode})"
            )

    @staticmethod
    def _compare_float(name, actual, expected):
        if isinstance(expected, str):
            exp_f = float(expected)
            if math.isnan(exp_f):
                assert math.isnan(float(actual)), (
                    f"[{name}] expected NaN, got {actual!r}"
                )
            else:
                assert float(actual) == exp_f, (
                    f"[{name}] expected {exp_f}, got {actual!r}"
                )
        else:
            assert actual == pytest.approx(expected, rel=1e-6), (
                f"[{name}] float mismatch: {actual!r} != {expected!r}"
            )

    @staticmethod
    def _compare_json(name, actual, expected):
        def _try_parse(val):
            if isinstance(val, str):
                try:
                    return json.loads(val)
                except (json.JSONDecodeError, TypeError):
                    return val
            return val

        parsed = _try_parse(actual)
        exp = _try_parse(expected) if isinstance(expected, str) else expected
        # SSv1 sometimes double-encodes JSON strings — try one more parse
        if isinstance(parsed, str) and not isinstance(exp, str):
            parsed = _try_parse(parsed)
        assert parsed == exp, (
            f"[{name}] JSON mismatch: {parsed!r} != {exp!r} (raw: {actual!r})"
        )


# ---------------------------------------------------------------------------
# Fixtures — shared
# ---------------------------------------------------------------------------


@pytest.fixture(scope="module", params=["v3", "v4-compat", "v4-ht"])
def ingestion_mode(request):
    return request.param


@pytest.fixture(scope="module")
def mode_salt(session_name_salt, ingestion_mode):
    suffix = {"v3": "_v3", "v4-compat": "", "v4-ht": "_ht"}[ingestion_mode]
    return f"{session_name_salt}{suffix}"


# ---------------------------------------------------------------------------
# Connector config builder
# ---------------------------------------------------------------------------


def _build_mode_config(ingestion_mode, *, dlq_topic=None):
    """Load the base template config and apply mode-specific overrides.

    When ``dlq_topic`` is provided, errors.tolerance is set to "all" and DLQ
    routing is configured.  When omitted, errors.tolerance stays "none" so
    the connector task aborts immediately on validation errors — this gives
    fast failure for tests that expect errors (e.g. unsupported types).
    """
    base = json.loads((TEMPLATE_DIR / BASE_TEMPLATE).read_text())
    config = dict(base["config"])

    # Schematization for all modes — required for JSON key → column mapping
    config["snowflake.enable.schematization"] = "true"

    if dlq_topic:
        config["errors.tolerance"] = "all"
        config["errors.deadletterqueue.topic.name"] = dlq_topic
        config["errors.deadletterqueue.topic.replication.factor"] = "1"
        config["errors.deadletterqueue.context.headers.enable"] = "true"

    match ingestion_mode:
        case "v3":
            config = v4_config_to_v3(config)
        case "v4-compat":
            config["snowflake.validation"] = "client_side"
            config["snowflake.compatibility.enable.column.identifier.normalization"] = (
                "true"
            )
            config[
                "snowflake.compatibility.enable.autogenerated.table.name.sanitization"
            ] = "true"
            config["snowflake.streaming.classic.offset.migration"] = "best_effort"
            config[
                "snowflake.streaming.classic.offset.migration.include.connector.name"
            ] = "false"
        case "v4-ht":
            config["snowflake.validation"] = "server_side"
            config["snowflake.streaming.validate.compatibility.with.classic"] = "false"

    return config


# ---------------------------------------------------------------------------
# Fixture — single-table batch connector (test_type_compatibility.py)
# ---------------------------------------------------------------------------


@pytest.fixture(scope="module")
def results(driver, mode_salt, ingestion_mode):
    """Single-table batch connector for type compatibility tests.

    Creates one table with all typed columns, sends every CASES entry in a
    single batch, waits for ingested rows, queries them, reads the DLQ, and
    yields a frozen Results object for assertion.

    Why this doesn't reuse wait_for_rows / create_table / create_custom_connector:
      - wait_for_rows waits for an exact row count, but v4 modes reject varying
        subsets of test cases so the final count is unknown.  We use a
        stabilization loop (count stops changing for N seconds) instead.
      - create_table is function-scoped; this fixture is module-scoped (one
        connector per mode, shared across all test functions).  The mode-aware
        table name casing (v3 uppercases, v4 preserves case) also isn't handled
        by the existing fixture.
      - create_custom_connector has the same scope mismatch and provides no
        benefit over calling driver.createConnector directly (we already handle
        cleanup in the finally block).
    """
    from .test_type_compatibility import COLUMNS, CASES

    bootstrap = driver.kafkaAddress
    table_name = f"dt_compat{mode_salt}"
    dlq_topic = f"dlq_dt_compat{mode_salt}"

    # v3 (SnowflakeSinkConnector) uppercases topic→table internally.
    # v4-compat with autogenerated_table_name_sanitization=true also uppercases.
    # v4-ht (SnowflakeStreamingSinkConnector) preserves topic case for the table.
    # We must create and query the Snowflake table with the case the connector
    # will actually use, otherwise we get a case-sensitive table mismatch.
    sf_table = table_name if ingestion_mode == "v4-ht" else table_name.upper()
    quoted_table = quote_name(sf_table)

    # Consistent timezone for timestamp tests
    driver.snowflake_conn.cursor().execute("ALTER SESSION SET TIMEZONE = 'UTC'")

    # Create single table from COLUMNS spec.
    col_defs = ", ".join(f"{name} {ddl}" for name, ddl in COLUMNS.items())
    error_logging = " ERROR_LOGGING = TRUE" if ingestion_mode == "v4-ht" else ""
    driver.snowflake_conn.cursor().execute(
        f"CREATE OR REPLACE TABLE {quoted_table} ({col_defs}){error_logging}"
    )
    # v4 connector requires the table property for schema evolution (not the
    # connector config).  Without this, any structural mismatch routes to DLQ.
    driver.snowflake_conn.cursor().execute(
        f"ALTER TABLE {quoted_table} SET ENABLE_SCHEMA_EVOLUTION = TRUE"
    )

    # Create topics
    driver.createTopics(table_name, partitionNum=1, replicationNum=1)
    driver.createTopics(dlq_topic, partitionNum=1, replicationNum=1)

    # Register connector via driver.createConnector (handles retries and cleanup)
    config = _build_mode_config(ingestion_mode, dlq_topic=dlq_topic)
    rest_request = driver.createConnector(
        name_salt=mode_salt,
        unsalted_name="dt_compat",
        config_template=config,
    )
    connector_name = rest_request["name"]
    driver.startConnectorWaitTime()

    # Build and send all records in one batch
    records = []
    keys = []
    for i, case in enumerate(CASES):
        record = {"ID": case.name, "TEST_CASE": case.name}
        record[case.col] = case.value
        records.append(json.dumps(record).encode())
        keys.append(json.dumps({"number": str(i)}).encode())

    driver.sendBytesData(table_name, records, keys)

    # Wait until row count stabilizes.  We cannot wait for an exact count
    # because v4-compat and v4-ht reject some "ingested" cases due to known
    # divergences (binary, boolean coercion, int-epoch, etc.).  Instead,
    # poll until the count stops changing for STABLE_SECS.
    STABLE_SECS = 15
    deadline = time.monotonic() + 120
    last_count = 0
    stable_since = None

    while time.monotonic() < deadline:
        count = driver.select_number_of_records(sf_table) or 0
        if count != last_count:
            last_count = count
            stable_since = time.monotonic()
        elif stable_since and count > 0:
            if (time.monotonic() - stable_since) >= STABLE_SECS:
                logger.info(
                    "Row count stabilized at %d for %ds, proceeding",
                    count,
                    STABLE_SECS,
                )
                break
        if failed := driver.get_failed_tasks(connector_name):
            logger.warning(
                "Connector task failed: %s", failed[0].get("trace", "")[:200]
            )
            break
        time.sleep(5)
    else:
        if last_count == 0:
            logger.warning(
                "Stabilization timed out with 0 rows — connector may not be ingesting"
            )

    # Query all rows
    cursor = driver.snowflake_conn.cursor()
    cursor.execute(
        f'SELECT * FROM {quoted_table} ORDER BY RECORD_METADATA:"offset"::int'
    )
    col_names = [desc[0] for desc in cursor.description]
    raw_rows = cursor.fetchall()

    row_lookup = {}
    for row in raw_rows:
        row_dict = dict(zip(col_names, row))
        row_id = row_dict.get("ID")
        if row_id:
            row_lookup[row_id] = row_dict

    # Read DLQ — parse message body JSON to extract case ID
    dlq_ids = set()
    consumer = KafkaConsumer(
        {
            "bootstrap.servers": bootstrap,
            "group.id": f"dlq-reader-{uuid4().hex[:8]}",
            "auto.offset.reset": "earliest",
            "enable.auto.commit": "false",
        }
    )
    consumer.subscribe([dlq_topic])

    deadline = time.monotonic() + 20
    empty_polls = 0
    while time.monotonic() < deadline:
        remaining = max(0.5, deadline - time.monotonic())
        msg = consumer.poll(remaining)
        if msg is None:
            empty_polls += 1
            # After partition assignment, 3 consecutive empty polls → done
            if empty_polls >= 3 and dlq_ids:
                break
            continue
        empty_polls = 0
        if msg.error():
            if msg.error().code() != KafkaError._PARTITION_EOF:
                logger.warning("DLQ consumer error: %s", msg.error())
            continue
        try:
            body = json.loads(msg.value())
            if "ID" in body:
                dlq_ids.add(body["ID"])
            else:
                logger.warning("DLQ message missing ID field: %s", msg.value()[:200])
        except (json.JSONDecodeError, TypeError):
            logger.warning("Could not parse DLQ message body: %s", msg.value()[:200])
    consumer.close()

    # Query error table for v4-ht mode
    error_table_rows = []
    if ingestion_mode == "v4-ht":
        try:
            et_cursor = driver.snowflake_conn.cursor()
            et_cursor.execute(f"SELECT * FROM ERROR_TABLE({quoted_table})")
            et_col_names = [desc[0] for desc in et_cursor.description]
            for row in et_cursor.fetchall():
                error_table_rows.append(dict(zip(et_col_names, row)))
            et_cursor.close()
        except Exception as e:
            logger.warning("Could not query error table: %s", e)

    logger.info(
        "Results for mode=%s: %d rows, %d DLQ, %d error_table, %d sent",
        ingestion_mode,
        len(row_lookup),
        len(dlq_ids),
        len(error_table_rows),
        len(CASES),
    )

    result = Results(
        rows=row_lookup,
        dlq_ids=frozenset(dlq_ids),
        mode=ingestion_mode,
        total_sent=len(CASES),
        columns=COLUMNS,
        error_table_rows=tuple(error_table_rows),
    )

    try:
        yield result
    finally:
        driver.closeConnector(connector_name)
        try:
            driver.deleteTopic(table_name)
        except Exception:
            pass
        try:
            driver.deleteTopic(dlq_topic)
        except Exception:
            pass


# ---------------------------------------------------------------------------
# Fixtures — per-test connector (test_unsupported_types.py)
#
# These stay separate because unsupported types (GEOGRAPHY, GEOMETRY, VECTOR,
# structured OBJECT/ARRAY) crash streaming channels and cannot share a batch
# with well-behaved types.
# ---------------------------------------------------------------------------


@pytest.fixture
def typed_table(driver, mode_salt, ingestion_mode):
    """Factory: create a Snowflake table + Kafka topic for a single test."""
    created = []

    def _create(test_id, col_ddl):
        topic = f"{test_id}{mode_salt}"
        sf_table = topic if ingestion_mode == "v4-ht" else topic.upper()
        quoted = quote_name(sf_table)
        driver.snowflake_conn.cursor().execute(
            f"CREATE OR REPLACE TABLE {quoted} "
            f"(VALUE_COL {col_ddl}, RECORD_METADATA VARIANT)"
        )
        driver.snowflake_conn.cursor().execute(
            f"ALTER TABLE {quoted} SET ENABLE_SCHEMA_EVOLUTION = TRUE"
        )
        driver.createTopics(topic, partitionNum=1, replicationNum=1)
        created.append(topic)
        return topic

    try:
        yield _create
    finally:
        for t in created:
            try:
                driver.deleteTopic(t)
            except Exception:
                pass


@pytest.fixture
def ingest_one_type_abort(driver, mode_salt, ingestion_mode, typed_table):
    """Per-test connector (abort mode) for a single column type.

    Creates a table with one typed column, registers a connector with
    errors.tolerance=none, sends values, and returns an IngestResult.
    The connector task fails immediately on validation errors — no DLQ.
    """

    created_connectors = []

    def _run(test_id, col_ddl, values, *, timeout=60):
        topic = typed_table(test_id, col_ddl)
        sf_table = topic if ingestion_mode == "v4-ht" else topic.upper()

        # Abort mode (errors.tolerance=none) — connector task fails immediately
        # on validation errors, giving fast feedback for unsupported types.
        config = _build_mode_config(ingestion_mode)
        rest_request = driver.createConnector(
            name_salt=mode_salt,
            unsalted_name=test_id,
            config_template=config,
        )
        connector_name = rest_request["name"]
        created_connectors.append(connector_name)
        driver.startConnectorWaitTime()

        records = [json.dumps({"VALUE_COL": v}).encode() for v in values]
        keys = [json.dumps({"number": str(i)}).encode() for i in range(len(values))]
        driver.sendBytesData(topic, records, keys)

        deadline = time.monotonic() + timeout
        error = None
        while time.monotonic() < deadline:
            if failed := driver.get_failed_tasks(connector_name):
                error = failed[0].get("trace", "no trace")
                logger.info("Connector error for %s: %.500s", test_id, error)
                break
            tbl = driver.select_number_of_records(sf_table) or 0
            if tbl >= len(values):
                break
            time.sleep(2)

        rows = (
            driver.snowflake_conn.cursor()
            .execute(
                f'SELECT VALUE_COL FROM {quote_name(sf_table)} ORDER BY RECORD_METADATA:"offset"::int'
            )
            .fetchall()
        )

        return IngestResult(
            values=[r[0] for r in rows],
            connector_error=error,
        )

    try:
        yield _run
    finally:
        for name in reversed(created_connectors):
            driver.closeConnector(name)


@dataclass
class IngestResult:
    """Legacy result type for standalone_ingest (test_unsupported_types.py)."""

    values: list
    dlq_count: int = 0
    dlq_errors: list = field(default_factory=list)
    connector_error: str | None = None


================================================
FILE: test/tests/compatibility/test_compatibility_case_sensitivity.py
================================================
from dataclasses import dataclass
import json
from typing import Any, Optional

import pytest
from snowflake.connector import DictCursor

from lib.config_migration import V3_CONFIG_TEMPLATE
from lib.driver import KafkaDriver
from lib.fixtures.table import Table

pytestmark = pytest.mark.compatibility


@pytest.fixture
def case(connector_version: str):
    """Switches values depending on the connector version."""

    def _case(*, v3, v4):
        match connector_version:
            case "v3":
                return v3
            case "v4":
                return v4
            case _:
                raise ValueError(f"Unsupported connector version: {connector_version}")

    return _case


def test_compatibility_case_sensitivity_table_name(
    driver: KafkaDriver,
    case,
    connector_version: str,
    create_connector,
    create_topics,
    name_salt: str,
    wait_for_rows,
):
    """Assert table name derived by the connector matches our expectations.

    Validates compatibility with KC v3, i.e. client-side validation is enabled.
    """

    @dataclass(frozen=True)
    class TableNameCase:
        case_name: str  # description
        unsalted_topic_name: str
        topic2table_value: Optional[str]
        expected_table_name: str

    test_cases = [
        # If no topic2table.map is provided, the table name is the same as the topic name.
        # NB the topic name is salted by the driver.
        TableNameCase("lower_a", "a", None, f"A{name_salt}"),
        TableNameCase("upper_b", "B", None, f"B{name_salt}"),
        TableNameCase("lower_c_mapped", "c_topic", f"c{name_salt}", f"C{name_salt}"),
        TableNameCase("upper_d_mapped", "D_topic", f"D{name_salt}", f"D{name_salt}"),
        *case(
            # KC v3 does not support:
            # - quoted table names in topic2table.map
            # - arbitrary unicode characters in topic2table.map
            v3=[],
            v4=[
                TableNameCase(
                    "lower_e_mapped_quoted",
                    "e_topic",
                    f'"e{name_salt}"',
                    f"e{name_salt}",
                ),
                TableNameCase(
                    "upper_f_mapped_quoted",
                    "f_topic",
                    f'"F{name_salt}"',
                    f"F{name_salt}",
                ),
                TableNameCase(
                    "unicode_mapped_quoted",
                    "g_topic",
                    f'"❄️{name_salt}"',
                    f"❄️{name_salt}",
                ),
            ],
        ),
    ]

    topics = create_topics(
        [test_case.unsalted_topic_name for test_case in test_cases], with_tables=False
    )

    topic2table_map = ",".join(
        f"{test_case.unsalted_topic_name}{name_salt}:{test_case.topic2table_value}"
        for test_case in test_cases
        if test_case.topic2table_value is not None
    )

    if connector_version == "v3" and topic2table_map == "":
        # In KC v3, topic2table.map cannot be empty.
        topic2table_map = None

    connector = create_connector(
        v3_config={
            key: value
            for key, value in {
                **V3_CONFIG_TEMPLATE,
                "topics": ",".join(topics),
                "tasks.max": "1",
                "key.converter": "org.apache.kafka.connect.storage.StringConverter",
                "value.converter": "org.apache.kafka.connect.json.JsonConverter",
                "value.converter.schemas.enable": "false",
                "snowflake.enable.schematization": "true",
                "snowflake.topic2table.map": topic2table_map,
            }.items()
            if value is not None
        }
    )
    driver.startConnectorWaitTime()

    for test_case in test_cases:
        driver.sendBytesData(
            f"{test_case.unsalted_topic_name}{name_salt}",
            [json.dumps({"case_name": test_case.case_name}).encode("utf-8")],
        )

    for test_case in test_cases:
        expected_table = Table(driver, test_case.expected_table_name)
        wait_for_rows(expected_table.name, 1, connector_name=connector.name)

        tables = (
            driver.snowflake_conn.cursor(DictCursor).execute("show tables").fetchall()
        )
        assert test_case.expected_table_name in [table["name"] for table in tables]

        # Make sure it's the correct one, i.e. has the data we sent it.
        assert expected_table.select_scalar("CASE_NAME") == test_case.case_name


def test_compatibility_case_sensitivity_ingestion_columns(
    driver: KafkaDriver,
    create_connector,
    create_topics,
    create_table,
    wait_for_rows,
):
    @dataclass(frozen=True)
    class ColumnIngestionCase:
        case_name: str
        column_names: list[str]
        column_types: list[str]
        payload: dict[str, str]
        expected_values: list[Any]

    test_cases = [
        ColumnIngestionCase(
            case_name="upper_A",
            column_names=["A"],
            column_types=["VARCHAR"],
            payload={"A": "upper A"},
            expected_values=["upper A"],
        ),
        ColumnIngestionCase(
            case_name="lower_b_into_upper_B",
            column_names=["B"],
            column_types=["VARCHAR"],
            payload={"b": "lower b into upper B"},
            expected_values=["lower b into upper B"],
        ),
        ColumnIngestionCase(
            case_name="lower_c_into_lower_c",
            column_names=["c"],
            column_types=["VARCHAR"],
            # KC v3 requires quotes to not uppercase the key.
            payload={'"c"': "lower c into lower c"},
            expected_values=["lower c into lower c"],
        ),
        ColumnIngestionCase(
            case_name="pair_D_d",
            column_names=["D", "d"],
            column_types=["VARCHAR", "VARCHAR"],
            payload={"D": "upper D", '"d"': "lower d"},
            expected_values=["upper D", "lower d"],
        ),
        ColumnIngestionCase(
            case_name="pair_E_f",
            column_names=["E", "f"],
            column_types=["VARCHAR", "VARCHAR"],
            payload={"E": "upper E", '"f"': "lower f"},
            expected_values=["upper E", "lower f"],
        ),
        ColumnIngestionCase(
            case_name="unicode",
            column_names=["❄️"],
            column_types=["VARCHAR"],
            payload={'"❄️"': "unicode ❄️"},
            expected_values=["unicode ❄️"],
        ),
        # We don't process keys beyond the first level.
        ColumnIngestionCase(
            case_name="variant",
            column_names=["V"],
            column_types=["VARIANT"],
            payload={"V": {"a": "b", "C": "D", '"e"': "❄️"}},
            expected_values=[{"a": "b", "C": "D", '"e"': "❄️"}],
        ),
    ]

    topics = create_topics(
        [test_case.case_name for test_case in test_cases],
        with_tables=False,
    )
    tables = [
        create_table(
            test_case.case_name.upper(),
            columns=(
                "("
                + ", ".join(
                    f'"{column_name}" {column_type}'
                    for column_name, column_type in zip(
                        test_case.column_names, test_case.column_types, strict=True
                    )
                )
                + ', "RECORD_METADATA" VARIANT)'
            ),
            cleanup_topic=False,
        )
        for test_case in test_cases
    ]

    connector = create_connector(
        v3_config={
            **V3_CONFIG_TEMPLATE,
            "topics": ",".join(topics),
            "tasks.max": "1",
            "key.converter": "org.apache.kafka.connect.storage.StringConverter",
            "value.converter": "org.apache.kafka.connect.json.JsonConverter",
            "value.converter.schemas.enable": "false",
            "snowflake.enable.schematization": "true",
        }
    )
    driver.startConnectorWaitTime()

    for topic, test_case in zip(topics, test_cases, strict=True):
        driver.sendBytesData(topic, [json.dumps(test_case.payload).encode("utf-8")])

    for test_case, table in zip(test_cases, tables, strict=True):
        wait_for_rows(table.name, 1, connector_name=connector.name)

        actual_row = table.select("*")[0]
        for column_name, expected_value, column_type in zip(
            test_case.column_names,
            test_case.expected_values,
            test_case.column_types,
            strict=True,
        ):
            if column_type == "VARIANT":
                actual_value = json.loads(actual_row[column_name])
            else:
                actual_value = actual_row[column_name]
            assert actual_value == expected_value, (
                f"{test_case.case_name}.{column_name}: "
                f"expected {expected_value}, got {actual_value}"
            )


def test_case_sensitivity_schema_evolution(
    driver: KafkaDriver,
    create_connector,
    create_topics,
    create_table,
    wait_for_rows,
):
    @dataclass(frozen=True)
    class SchemaEvolutionCase:
        case_name: str
        payload: dict[str, str]
        expected_values: dict[str, str]

    test_cases = [
        SchemaEvolutionCase(
            case_name="upper_A",
            payload={"A": "upper A"},
            expected_values={"A": "upper A"},
        ),
        SchemaEvolutionCase(
            case_name="lower_b_into_upper_B",
            payload={"b": "lower b into upper B"},
            expected_values={"B": "lower b into upper B"},
        ),
        SchemaEvolutionCase(
            case_name="quoted_c",
            payload={'"c"': "quoted c"},
            expected_values={"c": "quoted c"},
        ),
        SchemaEvolutionCase(
            case_name="pair_D_d",
            payload={"D": "upper D", '"d"': "lower d"},
            expected_values={"D": "upper D", "d": "lower d"},
        ),
        SchemaEvolutionCase(
            case_name="pair_E_f",
            payload={"E": "upper E", '"f"': "lower f"},
            expected_values={"E": "upper E", "f": "lower f"},
        ),
        # Funny enough, KC v3 is able to ingest an unquoted unicode column
        # if it immediately follows a schema evolution,
        # whereas a regular ingestion would fail.
        SchemaEvolutionCase(
            case_name="unicode",
            payload={"❄️": "unicode"},
            expected_values={"❄️": "unicode"},
        ),
    ]

    topics = create_topics(
        [test_case.case_name for test_case in test_cases],
        with_tables=False,
    )
    tables = [
        create_table(
            test_case.case_name.upper(),
            columns='("RECORD_METADATA" VARIANT) ENABLE_SCHEMA_EVOLUTION = TRUE',
            cleanup_topic=False,
        )
        for test_case in test_cases
    ]

    connector = create_connector(
        v3_config={
            **V3_CONFIG_TEMPLATE,
            "topics": ",".join(topics),
            "tasks.max": "1",
            "key.converter": "org.apache.kafka.connect.storage.StringConverter",
            "value.converter": "org.apache.kafka.connect.json.JsonConverter",
            "value.converter.schemas.enable": "false",
            # KC v3 needs this connector setting to attempt schema evolution.
            # KC v4 ignores it and instead relies on the table property below.
            "snowflake.enable.schematization": "true",
        }
    )
    driver.startConnectorWaitTime()

    for topic, test_case in zip(topics, test_cases, strict=True):
        driver.sendBytesData(topic, [json.dumps(test_case.payload).encode("utf-8")])

    for test_case, table in zip(test_cases, tables, strict=True):
        wait_for_rows(table.name, 1, connector_name=connector.name)

        actual_column_names = {column[0] for column in table.schema()}
        expected_column_names = set(test_case.expected_values.keys()) | {
            "RECORD_METADATA"
        }
        assert actual_column_names == expected_column_names, (
            f"{test_case.case_name}: "
            f"expected {expected_column_names}, got {actual_column_names}"
        )

        actual_row = table.select("*")[0]
        for column_name, expected_value in test_case.expected_values.items():
            actual_value = actual_row[column_name]
            assert actual_value == expected_value, (
                f"{test_case.case_name}.{column_name}: "
                f"expected {expected_value}, got {actual_value}"
            )


================================================
FILE: test/tests/compatibility/test_migration.py
================================================
"""
### Migration with duplicates but no gaps

During migration, KC v4 will inherit consumer group offsets from KC v3 if the following
conditions are met:

1. The new connector is given the same name as the old one.
    (They will belong to the same consumer group.)
2. At most `offsets.retention.minutes` has passed (defaults to 7 days).

Inheriting the consumer group offsets means that the new connector will start ingesting from
the last offset committed to *Kafka*. It's possible, especially under continuous load, that
Kafka will not be fully caught up to the last offset committed to Snowflake.

This will result in duplicate data being ingested, but no gaps.
It should be possible to deduplicate the data after ingestion using the RECORD_METADATA column.

### Migration with possible gaps

If the new connector has a different name, or too much time has passed, then depending on the
value of `auto.offset.reset`, the KC v4 will start ingesting:
- for `earliest`: from the beginning of the partition
- for `latest`: only data ingested after the new connector was created
"""

import logging
import time
import pytest

from lib.config_migration import V3_CONFIG_TEMPLATE, v3_config_to_v4
from lib.driver import KafkaDriver
from lib.utils import RecordProducer, wait_for

pytestmark = pytest.mark.compatibility


# Don't parameterize on v3, we create both connector versions explicitly here.
@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_migration_without_ingestion(
    driver: KafkaDriver,
    name_salt,
    create_custom_connector,
    create_table,
    wait_for_rows,
):
    """Test migration when there are no in-flight data during switchover."""

    test_name = "test_migration_without_duplicates"

    table = create_table(
        test_name.upper(), columns='(record_metadata variant, "NUMBER" varchar)'
    )
    topic = f"{test_name}{name_salt}"

    producer = RecordProducer(driver, topic)

    v3_config_template = {
        **V3_CONFIG_TEMPLATE,
        "topics": topic,
        "key.converter": "org.apache.kafka.connect.storage.StringConverter",
        "value.converter": "org.apache.kafka.connect.json.JsonConverter",
        "value.converter.schemas.enable": "false",
        "snowflake.enable.schematization": "true",
    }

    logging.info("Creating v3 connector and sending initial batch")
    v3_connector = create_custom_connector(test_name, v3_config_template)
    producer.send(10)
    logging.info(f"Produced 10 records (total: {producer.records_produced})")
    wait_for_rows(
        table_name=table.name,
        expected=producer.records_produced,
        connector_name=v3_connector.name,
    )

    logging.info(
        f"Waiting for Kafka consumer group offset to catch up to {producer.records_produced}"
    )
    assert wait_for(
        lambda: (
            (driver.get_consumer_group_offset(v3_connector.name, topic) or 0)
            >= producer.records_produced
        )
    ), f"Consumer group offset never reached {producer.records_produced}"
    logging.info(
        f"Consumer group offset: {driver.get_consumer_group_offset(v3_connector.name, topic)}"
    )

    logging.info("Closing v3 connector")
    assert v3_connector.close(wait_timeout=60)
    logging.info("Sending second batch while connector is down")
    producer.send(10)
    logging.info(f"Produced 10 records (total: {producer.records_produced})")

    logging.info("Creating v4 connector and sending third batch")
    v4_config_template = v3_config_to_v4(v3_config_template)
    v4_connector = create_custom_connector(test_name, v4_config_template)
    producer.send(10)
    logging.info(f"Produced 10 records (total: {producer.records_produced})")
    logging.info(
        f"Waiting for all {producer.records_produced} records to land in Snowflake"
    )
    wait_for_rows(
        table_name=table.name,
        expected=producer.records_produced,
        connector_name=v4_connector.name,
    )
    logging.info(
        f"All {producer.records_produced} records ingested — no gaps, no duplicates"
    )


# Don't parameterize on v3, we create both connector versions explicitly here.
@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
@pytest.mark.parametrize("ssv1_offset_migration", ["skip", "strict"])
def test_migration_with_ingestion(
    driver: KafkaDriver,
    name_salt,
    create_custom_connector,
    create_table,
    wait_for_rows,
    ssv1_offset_migration,
):
    """Test migration when there are in-flight data during switchover.

    With ssv1_offset_migration=skip (default), KC v4 starts from the consumer group offset,
    which may lag behind the SSv1 committed offset, causing duplicates.

    With ssv1_offset_migration=strict, KC v4 reads the SSv1 committed offset and uses it
    as the starting point, so no duplicates should occur.
    """

    # Mixed case on purpose to ensure case sensitivity is handled correctly.
    test_name = f"test_Migration_with_possible_duplicates_{ssv1_offset_migration}"
    warmup_records = 10

    table = create_table(
        test_name.upper(),
        columns='(record_metadata variant, "NUMBER" varchar)',
    )
    topic = f"{test_name}{name_salt}"

    producer = RecordProducer(driver, topic)

    v3_config_template = {
        **V3_CONFIG_TEMPLATE,
        "topics": topic,
        "key.converter": "org.apache.kafka.connect.storage.StringConverter",
        "value.converter": "org.apache.kafka.connect.json.JsonConverter",
        "value.converter.schemas.enable": "false",
        "snowflake.enable.schematization": "true",
    }

    logging.info(f"Creating v3 connector and sending {warmup_records} warmup records")
    v3_connector = create_custom_connector(test_name, v3_config_template)
    producer.send(warmup_records)
    logging.info(
        f"Produced {warmup_records} records (total: {producer.records_produced})"
    )
    wait_for_rows(
        table_name=table.name,
        expected=producer.records_produced,
        connector_name=v3_connector.name,
    )

    logging.info("Starting continuous producer")
    producer.start_continuous()

    try:
        logging.info("Waiting for v3 to ingest beyond the warmup batch")
        assert wait_for(lambda: table.select_scalar("count(*)") > warmup_records), (
            f"v3 never ingested beyond {warmup_records} warmup records"
        )
        logging.info(f"v3 ingested {table.select_scalar('count(*)')} rows so far")

        logging.info("Closing v3 connector while data is still flowing")
        assert v3_connector.close(wait_timeout=60)

        logging.info(
            "Creating v4 connector (same name → inherits consumer group offsets)"
        )
        v4_config_template = {
            **v3_config_to_v4(v3_config_template),
            "snowflake.streaming.classic.offset.migration": ssv1_offset_migration,
            # Disable compatibility validation to allow not migrating offsets from SSv1.
            "snowflake.streaming.validate.compatibility.with.classic": "false",
        }
        v4_connector = create_custom_connector(test_name, v4_config_template)

        logging.info("Letting v4 catch up for 5s before snapshot")
        time.sleep(5)
        records_produced_so_far = producer.records_produced
        logging.info(
            f"Snapshot: {records_produced_so_far} records produced, "
            f"{table.select_scalar('count(*)')} rows in Snowflake"
        )
        wait_for_rows(
            table_name=table.name,
            at_least=True,
            expected=records_produced_so_far + 1,
            connector_name=v4_connector.name,
        )
        logging.info(
            f"v4 is actively ingesting ({table.select_scalar('count(*)')} rows)"
        )

    finally:
        producer.stop_continuous()

    expected = producer.records_produced
    logging.info(
        f"Waiting for all {expected} distinct records to land in Snowflake "
        f"(currently {table.select_scalar('count(distinct number)')} distinct, "
        f"{table.select_scalar('count(*)')} total)"
    )
    assert wait_for(
        lambda: table.select_scalar("count(distinct number)") == expected,
        timeout=120,
    ), (
        f"Expected {expected} distinct records, "
        f"got {table.select_scalar('count(distinct number)')} distinct / {table.select_scalar('count(*)')} total"
    )

    distinct_offsets = table.select_scalar("count(distinct record_metadata:offset)")
    total_rows = table.select_scalar("count(*)")
    logging.info(
        f"Final: {expected} distinct records, {distinct_offsets} distinct offsets, "
        f"{total_rows} total rows (duplicates: {total_rows - expected})"
    )

    assert distinct_offsets == expected, (
        f"Expected {expected} distinct offsets, got {distinct_offsets}"
    )

    if ssv1_offset_migration == "strict":
        assert total_rows == expected, (
            f"With strict mode, expected exactly {expected} rows (no duplicates), "
            f"but got {total_rows}"
        )
    else:
        assert total_rows > expected, (
            f"Expected duplicates (total > {expected}), but got {total_rows}"
        )


# Don't parameterize on v3, we create both connector versions explicitly here.
@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_migration_different_connector_name(
    driver: KafkaDriver,
    name_salt,
    create_custom_connector,
    create_table,
    wait_for_rows,
):
    """Prove that SYSTEM$MIGRATE_SSV1_CHANNEL_OFFSET migrates offsets server-side.

    Uses a *different* connector name for v4 so there is no consumer group inheritance.
    With auto.offset.reset=earliest, Kafka re-delivers all records from offset 0.
    With ssv1_offset_migration=skip, v4 would re-ingest everything → duplicates.
    With ssv1_offset_migration=strict, the system function writes the SSv1 offset
    to the SSv2 channel, so v4 skips already-committed records → no duplicates.

    IMPORTANT NOTE:
    This test only works because KC v3 did *not* append the connector name to the channel name.
    If it did, we'd be looking up the wrong channel name.
    """

    test_name = "test_Migration_different_connector_name"

    table = create_table(
        test_name.upper(), columns='(record_metadata variant, "NUMBER" varchar)'
    )
    topic = f"{test_name}{name_salt}"

    producer = RecordProducer(driver, topic)

    v3_config_template = {
        **V3_CONFIG_TEMPLATE,
        "topics": topic,
        "key.converter": "org.apache.kafka.connect.storage.StringConverter",
        "value.converter": "org.apache.kafka.connect.json.JsonConverter",
        "value.converter.schemas.enable": "false",
        "snowflake.enable.schematization": "true",
    }

    # Phase 1: v3 ingests records via SSv1
    logging.info("Creating v3 connector and sending initial batch")
    v3_connector = create_custom_connector(test_name, v3_config_template)
    producer.send(20)
    logging.info(f"Produced 20 records (total: {producer.records_produced})")
    wait_for_rows(
        table_name=table.name,
        expected=producer.records_produced,
        connector_name=v3_connector.name,
    )
    logging.info("Closing v3 connector")
    assert v3_connector.close(wait_timeout=60)

    v3_rows = table.select_scalar("count(*)")
    logging.info(f"v3 ingested {v3_rows} rows, now closed")

    # Phase 2: v4 with a DIFFERENT connector name → no consumer group inheritance.
    # auto.offset.reset=earliest forces Kafka to re-deliver from offset 0.
    # The system function is the only mechanism that prevents re-ingestion.
    v4_name = f"{test_name}_v4"
    v4_config_template = {
        **v3_config_to_v4(v3_config_template),
        "snowflake.streaming.classic.offset.migration": "strict",
        "consumer.override.auto.offset.reset": "earliest",
    }
    logging.info(
        f"Creating v4 connector with different name ({v4_name}) and strict mode"
    )
    v4_connector = create_custom_connector(v4_name, v4_config_template)

    # Phase 3: Send more records and verify no duplicates
    producer.send(10)
    expected = producer.records_produced
    logging.info(f"Produced 10 more records (total: {expected})")

    wait_for_rows(
        table_name=table.name,
        expected=expected,
        connector_name=v4_connector.name,
    )

    total_rows = table.select_scalar("count(*)")
    distinct_offsets = table.select_scalar("count(distinct record_metadata:offset)")
    logging.info(
        f"Final: {expected} expected, {distinct_offsets} distinct offsets, "
        f"{total_rows} total rows"
    )

    assert distinct_offsets == expected, (
        f"Expected {expected} distinct offsets, got {distinct_offsets}"
    )
    assert total_rows == expected, (
        f"System function migration should prevent duplicates: "
        f"expected {expected} rows, got {total_rows}"
    )


# Don't parameterize on v3, we create both connector versions explicitly here.
@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_migration_from_snowpipe(
    driver: KafkaDriver,
    name_salt,
    create_custom_connector,
    create_table,
    wait_for_rows,
):
    """Test migration from KC v3 file-based Snowpipe to KC v4 (SSv2).

    SNOW-3293138: Verifies that a clean switchover from file-based Snowpipe to
    Snowpipe Streaming produces no gaps and no duplicates when the consumer group
    offsets are inherited (same connector name).
    """

    test_name = "test_migration_from_snowpipe"
    warmup_records = 10

    table = create_table(
        test_name.upper(),
        columns="(record_metadata variant, record_content variant)",
    )
    topic = f"{test_name}{name_salt}"
    producer = RecordProducer(driver, topic)

    # File-based Snowpipe: schematization unsupported, buffer.flush.time >= 10.
    v3_config_template = {
        **V3_CONFIG_TEMPLATE,
        "topics": topic,
        "key.converter": "org.apache.kafka.connect.storage.StringConverter",
        "value.converter": "org.apache.kafka.connect.json.JsonConverter",
        "value.converter.schemas.enable": "false",
        "buffer.flush.time": "10",
    }
    v3_config_template["snowflake.ingestion.method"] = "SNOWPIPE"
    v3_config_template.pop("snowflake.streaming.max.client.lag")

    logging.info(
        f"Creating v3 Snowpipe connector and sending {warmup_records} warmup records"
    )
    v3_connector = create_custom_connector(test_name, v3_config_template)
    producer.send(warmup_records)
    logging.info(
        f"Produced {warmup_records} records (total: {producer.records_produced})"
    )
    wait_for_rows(
        table_name=table.name,
        expected=producer.records_produced,
        connector_name=v3_connector.name,
    )

    logging.info("Starting continuous producer")
    producer.start_continuous()

    try:
        logging.info("Waiting for v3 to ingest beyond the warmup batch")
        assert wait_for(lambda: table.select_scalar("count(*)") > warmup_records), (
            f"v3 never ingested beyond {warmup_records} warmup records"
        )
        logging.info(f"v3 ingested {table.select_scalar('count(*)')} rows so far")

        logging.info("Closing v3 connector while data is still flowing")
        assert v3_connector.close(wait_timeout=60)

        v3_kafka_offset = driver.get_consumer_group_offset(v3_connector.name, topic)
        logging.info(f"v3 consumer group offset after shutdown: {v3_kafka_offset}")

        # File-based Snowpipe ingests staged files asynchronously. If we start
        # v4 (SSv2) before Snowpipe finishes draining, SSv2 rows from newer
        # offsets can land before Snowpipe finishes loading older ones,
        # breaking end-to-end ordering.
        logging.info("Waiting for Snowpipe to finish ingesting staged files")
        wait_for_rows(table_name=table.name, expected=v3_kafka_offset, at_least=True)

        logging.info(
            "Creating v4 connector (same name → inherits consumer group offsets)"
        )
        v4_config_template = v3_config_to_v4(v3_config_template)
        v4_connector = create_custom_connector(test_name, v4_config_template)

        logging.info("Letting v4 catch up for 5s before snapshot")
        time.sleep(5)
        records_produced_so_far = producer.records_produced
        logging.info(
            f"Snapshot: {records_produced_so_far} records produced, "
            f"{table.select_scalar('count(*)')} rows in Snowflake"
        )
        wait_for_rows(
            table_name=table.name,
            at_least=True,
            expected=records_produced_so_far + 1,
            connector_name=v4_connector.name,
        )
        logging.info(
            f"v4 is actively ingesting ({table.select_scalar('count(*)')} rows)"
        )

    finally:
        producer.stop_continuous()

    expected = producer.records_produced
    logging.info(
        f"Waiting for all {expected} distinct records to land in Snowflake "
        f"(currently {table.select_scalar('count(distinct record_content:number)')} "
        f"distinct, {table.select_scalar('count(*)')} total)"
    )
    wait_for_rows(
        table_name=table.name,
        expected=expected,
        connector_name=v4_connector.name,
    )

    total_rows = table.select_scalar("count(*)")
    distinct_numbers = table.select_scalar("count(distinct record_content:number)")
    logging.info(
        f"Final: {expected} expected, {distinct_numbers} distinct, {total_rows} total"
    )
    assert distinct_numbers == expected, (
        f"Expected {expected} distinct records, got {distinct_numbers}"
    )
    assert total_rows == expected, (
        f"Expected exactly {expected} rows (no duplicates), got {total_rows}"
    )


================================================
FILE: test/tests/compatibility/test_schematization_disabled.py
================================================
import json

from lib.config_migration import V3_CONFIG_TEMPLATE
from lib.fixtures.table import Table
from lib.driver import KafkaDriver


def test_compatibility_schematization_disabled_complex(
    driver: KafkaDriver, create_connector, create_topics, wait_for_rows
):
    """Nested JSON data lands as queryable VARIANT in RECORD_CONTENT.

    Table is NOT pre-created — the connector auto-creates it.
    KCv3 auto-creates with (RECORD_METADATA VARIANT, RECORD_CONTENT VARIANT).
    KCv4 auto-creates with both columns as VARIANT when schematization=off.

    Runs for both v3 and v4 to verify compatibility.
    """
    topic = create_topics(["schematization_disabled_complex"], with_tables=False)[0]

    connector = create_connector(
        v3_config={
            **V3_CONFIG_TEMPLATE,
            "topics": topic,
            "key.converter": "org.apache.kafka.connect.storage.StringConverter",
            "value.converter": "org.apache.kafka.connect.json.JsonConverter",
            "value.converter.schemas.enable": "false",
            "snowflake.enable.schematization": "false",
        }
    )
    driver.startConnectorWaitTime()

    values = [
        # 0: nested object with arrays
        json.dumps(
            {
                "user": {"name": "Alice", "scores": [1, 2, 3]},
                "tags": ["a", "b"],
                "count": 42,
            }
        ).encode("utf-8"),
        # 1: deeply nested
        json.dumps({"a": {"b": {"c": {"d": "deep"}}}}).encode("utf-8"),
        # 2: flat object
        json.dumps({"city": "Hsinchu", "age": 30}).encode("utf-8"),
    ]
    record_count = len(values)
    driver.sendBytesData(topic, values, [], partition=0)

    table = Table(driver, topic.upper())
    wait_for_rows(table.name, record_count, connector_name=connector.name)

    # Verify RECORD_CONTENT column exists and is VARIANT
    schema = table.schema(as_dict=True)
    col_schema = next(c for c in schema if c["name"] == "RECORD_CONTENT")
    assert col_schema["type"] == "VARIANT"

    # Verify nested object with arrays (offset 0)
    assert table.select(
        """
        RECORD_CONTENT:user.name::string AS user_name,
        RECORD_CONTENT:user.scores[0]::number AS first_score,
        RECORD_CONTENT:tags[0]::string AS first_tag,
        RECORD_CONTENT:count::number AS cnt
    """,
        'WHERE RECORD_METADATA:"offset"::number = 0',
    ) == [
        {
            "USER_NAME": "Alice",
            "FIRST_SCORE": 1,
            "FIRST_TAG": "a",
            "CNT": 42,
        }
    ]

    # Verify deeply nested (offset 1)
    assert table.select(
        """
        RECORD_CONTENT:a.b.c.d::string AS val
    """,
        'WHERE RECORD_METADATA:"offset"::number = 1',
    ) == [
        {
            "VAL": "deep",
        }
    ]

    # Verify flat object (offset 2)
    assert table.select(
        """
        RECORD_CONTENT:city::string AS city,
        RECORD_CONTENT:age::number AS age
    """,
        'WHERE RECORD_METADATA:"offset"::number = 2',
    ) == [
        {
            "CITY": "Hsinchu",
            "AGE": 30,
        }
    ]

    assert table.select_scalar("count(*)") == record_count


def test_compatibility_schematization_disabled_primitive(
    driver: KafkaDriver, create_connector, create_topics, wait_for_rows
):
    """Bare strings via StringConverter land as VARIANT in RECORD_CONTENT.

    Table is NOT pre-created — the connector auto-creates it.
    Verifies that primitive (non-JSON) payloads are stored as VARIANT,
    not inferred as VARCHAR by schema evolution.

    Runs for both v3 and v4 to verify compatibility.
    """
    topic = create_topics(["schematization_disabled_primitive"], with_tables=False)[0]

    connector = create_connector(
        v3_config={
            **V3_CONFIG_TEMPLATE,
            "topics": topic,
            "key.converter": "org.apache.kafka.connect.storage.StringConverter",
            "value.converter": "org.apache.kafka.connect.storage.StringConverter",
            "snowflake.enable.schematization": "false",
        }
    )
    driver.startConnectorWaitTime()

    values = [
        b"hello world",
        b"42",
        b"true",
    ]
    driver.sendBytesData(topic, values, [], partition=0)

    table = Table(driver, topic.upper())
    wait_for_rows(table.name, len(values), connector_name=connector.name)

    schema = table.schema(as_dict=True)
    col_schema = next(c for c in schema if c["name"] == "RECORD_CONTENT")
    assert col_schema["type"] == "VARIANT"

    rows = table.select(
        "RECORD_CONTENT::string AS content",
        'ORDER BY RECORD_METADATA:"offset"::number',
    )
    assert rows[0]["CONTENT"] == "hello world"
    assert rows[1]["CONTENT"] == "42"
    assert rows[2]["CONTENT"] == "true"

    assert table.select_scalar("count(*)") == len(values)


================================================
FILE: test/tests/compatibility/test_type_compatibility.py
================================================
"""Data-type ingestion compatibility tests — single-table architecture.

All test cases are defined as data in ``CASES`` (below).  One table, one
topic, and one connector per ingestion mode — all records are sent in a single
batch, queried once, and then asserted via the shared ``results`` fixture.

Assertions encode **v3 reference behavior**.  Tests that encounter known v4
divergences handle them inline and log a ``DIVERGENCE`` warning — grep for
that prefix to find all behavioral differences across modes.

Parameterized across three ingestion modes via the ``ingestion_mode`` fixture
(module-scoped):
  - v3:        SnowflakeSinkConnector with SNOWPIPE_STREAMING
  - v4-compat: SnowflakeStreamingSinkConnector with snowflake.validation=client_side
  - v4-ht:     SnowflakeStreamingSinkConnector with snowflake.validation=server_side
               (server-side validation only, no DLQ — errors silently drop records)

Type aliases (INT, STRING, DOUBLE, DECIMAL, CHAR, etc.) are not tested
separately — they resolve to the same storage type and code path in Snowflake.

Reference: https://docs.snowflake.com/en/sql-reference/intro-summary-data-types
"""

import datetime
import json
import logging
from enum import Enum

import pytest

from .conftest import UNSET, Case, cases_where

logger = logging.getLogger(__name__)

pytestmark = pytest.mark.compatibility

# ---------------------------------------------------------------------------
# Expect enum — makes the CASES table scannable at a glance
# ---------------------------------------------------------------------------


class Expect(str, Enum):
    OK = "ingested"
    ERR = "error"


OK = Expect.OK
ERR = Expect.ERR


# ---------------------------------------------------------------------------
# Table column definitions  (col_name → DDL type)
# ---------------------------------------------------------------------------

COLUMNS = {
    "ID": "VARCHAR NOT NULL",
    "TEST_CASE": "VARCHAR",
    "COL_NUMBER": "NUMBER",
    "COL_NUMSCALE": "NUMBER(10,2)",
    "COL_FLOAT": "FLOAT",
    "COL_VARCHAR": "VARCHAR",
    "COL_VARCHAR10": "VARCHAR(10)",
    "COL_BINARY": "BINARY",
    "COL_BOOLEAN": "BOOLEAN",
    "COL_DATE": "DATE",
    "COL_TIME": "TIME",
    "COL_TS_NTZ": "TIMESTAMP_NTZ",
    "COL_TS_LTZ": "TIMESTAMP_LTZ",
    "COL_TS_TZ": "TIMESTAMP_TZ",
    "COL_VARIANT": "VARIANT",
    "COL_OBJECT": "OBJECT",
    "COL_ARRAY": "ARRAY",
    "RECORD_METADATA": "VARIANT",
}


# ---------------------------------------------------------------------------
# Full test case specification
# ---------------------------------------------------------------------------
# Each Case populates only ID, TEST_CASE, and ONE typed column; the rest are
# NULL.  Schematization handles sparse records.
#
# The ``group`` tag controls which test function owns each case:
#   None           → owned by the per-column test (test_number, test_float, ...)
#   "float_special"→ test_float_special  (NaN/Inf need string-representation docs)
#   "bool_coercion"→ test_boolean_coercion (known v4-compat divergence)
#   "ts_epoch"     → test_timestamp_ntz_epoch (known v4 divergence)
#   "xtype"        → test_cross_type_mismatch (values sent to wrong column)
#   "null"         → test_null (parametrized across all columns)

CASES = [
    # ---- NUMBER(38,0) ----
    Case("num_int", "COL_NUMBER", 42, OK),
    Case("num_zero", "COL_NUMBER", 0, OK),
    Case("num_neg", "COL_NUMBER", -100, OK),
    Case("num_maxint", "COL_NUMBER", 2147483647, OK),
    Case("num_minint", "COL_NUMBER", -2147483648, OK),
    Case("num_bad_str", "COL_NUMBER", "not_a_number", ERR),
    Case("num_bad_abc", "COL_NUMBER", "abc", ERR),
    Case("num_bad_obj", "COL_NUMBER", {"obj": 1}, ERR),
    # ---- NUMBER(10,2) ----
    Case("nsc_decimal", "COL_NUMSCALE", 123.45, OK, approx=0.01),
    Case("nsc_neg", "COL_NUMSCALE", -0.01, OK, approx=0.01),
    Case("nsc_zero", "COL_NUMSCALE", 0.0, OK, approx=0.01),
    Case("nsc_max", "COL_NUMSCALE", 99999.99, OK, approx=0.01),
    Case("nsc_bad_text", "COL_NUMSCALE", "text", ERR),
    # ---- FLOAT ----
    Case("flt_pi", "COL_FLOAT", 3.14, OK),
    Case("flt_neg", "COL_FLOAT", -1.5, OK),
    Case("flt_zero", "COL_FLOAT", 0.0, OK),
    Case("flt_sci", "COL_FLOAT", 1.0e10, OK),
    Case("flt_bad_text", "COL_FLOAT", "text", ERR),
    Case("flt_bad_arr", "COL_FLOAT", [1, 2], ERR),
    # FLOAT special: NaN, Infinity, -Infinity (string representations)
    Case("flt_nan", "COL_FLOAT", "NaN", OK, group="float_special"),
    Case("flt_inf", "COL_FLOAT", "Infinity", OK, group="float_special"),
    Case("flt_ninf", "COL_FLOAT", "-Infinity", OK, group="float_special"),
    # ---- VARCHAR ----
    Case("vc_hello", "COL_VARCHAR", "hello world", OK),
    Case("vc_special", "COL_VARCHAR", "special chars: !@#$%^&*()", OK),
    Case("vc_long", "COL_VARCHAR", "a" * 1000, OK),
    # ---- VARCHAR(10) ----
    Case("vc10_short", "COL_VARCHAR10", "hello", OK),
    Case("vc10_exact", "COL_VARCHAR10", "0123456789", OK),
    Case("vc10_over", "COL_VARCHAR10", "a" * 20, ERR),
    # ---- BINARY ----
    Case(
        "bin_hello",
        "COL_BINARY",
        "48656C6C6F",
        OK,
        expected_value=bytes.fromhex("48656C6C6F"),
    ),
    Case(
        "bin_dead",
        "COL_BINARY",
        "DEADBEEF",
        OK,
        expected_value=bytes.fromhex("DEADBEEF"),
    ),
    Case("bin_zero", "COL_BINARY", "00", OK, expected_value=bytes.fromhex("00")),
    Case(
        "bin_long",
        "COL_BINARY",
        "FF" * 100,
        OK,
        expected_value=bytes.fromhex("FF" * 100),
    ),
    # ---- BOOLEAN ----
    Case("bool_true", "COL_BOOLEAN", True, OK),
    Case("bool_false", "COL_BOOLEAN", False, OK),
    Case("bool_bad_obj", "COL_BOOLEAN", {"key": "value"}, ERR),
    Case("bool_bad_arr", "COL_BOOLEAN", [1, 2, 3], ERR),
    Case("bool_bad_str", "COL_BOOLEAN", "random_string", ERR),
    # Boolean coercion: numeric 0/1 and string tokens.
    # v4-compat fix: RowValidator normalizes any valid input to Boolean.
    # v4-ht: RowValidator bypassed; Integer 0/1 reach SSv2 SDK directly and are dropped.
    Case(
        "bool_zero", "COL_BOOLEAN", 0, OK, expected_value=False, group="bool_coercion"
    ),
    Case("bool_one", "COL_BOOLEAN", 1, OK, expected_value=True, group="bool_coercion"),
    Case(
        "bool_str_true",
        "COL_BOOLEAN",
        "true",
        OK,
        expected_value=True,
        group="bool_coercion",
    ),
    Case(
        "bool_str_false",
        "COL_BOOLEAN",
        "false",
        OK,
        expected_value=False,
        group="bool_coercion",
    ),
    Case(
        "bool_str_yes",
        "COL_BOOLEAN",
        "yes",
        OK,
        expected_value=True,
        group="bool_coercion",
    ),
    Case(
        "bool_str_no",
        "COL_BOOLEAN",
        "no",
        OK,
        expected_value=False,
        group="bool_coercion",
    ),
    Case(
        "bool_str_on",
        "COL_BOOLEAN",
        "on",
        OK,
        expected_value=True,
        group="bool_coercion",
    ),
    Case(
        "bool_str_off",
        "COL_BOOLEAN",
        "off",
        OK,
        expected_value=False,
        group="bool_coercion",
    ),
    # ---- DATE ----
    Case(
        "date_normal",
        "COL_DATE",
        "2024-01-15",
        OK,
        expected_value=datetime.date(2024, 1, 15),
    ),
    Case(
        "date_epoch",
        "COL_DATE",
        "1970-01-01",
        OK,
        expected_value=datetime.date(1970, 1, 1),
    ),
    Case(
        "date_future",
        "COL_DATE",
        "2099-12-31",
        OK,
        expected_value=datetime.date(2099, 12, 31),
    ),
    Case("date_bad", "COL_DATE", "not_a_date", ERR),
    # ---- TIME ----
    Case(
        "time_normal",
        "COL_TIME",
        "13:45:30",
        OK,
        expected_value=datetime.time(13, 45, 30),
    ),
    Case(
        "time_midnight",
        "COL_TIME",
        "00:00:00",
        OK,
        expected_value=datetime.time(0, 0, 0),
    ),
    Case(
        "time_end", "COL_TIME", "23:59:59", OK, expected_value=datetime.time(23, 59, 59)
    ),
    Case("time_bad", "COL_TIME", "not_a_time", ERR),
    # ---- TIMESTAMP_NTZ ----
    Case(
        "tsntz_normal",
        "COL_TS_NTZ",
        "2024-01-15T13:45:30",
        OK,
        expected_value=datetime.datetime(2024, 1, 15, 13, 45, 30),
    ),
    Case(
        "tsntz_epoch",
        "COL_TS_NTZ",
        "1970-01-01T00:00:00",
        OK,
        expected_value=datetime.datetime(1970, 1, 1, 0, 0, 0),
    ),
    Case(
        "tsntz_future",
        "COL_TS_NTZ",
        "2099-12-31T23:59:59",
        OK,
        expected_value=datetime.datetime(2099, 12, 31, 23, 59, 59),
    ),
    Case("tsntz_bad", "COL_TS_NTZ", "not_a_timestamp", ERR),
    # Integer epoch → TIMESTAMP_NTZ
    # KNOWN DIVERGENCE: v4 RowValidator rejects java.lang.Long for TIMESTAMP_NTZ.
    Case(
        "tsntz_int_epoch",
        "COL_TS_NTZ",
        1705312800,
        OK,
        expected_value=datetime.datetime(2024, 1, 15, 10, 0, 0),
        group="ts_epoch",
    ),
    # ---- TIMESTAMP_LTZ ----
    Case(
        "tsltz_normal",
        "COL_TS_LTZ",
        "2024-01-15T13:45:30+00:00",
        OK,
        expected_value=datetime.datetime(2024, 1, 15, 13, 45, 30),
    ),
    Case(
        "tsltz_epoch",
        "COL_TS_LTZ",
        "1970-01-01T00:00:00+00:00",
        OK,
        expected_value=datetime.datetime(1970, 1, 1, 0, 0, 0),
    ),
    Case("tsltz_bad", "COL_TS_LTZ", "not_a_timestamp", ERR),
    # ---- TIMESTAMP_TZ ----
    Case("tstz_offset", "COL_TS_TZ", "2024-01-15T13:45:30+05:00", OK),
    Case("tstz_utc", "COL_TS_TZ", "1970-01-01T00:00:00+00:00", OK),
    Case("tstz_bad", "COL_TS_TZ", "not_a_timestamp", ERR),
    # ---- VARIANT (accepts any JSON type including primitives) ----
    Case("var_obj", "COL_VARIANT", {"key": "value", "number": 42}, OK),
    Case("var_arr", "COL_VARIANT", [1, 2, 3], OK),
    Case("var_nested", "COL_VARIANT", {"nested": [True, False, None]}, OK),
    Case("var_int", "COL_VARIANT", 42, OK),
    Case("var_float", "COL_VARIANT", 3.14, OK),
    Case("var_bool", "COL_VARIANT", True, OK),
    # Bare string (not valid JSON) → DLQ on v3/v4-compat; v4-ht ingests it
    # as a string VARIANT value (server-side accepts non-JSON scalars).
    Case("var_str", "COL_VARIANT", "hello", ERR, group="variant_bare_str"),
    # String containing valid JSON — probes SSv1/SSv2 parse divergence.
    # v3/v4-compat: SSv1/RowValidator parses string into native object {"a":1}.
    # v4-ht: SSv2 SDK stores the string as a JSON-quoted literal '"{\\"a\\":1}"'.
    Case(
        "var_json_str",
        "COL_VARIANT",
        '{"a":1}',
        OK,
        expected_value={"a": 1},
        group="variant_json_str",
    ),
    # JSON scalar strings to VARIANT — exercises the String→native re-parse path
    # for primitives (number, boolean, null).  All are valid JSON.
    Case(
        "var_json_num",
        "COL_VARIANT",
        "42",
        OK,
        expected_value=42,
        group="variant_json_str",
    ),
    Case(
        "var_json_bool",
        "COL_VARIANT",
        "true",
        OK,
        expected_value=True,
        group="variant_json_str",
    ),
    Case(
        "var_json_arr",
        "COL_VARIANT",
        "[1,2]",
        OK,
        expected_value=[1, 2],
        group="variant_json_str",
    ),
    # ---- OBJECT ----
    Case("obj_simple", "COL_OBJECT", {"key": "value"}, OK),
    Case("obj_nested", "COL_OBJECT", {"nested": {"a": 1, "b": 2}}, OK),
    Case("obj_with_arr", "COL_OBJECT", {"array_val": [1, 2, 3]}, OK),
    # JSON string that parses to an object
    Case("obj_str_json", "COL_OBJECT", '{"key":"value"}', OK),
    # Invalid JSON string → OBJECT: rejected in all modes
    Case("obj_bad_str", "COL_OBJECT", "not_json", ERR),
    # Valid JSON but not an object (array) → OBJECT: rejected in all modes
    Case("obj_str_arr", "COL_OBJECT", "[1,2,3]", ERR),
    # ---- ARRAY ----
    Case("arr_strings", "COL_ARRAY", ["a", "b", "c"], OK),
    Case("arr_numbers", "COL_ARRAY", [1, 2, 3], OK),
    Case("arr_objects", "COL_ARRAY", [{"key": "value"}, {"key": "value2"}], OK),
    # Invalid JSON string → ARRAY: v3/v4-compat reject (DLQ); v4-ht wraps as ["not_json"].
    Case("arr_bad_str", "COL_ARRAY", "not_json", ERR, group="array_json_str"),
    # JSON string sent to ARRAY: v3 (SSv1) parses it into [1,2,3],
    # v4 (SSv2) stores it as literal string element ["[1,2,3]"].
    Case("arr_str_json", "COL_ARRAY", "[1,2,3]", OK, group="array_json_str"),
    # Non-array JSON string: validateAndParseArray wraps into single-element array.
    Case(
        "arr_str_scalar",
        "COL_ARRAY",
        "42",
        OK,
        expected_value=[42],
        group="array_json_str",
    ),
    # ---- NULL handling (one per supported type) ----
    # KNOWN DIVERGENCE for VARIANT: v4-compat stores JSON null as string 'null'
    # while v3 stores SQL NULL.
    Case("null_number", "COL_NUMBER", None, OK, group="null"),
    Case("null_float", "COL_FLOAT", None, OK, group="null"),
    Case("null_varchar", "COL_VARCHAR", None, OK, group="null"),
    Case("null_boolean", "COL_BOOLEAN", None, OK, group="null"),
    Case("null_date", "COL_DATE", None, OK, group="null"),
    Case("null_time", "COL_TIME", None, OK, group="null"),
    Case("null_ts_ntz", "COL_TS_NTZ", None, OK, group="null"),
    Case("null_ts_ltz", "COL_TS_LTZ", None, OK, group="null"),
    Case("null_ts_tz", "COL_TS_TZ", None, OK, group="null"),
    Case("null_variant", "COL_VARIANT", None, OK, group="null"),
    Case("null_object", "COL_OBJECT", None, OK, group="null"),
    Case("null_array", "COL_ARRAY", None, OK, group="null"),
    # ---- Cross-type mismatch ----
    Case("xtype_str_num_1", "COL_NUMBER", "hello", ERR, group="xtype"),
    Case("xtype_str_num_2", "COL_NUMBER", "world", ERR, group="xtype"),
    Case("xtype_num_bool_1", "COL_BOOLEAN", 42, ERR, group="xtype"),
    Case("xtype_num_bool_2", "COL_BOOLEAN", -1, ERR, group="xtype"),
    Case("xtype_num_bool_3", "COL_BOOLEAN", 999, ERR, group="xtype"),
    # Object coerced to JSON string in VARCHAR — accepted by all modes
    Case(
        "xtype_obj_str",
        "COL_VARCHAR",
        {"key": "value"},
        OK,
        expected_value='{"key":"value"}',
        group="xtype",
    ),
    # List coerced to JSON string in VARCHAR — same as Map (xtype_obj_str)
    Case(
        "xtype_list_str",
        "COL_VARCHAR",
        [1, 2, 3],
        OK,
        expected_value="[1,2,3]",
        group="xtype",
    ),
    # Map serialized to JSON exceeds VARCHAR(10) limit → rejected
    Case("xtype_map_vc10", "COL_VARCHAR10", {"key": "value"}, ERR, group="xtype"),
    Case("xtype_arr_num", "COL_NUMBER", [1, 2, 3], ERR, group="xtype"),
]

# Groups that have their own dedicated test functions.
# Per-column tests (test_number, test_float, ...) exclude these.
_SPECIAL_GROUPS = {
    "float_special",
    "bool_coercion",
    "ts_epoch",
    "xtype",
    "null",
    "variant_bare_str",
    "variant_json_str",
    "array_json_str",
}


# ---------------------------------------------------------------------------
# Divergence logging — grep for "DIVERGENCE" to find all behavioral diffs
# ---------------------------------------------------------------------------

_DIVERGENCE_PREFIX = "DIVERGENCE"


def _log_divergence(mode, case_name, description):
    """Log a known behavioral divergence from v3 reference.

    All divergences use the same prefix so they can be found with:
        grep DIVERGENCE <test-output>
    """
    logger.warning("%s [%s] %s: %s", _DIVERGENCE_PREFIX, mode, case_name, description)


# ---------------------------------------------------------------------------
# Helper: assert all cases for a column, dispatching on expect
# ---------------------------------------------------------------------------


def _assert_all(results, cases):
    """Assert every case in the list using v3 reference expectations."""
    for c in cases:
        if c.expect == "ingested":
            results.assert_ingested(c)
        else:
            results.assert_error(c)


# ---------------------------------------------------------------------------
# Numeric data types
# ---------------------------------------------------------------------------


def test_number(results):
    """NUMBER(38,0): integers land, non-numeric values → DLQ/dropped."""
    _assert_all(results, cases_where(col="COL_NUMBER", exclude_groups=_SPECIAL_GROUPS))


def test_number_with_scale(results):
    """NUMBER(10,2): decimal values + non-numeric string to DLQ."""
    _assert_all(
        results, cases_where(col="COL_NUMSCALE", exclude_groups=_SPECIAL_GROUPS)
    )


def test_float(results):
    """FLOAT: standard floating-point values + non-numeric → DLQ/dropped."""
    _assert_all(results, cases_where(col="COL_FLOAT", exclude_groups=_SPECIAL_GROUPS))


def test_float_special(results):
    """FLOAT special values: NaN, +Infinity, -Infinity.

    JSON RFC 8259 does not define NaN/Infinity literals. We send them as
    string representations which is how DataValidationUtil.validateAndParseReal
    handles them (via Double.parseDouble).
    """
    _assert_all(results, cases_where(col="COL_FLOAT", group="float_special"))


# ---------------------------------------------------------------------------
# String & binary data types
# ---------------------------------------------------------------------------


def test_varchar(results):
    """VARCHAR: variable-length character strings."""
    _assert_all(results, cases_where(col="COL_VARCHAR", exclude_groups=_SPECIAL_GROUPS))


def test_varchar_length_limit(results):
    """VARCHAR(10): strings at and exceeding declared length limit.

    Snowflake silently truncates or the connector rejects overlength strings.
    This probes whether v3 and v4 handle the constraint identically.
    """
    _assert_all(
        results, cases_where(col="COL_VARCHAR10", exclude_groups=_SPECIAL_GROUPS)
    )


def test_binary(results):
    """BINARY: hex-encoded binary data.

    v3 and v4-compat both correctly decode hex strings to bytes.  v4-compat
    is fixed by SNOW-3256183: client-side RowValidator converts hex → byte[]
    before handing the row to the Ingest SDK, matching SSv1 behavior.

    KNOWN DIVERGENCE for v4-ht : server-side validation passes hex strings
    directly to the SSv2 SDK, which interprets them as base64 when
    ENABLE_SSV2_DEFAULT_BINARY_FORMAT_BASE64 is set, producing garbled bytes.
    """
    cases = cases_where(col="COL_BINARY", exclude_groups=_SPECIAL_GROUPS)
    if results.mode in ("v3", "v4-compat"):
        _assert_all(results, cases)
        return

    # v4-ht: log divergence details for diagnostics, then xfail.
    for c in cases:
        if c.name in results.rows:
            actual = results.rows[c.name].get(c.col)
            if c.expected_value is not UNSET and actual != c.expected_value:
                _log_divergence(
                    results.mode,
                    c.name,
                    f"ingested with wrong value: {actual!r} (expected {c.expected_value!r})",
                )
            elif c.expected_value is UNSET:
                _log_divergence(
                    results.mode,
                    c.name,
                    f"ingested (v3 also ingests, value={actual!r})",
                )
        else:
            in_dlq = c.name in results.dlq_ids
            _log_divergence(
                results.mode,
                c.name,
                f"rejected (v3 ingests); in_dlq={in_dlq}",
            )

    try:
        _assert_all(results, cases)
    except AssertionError as e:
        pytest.xfail(f"v4-ht SSv2 binary handling diverges from v3: {e}")


# ---------------------------------------------------------------------------
# Logical data type
# ---------------------------------------------------------------------------


def test_boolean(results):
    """BOOLEAN: true/false values + non-coercible objects/arrays → DLQ/dropped."""
    _assert_all(results, cases_where(col="COL_BOOLEAN", exclude_groups=_SPECIAL_GROUPS))


def test_boolean_coercion(results):
    """BOOLEAN coercion: numeric 0/1 and string tokens.

    v3 and v4-compat both coerce Integer 0->False, 1->True.
    v4-compat fix: RowValidator now normalizes any valid input to Boolean before
    passing to the SSv2 SDK (which only accepts Boolean, not Integer/String).

    KNOWN DIVERGENCE for v4-ht: server-side validation bypasses RowValidator,
    so Integer 0/1 reach the SSv2 SDK directly and are silently dropped.
    String tokens ("true"/"false"/"yes"/"no") work on all modes.
    """
    cases = cases_where(group="bool_coercion")
    numeric_cases = {c.name for c in cases if isinstance(c.value, int)}

    # String boolean tokens work identically on all modes — always hard assert.
    for c in cases:
        if c.name not in numeric_cases:
            if c.expect == "ingested":
                results.assert_ingested(c)
            else:
                results.assert_error(c)

    if results.mode in ("v3", "v4-compat"):
        # Both v3 (SSv1 coercion) and v4-compat (RowValidator normalization) ingest 0/1 correctly.
        for c in cases:
            if c.name in numeric_cases:
                results.assert_ingested(c)
        return

    # v4-ht: RowValidator is bypassed; SSv2 SDK silently drops Integer inputs for BOOLEAN.
    for c in cases:
        if c.name in numeric_cases:
            in_dlq = c.name in results.dlq_ids
            _log_divergence(
                results.mode,
                c.name,
                f"v4-ht drops numeric {c.value} for BOOLEAN (SSv2 SDK rejects Integer); in_dlq={in_dlq}",
            )

    try:
        for c in cases:
            if c.name in numeric_cases:
                results.assert_ingested(c)
    except AssertionError as e:
        pytest.xfail(f"v4-ht drops numeric booleans (SSv2 SDK rejects Integer): {e}")


# ---------------------------------------------------------------------------
# Date & time data types
# ---------------------------------------------------------------------------


def test_date(results):
    """DATE: ISO date strings + invalid string to DLQ."""
    _assert_all(results, cases_where(col="COL_DATE", exclude_groups=_SPECIAL_GROUPS))


def test_time(results):
    """TIME: time-of-day strings + invalid string to DLQ."""
    _assert_all(results, cases_where(col="COL_TIME", exclude_groups=_SPECIAL_GROUPS))


def test_timestamp_ntz(results):
    """TIMESTAMP_NTZ: ISO 8601 timestamps + invalid string to DLQ."""
    _assert_all(results, cases_where(col="COL_TS_NTZ", exclude_groups=_SPECIAL_GROUPS))


def test_timestamp_ntz_epoch(results):
    """TIMESTAMP_NTZ with integer epoch.

    v3: SSv1 SDK converts epoch to UTC client-side via parseInstantGuessScale.
    v4-compat: RowValidator normalizes Integer epoch to ISO string (same as v3).
    KNOWN DIVERGENCE: v4-ht bypasses RowValidator; SSv2 SDK passes raw Integer to
    the Snowflake backend which interprets it using the channel's default timezone
    (America/Los_Angeles) instead of UTC, producing a -8h shifted timestamp.
    """
    [case] = cases_where(group="ts_epoch")
    if results.mode in ("v3", "v4-compat"):
        results.assert_ingested(case)
        return

    # v4-ht: log and xfail on expected timezone shift.
    if case.name in results.rows:
        actual = results.rows[case.name].get(case.col)
        expected = (
            case.expected_value
            if not isinstance(case.expected_value, type(UNSET))
            else case.value
        )
        if actual != expected:
            _log_divergence(
                results.mode,
                case.name,
                f"epoch timestamp shifted: got {actual!r}, v3 expects {expected!r}",
            )
    else:
        in_dlq = case.name in results.dlq_ids
        _log_divergence(
            results.mode,
            case.name,
            f"v4-ht rejects Long for TIMESTAMP_NTZ; in_dlq={in_dlq}",
        )

    try:
        results.assert_ingested(case)
    except AssertionError as e:
        pytest.xfail(f"v4-ht: SSv2 backend uses channel TZ for integer epoch: {e}")


def test_timestamp_ltz(results):
    """TIMESTAMP_LTZ: timestamps with explicit UTC offset + invalid to DLQ."""
    _assert_all(results, cases_where(col="COL_TS_LTZ", exclude_groups=_SPECIAL_GROUPS))


def test_timestamp_tz(results):
    """TIMESTAMP_TZ: timestamps with explicit timezone + invalid to DLQ."""
    _assert_all(results, cases_where(col="COL_TS_TZ", exclude_groups=_SPECIAL_GROUPS))


# ---------------------------------------------------------------------------
# Semi-structured data types
# ---------------------------------------------------------------------------


def test_variant(results):
    """VARIANT: any JSON type including primitives, objects, arrays.

    Includes a string containing valid JSON ('{\"a\":1}') to probe the known
    SSv1/SSv2 divergence: SSv1 parses JSON-like strings into native JSON
    objects in VARIANT columns, while SSv2 may store them as string literals.
    """
    _assert_all(results, cases_where(col="COL_VARIANT", exclude_groups=_SPECIAL_GROUPS))


def test_object(results):
    """OBJECT: JSON object values, including from-string JSON."""
    _assert_all(results, cases_where(col="COL_OBJECT", exclude_groups=_SPECIAL_GROUPS))


def test_array(results):
    """ARRAY: JSON array values, including from-string JSON."""
    _assert_all(results, cases_where(col="COL_ARRAY", exclude_groups=_SPECIAL_GROUPS))


def test_variant_bare_string(results):
    """Bare string to VARIANT: DLQ on v3/v4-compat, ingested on v4-ht.

    KNOWN DIVERGENCE: v3/v4-compat reject bare strings (not valid JSON) and
    route them to DLQ. v4-ht (server-side only) accepts them as string
    VARIANT values.
    """
    [case] = cases_where(group="variant_bare_str")
    if results.mode == "v4-ht":
        # Server-side accepts bare strings in VARIANT — row is ingested.
        # Snowflake stores VARIANT strings as JSON-quoted: "hello" → '"hello"'
        assert case.name in results.rows, (
            f"[{case.name}] expected v4-ht to ingest bare string to VARIANT"
        )
        actual = results.rows[case.name].get(case.col)
        expected_json = json.dumps(case.value)  # "hello" → '"hello"'
        assert actual == expected_json, (
            f"[{case.name}] value mismatch: {actual!r} != {expected_json!r}"
        )
        _log_divergence(
            results.mode, case.name, "bare string ingested as VARIANT (v3 DLQ's it)"
        )
    else:
        results.assert_error(case)


def test_variant_json_string(results):
    """JSON string sent to VARIANT: v3/v4-compat parse to native object, v4-ht stores as string.

    Covers JSON object strings ('{"a":1}'), scalar strings ('42', 'true'),
    and JSON array strings ('[1,2]') sent as String values to a VARIANT column.

    v3 and v4-compat: RowValidator normalizes the String to a native Java object
    (Map, List, Integer, Boolean) so the SSv2 SDK stores it correctly.

    KNOWN DIVERGENCE for v4-ht: server-side validation bypasses RowValidator; the SSv2 SDK
    receives the raw String and stores it as a JSON-quoted string literal.
    """
    cases = cases_where(group="variant_json_str")
    if results.mode in ("v3", "v4-compat"):
        for c in cases:
            results.assert_ingested(c)
        return

    # v4-ht: row is ingested but stored as a JSON-quoted string, not as a native object.
    divergences = []
    for c in cases:
        assert c.name in results.rows, (
            f"[{c.name}] expected row in table on {results.mode}"
        )
        try:
            results.assert_ingested(c)
        except AssertionError:
            actual = results.rows[c.name].get(c.col)
            _log_divergence(
                results.mode,
                c.name,
                f"JSON string stored as quoted literal {actual!r} (v3 stores as {c.expected_value!r})",
            )
            divergences.append(c.name)

    if divergences:
        pytest.xfail(
            f"v4-ht stores JSON strings as quoted literals in VARIANT: {divergences}"
        )


def test_array_json_string(results):
    """String values sent to ARRAY: v3/v4-compat parse or reject, v4-ht wraps as literal element.

    Covers:
      - JSON array strings ('[1,2,3]') — v3/v4-compat parse to proper array
      - Non-array JSON scalars ('42') — v3/v4-compat wrap as single-element array
      - Invalid JSON strings ('not_json') — v3/v4-compat reject (DLQ)

    v3 and v4-compat: RowValidator normalizes String to a List so the SSv2 SDK
    stores it as a proper array.  Non-array scalars are wrapped into a
    single-element array (e.g. '42' → [42]).  Invalid JSON is rejected.

    KNOWN DIVERGENCE for v4-ht: server-side validation bypasses RowValidator; the SSv2 SDK
    wraps ANY String as a single-element array, including invalid JSON and valid JSON alike.
    """
    cases = cases_where(group="array_json_str")
    if results.mode in ("v3", "v4-compat"):
        for c in cases:
            if c.expect == "ingested":
                results.assert_ingested(c)
            else:
                results.assert_error(c)
        return

    # v4-ht: SSv2 wraps all strings as single-element arrays (no rejection)
    divergences = []
    for c in cases:
        if c.expect == "error":
            # v3/v4-compat reject this, but v4-ht ingests it as ["<value>"]
            if c.name in results.rows:
                actual = results.rows[c.name].get(c.col)
                parsed = json.loads(actual) if isinstance(actual, str) else actual
                _log_divergence(
                    results.mode,
                    c.name,
                    f"v4-ht ingested (v3 rejects): stored as {parsed!r}",
                )
                divergences.append(c.name)
            else:
                # Also rejected on v4-ht — no divergence
                pass
        else:
            assert c.name in results.rows, (
                f"[{c.name}] expected row in table on {results.mode}"
            )
            try:
                results.assert_ingested(c)
            except AssertionError:
                actual = results.rows[c.name].get(c.col)
                parsed = json.loads(actual) if isinstance(actual, str) else actual
                _log_divergence(
                    results.mode,
                    c.name,
                    f"JSON string stored as literal array element {parsed!r} (v3 stores {c.expected_value or c.value!r})",
                )
                divergences.append(c.name)

    if divergences:
        pytest.xfail(f"v4-ht array string handling diverges from v3: {divergences}")


# ---------------------------------------------------------------------------
# NULL handling
# ---------------------------------------------------------------------------


@pytest.mark.parametrize(
    "col",
    [
        "COL_NUMBER",
        "COL_FLOAT",
        "COL_VARCHAR",
        "COL_BOOLEAN",
        "COL_DATE",
        "COL_TIME",
        "COL_TS_NTZ",
        "COL_TS_LTZ",
        "COL_TS_TZ",
        "COL_VARIANT",
        "COL_OBJECT",
        "COL_ARRAY",
    ],
)
def test_null(results, col):
    """NULL in every supported column type — must be stored as SQL NULL.

    KNOWN DIVERGENCE: v4 stores JSON null in VARIANT as string 'null'
    instead of SQL NULL.
    """
    c = next(c for c in CASES if c.col == col and c.group == "null")
    assert c.name in results.rows, (
        f"[{c.name}] expected in table but not found (mode={results.mode})"
    )
    actual = results.rows[c.name].get(c.col)

    # KNOWN DIVERGENCE: v4 stores VARIANT null as string 'null'
    if actual is not None and results.mode != "v3" and col == "COL_VARIANT":
        _log_divergence(results.mode, c.name, f"expected SQL NULL, got {actual!r}")
        pytest.xfail(f"v4 stores VARIANT null as {actual!r} instead of SQL NULL")

    assert actual is None, (
        f"[{c.name}] expected NULL, got {actual!r} (mode={results.mode})"
    )


# ---------------------------------------------------------------------------
# Cross-type mismatch — DLQ behavior
# ---------------------------------------------------------------------------


def test_cross_type_mismatch(results):
    """Values sent to incompatible column types — expected DLQ/drop."""
    if results.mode == "v3":
        _assert_all(results, cases_where(group="xtype"))
        return

    # v4: track divergences from v3 reference behavior.
    divergences = []
    for c in cases_where(group="xtype"):
        in_table = c.name in results.rows
        in_dlq = c.name in results.dlq_ids

        if c.expect == "ingested":
            if in_table:
                results.assert_ingested(c)
            else:
                _log_divergence(
                    results.mode,
                    c.name,
                    f"rejected (v3 ingests via coercion); in_dlq={in_dlq}",
                )
                divergences.append(c.name)
        else:
            if in_table:
                actual = results.rows[c.name].get(c.col)
                _log_divergence(
                    results.mode,
                    c.name,
                    f"ingested (v3 rejects): value={actual!r}",
                )
                divergences.append(c.name)
            elif results.mode != "v4-ht" and not in_dlq:
                _log_divergence(
                    results.mode,
                    c.name,
                    "rejected without DLQ (silently dropped)",
                )
                divergences.append(c.name)
            else:
                results.assert_error(c)

    if divergences:
        pytest.xfail(f"v4 cross-type handling diverges from v3 on: {divergences}")


# ---------------------------------------------------------------------------
# Error table accounting (v4-ht only)
# ---------------------------------------------------------------------------


def test_error_table_accounting(results):
    """v4-ht: verify error table captured rejected records."""
    if results.mode != "v4-ht":
        pytest.skip("Error table only applicable to v4-ht mode")

    expected_errors = sum(
        1 for c in CASES if c.expect == "error" and c.name not in results.rows
    )

    assert len(results.error_table_rows) >= expected_errors, (
        f"Expected at least {expected_errors} error table rows for v4-ht but found "
        f"{len(results.error_table_rows)} — errors may be silently dropped"
    )

    for row in results.error_table_rows:
        assert row.get("ERROR_CODE") is not None, (
            f"Error table row missing ERROR_CODE: {row}"
        )


================================================
FILE: test/tests/compatibility/test_type_compatibility_avro.py
================================================
"""Avro type compatibility tests across v4-compat and v4-ht ingestion modes.

Verifies that Avro-typed values (int, long, float, double, string, boolean,
bytes, date logical, timestamp-millis logical, array, map) are correctly
ingested into pre-created Snowflake typed columns via the AvroConverter pipeline.

Also tests Avro-specific cross-type mismatches (bytes->VARCHAR, float NaN->NUMBER,
etc.) that cannot be exercised through JSON.

v3 is excluded: Schema Registry classloader conflict prevents v3 from running
Avro tests (see E2E_TEST_PLAN.md Section 3.1.2).
"""

import datetime
import json
import logging
import math
import time

import pytest
from confluent_kafka import avro

from lib.config_migration import V4_CONFIG_TEMPLATE
from lib.driver import quote_name

from .conftest import Case, Results

logger = logging.getLogger(__name__)

pytestmark = [pytest.mark.confluent_only, pytest.mark.compatibility]

# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------

OK = "ingested"
ERR = "error"

# Avro schema: single record with nullable unions for each typed column,
# plus XTYPE_* columns whose Avro types intentionally mismatch the Snowflake
# column types (for cross-type error testing).
VALUE_SCHEMA = avro.loads(
    json.dumps(
        {
            "type": "record",
            "name": "TypeTestRecord",
            "namespace": "com.snowflake.kafka.test",
            "fields": [
                {"name": "ID", "type": "string"},
                {"name": "TEST_CASE", "type": "string"},
                # Positive: Avro type matches Snowflake column type
                {"name": "COL_INT", "type": ["null", "int"], "default": None},
                {"name": "COL_BIGINT", "type": ["null", "long"], "default": None},
                {"name": "COL_FLOAT", "type": ["null", "float"], "default": None},
                {"name": "COL_DOUBLE", "type": ["null", "double"], "default": None},
                {"name": "COL_VARCHAR", "type": ["null", "string"], "default": None},
                {"name": "COL_BOOLEAN", "type": ["null", "boolean"], "default": None},
                {"name": "COL_BINARY", "type": ["null", "bytes"], "default": None},
                {
                    "name": "COL_DATE",
                    "type": [
                        "null",
                        {"type": "int", "logicalType": "date"},
                    ],
                    "default": None,
                },
                {
                    "name": "COL_TS_NTZ",
                    "type": [
                        "null",
                        {"type": "long", "logicalType": "timestamp-millis"},
                    ],
                    "default": None,
                },
                {
                    "name": "COL_ARRAY",
                    "type": [
                        "null",
                        {"type": "array", "items": "string"},
                    ],
                    "default": None,
                },
                {
                    "name": "COL_VARIANT",
                    "type": [
                        "null",
                        {"type": "map", "values": "string"},
                    ],
                    "default": None,
                },
                # Cross-type: Avro type intentionally mismatches Snowflake column
                {
                    "name": "XTYPE_BYTES_TO_VARCHAR",
                    "type": ["null", "bytes"],
                    "default": None,
                },
                {
                    "name": "XTYPE_BYTES_TO_NUM",
                    "type": ["null", "bytes"],
                    "default": None,
                },
                {
                    "name": "XTYPE_FLOAT_NAN_TO_NUM",
                    "type": ["null", "float"],
                    "default": None,
                },
                {
                    "name": "XTYPE_FLOAT_INF_TO_NUM",
                    "type": ["null", "float"],
                    "default": None,
                },
                {
                    "name": "XTYPE_MAP_TO_BOOL",
                    "type": [
                        "null",
                        {"type": "map", "values": "string"},
                    ],
                    "default": None,
                },
                {
                    "name": "XTYPE_ARR_TO_BOOL",
                    "type": [
                        "null",
                        {"type": "array", "items": "string"},
                    ],
                    "default": None,
                },
            ],
        }
    )
)

# Snowflake table DDL.  Positive columns match Avro types; XTYPE_* columns
# have intentionally mismatched types.
COLUMNS = {
    "ID": "VARCHAR NOT NULL",
    "TEST_CASE": "VARCHAR",
    # Positive
    "COL_INT": "NUMBER",
    "COL_BIGINT": "NUMBER",
    "COL_FLOAT": "FLOAT",
    "COL_DOUBLE": "FLOAT",
    "COL_VARCHAR": "VARCHAR",
    "COL_BOOLEAN": "BOOLEAN",
    "COL_BINARY": "BINARY",
    "COL_DATE": "DATE",
    "COL_TS_NTZ": "TIMESTAMP_NTZ",
    "COL_ARRAY": "ARRAY",
    "COL_VARIANT": "VARIANT",
    # Cross-type mismatch targets
    "XTYPE_BYTES_TO_VARCHAR": "VARCHAR",
    "XTYPE_BYTES_TO_NUM": "NUMBER",
    "XTYPE_FLOAT_NAN_TO_NUM": "NUMBER",
    "XTYPE_FLOAT_INF_TO_NUM": "NUMBER",
    "XTYPE_MAP_TO_BOOL": "BOOLEAN",
    "XTYPE_ARR_TO_BOOL": "BOOLEAN",
    "RECORD_METADATA": "VARIANT",
}

# Days from epoch for known dates
_DATE_2024_01_15 = (datetime.date(2024, 1, 15) - datetime.date(1970, 1, 1)).days
_DATE_EPOCH = 0

# Millis from epoch for known timestamps (UTC)
_TS_2024_01_15_10_00 = int(
    datetime.datetime(2024, 1, 15, 10, 0, 0, tzinfo=datetime.timezone.utc).timestamp()
    * 1000
)
_TS_EPOCH = 0


# ---------------------------------------------------------------------------
# Test cases
# ---------------------------------------------------------------------------

CASES = [
    # ---- NUMBER (Avro int, 32-bit) ----
    Case("int_pos", "COL_INT", 42, OK, expected_value=42),
    Case("int_neg", "COL_INT", -100, OK, expected_value=-100),
    Case("int_zero", "COL_INT", 0, OK, expected_value=0),
    Case("int_max", "COL_INT", 2147483647, OK, expected_value=2147483647),
    # ---- NUMBER (Avro long, 64-bit) ----
    Case("long_pos", "COL_BIGINT", 9999999999, OK, expected_value=9999999999),
    Case("long_neg", "COL_BIGINT", -9999999999, OK, expected_value=-9999999999),
    Case("long_zero", "COL_BIGINT", 0, OK, expected_value=0),
    # ---- FLOAT (Avro float, 32-bit) ----
    Case("float_pos", "COL_FLOAT", 3.14, OK, approx=0.01),
    Case("float_neg", "COL_FLOAT", -2.72, OK, approx=0.01),
    Case("float_nan", "COL_FLOAT", float("nan"), OK, group="float_special"),
    Case("float_inf", "COL_FLOAT", float("inf"), OK, group="float_special"),
    Case("float_neginf", "COL_FLOAT", float("-inf"), OK, group="float_special"),
    # ---- FLOAT (Avro double, 64-bit) ----
    Case("dbl_pos", "COL_DOUBLE", 3.14159265358979, OK, approx=1e-6),
    Case("dbl_neg", "COL_DOUBLE", -2.71828182845905, OK, approx=1e-6),
    Case("dbl_nan", "COL_DOUBLE", float("nan"), OK, group="float_special"),
    Case("dbl_inf", "COL_DOUBLE", float("inf"), OK, group="float_special"),
    Case("dbl_neginf", "COL_DOUBLE", float("-inf"), OK, group="float_special"),
    # ---- VARCHAR (Avro string) ----
    Case("str_normal", "COL_VARCHAR", "hello world", OK),
    Case("str_empty", "COL_VARCHAR", "", OK),
    Case("str_unicode", "COL_VARCHAR", "\u3053\u3093\u306b\u3061\u306f", OK),
    # ---- BOOLEAN (Avro boolean) ----
    Case("bool_true", "COL_BOOLEAN", True, OK),
    Case("bool_false", "COL_BOOLEAN", False, OK),
    # ---- BINARY (Avro bytes) ----
    Case("bin_normal", "COL_BINARY", b"\x01\x02\x03\x04", OK),
    Case("bin_empty", "COL_BINARY", b"", OK),
    # ---- DATE (Avro date logical type: days from epoch) ----
    Case(
        "date_normal",
        "COL_DATE",
        _DATE_2024_01_15,
        OK,
        expected_value=datetime.date(2024, 1, 15),
    ),
    Case(
        "date_epoch",
        "COL_DATE",
        _DATE_EPOCH,
        OK,
        expected_value=datetime.date(1970, 1, 1),
    ),
    # ---- TIMESTAMP_NTZ (Avro timestamp-millis: millis from epoch UTC) ----
    Case(
        "ts_normal",
        "COL_TS_NTZ",
        _TS_2024_01_15_10_00,
        OK,
        expected_value=datetime.datetime(2024, 1, 15, 10, 0, 0),
    ),
    Case(
        "ts_epoch",
        "COL_TS_NTZ",
        _TS_EPOCH,
        OK,
        expected_value=datetime.datetime(1970, 1, 1, 0, 0, 0),
    ),
    # ---- ARRAY (Avro array of strings) ----
    Case("arr_normal", "COL_ARRAY", ["hello", "world"], OK),
    Case("arr_empty", "COL_ARRAY", [], OK),
    # ---- VARIANT (Avro map string->string) ----
    Case("map_normal", "COL_VARIANT", {"key1": "value1", "key2": "value2"}, OK),
    Case("map_empty", "COL_VARIANT", {}, OK),
    # ---- NULL values ----
    Case("null_int", "COL_INT", None, OK, group="null"),
    Case("null_varchar", "COL_VARCHAR", None, OK, group="null"),
    Case("null_boolean", "COL_BOOLEAN", None, OK, group="null"),
    Case("null_binary", "COL_BINARY", None, OK, group="null"),
    Case("null_date", "COL_DATE", None, OK, group="null"),
    # ---- Cross-type mismatch (Avro-specific, not covered by JSON tests) ----
    # bytes→VARCHAR: v4-compat rejects (RowValidator TEXT validation rejects byte[]),
    # v4-ht coerces to base64 (SDK accepts byte[] for VARCHAR).
    Case(
        "xtype_bytes_varchar",
        "XTYPE_BYTES_TO_VARCHAR",
        b"\x01\x02",
        ERR,
        group="xtype_bytes_varchar",
    ),
    Case("xtype_bytes_num", "XTYPE_BYTES_TO_NUM", b"\x01\x02", ERR, group="xtype"),
    Case("xtype_nan_num", "XTYPE_FLOAT_NAN_TO_NUM", float("nan"), ERR, group="xtype"),
    Case("xtype_inf_num", "XTYPE_FLOAT_INF_TO_NUM", float("inf"), ERR, group="xtype"),
    Case("xtype_map_bool", "XTYPE_MAP_TO_BOOL", {"k": "v"}, ERR, group="xtype"),
    Case("xtype_arr_bool", "XTYPE_ARR_TO_BOOL", ["a"], ERR, group="xtype"),
]

# Groups with dedicated test functions (excluded from per-column tests).
_SPECIAL_GROUPS = {
    "float_special",
    "null",
    "xtype",
    "xtype_bytes_varchar",
}


def _cases_where(*, col=None, expect=None, group=None, exclude_groups=None):
    """Filter CASES by column, outcome, and/or group."""
    result = CASES
    if col is not None:
        result = [c for c in result if c.col == col]
    if expect is not None:
        result = [c for c in result if c.expect == expect]
    if group is not None:
        result = [c for c in result if c.group == group]
    if exclude_groups is not None:
        result = [c for c in result if c.group not in exclude_groups]
    return result


# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------


@pytest.fixture(scope="module", params=["v4-compat", "v4-ht"])
def avro_mode(request):
    return request.param


@pytest.fixture(scope="module")
def avro_mode_salt(session_name_salt, avro_mode):
    suffix = {"v4-compat": "_avro", "v4-ht": "_avro_ht"}[avro_mode]
    return f"{session_name_salt}{suffix}"


@pytest.fixture(scope="module")
def avro_results(driver, avro_mode_salt, avro_mode):
    """Single-table batch connector for Avro type compatibility tests.

    Creates one table with all typed columns, sends every CASES entry in a
    single Avro batch, waits for ingested rows, queries them, and yields a
    frozen Results object for assertion.
    """
    table_name = f"dt_avro{avro_mode_salt}"
    sf_table = table_name
    quoted_table = quote_name(sf_table)

    # Consistent timezone for timestamp tests
    driver.snowflake_conn.cursor().execute("ALTER SESSION SET TIMEZONE = 'UTC'")

    # Create table from COLUMNS spec
    col_defs = ", ".join(f"{name} {ddl}" for name, ddl in COLUMNS.items())
    error_logging = " ERROR_LOGGING = TRUE" if avro_mode == "v4-ht" else ""
    driver.snowflake_conn.cursor().execute(
        f"CREATE OR REPLACE TABLE {quoted_table} ({col_defs}){error_logging}"
    )
    driver.snowflake_conn.cursor().execute(
        f"ALTER TABLE {quoted_table} SET ENABLE_SCHEMA_EVOLUTION = TRUE"
    )

    # Create topic
    driver.createTopics(table_name, partitionNum=1, replicationNum=1)

    # Build connector config inline
    config = {
        **V4_CONFIG_TEMPLATE,
        "topics": "SNOWFLAKE_TEST_TOPIC",
        "tasks.max": "1",
        "key.converter": "org.apache.kafka.connect.storage.StringConverter",
        "value.converter": "io.confluent.connect.avro.AvroConverter",
        "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY",
        "snowflake.enable.schematization": "true",
        "errors.tolerance": "all",
        "errors.log.enable": "true",
    }
    match avro_mode:
        case "v4-compat":
            config["snowflake.validation"] = "client_side"
        case "v4-ht":
            config["snowflake.validation"] = "server_side"

    rest_request = driver.createConnector(
        name_salt=avro_mode_salt,
        unsalted_name="dt_avro",
        config_template=config,
    )
    connector_name = rest_request["name"]
    driver.startConnectorWaitTime()

    # Build and send all records as Avro
    records = []
    for case in CASES:
        record = {
            "ID": case.name,
            "TEST_CASE": case.name,
            # All nullable fields default to None
            "COL_INT": None,
            "COL_BIGINT": None,
            "COL_FLOAT": None,
            "COL_DOUBLE": None,
            "COL_VARCHAR": None,
            "COL_BOOLEAN": None,
            "COL_BINARY": None,
            "COL_DATE": None,
            "COL_TS_NTZ": None,
            "COL_ARRAY": None,
            "COL_VARIANT": None,
            "XTYPE_BYTES_TO_VARCHAR": None,
            "XTYPE_BYTES_TO_NUM": None,
            "XTYPE_FLOAT_NAN_TO_NUM": None,
            "XTYPE_FLOAT_INF_TO_NUM": None,
            "XTYPE_MAP_TO_BOOL": None,
            "XTYPE_ARR_TO_BOOL": None,
        }
        record[case.col] = case.value
        records.append(record)

    driver.sendAvroSRData(table_name, records, VALUE_SCHEMA)

    # Wait until row count stabilizes (same approach as JSON test).
    # Cannot predict exact count: error cases won't land in table.
    STABLE_SECS = 15
    deadline = time.monotonic() + 120
    last_count = 0
    stable_since = None

    while time.monotonic() < deadline:
        count = driver.select_number_of_records(sf_table) or 0
        if count != last_count:
            last_count = count
            stable_since = time.monotonic()
        elif stable_since and count > 0:
            if (time.monotonic() - stable_since) >= STABLE_SECS:
                logger.info(
                    "Row count stabilized at %d for %ds, proceeding",
                    count,
                    STABLE_SECS,
                )
                break
        if failed := driver.get_failed_tasks(connector_name):
            logger.warning(
                "Connector task failed: %s", failed[0].get("trace", "")[:200]
            )
            break
        time.sleep(5)
    else:
        if last_count == 0:
            logger.warning(
                "Stabilization timed out with 0 rows -- connector may not be ingesting"
            )

    # Query all rows
    cursor = driver.snowflake_conn.cursor()
    cursor.execute(
        f'SELECT * FROM {quoted_table} ORDER BY RECORD_METADATA:"offset"::int'
    )
    col_names = [desc[0] for desc in cursor.description]
    raw_rows = cursor.fetchall()

    row_lookup = {}
    for row in raw_rows:
        row_dict = dict(zip(col_names, row))
        row_id = row_dict.get("ID")
        if row_id:
            row_lookup[row_id] = row_dict

    # Query error table for v4-ht mode
    error_table_rows = []
    if avro_mode == "v4-ht":
        try:
            et_cursor = driver.snowflake_conn.cursor()
            et_cursor.execute(f"SELECT * FROM ERROR_TABLE({quoted_table})")
            et_col_names = [desc[0] for desc in et_cursor.description]
            for row in et_cursor.fetchall():
                error_table_rows.append(dict(zip(et_col_names, row)))
            et_cursor.close()
        except Exception as e:
            logger.warning("Could not query error table: %s", e)

    logger.info(
        "Avro results for mode=%s: %d rows, %d error_table, %d sent",
        avro_mode,
        len(row_lookup),
        len(error_table_rows),
        len(CASES),
    )

    result = Results(
        rows=row_lookup,
        dlq_ids=frozenset(),  # DLQ messages are Avro-encoded, can't parse case IDs
        mode=avro_mode,
        total_sent=len(CASES),
        columns=COLUMNS,
        error_table_rows=tuple(error_table_rows),
    )

    try:
        yield result
    finally:
        driver.closeConnector(connector_name)
        try:
            driver.deleteTopic(table_name)
        except Exception:
            pass


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _assert_all(results, cases):
    """Assert all cases in a list, dispatching to assert_ingested or assert_error."""
    for case in cases:
        if case.expect == OK:
            results.assert_ingested(case)
        else:
            _assert_error_no_dlq(results, case)


def _assert_error_no_dlq(results, case):
    """Assert case did NOT land in table. No DLQ check (Avro-encoded DLQ)."""
    assert case.name not in results.rows, (
        f"[{case.name}] expected NOT in table but found: "
        f"{results.rows[case.name].get(case.col)!r} (mode={results.mode})"
    )


# ---------------------------------------------------------------------------
# Tests — positive
# ---------------------------------------------------------------------------


def test_int(avro_results):
    """NUMBER from Avro int: 32-bit signed integers."""
    _assert_all(
        avro_results, _cases_where(col="COL_INT", exclude_groups=_SPECIAL_GROUPS)
    )


def test_long(avro_results):
    """NUMBER from Avro long: 64-bit signed integers."""
    _assert_all(
        avro_results, _cases_where(col="COL_BIGINT", exclude_groups=_SPECIAL_GROUPS)
    )


def test_float(avro_results):
    """FLOAT from Avro float: 32-bit values (excluding NaN/Inf specials)."""
    _assert_all(
        avro_results, _cases_where(col="COL_FLOAT", exclude_groups=_SPECIAL_GROUPS)
    )


def test_double(avro_results):
    """FLOAT from Avro double: 64-bit values (excluding NaN/Inf specials)."""
    _assert_all(
        avro_results, _cases_where(col="COL_DOUBLE", exclude_groups=_SPECIAL_GROUPS)
    )


def test_float_special(avro_results):
    """FLOAT NaN/Inf from native Avro float and double values.

    Unlike JSON where NaN/Inf are string representations, Avro sends native
    IEEE 754 NaN/Inf float values through the pipeline.

    Custom comparison: Results._compare_float handles string NaN ("NaN") but
    not float NaN (which is what Avro produces). We check presence + value
    directly instead of using assert_ingested.
    """
    for case in _cases_where(group="float_special"):
        assert case.name in avro_results.rows, (
            f"[{case.name}] expected in table but not found (mode={avro_results.mode})"
        )
        actual = avro_results.rows[case.name].get(case.col)
        sent = case.value
        if math.isnan(sent):
            assert actual is not None and math.isnan(float(actual)), (
                f"[{case.name}] expected NaN, got {actual!r}"
            )
        elif math.isinf(sent):
            actual_f = float(actual)
            assert math.isinf(actual_f) and (actual_f > 0) == (sent > 0), (
                f"[{case.name}] expected {'Inf' if sent > 0 else '-Inf'}, got {actual!r}"
            )


def test_string(avro_results):
    """VARCHAR from Avro string."""
    _assert_all(
        avro_results, _cases_where(col="COL_VARCHAR", exclude_groups=_SPECIAL_GROUPS)
    )


def test_boolean(avro_results):
    """BOOLEAN from Avro boolean: native true/false."""
    _assert_all(
        avro_results, _cases_where(col="COL_BOOLEAN", exclude_groups=_SPECIAL_GROUPS)
    )


def test_binary(avro_results):
    """BINARY from Avro bytes: raw byte arrays (not hex strings like JSON)."""
    _assert_all(
        avro_results, _cases_where(col="COL_BINARY", exclude_groups=_SPECIAL_GROUPS)
    )


def test_date(avro_results):
    """DATE from Avro date logical type (days from epoch)."""
    _assert_all(
        avro_results, _cases_where(col="COL_DATE", exclude_groups=_SPECIAL_GROUPS)
    )


def test_timestamp_ntz(avro_results):
    """TIMESTAMP_NTZ from Avro timestamp-millis logical type."""
    _assert_all(
        avro_results, _cases_where(col="COL_TS_NTZ", exclude_groups=_SPECIAL_GROUPS)
    )


def test_array(avro_results):
    """ARRAY from Avro array of strings."""
    _assert_all(
        avro_results, _cases_where(col="COL_ARRAY", exclude_groups=_SPECIAL_GROUPS)
    )


def test_variant(avro_results):
    """VARIANT from Avro map (string->string)."""
    _assert_all(
        avro_results, _cases_where(col="COL_VARIANT", exclude_groups=_SPECIAL_GROUPS)
    )


# ---------------------------------------------------------------------------
# Tests — null
# ---------------------------------------------------------------------------


@pytest.mark.parametrize(
    "col",
    ["COL_INT", "COL_VARCHAR", "COL_BOOLEAN", "COL_BINARY", "COL_DATE"],
)
def test_null(avro_results, col):
    """NULL values via Avro nullable unions."""
    case = next(c for c in CASES if c.col == col and c.group == "null")
    avro_results.assert_ingested(case)
    actual = avro_results.rows[case.name].get(col)
    assert actual is None, f"[{case.name}] expected NULL, got {actual!r}"


# ---------------------------------------------------------------------------
# Tests — cross-type mismatch (Avro-specific)
# ---------------------------------------------------------------------------


def test_cross_type_bytes_to_varchar(avro_results):
    """Avro bytes → VARCHAR: v4-compat rejects, v4-ht coerces to base64.

    RowValidator's TEXT validation rejects byte[], so v4-compat errors.
    The SSv2 SDK (v4-ht) accepts byte[] for VARCHAR and coerces to base64.
    """
    case = next(c for c in CASES if c.name == "xtype_bytes_varchar")
    if avro_results.mode == "v4-compat":
        _assert_error_no_dlq(avro_results, case)
    else:
        assert case.name in avro_results.rows, (
            f"[{case.name}] expected in table (v4-ht coercion) but not found"
        )
        actual = avro_results.rows[case.name].get(case.col)
        assert actual == "AQI=", f"[{case.name}] expected base64 'AQI=', got {actual!r}"


def test_cross_type_mismatch(avro_results):
    """Avro-specific cross-type errors not covered by JSON tests.

    These cases send Avro-typed values (bytes, native float NaN/Inf, map, array)
    to incompatible Snowflake column types. JSON tests can't produce these Java
    types (byte[], native float NaN, typed Avro map/array).
    """
    for case in _cases_where(group="xtype"):
        _assert_error_no_dlq(avro_results, case)


================================================
FILE: test/tests/compatibility/test_unsupported_types.py
================================================
"""Tests for data types unsupported (or partially supported) by the Kafka connector.

These types crash streaming channels or fail in ways that can't share a batch
connector with well-behaved types. Each test gets its own connector via the
``ingest_one_type_abort`` fixture (abort mode — errors.tolerance=none).

The connector task fails immediately on unsupported types for v3.
For v4 modes (v4-compat and v4-ht), the async SDK flush failure does not
propagate back to the KC task — the task stays RUNNING and 0 rows land.

Types tested:
  - GEOGRAPHY: GeoJSON data — not supported by Snowpipe Streaming
  - GEOMETRY: WKT data — not supported by Snowpipe Streaming
  - VECTOR: embedding arrays — v4 only, not supported by v3 classic SDK
  - Structured OBJECT/ARRAY: typed columns with parameters — v3 rejects,
    v4 accepts (known divergence)
"""

import pytest

pytestmark = pytest.mark.compatibility


def _assert_connector_error(result, ingestion_mode, type_name, expected_fragments):
    """Assert the connector failed with an error matching at least one expected fragment.

    For v4 modes (v4-compat and v4-ht), the SDK async flush failure does not
    propagate back to the KC task — the task stays RUNNING and 0 rows land.
    For v3, the channel open is rejected synchronously and the task fails.
    """
    if ingestion_mode in ("v4-ht", "v4-compat"):
        assert len(result.values) == 0, (
            f"Expected no rows for {type_name} on {ingestion_mode}, got {len(result.values)}"
        )
        return

    assert result.connector_error is not None, (
        f"Expected connector task failure for {type_name} on {ingestion_mode}, "
        f"but connector succeeded with {len(result.values)} rows"
    )
    matched = any(f in result.connector_error for f in expected_fragments)
    assert matched, (
        f"Connector error for {type_name} on {ingestion_mode} did not match "
        f"any expected pattern {expected_fragments}.\n"
        f"Actual error (first 500 chars): {result.connector_error[:500]}"
    )


# Error patterns observed in connector traces for v3:
#   SFException "does not support columns of type" (channel open rejected by server)
# v4 modes never reach this assertion — they exit early with 0 rows.
_GEO_ERROR_FRAGMENTS = [
    "does not support columns of type",
    "TopicPartitionChannelInsertionException",
    "Failed to insert rows",
]

# v3 structured types / unsupported column types: channel open or schema setup failure
_CHANNEL_OPEN_ERROR_FRAGMENTS = [
    "does not support columns of type",
    "Open channel request failed",
    "Unknown data type for column",
]


# ---------------------------------------------------------------------------
# Geospatial types (unsupported by Snowpipe Streaming)
# ---------------------------------------------------------------------------


def test_dt_geography(ingest_one_type_abort, ingestion_mode):
    """GEOGRAPHY: GeoJSON point data — unsupported by Snowpipe Streaming."""
    result = ingest_one_type_abort(
        "dt_geography",
        "GEOGRAPHY",
        [
            '{"type":"Point","coordinates":[-122.35,37.55]}',
            '{"type":"Point","coordinates":[0,0]}',
        ],
    )
    _assert_connector_error(result, ingestion_mode, "GEOGRAPHY", _GEO_ERROR_FRAGMENTS)


def test_dt_geometry(ingest_one_type_abort, ingestion_mode):
    """GEOMETRY: WKT geometry data — unsupported by Snowpipe Streaming."""
    result = ingest_one_type_abort(
        "dt_geometry",
        "GEOMETRY",
        ["POINT(-122.35 37.55)", "POINT(0 0)"],
    )
    _assert_connector_error(result, ingestion_mode, "GEOMETRY", _GEO_ERROR_FRAGMENTS)


# ---------------------------------------------------------------------------
# VECTOR type (v4 only)
# ---------------------------------------------------------------------------


def test_dt_vector(ingest_one_type_abort, ingestion_mode):
    """VECTOR(FLOAT, 3): vector embeddings — not supported by v3 classic SDK."""
    result = ingest_one_type_abort(
        "dt_vector",
        "VECTOR(FLOAT, 3)",
        [[1.0, 2.0, 3.0], [0.0, 0.0, 0.0], [-1.5, 2.5, -3.5]],
    )
    if ingestion_mode == "v3":
        _assert_connector_error(
            result, ingestion_mode, "VECTOR", _CHANNEL_OPEN_ERROR_FRAGMENTS
        )
    else:
        assert len(result.values) == 3, (
            f"Expected 3 VECTOR rows, got {len(result.values)}; "
            f"error={result.connector_error}"
        )


# ---------------------------------------------------------------------------
# Structured OBJECT / ARRAY
#
# KNOWN DIVERGENCE: v3 ColumnSchema rejects OBJECT/ARRAY with typed parameters,
# but v4 accepts them.  v4's SSv2 handles structured types natively.
# ---------------------------------------------------------------------------


def test_dt_structured_object(ingest_one_type_abort, ingestion_mode):
    """Structured OBJECT(name VARCHAR, age NUMBER) — rejected by v3, accepted by v4."""
    result = ingest_one_type_abort(
        "dt_struct_obj",
        "OBJECT(name VARCHAR, age NUMBER)",
        [{"name": "Alice", "age": 30}],
    )
    if ingestion_mode == "v3":
        _assert_connector_error(
            result, ingestion_mode, "structured OBJECT", _CHANNEL_OPEN_ERROR_FRAGMENTS
        )
    else:
        # v4-compat and v4-ht accept structured OBJECT
        assert len(result.values) == 1, (
            f"Expected 1 row for structured OBJECT on {ingestion_mode}, "
            f"got {len(result.values)}; error={result.connector_error}"
        )


def test_dt_structured_array(ingest_one_type_abort, ingestion_mode):
    """Structured ARRAY(NUMBER) — rejected by v3, accepted by v4."""
    result = ingest_one_type_abort(
        "dt_struct_arr",
        "ARRAY(NUMBER)",
        [[1, 2, 3]],
    )
    if ingestion_mode == "v3":
        _assert_connector_error(
            result, ingestion_mode, "structured ARRAY", _CHANNEL_OPEN_ERROR_FRAGMENTS
        )
    else:
        # v4-compat and v4-ht accept structured ARRAY
        assert len(result.values) == 1, (
            f"Expected 1 row for structured ARRAY on {ingestion_mode}, "
            f"got {len(result.values)}; error={result.connector_error}"
        )


================================================
FILE: test/tests/high_performance/test_case_sensitivity.py
================================================
from dataclasses import dataclass
import json
from typing import Optional

import pytest
from snowflake.connector import DictCursor

from lib.config_migration import V4_CONFIG_TEMPLATE
from lib.driver import KafkaDriver
from lib.fixtures.table import Table


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
@pytest.mark.parametrize(
    "sanitize_autogenerated_table_names",
    [True, False],
    ids=["sanitized", "unsanitized"],
)
def test_high_performance_case_sensitivity_table_name(
    driver: KafkaDriver,
    create_connector,
    create_topics,
    name_salt,
    wait_for_rows,
    sanitize_autogenerated_table_names,
):
    """Assert table name derived by the connector matches our expectations."""

    @dataclass(frozen=True)
    class TableNameCase:
        case_name: str  # description
        unsalted_topic_name: str
        topic2table_value: Optional[str]
        expected_table_name: str

    test_cases = [
        # Without topic2table.map, the connector passes the topic name as the table name.
        # With sanitization disabled, that name is passed as-is, i.e. not uppercased.
        TableNameCase(
            case_name="lower_a",
            unsalted_topic_name="a",
            topic2table_value=None,
            expected_table_name=f"A{name_salt}"
            if sanitize_autogenerated_table_names
            else f"a{name_salt}",
        ),
        TableNameCase(
            case_name="upper_b",
            unsalted_topic_name="B",
            topic2table_value=None,
            expected_table_name=f"B{name_salt}",
        ),
        TableNameCase(
            case_name="lower_c_mapped",
            unsalted_topic_name="c_topic",
            topic2table_value=f"c{name_salt}",
            expected_table_name=f"C{name_salt}",
        ),
        TableNameCase(
            case_name="upper_d_mapped",
            unsalted_topic_name="D_topic",
            topic2table_value=f"D{name_salt}",
            expected_table_name=f"D{name_salt}",
        ),
        TableNameCase(
            case_name="lower_e_mapped_quoted",
            unsalted_topic_name="e_topic",
            topic2table_value=f'"e{name_salt}"',
            expected_table_name=f"e{name_salt}",
        ),
        TableNameCase(
            case_name="upper_f_mapped_quoted",
            unsalted_topic_name="f_topic",
            topic2table_value=f'"F{name_salt}"',
            expected_table_name=f"F{name_salt}",
        ),
        TableNameCase(
            case_name="unicode_mapped_quoted",
            unsalted_topic_name="g_topic",
            topic2table_value=f'"❄️{name_salt}"',
            expected_table_name=f"❄️{name_salt}",
        ),
    ]

    topics = create_topics(
        [test_case.unsalted_topic_name for test_case in test_cases], with_tables=False
    )

    topic2table_map = ",".join(
        f"{test_case.unsalted_topic_name}{name_salt}:{test_case.topic2table_value}"
        for test_case in test_cases
        if test_case.topic2table_value is not None
    )

    connector = create_connector(
        v4_config={
            key: value
            for key, value in {
                **V4_CONFIG_TEMPLATE,
                "topics": ",".join(topics),
                "snowflake.topic2table.map": topic2table_map,
                "key.converter": "org.apache.kafka.connect.storage.StringConverter",
                "value.converter": "org.apache.kafka.connect.json.JsonConverter",
                "value.converter.schemas.enable": "false",
                # high-performance defaults, but we also test with sanitized table names
                "snowflake.validation": "server_side",
                "snowflake.compatibility.enable.column.identifier.normalization": "false",
                "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true"
                if sanitize_autogenerated_table_names
                else "false",
            }.items()
            if value is not None
        }
    )
    driver.startConnectorWaitTime()

    for test_case in test_cases:
        driver.sendBytesData(
            f"{test_case.unsalted_topic_name}{name_salt}",
            [json.dumps({"case_name": test_case.case_name}).encode("utf-8")],
        )

    for test_case in test_cases:
        expected_table = Table(driver, test_case.expected_table_name)
        wait_for_rows(expected_table.name, 1, connector_name=connector.name)

        tables = (
            driver.snowflake_conn.cursor(DictCursor).execute("show tables").fetchall()
        )
        assert test_case.expected_table_name in [table["name"] for table in tables]

        # Make sure it's the correct one, i.e. has the data we sent it.
        assert expected_table.select_scalar("CASE_NAME") == test_case.case_name

    # Cleanup - first remove the connector, then the tables.
    connector.close()
    for test_case in test_cases:
        Table(driver, test_case.expected_table_name).drop()


================================================
FILE: test/tests/iceberg/__init__.py
================================================
from lib.config_migration import V4_CONFIG_TEMPLATE


def json_connector_config(topic: str, schematization: bool, validation: bool) -> dict:
    """Build a v4 connector config for JSON ingestion into an iceberg table."""
    config = {
        **V4_CONFIG_TEMPLATE,
        "tasks.max": "1",
        "key.converter": "org.apache.kafka.connect.storage.StringConverter",
        "value.converter": "org.apache.kafka.connect.json.JsonConverter",
        "value.converter.schemas.enable": "false",
        "snowflake.enable.schematization": str(schematization).lower(),
        "snowflake.validation": "client_side" if validation else "server_side",
        "topics": topic,
        "jmx": "true",
    }
    if schematization:
        # JSON field names are lowercase; Snowflake column names are uppercase.
        # Normalization uppercases the field names so the row validator and SSv2
        # can match them to the pre-declared columns (ID, BODY_TEMPERATURE, etc.).
        config["snowflake.compatibility.enable.column.identifier.normalization"] = (
            "true"
        )
    return config


================================================
FILE: test/tests/iceberg/test_iceberg_avro.py
================================================
"""E2E tests for Kafka Connector v4 iceberg Avro ingestion (via Schema Registry).

v4-only, confluent-only (requires Schema Registry for AvroConverter).

Tests the same schematization x validation matrix as the JSON iceberg tests,
but uses Avro-encoded records with Schema Registry.
"""

import json
import logging

import pytest
from confluent_kafka import avro

from lib.config_migration import V4_CONFIG_TEMPLATE
from lib.driver import KafkaDriver
from lib.matchers import ANY_INT

logger = logging.getLogger(__name__)

VALUE_SCHEMA = avro.loads(
    """
{
    "type": "record",
    "name": "iceberg_avro_value",
    "fields": [
        {"name": "id", "type": "int"},
        {"name": "body_temperature", "type": "double"},
        {"name": "name", "type": "string"}
    ]
}
"""
)

KEY_SCHEMA = avro.loads(
    """
{
    "type": "record",
    "name": "iceberg_avro_key",
    "fields": [
        {"name": "id", "type": "int"}
    ]
}
"""
)

RECORD_COUNT = 100


def _avro_connector_config(topic: str, schematization: bool, validation: bool) -> dict:
    config = {
        **V4_CONFIG_TEMPLATE,
        "tasks.max": "1",
        "key.converter": "io.confluent.connect.avro.AvroConverter",
        "key.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY",
        "value.converter": "io.confluent.connect.avro.AvroConverter",
        "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY",
        "snowflake.enable.schematization": str(schematization).lower(),
        "snowflake.validation": "client_side" if validation else "server_side",
        "topics": topic,
        "jmx": "true",
    }
    if schematization:
        config["snowflake.compatibility.enable.column.identifier.normalization"] = (
            "true"
        )
    return config


@pytest.mark.iceberg
@pytest.mark.confluent_only
@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
@pytest.mark.parametrize(
    "schematization", [True, False], ids=["schema=on", "schema=off"]
)
@pytest.mark.parametrize("validation", [True, False], ids=["compat", "ht"])
def test_iceberg_avro_ingestion(
    driver: KafkaDriver,
    create_iceberg_table,
    create_topics,
    create_connector,
    wait_for_rows,
    validation: bool,
    schematization: bool,
):
    """Avro SR ingestion into an iceberg table — 2x2 matrix (validation x schematization).

    ``schema=off`` (bag-of-bits): table has RECORD_METADATA VARIANT, RECORD_CONTENT
    VARIANT.  Avro fields land in RECORD_CONTENT.

    ``schema=on`` (typed columns): table pre-declares ID, BODY_TEMPERATURE, NAME
    columns.  AvroConverter provides a Kafka Connect schema so the connector maps
    fields to the pre-declared columns directly.
    """
    val_tag = "compat" if validation else "ht"
    sch_tag = "s1" if schematization else "s0"
    base_name = f"iceberg_av_{val_tag}_{sch_tag}"

    if schematization:
        columns = (
            "(RECORD_METADATA VARIANT, ID BIGINT, BODY_TEMPERATURE DOUBLE, NAME TEXT)"
        )
    else:
        columns = "(RECORD_METADATA VARIANT, RECORD_CONTENT VARIANT)"

    table = create_iceberg_table(base_name, columns=columns, cleanup_topic=False)
    topic = create_topics([base_name], with_tables=False)[0]

    create_connector(
        v4_config=_avro_connector_config(
            topic, schematization=schematization, validation=validation
        )
    )
    driver.startConnectorWaitTime()

    keys = [{"id": i} for i in range(RECORD_COUNT)]
    values = [
        {"id": i, "body_temperature": 36.6, "name": "Steve"}
        for i in range(RECORD_COUNT)
    ]
    driver.sendAvroSRData(topic, values, VALUE_SCHEMA, keys, KEY_SCHEMA, partition=0)

    wait_for_rows(table.name, RECORD_COUNT)

    if not schematization:
        rows = table.select(
            "PARSE_JSON(RECORD_CONTENT):id::NUMBER            AS ID, "
            "PARSE_JSON(RECORD_CONTENT):body_temperature::FLOAT AS BODY_TEMPERATURE, "
            "PARSE_JSON(RECORD_CONTENT):name::STRING           AS NAME, "
            "PARSE_JSON(RECORD_METADATA):offset::NUMBER        AS OFFSET, "
            "PARSE_JSON(RECORD_METADATA):partition::NUMBER     AS PARTITION, "
            "PARSE_JSON(RECORD_METADATA):topic::STRING         AS TOPIC",
            "ORDER BY PARSE_JSON(RECORD_METADATA):offset::NUMBER LIMIT 1",
        )
        assert rows, "Expected at least one row"
        row = rows[0]
        assert row["ID"] == 0, f"Expected id=0, got {row['ID']}"
        assert abs(float(row["BODY_TEMPERATURE"]) - 36.6) < 0.01, (
            f"Expected body_temperature≈36.6, got {row['BODY_TEMPERATURE']}"
        )
        assert row["NAME"] == "Steve", f"Expected name='Steve', got {row['NAME']}"
        assert row["OFFSET"] == 0, f"Expected offset=0, got {row['OFFSET']}"
        assert row["PARTITION"] == 0, f"Expected partition=0, got {row['PARTITION']}"
        assert row["TOPIC"] == topic, f"Expected topic={topic!r}, got {row['TOPIC']!r}"
    else:
        rows = table.select(
            '"ID", "BODY_TEMPERATURE", "NAME", '
            "PARSE_JSON(RECORD_METADATA):offset::NUMBER    AS OFFSET, "
            "PARSE_JSON(RECORD_METADATA):partition::NUMBER AS PARTITION, "
            "PARSE_JSON(RECORD_METADATA):topic::STRING     AS TOPIC",
            "ORDER BY PARSE_JSON(RECORD_METADATA):offset::NUMBER LIMIT 1",
        )
        assert rows, "Expected at least one row"
        row = rows[0]
        assert row["ID"] == 0, f"Expected id=0, got {row['ID']}"
        assert abs(float(row["BODY_TEMPERATURE"]) - 36.6) < 0.01, (
            f"Expected body_temperature≈36.6, got {row['BODY_TEMPERATURE']}"
        )
        assert row["NAME"] == "Steve", f"Expected name='Steve', got {row['NAME']}"
        assert row["OFFSET"] == 0, f"Expected offset=0, got {row['OFFSET']}"
        assert row["PARTITION"] == 0, f"Expected partition=0, got {row['PARTITION']}"
        assert row["TOPIC"] == topic, f"Expected topic={topic!r}, got {row['TOPIC']!r}"

    # Verify RECORD_METADATA contains key (Avro key schema → key field in metadata)
    meta_rows = table.select(
        "PARSE_JSON(RECORD_METADATA) AS META",
        "ORDER BY PARSE_JSON(RECORD_METADATA):offset::NUMBER LIMIT 1",
    )
    metadata = json.loads(meta_rows[0]["META"])
    assert metadata["offset"] == 0
    assert metadata["partition"] == 0
    assert metadata["topic"] == topic
    assert metadata["SnowflakeConnectorPushTime"] == ANY_INT


================================================
FILE: test/tests/iceberg/test_iceberg_json.py
================================================
"""E2E tests for Kafka Connector v4 iceberg JSON ingestion.

These tests are v4-only. V3 is excluded because:
  - v3 requires ``snowflake.streaming.iceberg.enabled=true`` in the connector config
    which the config migration does not add (v3 iceberg was experimental)
  - v3 had custom iceberg code (IcebergInitService, IcebergTableStreamingRecordMapper)
    that has been removed in v4
  - v4 uses SSv2 which handles iceberg tables transparently

Prerequisites:
  - An AWS external volume named ``ICEBERG_EXTERNAL_VOLUME`` must exist in the test
    Snowflake account.  The default is ``kafka_push_e2e_volume_aws``.  Override
    with the environment variable ``ICEBERG_EXTERNAL_VOLUME``.
"""

import json
import logging

import pytest

from lib.driver import KafkaDriver
from tests.iceberg import json_connector_config

logger = logging.getLogger(__name__)

_SAMPLE_MESSAGE = {
    "id": 1,
    "body_temperature": 36.6,
    "name": "Steve",
    "approved_coffee_types": ["Espresso", "Doppio", "Ristretto", "Lungo"],
    "animals_possessed": {"dogs": True, "cats": False},
}
RECORD_COUNT = 100


@pytest.mark.iceberg
@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
@pytest.mark.parametrize(
    "schematization", [True, False], ids=["schema=on", "schema=off"]
)
@pytest.mark.parametrize("validation", [True, False], ids=["compat", "ht"])
def test_iceberg_json_ingestion(
    driver: KafkaDriver,
    create_iceberg_table,
    create_topics,
    create_connector,
    wait_for_rows,
    validation: bool,
    schematization: bool,
):
    """JSON ingestion into an iceberg table — full 2x2 matrix (validation x schematization).

    Matrix axes:
      - validation (compat=true / ht=false): controls whether the client-side
        RowValidator runs.
      - schematization (on/off): controls how the connector maps records to columns.

    ``schema=off`` (bag-of-bits): table has ``RECORD_METADATA VARIANT, RECORD_CONTENT
      VARIANT``.  Full JSON payload goes into RECORD_CONTENT.  Assertions use
      ``PARSE_JSON(RECORD_CONTENT):field::TYPE`` because iceberg stores VARIANT as a
      string-encoded JSON literal.

    ``schema=on`` (typed columns): table pre-declares all columns from the sample
      message — scalar fields as typed (ID NUMBER, BODY_TEMPERATURE FLOAT, NAME STRING)
      and complex fields as VARIANT (APPROVED_COFFEE_TYPES, ANIMALS_POSSESSED).
      RECORD_METADATA remains VARIANT.  No schema evolution is needed.
      Typed columns are accessed directly; VARIANT columns still need PARSE_JSON().
    """
    val_tag = "compat" if validation else "ht"
    sch_tag = "s1" if schematization else "s0"
    base_name = f"iceberg_jv_{val_tag}_{sch_tag}"

    if schematization:
        columns = (
            "(RECORD_METADATA VARIANT, "
            "ID BIGINT, "
            "BODY_TEMPERATURE DOUBLE, "
            "NAME TEXT, "
            "APPROVED_COFFEE_TYPES VARIANT, "
            "ANIMALS_POSSESSED VARIANT)"
        )
    else:
        columns = "(RECORD_METADATA VARIANT, RECORD_CONTENT VARIANT)"

    table = create_iceberg_table(base_name, columns=columns, cleanup_topic=False)
    topic = create_topics([base_name], with_tables=False)[0]

    create_connector(
        v4_config=json_connector_config(
            topic, schematization=schematization, validation=validation
        )
    )
    driver.startConnectorWaitTime()

    records = [json.dumps(_SAMPLE_MESSAGE).encode("utf-8") for _ in range(RECORD_COUNT)]
    driver.sendBytesData(topic, records, partition=0)

    wait_for_rows(table.name, RECORD_COUNT)

    if not schematization:
        rows = table.select(
            "PARSE_JSON(RECORD_CONTENT):id::NUMBER            AS ID, "
            "PARSE_JSON(RECORD_CONTENT):body_temperature::FLOAT AS BODY_TEMPERATURE, "
            "PARSE_JSON(RECORD_CONTENT):name::STRING           AS NAME, "
            "PARSE_JSON(RECORD_METADATA):offset::NUMBER        AS OFFSET, "
            "PARSE_JSON(RECORD_METADATA):partition::NUMBER     AS PARTITION, "
            "PARSE_JSON(RECORD_METADATA):topic::STRING         AS TOPIC, "
            "PARSE_JSON(RECORD_METADATA):SnowflakeConnectorPushTime::STRING AS PUSH_TIME",
            "ORDER BY PARSE_JSON(RECORD_METADATA):offset::NUMBER LIMIT 1",
        )
        assert rows, "Expected at least one row in the iceberg table"
        row = rows[0]
        assert row["ID"] == 1, f"Expected id=1, got {row['ID']}"
        assert abs(float(row["BODY_TEMPERATURE"]) - 36.6) < 0.01, (
            f"Expected body_temperature≈36.6, got {row['BODY_TEMPERATURE']}"
        )
        assert row["NAME"] == "Steve", f"Expected name='Steve', got {row['NAME']}"
        assert row["OFFSET"] == 0, f"Expected offset=0, got {row['OFFSET']}"
        assert row["PARTITION"] == 0, f"Expected partition=0, got {row['PARTITION']}"
        assert row["TOPIC"] == topic, f"Expected topic={topic!r}, got {row['TOPIC']!r}"
        assert row["PUSH_TIME"] is not None, (
            "Expected SnowflakeConnectorPushTime to be set"
        )
    else:
        rows = table.select(
            '"ID", "BODY_TEMPERATURE", "NAME", '
            "PARSE_JSON(RECORD_METADATA):offset::NUMBER        AS OFFSET, "
            "PARSE_JSON(RECORD_METADATA):partition::NUMBER     AS PARTITION, "
            "PARSE_JSON(RECORD_METADATA):topic::STRING         AS TOPIC, "
            "PARSE_JSON(RECORD_METADATA):SnowflakeConnectorPushTime::STRING AS PUSH_TIME",
            "ORDER BY PARSE_JSON(RECORD_METADATA):offset::NUMBER LIMIT 1",
        )
        assert rows, "Expected at least one row"
        row = rows[0]
        assert row["ID"] == 1, f"Expected id=1, got {row['ID']}"
        assert abs(float(row["BODY_TEMPERATURE"]) - 36.6) < 0.01, (
            f"Expected body_temperature≈36.6, got {row['BODY_TEMPERATURE']}"
        )
        assert row["NAME"] == "Steve", f"Expected name='Steve', got {row['NAME']}"
        assert row["OFFSET"] == 0, f"Expected offset=0, got {row['OFFSET']}"
        assert row["PARTITION"] == 0, f"Expected partition=0, got {row['PARTITION']}"
        assert row["TOPIC"] == topic, f"Expected topic={topic!r}, got {row['TOPIC']!r}"
        assert row["PUSH_TIME"] is not None, (
            "Expected SnowflakeConnectorPushTime to be set"
        )


================================================
FILE: test/tests/iceberg/test_iceberg_se_avro.py
================================================
"""Iceberg schema evolution E2E tests — Avro format (via Schema Registry).

Tests client-side SE with Avro-encoded records: the connector detects new
columns from the Avro schema and issues ``ALTER ICEBERG TABLE ADD COLUMN``.

v4-only, confluent-only.
"""

import logging

import pytest
from confluent_kafka import avro

from lib.config_migration import V4_CONFIG_TEMPLATE
from lib.driver import KafkaDriver

logger = logging.getLogger(__name__)

WAVE1_SCHEMA = avro.loads(
    """
{
    "type": "record",
    "name": "iceberg_se_avro_record",
    "fields": [
        {"name": "CITY", "type": "string"},
        {"name": "AGE", "type": "int"}
    ]
}
"""
)

WAVE2_SCHEMA = avro.loads(
    """
{
    "type": "record",
    "name": "iceberg_se_avro_record",
    "fields": [
        {"name": "CITY", "type": "string"},
        {"name": "AGE", "type": "int"},
        {"name": "COUNTRY", "type": ["null", "string"], "default": null}
    ]
}
"""
)


def _avro_se_connector_config(topic: str) -> dict:
    return {
        **V4_CONFIG_TEMPLATE,
        "tasks.max": "1",
        "key.converter": "org.apache.kafka.connect.storage.StringConverter",
        "value.converter": "io.confluent.connect.avro.AvroConverter",
        "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY",
        "snowflake.enable.schematization": "true",
        "snowflake.validation": "client_side",
        "topics": topic,
        "jmx": "true",
    }


@pytest.mark.iceberg
@pytest.mark.schema_evolution
@pytest.mark.confluent_only
@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_iceberg_se_avro_add_column(
    driver: KafkaDriver,
    create_iceberg_table,
    create_topics,
    create_connector,
    wait_for_rows,
):
    """Iceberg SE with Avro — connector adds columns from evolving Avro schemas.

    Table starts with RECORD_METADATA VARIANT + CITY TEXT.  Wave 1 sends Avro
    records with ``{CITY, AGE}`` — connector SE adds AGE.  Wave 2 uses an
    evolved Avro schema with ``{CITY, AGE, COUNTRY}`` — connector SE adds COUNTRY.

    Avro has an explicit schema so the connector knows the exact type for each
    new column (unlike JSON where types are inferred from values).
    """
    base_name = "iceberg_se_avro"
    table = create_iceberg_table(
        base_name,
        columns="(RECORD_METADATA VARIANT, CITY TEXT) ENABLE_SCHEMA_EVOLUTION = TRUE",
        cleanup_topic=False,
    )
    topic = create_topics([base_name], with_tables=False)[0]

    create_connector(v4_config=_avro_se_connector_config(topic))
    driver.startConnectorWaitTime()

    wave1_count = 100
    wave1_values = [{"CITY": "Hsinchu", "AGE": i} for i in range(wave1_count)]
    driver.sendAvroSRData(topic, wave1_values, WAVE1_SCHEMA, partition=0)

    wait_for_rows(table.name, wave1_count)

    cols = {row[0] for row in table.schema()}
    assert "AGE" in cols, (
        f"Expected connector SE to add AGE column after wave 1, got: {cols}"
    )

    wave2_count = 50
    wave2_values = [
        {"CITY": "Taipei", "AGE": 100 + i, "COUNTRY": "TW"} for i in range(wave2_count)
    ]
    driver.sendAvroSRData(topic, wave2_values, WAVE2_SCHEMA, partition=0)

    wait_for_rows(table.name, wave1_count + wave2_count)

    cols = {row[0] for row in table.schema()}
    assert "COUNTRY" in cols, (
        f"Expected connector SE to add COUNTRY column after wave 2, got: {cols}"
    )

    rows = table.select('"CITY", "COUNTRY"', "WHERE \"CITY\" = 'Taipei' LIMIT 1")
    assert rows, "Expected at least one wave-2 row with CITY = 'Taipei'"
    assert rows[0]["CITY"] == "Taipei"
    assert rows[0]["COUNTRY"] == "TW", (
        f"Expected COUNTRY='TW', got {rows[0]['COUNTRY']!r}"
    )

    null_country_count = table.select("COUNT(*) AS CNT", 'WHERE "COUNTRY" IS NULL')[0][
        "CNT"
    ]
    assert null_country_count == wave1_count, (
        f"Expected {wave1_count} rows with NULL COUNTRY, got {null_country_count}"
    )


================================================
FILE: test/tests/iceberg/test_iceberg_se_json.py
================================================
"""Iceberg schema evolution E2E tests — JSON format.

Tests client-side SE (RowValidator-driven ``ALTER ICEBERG TABLE ADD COLUMN``)
and documents the server-side SE limitation (xfail).

v4-only: v3 iceberg support was removed in v4.
"""

import json
import logging

import pytest

from lib.driver import KafkaDriver
from tests.iceberg import json_connector_config

logger = logging.getLogger(__name__)


@pytest.mark.iceberg
@pytest.mark.schema_evolution
@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_iceberg_se_add_column(
    driver: KafkaDriver,
    create_iceberg_table,
    create_topics,
    create_connector,
    wait_for_rows,
):
    """Iceberg schema evolution — connector adds a new column mid-stream (client-side SE).

    Table starts with RECORD_METADATA VARIANT + CITY TEXT.  Wave 1 records carry
    ``{city, age}``: the connector's RowValidator detects AGE as new and issues
    ``ALTER ICEBERG TABLE ADD COLUMN``.  Wave 2 adds ``country``: SE adds COUNTRY.

    Uses ``validation=true`` (compat/client-side SE) so the RowValidator drives
    column additions.  Server-side SE (validation=false) does not support typed
    column additions on iceberg tables.
    """
    base_name = "iceberg_se_addcol"
    table = create_iceberg_table(
        base_name,
        columns="(RECORD_METADATA VARIANT, CITY TEXT) ENABLE_SCHEMA_EVOLUTION = TRUE",
        cleanup_topic=False,
    )
    topic = create_topics([base_name], with_tables=False)[0]

    create_connector(
        v4_config=json_connector_config(topic, schematization=True, validation=True)
    )
    driver.startConnectorWaitTime()

    wave1_count = 100
    driver.sendBytesData(
        topic,
        [
            json.dumps({"city": "Hsinchu", "age": i}).encode("utf-8")
            for i in range(wave1_count)
        ],
        partition=0,
    )
    wait_for_rows(table.name, wave1_count)

    cols = {row[0] for row in table.schema()}
    assert "AGE" in cols, (
        f"Expected connector SE to add AGE column after wave 1, got: {cols}"
    )

    wave2_count = 50
    driver.sendBytesData(
        topic,
        [
            json.dumps({"city": "Taipei", "age": 100 + i, "country": "TW"}).encode(
                "utf-8"
            )
            for i in range(wave2_count)
        ],
        partition=0,
    )
    wait_for_rows(table.name, wave1_count + wave2_count)

    rows = table.select('"CITY", "COUNTRY"', "WHERE \"CITY\" = 'Taipei' LIMIT 1")
    assert rows, "Expected at least one wave-2 row with CITY = 'Taipei'"
    assert rows[0]["CITY"] == "Taipei"
    assert rows[0]["COUNTRY"] == "TW", (
        f"Expected COUNTRY='TW', got {rows[0]['COUNTRY']!r}"
    )

    null_country_count = table.select("COUNT(*) AS CNT", 'WHERE "COUNTRY" IS NULL')[0][
        "CNT"
    ]
    assert null_country_count == wave1_count, (
        f"Expected {wave1_count} rows with NULL COUNTRY, got {null_country_count}"
    )


@pytest.mark.iceberg
@pytest.mark.schema_evolution
@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_iceberg_se_multi_wave(
    driver: KafkaDriver,
    create_iceberg_table,
    create_topics,
    create_connector,
    wait_for_rows,
):
    """Iceberg SE — connector adds two successive new columns across three waves.

    Waves:
      1. Wave 1 (50 records): ``{city}`` — no SE needed.
      2. Wave 2 (50 records): ``{city, age}`` — connector SE adds AGE.
      3. Wave 3 (50 records): ``{city, age, country}`` — connector SE adds COUNTRY.

    After all waves:
      - Wave-1 rows: AGE IS NULL, COUNTRY IS NULL
      - Wave-2 rows: AGE set, COUNTRY IS NULL
      - Wave-3 rows: AGE set, COUNTRY set
    """
    base_name = "iceberg_se_multi"
    table = create_iceberg_table(
        base_name,
        columns="(RECORD_METADATA VARIANT, CITY TEXT) ENABLE_SCHEMA_EVOLUTION = TRUE",
        cleanup_topic=False,
    )
    topic = create_topics([base_name], with_tables=False)[0]

    create_connector(
        v4_config=json_connector_config(topic, schematization=True, validation=True)
    )
    driver.startConnectorWaitTime()

    wave1_count = 50
    driver.sendBytesData(
        topic,
        [json.dumps({"city": "Taipei"}).encode("utf-8") for _ in range(wave1_count)],
        partition=0,
    )
    wait_for_rows(table.name, wave1_count)

    wave2_count = 50
    driver.sendBytesData(
        topic,
        [
            json.dumps({"city": "Hsinchu", "age": i}).encode("utf-8")
            for i in range(wave2_count)
        ],
        partition=0,
    )
    wait_for_rows(table.name, wave1_count + wave2_count)

    wave3_count = 50
    driver.sendBytesData(
        topic,
        [
            json.dumps({"city": "Kaohsiung", "age": 200 + i, "country": "TW"}).encode(
                "utf-8"
            )
            for i in range(wave3_count)
        ],
        partition=0,
    )
    wait_for_rows(table.name, wave1_count + wave2_count + wave3_count)

    w1_null = table.select(
        "COUNT(*) AS CNT",
        'WHERE "CITY" = \'Taipei\' AND "AGE" IS NULL AND "COUNTRY" IS NULL',
    )[0]["CNT"]
    assert w1_null == wave1_count, (
        f"Expected {wave1_count} wave-1 rows with NULL AGE+COUNTRY, got {w1_null}"
    )

    w2_rows = table.select('"AGE", "COUNTRY"', "WHERE \"CITY\" = 'Hsinchu' LIMIT 1")
    assert w2_rows, "Expected at least one wave-2 row"
    assert w2_rows[0]["AGE"] is not None, "Expected AGE set for wave-2 rows"
    assert w2_rows[0]["COUNTRY"] is None, (
        f"Expected COUNTRY NULL for wave-2 rows, got {w2_rows[0]['COUNTRY']!r}"
    )

    w3_rows = table.select('"AGE", "COUNTRY"', "WHERE \"CITY\" = 'Kaohsiung' LIMIT 1")
    assert w3_rows, "Expected at least one wave-3 row"
    assert w3_rows[0]["AGE"] is not None, "Expected AGE set for wave-3 rows"
    assert w3_rows[0]["COUNTRY"] == "TW", (
        f"Expected COUNTRY='TW', got {w3_rows[0]['COUNTRY']!r}"
    )


@pytest.mark.iceberg
@pytest.mark.schema_evolution
@pytest.mark.xfail(
    strict=True,
    reason=(
        "Server-side SE (ENABLE_SCHEMA_EVOLUTION on the table, validation=false) "
        "silently discards typed (non-VARIANT) column additions on iceberg tables. "
        "Client-side SE (validation=true) does work after fixing the connector to "
        "issue ALTER ICEBERG TABLE ADD COLUMN, but this test exercises the HT path "
        "(validation=false) where server-side SE is the only mechanism.  Remove "
        "this xfail once Snowflake server-side SE supports typed columns on iceberg."
    ),
)
@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_iceberg_se_json_server_side(
    driver: KafkaDriver,
    name_salt: str,
    create_iceberg_table,
    create_topics,
    create_connector,
    wait_for_rows,
):
    """JSON schema evolution into an iceberg table (server-side SE, HT mode).

    Uses ``validation=false`` (HT mode) so client-side validation is never
    initialized.  Records flow directly to SSv2, which relies on
    ``ENABLE_SCHEMA_EVOLUTION = TRUE`` for server-side column additions.

    Sends two waves:
      1. Wave 1 (100 records): ``{city, age}`` — server-side SE adds CITY, AGE.
      2. Wave 2 (50 records): ``{city, age, country}`` — server-side SE adds COUNTRY.
    """
    base_name = "iceberg_se_json"
    table = create_iceberg_table(
        base_name,
        columns="(RECORD_METADATA VARIANT) ENABLE_SCHEMA_EVOLUTION = TRUE",
        cleanup_topic=False,
    )
    topic = create_topics([base_name], with_tables=False)[0]

    create_connector(
        v4_config={
            **json_connector_config(topic, schematization=True, validation=False),
            "errors.tolerance": "all",
            "errors.log.enable": "true",
            "errors.deadletterqueue.topic.name": f"DLQ_iceberg_se{name_salt}",
            "errors.deadletterqueue.topic.replication.factor": "1",
        }
    )
    driver.startConnectorWaitTime()

    wave1_count = 100
    driver.sendBytesData(
        topic,
        [
            json.dumps({"city": "Hsinchu", "age": i}).encode("utf-8")
            for i in range(wave1_count)
        ],
        partition=0,
    )
    wait_for_rows(table.name, wave1_count)

    wave2_count = 50
    driver.sendBytesData(
        topic,
        [
            json.dumps({"city": "Taipei", "age": 100 + i, "country": "TW"}).encode(
                "utf-8"
            )
            for i in range(wave2_count)
        ],
        partition=0,
    )
    wait_for_rows(table.name, wave1_count + wave2_count)

    cols = {row[0]: row[1] for row in table.schema()}
    assert "CITY" in cols, f"Expected CITY column, got: {list(cols.keys())}"
    assert "AGE" in cols, f"Expected AGE column, got: {list(cols.keys())}"
    assert "COUNTRY" in cols, (
        f"Expected COUNTRY column after wave 2, got: {list(cols.keys())}"
    )

    rows = table.select('"CITY", "AGE", "COUNTRY"', "WHERE \"CITY\" = 'Taipei' LIMIT 1")
    assert rows, "Expected at least one wave-2 row with CITY = 'Taipei'"
    assert rows[0]["CITY"] == "Taipei"
    assert rows[0]["COUNTRY"] == "TW"

    null_country_count = table.select("COUNT(*) AS CNT", 'WHERE "COUNTRY" IS NULL')[0][
        "CNT"
    ]
    assert null_country_count == wave1_count, (
        f"Expected {wave1_count} rows with NULL COUNTRY, got {null_country_count}"
    )


================================================
FILE: test/tests/pressure/test_perf_backlog_drain.py
================================================
"""
P1 Backlog Drain — profiling-friendly performance test.

Defaults: 4 partitions × 1M records × ~250 bytes = 4M rows (~1 GB).
All parameters are tunable via environment variables (see below).

Scenario:
  1. Create a single topic with DRAIN_PARTITIONS partitions (default 4).
  2. Loader phase: pre-populate the topic with DRAIN_RECORDS_PER_PARTITION
     records per partition.  Each message is a ~250-byte JSON row.
  3. KC phase: start a Snowflake Streaming connector with DRAIN_TASKS_MAX
     tasks (default 8) to drain the full topic from offset 0 ("cold start").
     Runs for up to DRAIN_KC_TIMEOUT seconds (default 900).
  4. Post-run: log final offsets, row counts, and drain time.

Usage:
  ./run_tests.sh --platform=confluent --platform-version=7.8.0 --profile --keep \\
      -- tests/pressure/test_perf_backlog_drain.py

  # Larger run (e.g. ~144M rows / ~38 GB):
  DRAIN_PARTITIONS=4 DRAIN_RECORDS_PER_PARTITION=36000000 \\
  ./run_tests.sh ... -- tests/pressure/test_perf_backlog_drain.py
"""

import json
import logging
import os
import time
from concurrent.futures import ThreadPoolExecutor, as_completed

import pytest

from lib.config_migration import V4_CONFIG_TEMPLATE

logger = logging.getLogger(__name__)

# ---------------------------------------------------------------------------
# Tunables — override via environment variables for quick profiling runs
# ---------------------------------------------------------------------------
PARTITION_COUNT = int(os.environ.get("DRAIN_PARTITIONS", "4"))
TASKS_MAX = int(os.environ.get("DRAIN_TASKS_MAX", "8"))
RECORDS_PER_PARTITION = int(os.environ.get("DRAIN_RECORDS_PER_PARTITION", "1_000_000"))
LOADER_THREADS = int(os.environ.get("DRAIN_LOADER_THREADS", "4"))
BATCH_SIZE = int(os.environ.get("DRAIN_BATCH_SIZE", "50_000"))
KC_TIMEOUT = int(os.environ.get("DRAIN_KC_TIMEOUT", "900"))
ROW_SIZE_APPROX = 250  # bytes per JSON message


def _make_row(partition: int, id: int) -> bytes:
    """Build a single JSON message (~250 bytes)."""
    return json.dumps(
        {
            "MESSAGE": f"p{partition}-{id}",
            "TIMESTAMP": int(time.time() * 1000),
            "ID": id,
            "PARTITION": partition,
            "ROW_SIZE_IN_BYTES": ROW_SIZE_APPROX,
        }
    ).encode("utf-8")


def _load_partition(driver, topic: str, partition: int, total: int, batch: int):
    """Send `total` records to a single partition in batches."""
    sent = 0
    while sent < total:
        chunk = min(batch, total - sent)
        values = [_make_row(partition, sent + i) for i in range(chunk)]
        driver.sendBytesData(topic, values, key=None, partition=partition)
        sent += chunk
        if sent % 200_000 == 0 or sent == total:
            logger.info(
                "Loader p%d: %d / %d (%.1f%%)",
                partition,
                sent,
                total,
                100 * sent / total,
            )
    return sent


@pytest.mark.pressure
@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_perf_backlog_drain(
    driver,
    name_salt,
    create_topics,
    create_custom_connector,
    wait_for_rows,
):
    total_records = PARTITION_COUNT * RECORDS_PER_PARTITION
    total_bytes = total_records * ROW_SIZE_APPROX
    logger.info(
        "=== P1 Backlog Drain: %d partitions × %d records = %d total (%.1f GB) ===",
        PARTITION_COUNT,
        RECORDS_PER_PARTITION,
        total_records,
        total_bytes / 1e9,
    )

    # -----------------------------------------------------------------------
    # 1. Topic setup
    # -----------------------------------------------------------------------
    topic_unsalted = "perf_backlog_drain"
    topics = create_topics(
        [topic_unsalted],
        num_partitions=PARTITION_COUNT,
    )
    topic = topics[0]
    logger.info("Topic created: %s (%d partitions)", topic, PARTITION_COUNT)

    # -----------------------------------------------------------------------
    # 2. Loader phase — fill the topic before starting KC
    # -----------------------------------------------------------------------
    logger.info(
        "=== Loader phase: %d threads, %d records/partition, batch=%d ===",
        LOADER_THREADS,
        RECORDS_PER_PARTITION,
        BATCH_SIZE,
    )
    load_start = time.time()
    with ThreadPoolExecutor(max_workers=LOADER_THREADS) as pool:
        futures = {
            pool.submit(
                _load_partition,
                driver,
                topic,
                p,
                RECORDS_PER_PARTITION,
                BATCH_SIZE,
            ): p
            for p in range(PARTITION_COUNT)
        }
        for fut in as_completed(futures):
            p = futures[fut]
            count = fut.result()
            logger.info("Partition %d loaded: %d records", p, count)

    load_elapsed = time.time() - load_start
    load_throughput = total_bytes / load_elapsed / 1e6
    logger.info(
        "=== Loader done: %.1fs, %.1f MB/s ===",
        load_elapsed,
        load_throughput,
    )

    # -----------------------------------------------------------------------
    # 3. KC phase — connector starts cold against a full topic
    # -----------------------------------------------------------------------
    logger.info(
        "=== KC phase: %d tasks, timeout=%ds ===",
        TASKS_MAX,
        KC_TIMEOUT,
    )
    kc_start = time.time()

    connector = create_custom_connector(
        "perf_backlog_drain",
        {
            **V4_CONFIG_TEMPLATE,
            "tasks.max": str(TASKS_MAX),
            "topics": topic,
            "key.converter": "org.apache.kafka.connect.storage.StringConverter",
            "value.converter": "org.apache.kafka.connect.json.JsonConverter",
            "value.converter.schemas.enable": "false",
            "snowflake.validation": os.environ.get("DRAIN_VALIDATION", "server_side"),
            "consumer.override.max.poll.interval.ms": "600000",
            "consumer.override.auto.offset.reset": "earliest",
        },
    )

    driver.wait_for_connector_running(connector.name, timeout=120)
    logger.info("Connector %s is RUNNING", connector.name)

    # -----------------------------------------------------------------------
    # 4. Wait for all rows to land in Snowflake
    # -----------------------------------------------------------------------
    table_name = topic
    wait_for_rows(
        table_name,
        total_records,
        timeout=KC_TIMEOUT,
        interval=10,
        connector_name=connector.name,
    )

    kc_elapsed = time.time() - kc_start
    drain_throughput = total_bytes / kc_elapsed / 1e6
    logger.info(
        "=== KC drain complete: %.1fs, %.1f MB/s ===",
        kc_elapsed,
        drain_throughput,
    )

    # -----------------------------------------------------------------------
    # 5. Post-run stats
    # -----------------------------------------------------------------------
    logger.info("=== Post-run stats ===")
    logger.info("  Total records:      %d", total_records)
    logger.info(
        "  Load time:          %.1fs (%.1f MB/s)", load_elapsed, load_throughput
    )
    logger.info("  Drain time:         %.1fs (%.1f MB/s)", kc_elapsed, drain_throughput)
    logger.info(
        "  Rows/sec (drain):   %.0f",
        total_records / kc_elapsed if kc_elapsed > 0 else 0,
    )
    row_count = driver.select_number_of_records(table_name)
    logger.info("  Snowflake rows:     %s", row_count)
    assert row_count == total_records, (
        f"Expected {total_records} rows but got {row_count}"
    )


================================================
FILE: test/tests/pressure/test_pressure_init.py
================================================
import json
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
import pytest

from lib.config_migration import V4_CONFIG_TEMPLATE

logger = logging.getLogger(__name__)

TOPIC_COUNT = 200
PARTITION_COUNT = 12
RECORD_COUNT = 10_000
THREAD_COUNT = 10


def _send_partition(driver, topic, partition, record_count):
    values = [
        json.dumps(
            {
                "numbernumbernumbernumbernumbernumbernumbernumbernumbernumbernumbernumber": str(
                    e
                )
            }
        ).encode("utf-8")
        for e in range(record_count)
    ]
    driver.sendBytesData(topic, values, [], partition)


@pytest.mark.pressure
@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_pressure_init(driver, create_topics, create_custom_connector, wait_for_rows):
    test_name = "test_pressure_init"

    topics = create_topics(
        [f"{test_name}_{i}" for i in range(TOPIC_COUNT)],
        num_partitions=PARTITION_COUNT,
    )

    connector = create_custom_connector(
        test_name,
        {
            **V4_CONFIG_TEMPLATE,
            "tasks.max": "10",
            "topics.regex": f"{test_name}.*",
            "key.converter": "org.apache.kafka.connect.storage.StringConverter",
            "value.converter": "org.apache.kafka.connect.json.JsonConverter",
            "value.converter.schemas.enable": "false",
            "snowflake.validation": "server_side",
            "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
            "snowflake.compatibility.enable.column.identifier.normalization": "false",
            # Increase max poll interval from 5 to 10 minutes to avoid constant rebalancing.
            # This is required when we have a large number of topics across tasks.
            # We might be able to remove this once we parallelize table and pipe metadata lookups.
            "consumer.override.max.poll.interval.ms": "600000",
        },
    )

    driver.startConnectorWaitTime()

    total = TOPIC_COUNT * PARTITION_COUNT
    with ThreadPoolExecutor(max_workers=THREAD_COUNT) as executor:
        futures = [
            executor.submit(_send_partition, driver, topics[t], p, RECORD_COUNT)
            for t in range(TOPIC_COUNT)
            for p in range(PARTITION_COUNT)
        ]
        for i, future in enumerate(as_completed(futures), 1):
            future.result()
            if i % 100 == 0 or i == total:
                logger.info(f"Sent {i}/{total} partitions")

    for i, topic in enumerate(topics):
        table_name = topic.upper()
        logger.info("Verifying topic %d/%d: %s", i + 1, TOPIC_COUNT, table_name)
        wait_for_rows(
            table_name,
            PARTITION_COUNT * RECORD_COUNT,
            interval=10,
            timeout=1800,
            connector_name=connector.name,
        )


================================================
FILE: test/tests/pressure/test_pressure_restart.py
================================================
import json
import logging
import time
from concurrent.futures import ThreadPoolExecutor, as_completed

import pytest

from lib.config_migration import V4_CONFIG_TEMPLATE
from lib.driver import KafkaDriver

logger = logging.getLogger(__name__)

TOPIC_COUNT = 10
PARTITION_COUNT = 3
RECORD_COUNT = 200_000
EXPECTED_PER_TOPIC = PARTITION_COUNT * RECORD_COUNT
THREAD_COUNT = 10


def _send_partition(driver, topic, partition, record_count):
    values = [
        json.dumps(
            {
                "numbernumbernumbernumbernumbernumbernumbernumbernumbernumbernumbernumber": str(
                    e
                )
            }
        ).encode("utf-8")
        for e in range(record_count)
    ]
    driver.sendBytesData(topic, values, [], partition)


@pytest.mark.pressure
@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_pressure_restart(driver: KafkaDriver, create_topics, create_custom_connector):
    test_name = "test_pressure_restart"

    topics = create_topics(
        [f"{test_name}{i}" for i in range(TOPIC_COUNT)], num_partitions=PARTITION_COUNT
    )

    config = {
        **V4_CONFIG_TEMPLATE,
        "tasks.max": "10",
        "topics.regex": f"{test_name}.*",
        "key.converter": "org.apache.kafka.connect.storage.StringConverter",
        "value.converter": "org.apache.kafka.connect.json.JsonConverter",
        "value.converter.schemas.enable": "false",
        "snowflake.validation": "server_side",
        "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
        "snowflake.compatibility.enable.column.identifier.normalization": "false",
    }
    connector = create_custom_connector(test_name, config)

    total = TOPIC_COUNT * PARTITION_COUNT
    with ThreadPoolExecutor(max_workers=THREAD_COUNT) as executor:
        futures = [
            executor.submit(_send_partition, driver, topics[t], p, RECORD_COUNT)
            for t in range(TOPIC_COUNT)
            for p in range(PARTITION_COUNT)
        ]
        for i, future in enumerate(as_completed(futures), 1):
            future.result()
            if i % 10 == 0 or i == total:
                logger.info(f"Sent {i}/{total} partitions")

    phase = 0
    for i, topic in enumerate(topics):
        table_name = topic.upper()
        logger.info(f"Verifying topic {i + 1}/{TOPIC_COUNT}: {table_name}")
        deadline = time.monotonic() + 600
        while True:
            phase = (phase + 1) % 7
            match phase:
                case 2 | 3:
                    driver.restartConnector(connector.name)
                case 4:
                    driver.pauseConnector(connector.name)
                case 5:
                    driver.resumeConnector(connector.name)
                case 6:
                    connector.close()
                case 0:
                    connector = create_custom_connector(test_name, config)

            count = driver.select_number_of_records(table_name)
            if count == EXPECTED_PER_TOPIC:
                break
            if time.monotonic() >= deadline:
                raise AssertionError(
                    f"Timed out waiting for {EXPECTED_PER_TOPIC} rows in {table_name} "
                    f"(got {count} after 600s)"
                )
            logger.info(
                f"Topic {table_name}: {count}/{EXPECTED_PER_TOPIC} rows, retrying in {driver.VERIFY_INTERVAL}s..."
            )
            time.sleep(driver.VERIFY_INTERVAL)


================================================
FILE: test/tests/schema_evolution/__init__.py
================================================


================================================
FILE: test/tests/schema_evolution/test_se_auto_table_creation_avro_sr.py
================================================
"""Schema evolution with auto table creation (Avro Schema Registry).

Migrated from v3 ``TestSchemaEvolutionWithAutoTableCreationAvroSR``.

Same logic as the JSON variant but data is produced via AvroProducer
with a Schema Registry.
"""

import pytest
from confluent_kafka import avro

from lib.config_migration import V4_CONFIG_TEMPLATE

INITIAL_BATCH = 12
FLUSH_BATCH = 300
RECORD_COUNT = INITIAL_BATCH + FLUSH_BATCH

VALUE_SCHEMAS = [
    avro.loads("""
    {
        "type": "record",
        "name": "value_schema_0",
        "fields": [
            {"name": "PERFORMANCE_STRING", "type": "string"},
            {"name": "PERFORMANCE_CHAR", "type": "string"},
            {"name": "RATING_INT", "type": "int"}
        ]
    }
    """),
    avro.loads("""
    {
        "type": "record",
        "name": "value_schema_1",
        "fields": [
            {"name": "PERFORMANCE_STRING", "type": "string"},
            {"name": "RATING_DOUBLE", "type": "float"},
            {"name": "APPROVAL", "type": "boolean"}
        ]
    }
    """),
]

RECORDS = [
    {"PERFORMANCE_STRING": "Excellent", "PERFORMANCE_CHAR": "A", "RATING_INT": 100},
    {"PERFORMANCE_STRING": "Excellent", "RATING_DOUBLE": 0.99, "APPROVAL": True},
]

GOLD_TYPES = {
    "PERFORMANCE_STRING": "VARCHAR",
    "PERFORMANCE_CHAR": "VARCHAR",
    "RATING_INT": "NUMBER",
    "RATING_DOUBLE": "FLOAT",
    "APPROVAL": "BOOLEAN",
    "RECORD_METADATA": "VARIANT",
}


@pytest.mark.schema_evolution
@pytest.mark.confluent_only
@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_se_auto_table_creation_avro_sr(
    driver,
    connector_version,
    name_salt,
    create_connector,
    wait_for_rows,
):
    """Auto table creation is a v4-only feature; v3 requires pre-existing tables."""
    base = f"se_auto_table_creation_avro_sr{name_salt}"
    table_name = base.upper()
    topics = [f"{base}{i}" for i in range(2)]

    for t in topics:
        driver.createTopics(t, partitionNum=1, replicationNum=1)

    connector = create_connector(
        v4_config={
            **V4_CONFIG_TEMPLATE,
            "topics": ",".join(topics),
            "snowflake.topic2table.map": ",".join(f"{t}:{table_name}" for t in topics),
            "tasks.max": "1",
            "key.converter": "org.apache.kafka.connect.storage.StringConverter",
            "value.converter": "io.confluent.connect.avro.AvroConverter",
            "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY",
            "value.converter.schemas.enable": "false",
            "errors.tolerance": "none",
            "errors.log.enable": "true",
            "snowflake.validation": "client_side",
        }
    )
    connector_name = connector.name
    driver.startConnectorWaitTime()

    for i, topic in enumerate(topics):
        for batch_size in (INITIAL_BATCH, FLUSH_BATCH):
            values = [RECORDS[i]] * batch_size
            driver.sendAvroSRData(
                topic, values, VALUE_SCHEMAS[i], key=[], key_schema="", partition=0
            )

    wait_for_rows(table_name, RECORD_COUNT * len(topics), connector_name=connector_name)

    cols = {
        row[0]: row[1]
        for row in driver.snowflake_conn.cursor()
        .execute(f"DESCRIBE TABLE {table_name}")
        .fetchall()
    }
    for col_name, expected_prefix in GOLD_TYPES.items():
        assert col_name in cols, f"Missing column {col_name}, got: {list(cols.keys())}"
        assert cols[col_name].startswith(expected_prefix), (
            f"Column {col_name}: expected {expected_prefix}, got {cols[col_name]}"
        )


================================================
FILE: test/tests/schema_evolution/test_se_auto_table_creation_json.py
================================================
"""Schema evolution with auto table creation (JSON).

Migrated from v3 ``TestSchemaEvolutionWithAutoTableCreationJson``.

The table does NOT exist initially.  The connector auto-creates it
from RECORD_METADATA, then schema evolution adds the remaining
columns from the record payload.  Two topics with different schemas
test that all columns end up in one table.
"""

import json

import pytest

from lib.config_migration import V4_CONFIG_TEMPLATE

INITIAL_BATCH = 12
FLUSH_BATCH = 300
RECORD_COUNT = INITIAL_BATCH + FLUSH_BATCH

RECORDS = [
    {"PERFORMANCE_STRING": "Excellent", "PERFORMANCE_CHAR": "A", "RATING_INT": 100},
    {"PERFORMANCE_STRING": "Excellent", "RATING_DOUBLE": 0.99, "APPROVAL": True},
]

GOLD_TYPES = {
    "PERFORMANCE_STRING": "VARCHAR",
    "PERFORMANCE_CHAR": "VARCHAR",
    "RATING_INT": "NUMBER",
    "RATING_DOUBLE": "FLOAT",
    "APPROVAL": "BOOLEAN",
    "RECORD_METADATA": "VARIANT",
}


@pytest.mark.schema_evolution
@pytest.mark.compatibility
def test_se_auto_table_creation_json(
    driver,
    connector_version,
    name_salt,
    create_connector,
    wait_for_rows,
):
    base = f"se_auto_table_creation_json{name_salt}"
    table_name = base.upper()
    topics = [f"{base}{i}" for i in range(2)]

    for t in topics:
        driver.createTopics(t, partitionNum=1, replicationNum=1)

    connector = create_connector(
        v4_config={
            **V4_CONFIG_TEMPLATE,
            "topics": ",".join(topics),
            "snowflake.topic2table.map": ",".join(f"{t}:{table_name}" for t in topics),
            "tasks.max": "1",
            "key.converter": "org.apache.kafka.connect.storage.StringConverter",
            "value.converter": "org.apache.kafka.connect.json.JsonConverter",
            "value.converter.schemas.enable": "false",
            "errors.tolerance": "none",
            "errors.log.enable": "true",
            "snowflake.validation": "client_side",
        }
    )
    connector_name = connector.name
    driver.startConnectorWaitTime()

    for i, topic in enumerate(topics):
        for batch_size in (INITIAL_BATCH, FLUSH_BATCH):
            keys = [
                json.dumps({"number": str(e)}).encode("utf-8")
                for e in range(batch_size)
            ]
            values = [json.dumps(RECORDS[i]).encode("utf-8") for _ in range(batch_size)]
            driver.sendBytesData(topic, values, keys)

    wait_for_rows(table_name, RECORD_COUNT * len(topics), connector_name=connector_name)

    cols = {
        row[0]: row[1]
        for row in driver.snowflake_conn.cursor()
        .execute(f"DESCRIBE TABLE {table_name}")
        .fetchall()
    }
    for col_name, expected_prefix in GOLD_TYPES.items():
        assert col_name in cols, f"Missing column {col_name}, got: {list(cols.keys())}"
        assert cols[col_name].startswith(expected_prefix), (
            f"Column {col_name}: expected {expected_prefix}, got {cols[col_name]}"
        )


================================================
FILE: test/tests/schema_evolution/test_se_avro_sr.py
================================================
"""Schema evolution with Avro Schema Registry data.

Migrated from v3 ``TestSchemaEvolutionAvroSR``.

Two topics with different Avro schemas feed into the same table.
The connector should evolve the table to accommodate all columns
from both schemas.
"""

import pytest
from confluent_kafka import avro

from lib.config_migration import V4_CONFIG_TEMPLATE

RECORD_COUNT = 100

VALUE_SCHEMAS = [
    avro.loads("""
    {
        "type": "record",
        "name": "value_schema_0",
        "fields": [
            {"name": "PERFORMANCE_CHAR", "type": "string"},
            {"name": "PERFORMANCE_STRING", "type": "string"},
            {"name": "RATING_INT", "type": "int"}
        ]
    }
    """),
    avro.loads("""
    {
        "type": "record",
        "name": "value_schema_1",
        "fields": [
            {"name": "RATING_DOUBLE", "type": "float"},
            {"name": "PERFORMANCE_STRING", "type": "string"},
            {"name": "APPROVAL", "type": "boolean"},
            {"name": "SOME_FLOAT_NAN", "type": "float"}
        ]
    }
    """),
]

RECORDS = [
    {
        "PERFORMANCE_STRING": "Excellent",
        "PERFORMANCE_CHAR": "A",
        "RATING_INT": 100,
    },
    {
        "PERFORMANCE_STRING": "Excellent",
        "RATING_DOUBLE": 0.99,
        "APPROVAL": True,
        "SOME_FLOAT_NAN": float("nan"),
    },
]

GOLD_TYPES = {
    "PERFORMANCE_STRING": "VARCHAR",
    "PERFORMANCE_CHAR": "VARCHAR",
    "RATING_INT": "NUMBER",
    "RATING_DOUBLE": "FLOAT",
    "APPROVAL": "BOOLEAN",
    "SOME_FLOAT_NAN": "FLOAT",
    "RECORD_METADATA": "VARIANT",
}


@pytest.mark.schema_evolution
@pytest.mark.confluent_only
@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_se_avro_sr(
    driver,
    connector_version,
    name_salt,
    create_connector,
    snowflake_table,
    wait_for_rows,
):
    """v3 with SNOWPIPE_STREAMING cannot auto-create the table for Avro SR
    data with topic2table.map, and pre-created tables trigger pipe
    invalidation on ALTER TABLE.  Restricted to v4 (auto-creation works).
    """
    base = f"se_avro_sr{name_salt}"
    table_name = base.upper()

    topics = [f"{base}{i}" for i in range(2)]
    for t in topics:
        driver.createTopics(t, partitionNum=1, replicationNum=1)

    connector = create_connector(
        v4_config={
            **V4_CONFIG_TEMPLATE,
            "topics": ",".join(topics),
            "snowflake.topic2table.map": ",".join(f"{t}:{table_name}" for t in topics),
            "tasks.max": "1",
            "key.converter": "org.apache.kafka.connect.storage.StringConverter",
            "value.converter": "io.confluent.connect.avro.AvroConverter",
            "value.converter.schema.registry.url": "CONFLUENT_SCHEMA_REGISTRY",
            "value.converter.schemas.enable": "false",
            "errors.tolerance": "none",
            "errors.log.enable": "true",
            "snowflake.validation": "client_side",
        }
    )
    connector_name = connector.name
    driver.startConnectorWaitTime()

    for i, topic in enumerate(topics):
        values = [RECORDS[i]] * RECORD_COUNT
        driver.sendAvroSRData(
            topic, values, VALUE_SCHEMAS[i], key=[], key_schema="", partition=0
        )

    wait_for_rows(table_name, RECORD_COUNT * len(topics), connector_name=connector_name)

    cols = {
        row[0]: row[1]
        for row in driver.snowflake_conn.cursor()
        .execute(f"DESCRIBE TABLE {table_name}")
        .fetchall()
    }
    for col_name, expected_prefix in GOLD_TYPES.items():
        assert col_name in cols, f"Missing column {col_name}, got: {list(cols.keys())}"
        assert cols[col_name].startswith(expected_prefix), (
            f"Column {col_name}: expected type starting with {expected_prefix}, "
            f"got {cols[col_name]}"
        )


================================================
FILE: test/tests/schema_evolution/test_se_json_ignore_tombstone.py
================================================
"""Schema evolution with tombstone filtering (behavior.on.null.values=IGNORE).

Migrated from v3 ``TestSchemaEvolutionJsonIgnoreTombstone``.

Two topics feed one table.  Each topic sends (RECORD_COUNT - 2)
real records plus a null and an empty-string tombstone.  With
``behavior.on.null.values=IGNORE`` the tombstones are dropped, so
the expected row count is ``2 * (RECORD_COUNT - 2)``.
Schema evolution must still create all expected columns.
"""

import json

import pytest

from lib.config_migration import V4_CONFIG_TEMPLATE

RECORD_COUNT = 100

RECORDS = [
    {
        "PERFORMANCE_STRING": "Excellent",
        "PERFORMANCE_CHAR": "A",
        "RATING_INT": 100,
    },
    {
        "PERFORMANCE_STRING": "Excellent",
        "RATING_DOUBLE": 0.99,
        "APPROVAL": True,
    },
]

GOLD_TYPES = {
    "PERFORMANCE_STRING": "VARCHAR",
    "PERFORMANCE_CHAR": "VARCHAR",
    "RATING_INT": "NUMBER",
    "RATING_DOUBLE": "FLOAT",
    "APPROVAL": "BOOLEAN",
    "RECORD_METADATA": "VARIANT",
}


@pytest.mark.schema_evolution
@pytest.mark.compatibility
def test_se_json_ignore_tombstone(
    driver,
    connector_version,
    name_salt,
    create_connector,
    snowflake_table,
    wait_for_rows,
):
    base = f"se_json_ignore_tombstone{name_salt}"
    table_name = base.upper()
    topics = [f"{base}{i}" for i in range(2)]

    for t in topics:
        driver.createTopics(t, partitionNum=1, replicationNum=1)

    connector = create_connector(
        v4_config={
            **V4_CONFIG_TEMPLATE,
            "topics": ",".join(topics),
            "snowflake.topic2table.map": ",".join(f"{t}:{table_name}" for t in topics),
            "tasks.max": "1",
            "key.converter": "org.apache.kafka.connect.storage.StringConverter",
            "value.converter": "org.apache.kafka.connect.json.JsonConverter",
            "value.converter.schemas.enable": "false",
            "errors.tolerance": "none",
            "errors.log.enable": "true",
            "behavior.on.null.values": "IGNORE",
            "snowflake.validation": "client_side",
        }
    )
    connector_name = connector.name
    driver.startConnectorWaitTime()

    for i, topic in enumerate(topics):
        real_count = RECORD_COUNT - 2
        keys = [
            json.dumps({"number": str(e)}).encode("utf-8") for e in range(real_count)
        ]
        values = [json.dumps(RECORDS[i]).encode("utf-8") for _ in range(real_count)]

        # Tombstones
        keys.append(json.dumps({"number": str(real_count)}).encode("utf-8"))
        values.append(None)
        keys.append(json.dumps({"number": str(real_count + 1)}).encode("utf-8"))
        values.append(b"")

        driver.sendBytesData(topic, values, keys)

    expected_rows = len(topics) * (RECORD_COUNT - 2)
    wait_for_rows(table_name, expected_rows, connector_name=connector_name)

    cols = {
        row[0]: row[1]
        for row in driver.snowflake_conn.cursor()
        .execute(f"DESCRIBE TABLE {table_name}")
        .fetchall()
    }
    for col_name, expected_prefix in GOLD_TYPES.items():
        assert col_name in cols, f"Missing column {col_name}, got: {list(cols.keys())}"
        assert cols[col_name].startswith(expected_prefix), (
            f"Column {col_name}: expected {expected_prefix}, got {cols[col_name]}"
        )


================================================
FILE: test/tests/schema_evolution/test_se_multi_topic_replace_table.py
================================================
"""Schema evolution with multiple topics and a mid-stream table replacement.

Migrated from v3 ``TestSchemaEvolutionMultiTopicDropTable``.

Two topics with different schemas feed into one table.  After the
first wave is ingested the table is replaced with CREATE OR REPLACE
TABLE.  The connector must detect the channel invalidation, re-open
channels, and re-evolve columns from both topics.
"""

import json

import pytest

from lib.config_migration import V4_CONFIG_TEMPLATE

RECORD_COUNT = 100

RECORDS = [
    {
        "PERFORMANCE_STRING": "Excellent",
        "PERFORMANCE_CHAR": "A",
        "RATING_INT": 100,
    },
    {
        "PERFORMANCE_STRING": "Excellent",
        "RATING_DOUBLE": 0.99,
        "APPROVAL": True,
    },
]

GOLD_TYPES = {
    "PERFORMANCE_STRING": "VARCHAR",
    "PERFORMANCE_CHAR": "VARCHAR",
    "RATING_INT": "NUMBER",
    "RATING_DOUBLE": "FLOAT",
    "APPROVAL": "BOOLEAN",
    "RECORD_METADATA": "VARIANT",
}


def _assert_schema(driver, table_name):
    cols = {
        row[0]: row[1]
        for row in driver.snowflake_conn.cursor()
        .execute(f"DESCRIBE TABLE {table_name}")
        .fetchall()
    }
    for col_name, expected_prefix in GOLD_TYPES.items():
        assert col_name in cols, f"Missing column {col_name}, got: {list(cols.keys())}"
        assert cols[col_name].startswith(expected_prefix), (
            f"Column {col_name}: expected {expected_prefix}, got {cols[col_name]}"
        )


def _send_all(driver, topics, count):
    for i, topic in enumerate(topics):
        keys = [json.dumps({"number": str(e)}).encode("utf-8") for e in range(count)]
        values = [json.dumps(RECORDS[i]).encode("utf-8") for _ in range(count)]
        driver.sendBytesData(topic, values, keys)


@pytest.mark.schema_evolution
@pytest.mark.compatibility
@pytest.mark.parametrize("connector_version", ["v3"], indirect=True)
def test_se_multi_topic_replace_table(
    driver,
    connector_version,
    name_salt,
    create_connector,
    snowflake_table,
    wait_for_rows,
):
    """CREATE OR REPLACE TABLE mid-stream invalidates v4 streaming channels.
    SSv2 SDK does not surface pipe invalidation through isClosed().
    Restricted to v3.
    """
    base = f"se_multi_topic_replace_table{name_salt}"
    table_name = base.upper()
    topics = [f"{base}{i}" for i in range(2)]

    for t in topics:
        driver.createTopics(t, partitionNum=1, replicationNum=1)

    create_connector(
        v4_config={
            **V4_CONFIG_TEMPLATE,
            "topics": ",".join(topics),
            "snowflake.topic2table.map": ",".join(f"{t}:{table_name}" for t in topics),
            "tasks.max": "1",
            "key.converter": "org.apache.kafka.connect.storage.StringConverter",
            "value.converter": "org.apache.kafka.connect.json.JsonConverter",
            "value.converter.schemas.enable": "false",
            "errors.tolerance": "all",
            "errors.log.enable": "true",
            "snowflake.validation": "client_side",
        }
    )
    driver.startConnectorWaitTime()

    # Wave 1
    _send_all(driver, topics, RECORD_COUNT)
    wait_for_rows(table_name, RECORD_COUNT * len(topics))
    _assert_schema(driver, table_name)

    # Replace the table
    driver.snowflake_conn.cursor().execute(
        f"CREATE OR REPLACE TABLE {table_name} "
        f"(RECORD_METADATA VARIANT) "
        f"ENABLE_SCHEMA_EVOLUTION = TRUE"
    )

    # Wave 2 — after CREATE OR REPLACE TABLE the old channels are invalidated and
    # reopen with no committed offset.  Recovery falls back to the consumer group
    # offset tracked in PartitionOffsetTracker, which may lag behind the actual
    # committed position (it only advances when preCommit runs).  This can cause
    # Kafka to replay wave-1 records into the new table, so we must tolerate
    # more than RECORD_COUNT * len(topics) rows.
    _send_all(driver, topics, RECORD_COUNT)
    wait_for_rows(table_name, RECORD_COUNT * len(topics), at_least=True)
    _assert_schema(driver, table_name)


================================================
FILE: test/tests/schema_evolution/test_se_nonnullable_json.py
================================================
"""Schema evolution with NOT NULL columns.

Migrated from v3 ``TestSchemaEvolutionNonNullableJson``.

The table starts with a NOT NULL column.  Records arrive without that
column but with new columns.  Schema evolution must:
  - Add the new columns as nullable
  - All evolved columns should be nullable (verified via DESCRIBE)
"""

import json

import pytest

from lib.config_migration import V4_CONFIG_TEMPLATE

RECORD_COUNT = 100

RECORD = {
    "PERFORMANCE_CHAR": "A",
    "RATING_INT": 100,
}

GOLD_TYPES = {
    "PERFORMANCE_STRING": "VARCHAR",
    "PERFORMANCE_CHAR": "VARCHAR",
    "RATING_INT": "NUMBER",
    "RECORD_METADATA": "VARIANT",
}


@pytest.mark.schema_evolution
@pytest.mark.compatibility
def test_se_nonnullable_json(
    driver,
    connector_version,
    name_salt,
    create_connector,
    snowflake_table,
    wait_for_rows,
):
    topic = f"se_nonnullable_json{name_salt}"
    table_name = topic.upper()

    driver.snowflake_conn.cursor().execute(
        f"CREATE OR REPLACE TABLE {table_name} "
        f"(RECORD_METADATA VARIANT, PERFORMANCE_STRING STRING NOT NULL) "
        f"ENABLE_SCHEMA_EVOLUTION = TRUE"
    )
    driver.createTopics(topic, partitionNum=1, replicationNum=1)

    connector = create_connector(
        v4_config={
            **V4_CONFIG_TEMPLATE,
            "topics": topic,
            "snowflake.topic2table.map": f"{topic}:{table_name}",
            "tasks.max": "1",
            "key.converter": "org.apache.kafka.connect.storage.StringConverter",
            "value.converter": "org.apache.kafka.connect.json.JsonConverter",
            "value.converter.schemas.enable": "false",
            "errors.tolerance": "none",
            "errors.log.enable": "true",
            "snowflake.validation": "client_side",
        }
    )
    connector_name = connector.name
    driver.startConnectorWaitTime()

    keys = [json.dumps({"number": str(i)}).encode("utf-8") for i in range(RECORD_COUNT)]
    values = [json.dumps(RECORD).encode("utf-8") for _ in range(RECORD_COUNT)]
    driver.sendBytesData(topic, values, keys)

    wait_for_rows(table_name, RECORD_COUNT, connector_name=connector_name)

    rows = (
        driver.snowflake_conn.cursor()
        .execute(f"DESCRIBE TABLE {table_name}")
        .fetchall()
    )

    cols = {}
    for row in rows:
        col_name, col_type, _kind, nullable = row[0], row[1], row[2], row[3]
        cols[col_name] = col_type
        assert nullable == "Y", (
            f"Column {col_name} should be nullable after schema evolution, "
            f"but null?={nullable}"
        )

    for col_name, expected_prefix in GOLD_TYPES.items():
        assert col_name in cols, f"Missing column {col_name}, got: {list(cols.keys())}"
        assert cols[col_name].startswith(expected_prefix), (
            f"Column {col_name}: expected {expected_prefix}, got {cols[col_name]}"
        )


================================================
FILE: test/tests/schema_evolution/test_se_nullable_values_after_smt.py
================================================
"""Schema evolution with nullable values produced by an SMT.

Migrated from v3 ``TestSchemaEvolutionNullableValuesAfterSmt``.

An ``ExtractField$Value`` SMT extracts the ``optionalField``
sub-object.  Only every other event contains ``optionalField``, so
half the events produce null values and are dropped by
``behavior.on.null.values=IGNORE``.

Schema evolution creates ``INDEX`` (from the table DDL) and adds
``FROM_OPTIONAL_FIELD`` from the record payload.  The original
``INDEX`` column is NOT NULL; evolved columns must be nullable.
"""

import json

import pytest
from snowflake.connector import DictCursor

from lib.config_migration import V4_CONFIG_TEMPLATE

TOTAL_EVENTS = 200
EXPECTED_ROWS = 100


@pytest.mark.schema_evolution
@pytest.mark.compatibility
def test_se_nullable_values_after_smt(
    driver,
    connector_version,
    name_salt,
    create_connector,
    snowflake_table,
    wait_for_rows,
):
    topic = f"se_nullable_values_after_smt{name_salt}"
    table_name = topic.upper()

    driver.snowflake_conn.cursor().execute(
        f"CREATE OR REPLACE TABLE {table_name} "
        f"(RECORD_METADATA VARIANT, INDEX NUMBER NOT NULL) "
        f"ENABLE_SCHEMA_EVOLUTION = TRUE"
    )
    driver.createTopics(topic, partitionNum=1, replicationNum=1)

    connector = create_connector(
        v4_config={
            **V4_CONFIG_TEMPLATE,
            "topics": topic,
            "snowflake.topic2table.map": f"{topic}:{table_name}",
            "tasks.max": "1",
            "key.converter": "org.apache.kafka.connect.storage.StringConverter",
            "value.converter": "org.apache.kafka.connect.json.JsonConverter",
            "value.converter.schemas.enable": "false",
            "errors.tolerance": "none",
            "errors.log.enable": "true",
            "behavior.on.null.values": "IGNORE",
            "transforms": "extractField",
            "transforms.extractField.type": "org.apache.kafka.connect.transforms.ExtractField$Value",
            "transforms.extractField.field": "optionalField",
            "snowflake.validation": "client_side",
        }
    )
    connector_name = connector.name
    driver.startConnectorWaitTime()

    values = []
    for idx in range(TOTAL_EVENTS):
        event = {"index": idx, "someKey": "someValue"}
        if idx % 2 == 0:
            event["optionalField"] = {"INDEX": idx, "FROM_OPTIONAL_FIELD": True}
        values.append(json.dumps(event).encode("utf-8"))
    driver.sendBytesData(topic, values)

    wait_for_rows(table_name, EXPECTED_ROWS, connector_name=connector_name)

    # --- Verify table schema ---
    desc = (
        driver.snowflake_conn.cursor(DictCursor)
        .execute(f"DESCRIBE TABLE {table_name}")
        .fetchall()
    )

    gold = {
        "INDEX": {"type_prefix": "NUMBER", "nullable": "N"},
        "FROM_OPTIONAL_FIELD": {"type_prefix": "BOOLEAN", "nullable": "Y"},
        "RECORD_METADATA": {"type_prefix": "VARIANT", "nullable": "Y"},
    }
    col_map = {row["name"]: row for row in desc}
    for col_name, expected in gold.items():
        assert col_name in col_map, (
            f"Missing column {col_name}, got: {list(col_map.keys())}"
        )
        assert col_map[col_name]["type"].startswith(expected["type_prefix"]), (
            f"Column {col_name}: expected type starting with "
            f"{expected['type_prefix']}, got {col_map[col_name]['type']}"
        )
        assert col_map[col_name]["null?"] == expected["nullable"], (
            f"Column {col_name}: expected null?={expected['nullable']}, "
            f"got {col_map[col_name]['null?']}"
        )

    # --- Verify data ---
    rows = (
        driver.snowflake_conn.cursor(DictCursor)
        .execute(
            f"SELECT INDEX, FROM_OPTIONAL_FIELD, "
            f'RECORD_METADATA:"offset"::number AS OFFSET '
            f"FROM {table_name} ORDER BY OFFSET"
        )
        .fetchall()
    )

    assert len(rows) == EXPECTED_ROWS

    expected_indices = list(range(0, TOTAL_EVENTS, 2))
    for row, expected_idx in zip(rows, expected_indices):
        assert row["INDEX"] == expected_idx, (
            f"Expected INDEX={expected_idx}, got {row['INDEX']}"
        )
        assert row["FROM_OPTIONAL_FIELD"] is True


================================================
FILE: test/tests/schema_evolution/test_se_random_row_count.py
================================================
"""Schema evolution with random initial batch size.

Migrated from v3 ``TestSchemaEvolutionWithRandomRowCount``.

The initial batch size is randomised (1–299) so that the ALTER TABLE
for schema evolution can trigger at different points relative to the
flush boundary (300 records).  This catches timing-related edge cases
in the schema evolution path.
"""

import json
import random

import pytest

from lib.config_migration import V4_CONFIG_TEMPLATE

FLUSH_BATCH = 300

RECORDS = [
    {"PERFORMANCE_STRING": "Excellent", "PERFORMANCE_CHAR": "A", "RATING_INT": 100},
    {"PERFORMANCE_STRING": "Excellent", "RATING_DOUBLE": 0.99, "APPROVAL": True},
]

GOLD_TYPES = {
    "PERFORMANCE_STRING": "VARCHAR",
    "PERFORMANCE_CHAR": "VARCHAR",
    "RATING_INT": "NUMBER",
    "RATING_DOUBLE": "FLOAT",
    "APPROVAL": "BOOLEAN",
    "RECORD_METADATA": "VARIANT",
}


@pytest.mark.schema_evolution
@pytest.mark.compatibility
def test_se_random_row_count(
    driver,
    connector_version,
    name_salt,
    create_connector,
    snowflake_table,
    wait_for_rows,
):
    initial_batch = random.randrange(1, 300)
    record_count = initial_batch + FLUSH_BATCH

    base = f"se_random_row_count{name_salt}"
    table_name = base.upper()
    topics = [f"{base}{i}" for i in range(2)]

    for t in topics:
        driver.createTopics(t, partitionNum=1, replicationNum=1)

    connector = create_connector(
        v4_config={
            **V4_CONFIG_TEMPLATE,
            "topics": ",".join(topics),
            "snowflake.topic2table.map": ",".join(f"{t}:{table_name}" for t in topics),
            "tasks.max": "1",
            "key.converter": "org.apache.kafka.connect.storage.StringConverter",
            "value.converter": "org.apache.kafka.connect.json.JsonConverter",
            "value.converter.schemas.enable": "false",
            "errors.tolerance": "none",
            "errors.log.enable": "true",
            "snowflake.validation": "client_side",
        }
    )
    connector_name = connector.name
    driver.startConnectorWaitTime()

    for i, topic in enumerate(topics):
        for batch_size in (initial_batch, FLUSH_BATCH):
            keys = [
                json.dumps({"number": str(e)}).encode("utf-8")
                for e in range(batch_size)
            ]
            values = [json.dumps(RECORDS[i]).encode("utf-8") for _ in range(batch_size)]
            driver.sendBytesData(topic, values, keys)

    wait_for_rows(table_name, record_count * len(topics), connector_name=connector_name)

    cols = {
        row[0]: row[1]
        for row in driver.snowflake_conn.cursor()
        .execute(f"DESCRIBE TABLE {table_name}")
        .fetchall()
    }
    for col_name, expected_prefix in GOLD_TYPES.items():
        assert col_name in cols, f"Missing column {col_name}, got: {list(cols.keys())}"
        assert cols[col_name].startswith(expected_prefix), (
            f"Column {col_name}: expected {expected_prefix}, got {cols[col_name]}"
        )


================================================
FILE: test/tests/schema_evolution/test_se_replace_table.py
================================================
"""Schema evolution recovery after CREATE OR REPLACE TABLE.

Migrated from v3 ``TestSchemaEvolutionDropTable``.

Sends records so the table evolves new columns, then replaces the
table with CREATE OR REPLACE TABLE.  The connector should detect the
channel invalidation, re-open the channel, and re-evolve the schema
from scratch.
"""

import json

import pytest

from lib.config_migration import V4_CONFIG_TEMPLATE

RECORD_COUNT = 100

RECORD = {
    "PERFORMANCE_STRING": "Excellent",
    "PERFORMANCE_CHAR": "A",
    "RATING_INT": 100,
}

GOLD_TYPES = {
    "PERFORMANCE_STRING": "VARCHAR",
    "PERFORMANCE_CHAR": "VARCHAR",
    "RATING_INT": "NUMBER",
    "RECORD_METADATA": "VARIANT",
}


def _assert_schema(driver, table_name):
    cols = {
        row[0]: row[1]
        for row in driver.snowflake_conn.cursor()
        .execute(f"DESCRIBE TABLE {table_name}")
        .fetchall()
    }
    for col_name, expected_prefix in GOLD_TYPES.items():
        assert col_name in cols, f"Missing column {col_name}, got: {list(cols.keys())}"
        assert cols[col_name].startswith(expected_prefix), (
            f"Column {col_name}: expected {expected_prefix}, got {cols[col_name]}"
        )


def _send_records(driver, topic, count):
    keys = [json.dumps({"number": str(i)}).encode("utf-8") for i in range(count)]
    values = [json.dumps(RECORD).encode("utf-8") for _ in range(count)]
    driver.sendBytesData(topic, values, keys)


@pytest.mark.schema_evolution
@pytest.mark.compatibility
@pytest.mark.parametrize("connector_version", ["v3"], indirect=True)
def test_se_replace_table(
    driver,
    connector_version,
    name_salt,
    create_connector,
    snowflake_table,
    wait_for_rows,
):
    """CREATE OR REPLACE TABLE mid-stream invalidates v4 streaming channels.
    SSv2 SDK does not surface pipe invalidation through isClosed().
    Restricted to v3.
    """
    topic = f"se_replace_table{name_salt}"
    table_name = topic.upper()

    driver.snowflake_conn.cursor().execute(
        f"CREATE OR REPLACE TABLE {table_name} "
        f"(RECORD_METADATA VARIANT) "
        f"ENABLE_SCHEMA_EVOLUTION = TRUE"
    )
    driver.createTopics(topic, partitionNum=1, replicationNum=1)

    create_connector(
        v4_config={
            **V4_CONFIG_TEMPLATE,
            "topics": topic,
            "tasks.max": "1",
            "key.converter": "org.apache.kafka.connect.storage.StringConverter",
            "value.converter": "org.apache.kafka.connect.json.JsonConverter",
            "value.converter.schemas.enable": "false",
            "errors.tolerance": "all",
            "errors.log.enable": "true",
            "snowflake.validation": "client_side",
        }
    )
    driver.startConnectorWaitTime()

    # Wave 1: ingest and verify schema evolution
    _send_records(driver, topic, RECORD_COUNT)
    wait_for_rows(table_name, RECORD_COUNT)
    _assert_schema(driver, table_name)

    # Replace the table (simulating an ops incident)
    driver.snowflake_conn.cursor().execute(
        f"CREATE OR REPLACE TABLE {table_name} "
        f"(RECORD_METADATA VARIANT) "
        f"ENABLE_SCHEMA_EVOLUTION = TRUE"
    )

    # Wave 2: connector should re-evolve the missing columns
    _send_records(driver, topic, RECORD_COUNT)
    wait_for_rows(table_name, RECORD_COUNT)
    _assert_schema(driver, table_name)


================================================
FILE: test/tests/test_auto_table_creation.py
================================================
import pytest
from confluent_kafka import avro
from confluent_kafka.schema_registry import Schema, SchemaRegistryClient
from time import sleep

from lib.fixtures.table import Table

FILE_NAME = "travis_correct_auto_table_creation"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 100

VALUE_SCHEMA_STR = """
{
    "type":"record",
    "name":"value_schema",
    "fields":[
        {"name":"id","type":"int"},
        {"name":"first_name","type":"string"},
        {"name":"rating","type":"float"},
        {"name":"approval","type":"boolean"},
        {"name":"info_map","type":{"type":"map","values":"string"}}
    ]
}
"""

VALUE_SCHEMA = avro.loads(VALUE_SCHEMA_STR)

GOLD_SCHEMA = {
    "ID": "NUMBER",
    "FIRST_NAME": "VARCHAR",
    "RATING": "FLOAT",
    "APPROVAL": "BOOLEAN",
    "INFO_MAP": "VARIANT",
    "RECORD_METADATA": "VARIANT",
}

RECORD = {
    "id": 100,
    "first_name": "Zekai",
    "rating": 0.99,
    "approval": True,
    "info_map": {"TREE_1": "APPLE", "TREE_2": "PINEAPPLE"},
}


@pytest.mark.confluent_only
@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_auto_table_creation(
    driver, name_salt, create_connector_from_file, wait_for_rows
):
    """Verify auto table creation with Avro Schema Registry.

    The table is NOT pre-created — the connector should auto-create it
    based on the registered Avro schema.  Verifies column types match
    the expected schema.
    """
    table = Table(driver, f"{FILE_NAME}{name_salt}".upper())
    topic = f"{FILE_NAME}{name_salt}"

    # Register schema with Schema Registry
    sr_client = SchemaRegistryClient({"url": driver.schemaRegistryAddress})
    sr_client.register_schema(f"{topic}-value", Schema(VALUE_SCHEMA_STR, "AVRO"))

    # Create Kafka topic (but NOT the Snowflake table)
    driver.createTopics(topic, partitionNum=1, replicationNum=1)

    try:
        create_connector_from_file(CONFIG_FILE)
        driver.startConnectorWaitTime()

        # -- Send --
        values = [RECORD for _ in range(RECORD_COUNT)]
        driver.sendAvroSRData(topic, values, VALUE_SCHEMA)
        sleep(2)

        # -- Verify row count --
        wait_for_rows(table.name, RECORD_COUNT)

        # -- Verify auto-created table schema --
        col_info = table.schema()

        col_names = []
        for col in col_info:
            col_names.append(col[0])
            sf_type = col[1]
            if "(" in sf_type:
                sf_type = sf_type[: sf_type.find("(")]
            assert GOLD_SCHEMA[col[0]] == sf_type, (
                f"Column {col[0]}: expected type {GOLD_SCHEMA[col[0]]}, got {sf_type}"
            )

        for expected_col in GOLD_SCHEMA:
            assert expected_col in col_names, f"Missing column {expected_col}"
    finally:
        driver.deleteTopic(topic)


================================================
FILE: test/tests/test_auto_table_creation_topic2table.py
================================================
import pytest
from confluent_kafka import avro
from confluent_kafka.schema_registry import Schema, SchemaRegistryClient
from time import sleep

from lib.fixtures.table import Table

FILE_NAME = "travis_correct_auto_table_creation_topic2table"
CONFIG_FILE = f"{FILE_NAME}.json"
TOPIC_COUNT = 2
RECORD_COUNT = 100

VALUE_SCHEMA_STRS = [
    """
    {
        "type":"record",
        "name":"value_schema_0",
        "fields":[
            {"name":"id","type":"int"},
            {"name":"approval","type":"boolean"},
            {"name":"info_map","type":{"type":"map","values":"string"}}
        ]
    }
    """,
    """
    {
        "type":"record",
        "name":"value_schema_1",
        "fields":[
            {"name":"id","type":"int"},
            {"name":"first_name","type":"string"},
            {"name":"rating","type":"float"}
        ]
    }
    """,
]

VALUE_SCHEMAS = [avro.loads(s) for s in VALUE_SCHEMA_STRS]

GOLD_SCHEMA = {
    "ID": "NUMBER",
    "FIRST_NAME": "VARCHAR",
    "RATING": "FLOAT",
    "APPROVAL": "BOOLEAN",
    "INFO_MAP": "VARIANT",
    "RECORD_METADATA": "VARIANT",
}

RECORDS = [
    {
        "id": 100,
        "approval": True,
        "info_map": {"TREE_1": "APPLE", "TREE_2": "PINEAPPLE"},
    },
    {"id": 100, "first_name": "Zekai", "rating": 0.99},
]


@pytest.mark.confluent_only
@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_auto_table_creation_topic2table(
    driver, name_salt, create_connector_from_file, wait_for_rows
):
    """Verify auto table creation with two topics mapped to one table.

    Two Avro schemas are registered for two topics.  Both topics map to
    the same Snowflake table via topic2table.map.  The connector should
    auto-create the table with the union of all fields.
    """
    table = Table(driver, f"{FILE_NAME}{name_salt}".upper())
    topics = [f"{FILE_NAME}{name_salt}{i}" for i in range(TOPIC_COUNT)]

    # Register schemas and create Kafka topics
    sr_client = SchemaRegistryClient({"url": driver.schemaRegistryAddress})
    for i, topic in enumerate(topics):
        sr_client.register_schema(
            f"{topic}-value", Schema(VALUE_SCHEMA_STRS[i], "AVRO")
        )
        driver.createTopics(topic, partitionNum=1, replicationNum=1)

    try:
        create_connector_from_file(CONFIG_FILE)
        driver.startConnectorWaitTime()

        # -- Send --
        for i, topic in enumerate(topics):
            values = [RECORDS[i] for _ in range(RECORD_COUNT)]
            driver.sendAvroSRData(topic, values, VALUE_SCHEMAS[i])
            sleep(2)

        # -- Verify total row count (both topics → one table) --
        wait_for_rows(table.name, RECORD_COUNT * TOPIC_COUNT)

        # -- Verify auto-created table schema (union of both schemas) --
        col_info = table.schema()

        col_names = []
        for col in col_info:
            col_names.append(col[0])
            sf_type = col[1]
            if "(" in sf_type:
                sf_type = sf_type[: sf_type.find("(")]
            assert GOLD_SCHEMA[col[0]] == sf_type, (
                f"Column {col[0]}: expected type {GOLD_SCHEMA[col[0]]}, got {sf_type}"
            )

        for expected_col in GOLD_SCHEMA:
            assert expected_col in col_names, f"Missing column {expected_col}"
    finally:
        for topic in topics:
            driver.deleteTopic(topic)


================================================
FILE: test/tests/test_avrosr_avrosr.py
================================================
import json

import pytest
from confluent_kafka import avro
from lib.matchers import ANY_INT

FILE_NAME = "travis_correct_avrosr_avrosr"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 100

KEY_SCHEMA = avro.loads("""
{
    "type": "record",
    "name": "key_schema",
    "fields": [
        {"name": "id", "type": "int"}
    ]
}
""")

VALUE_SCHEMA = avro.loads("""
{
    "type": "record",
    "name": "value_schema",
    "fields": [
        {"name": "id", "type": "int"},
        {"name": "firstName", "type": "string"},
        {"name": "time", "type": "int"},
        {"name": "someFloat", "type": "float"},
        {"name": "someFloatNaN", "type": "float"},
        {"name": "someFloatPositiveInfinity", "type": "float"},
        {"name": "someFloatNegativeInfinity", "type": "float"},
        {"name": "someDouble", "type": "double"},
        {"name": "someDoubleNaN", "type": "double"},
        {"name": "someDoublePositiveInfinity", "type": "double"},
        {"name": "someDoubleNegativeInfinity", "type": "double"}
    ]
}
""")


@pytest.mark.confluent_only
@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_avrosr_avrosr(
    driver,
    name_salt,
    connector_version,
    create_connector_from_file,
    create_table,
    wait_for_rows,
):
    # Assertions below capture v3 reference behavior (test ported from v3).
    # v4 parity confirmed 2026-03-31. v3 cannot run due to SR classloader conflict.
    table = create_table(
        FILE_NAME.upper(),
        columns="(record_metadata variant, id number, firstName varchar, time number, "
        "someFloat number, someFloatNaN varchar, "
        "someFloatPositiveInfinity varchar, someFloatNegativeInfinity varchar, "
        "someDouble number, someDoubleNaN varchar, "
        "someDoublePositiveInfinity varchar, someDoubleNegativeInfinity varchar)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send --
    keys = [{"id": i} for i in range(RECORD_COUNT)]
    values = [
        {
            "id": i,
            "firstName": "abc0",
            "time": 1835,
            "someFloat": 21.37,
            "someFloatNaN": "NaN",
            "someFloatPositiveInfinity": "inf",
            "someFloatNegativeInfinity": "-inf",
            "someDouble": 15.10,
            "someDoubleNaN": "NaN",
            "someDoublePositiveInfinity": "inf",
            "someDoubleNegativeInfinity": "-inf",
        }
        for i in range(RECORD_COUNT)
    ]
    driver.sendAvroSRData(topic, values, VALUE_SCHEMA, keys, KEY_SCHEMA)

    # -- Verify row count --
    wait_for_rows(table.name, RECORD_COUNT)

    # -- Verify first row content --
    row = table.select("*")[0]

    assert row["ID"] == 0
    assert row["FIRSTNAME"] == "abc0"
    assert row["TIME"] == 1835
    assert row["SOMEFLOAT"] == 21
    assert row["SOMEFLOATNAN"] == "NaN"
    assert row["SOMEFLOATPOSITIVEINFINITY"] == "Inf"
    assert row["SOMEFLOATNEGATIVEINFINITY"] == "-Inf"
    assert row["SOMEDOUBLE"] == 15
    assert row["SOMEDOUBLENAN"] == "NaN"
    assert row["SOMEDOUBLEPOSITIVEINFINITY"] == "Inf"
    assert row["SOMEDOUBLENEGATIVEINFINITY"] == "-Inf"

    record_metadata = json.loads(row["RECORD_METADATA"])
    assert record_metadata == {
        "CreateTime": ANY_INT,
        "SnowflakeConnectorPushTime": ANY_INT,
        "key": {"id": 0},
        "offset": 0,
        "partition": 0,
        "topic": topic,
    }


================================================
FILE: test/tests/test_channel_invalidation.py
================================================
"""E2E tests for channel invalidation recovery.

Uses SYSTEM$STREAMING_CHANNEL_INVALIDATE to set ERR_CHANNEL_MUST_BE_REOPENED
on streaming channels and verifies the connector recovers with no data loss.

Recovery mechanism: After server-side invalidation, the SDK discovers the error
on the next background flush (~25s), marks the channel locally invalid, and the
next appendRow() throws synchronously -> Failsafe fallback -> reopenChannel.

Requires: SNOW-3291474 (system function) deployed to the test account.
JIRA: SNOW-3097571
"""

import logging
import time

import pytest
import snowflake.connector

from lib.config_migration import V4_CONFIG_TEMPLATE
from lib.utils import RecordProducer

logger = logging.getLogger(__name__)

# Note on table naming: the v4 connector with no sanitization config creates tables
# using the exact topic name (case-preserved). Queries use quote_name() which wraps
# names in double-quotes (case-sensitive), so table_name must match topic exactly.
# Do NOT use topic.upper() — that only works with sanitization enabled.
CONNECTOR_CONFIG = {
    **V4_CONFIG_TEMPLATE,
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "1",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "errors.tolerance": "all",
    "errors.log.enable": "true",
    "snowflake.validation": "client_side",
}


def invalidate_channel(driver, credentials, table_name, topic, partition=0):
    """Call SYSTEM$STREAMING_CHANNEL_INVALIDATE to set ERR_CHANNEL_MUST_BE_REOPENED."""
    connector_name_upper = table_name.upper()
    channel_name = f"{connector_name_upper}_{topic}_{partition}"
    pipe_fqn = f'{credentials.database}.{credentials.schema}."{table_name}-STREAMING"'
    logger.info(f"Invalidating channel={channel_name} on pipe={pipe_fqn}")

    cur = driver.snowflake_conn.cursor()
    try:
        result = cur.execute(
            f"SELECT SYSTEM$STREAMING_CHANNEL_INVALIDATE('{pipe_fqn}', '{channel_name}')"
        ).fetchone()[0]
    except snowflake.connector.errors.ProgrammingError as e:
        if e.errno == 2140 or "Unknown function" in str(e):
            pytest.skip(
                f"SYSTEM$STREAMING_CHANNEL_INVALIDATE is not available on this "
                f"Snowflake account — skipping channel invalidation test ({e})"
            )
        raise

    logger.info(f"Invalidation result: {result}")
    assert "ERR_CHANNEL_MUST_BE_REOPENED" in result, f"Invalidation failed: {result}"
    return result


def _send_to_partition(driver, topic, n, partition):
    """Send n JSON records to a specific partition.

    RecordProducer.send() hardcodes partition=0, so multi-partition tests need
    this helper to route records to specific partitions.
    """
    import json

    values = [json.dumps({"number": str(i)}).encode() for i in range(n)]
    driver.sendBytesData(topic, values, [], partition)


def _drip_feed_to_partitions(driver, topic, partitions, batch_size=10, interval=1.0):
    """Start a background thread that drip-feeds records round-robin across partitions."""
    import json
    import threading

    stop_event = threading.Event()
    counter = [0]

    def _produce():
        while not stop_event.is_set():
            for p in partitions:
                values = [
                    json.dumps({"number": str(counter[0] + i)}).encode()
                    for i in range(batch_size)
                ]
                driver.sendBytesData(topic, values, [], p)
                counter[0] += batch_size
            stop_event.wait(interval)

    thread = threading.Thread(target=_produce, daemon=True)
    thread.start()
    logger.info(
        f"Started multi-partition drip-feed to partitions {partitions} "
        f"(batch_size={batch_size}, interval={interval}s)"
    )
    return stop_event, thread


def _wait_for_stall(driver, table_name, rows_before, timeout=90):
    """Wait until ingestion stalls (row count stable for 15s). Returns stalled row count."""
    stable_count = 0
    last_rows = rows_before
    deadline = time.monotonic() + timeout
    while time.monotonic() < deadline:
        time.sleep(5)
        current = driver.select_number_of_records(table_name)
        current = int(current) if current is not None else 0
        if current == last_rows:
            stable_count += 1
        else:
            stable_count = 0
            last_rows = current
        if stable_count >= 3:
            break
    logger.info(f"Ingestion stalled at {last_rows} rows (was {rows_before})")
    return last_rows


def _get_partition_row_counts(driver, table_name):
    """Query per-partition row counts from record_metadata."""
    rows = (
        driver.snowflake_conn.cursor()
        .execute(
            f"SELECT record_metadata:partition::int AS p, count(*) AS c "
            f'FROM "{table_name}" GROUP BY p ORDER BY p'
        )
        .fetchall()
    )
    return {int(r[0]): int(r[1]) for r in rows}


def _assert_task_running(driver, connector_name):
    """Assert all connector tasks are RUNNING."""
    status = driver.get_connector_status(connector_name)
    assert status is not None, f"Connector {connector_name} not found"
    tasks = status.get("tasks", [])
    assert tasks, f"Connector {connector_name} has no tasks"
    for task in tasks:
        state = task.get("state")
        assert state == "RUNNING", (
            f"Task {task.get('id')} is {state}, not RUNNING. "
            f"Trace: {task.get('trace', '')[:500]}"
        )


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_invalidation_during_active_ingestion(
    driver, credentials, name_salt, create_connector, wait_for_rows
):
    """Test 1: Invalidate a channel while records are actively being ingested.

    Starts continuous production, waits for some rows to land, then invalidates
    mid-stream while records are still flowing.
    """
    topic = f"test_invalidation_during_active_ingestion{name_salt}"
    table_name = topic
    driver.createTopics(topic, partitionNum=1, replicationNum=1)

    connector = create_connector(v4_config=CONNECTOR_CONFIG)
    driver.wait_for_connector_running(connector.name)

    producer = RecordProducer(driver, topic)

    # Start continuous production and wait for some rows to commit
    producer.start_continuous(batch_size=10, interval=0.5)
    wait_for_rows(table_name, 50, at_least=True, connector_name=connector.name)
    rows_before = int(driver.select_number_of_records(table_name))
    logger.info(f"Phase 1: {rows_before} rows committed while actively ingesting")

    # Invalidate mid-stream — records are still flowing
    invalidate_channel(driver, credentials, table_name, topic, partition=0)

    # Wait for stall to confirm invalidation took effect
    stalled_rows = _wait_for_stall(driver, table_name, rows_before)

    # Continue drip-feeding to trigger synchronous recovery
    wait_for_rows(
        table_name,
        stalled_rows + 50,
        at_least=True,
        connector_name=connector.name,
        timeout=120,
    )
    producer.stop_continuous()
    _assert_task_running(driver, connector.name)

    final_rows = int(driver.select_number_of_records(table_name))
    logger.info(f"Final row count: {final_rows} (stalled at {stalled_rows})")
    assert final_rows > stalled_rows


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_invalidation_between_batches(
    driver, credentials, name_salt, create_connector, wait_for_rows
):
    """Test 2: Invalidate a channel while it is idle between batches.

    Verifies the connector recovers on the next batch with no data loss.
    """
    topic = f"test_invalidation_between_batches{name_salt}"
    table_name = topic
    driver.createTopics(topic, partitionNum=1, replicationNum=1)

    connector = create_connector(v4_config=CONNECTOR_CONFIG)
    driver.wait_for_connector_running(connector.name)

    producer = RecordProducer(driver, topic)

    # Wave 1: ingest and wait for full commit
    producer.send(100)
    wait_for_rows(table_name, 100, connector_name=connector.name)
    rows_before = int(driver.select_number_of_records(table_name))
    logger.info(f"Wave 1 committed ({rows_before} rows)")

    # Invalidate while idle
    invalidate_channel(driver, credentials, table_name, topic, partition=0)

    # Send batch to trigger flush failure, then wait for stall
    producer.send(100)
    stalled_rows = _wait_for_stall(driver, table_name, rows_before)

    # Drip-feed to trigger synchronous recovery
    producer.start_continuous(batch_size=10, interval=1.0)
    wait_for_rows(
        table_name,
        stalled_rows + 50,
        at_least=True,
        connector_name=connector.name,
        timeout=120,
    )
    producer.stop_continuous()
    _assert_task_running(driver, connector.name)

    final_rows = int(driver.select_number_of_records(table_name))
    logger.info(f"Final row count: {final_rows} (was {rows_before})")
    assert final_rows > rows_before


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_invalidation_all_partitions(
    driver, credentials, name_salt, create_connector, wait_for_rows
):
    """Test 3: Invalidate all channels simultaneously on a multi-partition topic.

    Sends records to each partition explicitly, invalidates all channels,
    and verifies each partition recovers.
    """
    topic = f"test_invalidation_all_partitions{name_salt}"
    table_name = topic
    num_partitions = 3
    records_per_partition = 100
    driver.createTopics(topic, partitionNum=num_partitions, replicationNum=1)

    connector = create_connector(v4_config=CONNECTOR_CONFIG)
    driver.wait_for_connector_running(connector.name)

    # Wave 1: send records to each partition explicitly
    for p in range(num_partitions):
        _send_to_partition(driver, topic, records_per_partition, partition=p)
    total_wave1 = num_partitions * records_per_partition
    wait_for_rows(table_name, total_wave1, connector_name=connector.name)
    rows_before = int(driver.select_number_of_records(table_name))

    # Verify records landed on all partitions
    partition_counts_before = _get_partition_row_counts(driver, table_name)
    logger.info(f"Wave 1 per-partition counts: {partition_counts_before}")
    for p in range(num_partitions):
        assert partition_counts_before.get(p, 0) > 0, (
            f"Partition {p} has no records before invalidation"
        )

    # Invalidate all partitions
    for p in range(num_partitions):
        invalidate_channel(driver, credentials, table_name, topic, partition=p)
    logger.info(f"All {num_partitions} channels invalidated")

    # Send to each partition to trigger flush failure, then wait for stall
    for p in range(num_partitions):
        _send_to_partition(driver, topic, 50, partition=p)
    stalled_rows = _wait_for_stall(driver, table_name, rows_before)

    # Drip-feed to ALL partitions to trigger recovery on each channel
    stop_event, thread = _drip_feed_to_partitions(
        driver, topic, list(range(num_partitions))
    )
    wait_for_rows(
        table_name,
        stalled_rows + 50,
        at_least=True,
        connector_name=connector.name,
        timeout=120,
    )
    stop_event.set()
    thread.join(timeout=5)
    _assert_task_running(driver, connector.name)

    # Verify each partition has more rows than before
    partition_counts_after = _get_partition_row_counts(driver, table_name)
    logger.info(f"Post-recovery per-partition counts: {partition_counts_after}")
    for p in range(num_partitions):
        assert partition_counts_after.get(p, 0) > partition_counts_before.get(p, 0), (
            f"Partition {p} did not recover: "
            f"before={partition_counts_before.get(p, 0)}, "
            f"after={partition_counts_after.get(p, 0)}"
        )


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_invalidation_with_connector_restart(
    driver, credentials, name_salt, create_connector, wait_for_rows
):
    """Test 4: Invalidate a channel and then restart the connector.

    A restart clears the SDK state and reopens the channel fresh. The server-side
    error code is tied to the old client sequencer; the new channel gets a fresh
    sequencer, so the error doesn't apply. Recovery is implicit via the restart.
    """
    topic = f"test_invalidation_with_connector_restart{name_salt}"
    table_name = topic
    driver.createTopics(topic, partitionNum=1, replicationNum=1)

    connector = create_connector(v4_config=CONNECTOR_CONFIG)
    driver.wait_for_connector_running(connector.name)

    producer = RecordProducer(driver, topic)

    # Wave 1
    producer.send(100)
    wait_for_rows(table_name, 100, connector_name=connector.name)
    rows_before = int(driver.select_number_of_records(table_name))
    logger.info(f"Wave 1 committed ({rows_before} rows)")

    # Invalidate then restart
    invalidate_channel(driver, credentials, table_name, topic, partition=0)
    driver.restartConnector(connector.name)
    driver.wait_for_connector_running(connector.name)
    logger.info("Connector restarted after invalidation")

    # Drip-feed after restart
    producer.start_continuous(batch_size=10, interval=1.0)
    wait_for_rows(
        table_name,
        rows_before + 100,
        at_least=True,
        connector_name=connector.name,
        timeout=120,
    )
    producer.stop_continuous()
    _assert_task_running(driver, connector.name)

    final_rows = int(driver.select_number_of_records(table_name))
    logger.info(f"Final row count: {final_rows} (was {rows_before})")
    assert final_rows > rows_before


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_invalidation_one_partition_others_healthy(
    driver, credentials, name_salt, create_connector, wait_for_rows
):
    """Test 5: Invalidate one partition while others remain healthy.

    Sends records to each partition explicitly, invalidates only partition 1,
    and verifies partition 1 recovers while partitions 0 and 2 continue
    ingesting without interruption.
    """
    topic = f"test_invalidation_one_partition_others_healthy{name_salt}"
    table_name = topic
    num_partitions = 3
    records_per_partition = 100
    driver.createTopics(topic, partitionNum=num_partitions, replicationNum=1)

    connector = create_connector(v4_config=CONNECTOR_CONFIG)
    driver.wait_for_connector_running(connector.name)

    # Wave 1: send to each partition
    for p in range(num_partitions):
        _send_to_partition(driver, topic, records_per_partition, partition=p)
    total_wave1 = num_partitions * records_per_partition
    wait_for_rows(table_name, total_wave1, connector_name=connector.name)

    partition_counts_before = _get_partition_row_counts(driver, table_name)
    logger.info(f"Wave 1 per-partition counts: {partition_counts_before}")
    for p in range(num_partitions):
        assert partition_counts_before.get(p, 0) > 0

    # Invalidate only partition 1
    invalidate_channel(driver, credentials, table_name, topic, partition=1)
    logger.info("Partition 1 invalidated, partitions 0 and 2 untouched")

    # Send to all partitions — partitions 0,2 should ingest immediately,
    # partition 1 will stall then recover
    for p in range(num_partitions):
        _send_to_partition(driver, topic, 50, partition=p)
    rows_after_wave1 = int(driver.select_number_of_records(table_name))

    # Wait for stall on partition 1, then drip-feed to ALL partitions
    stalled_rows = _wait_for_stall(driver, table_name, rows_after_wave1)

    stop_event, thread = _drip_feed_to_partitions(
        driver, topic, list(range(num_partitions))
    )
    wait_for_rows(
        table_name,
        stalled_rows + 50,
        at_least=True,
        connector_name=connector.name,
        timeout=120,
    )
    stop_event.set()
    thread.join(timeout=5)
    _assert_task_running(driver, connector.name)

    # Verify partition 1 recovered and all partitions have more data
    partition_counts_after = _get_partition_row_counts(driver, table_name)
    logger.info(f"Post-recovery per-partition counts: {partition_counts_after}")

    # Partition 1 (invalidated) must have recovered
    assert partition_counts_after.get(1, 0) > partition_counts_before.get(1, 0), (
        f"Partition 1 did not recover: "
        f"before={partition_counts_before.get(1, 0)}, "
        f"after={partition_counts_after.get(1, 0)}"
    )
    # Healthy partitions should also have more rows
    for p in [0, 2]:
        assert partition_counts_after.get(p, 0) > partition_counts_before.get(p, 0), (
            f"Healthy partition {p} lost data: "
            f"before={partition_counts_before.get(p, 0)}, "
            f"after={partition_counts_after.get(p, 0)}"
        )


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_invalidation_offset_consistency(
    driver, credentials, name_salt, create_connector, wait_for_rows
):
    """Test 6: Verify offset consistency after invalidation recovery.

    Checks the full offset range (0..max_offset) has no gaps. Duplicates are OK.
    """
    topic = f"test_invalidation_offset_consistency{name_salt}"
    table_name = topic
    driver.createTopics(topic, partitionNum=1, replicationNum=1)

    connector = create_connector(v4_config=CONNECTOR_CONFIG)
    driver.wait_for_connector_running(connector.name)

    producer = RecordProducer(driver, topic)

    # Wave 1
    producer.send(100)
    wait_for_rows(table_name, 100, connector_name=connector.name)
    rows_before = int(driver.select_number_of_records(table_name))
    logger.info(f"Wave 1 committed ({rows_before} rows)")

    # Invalidate, wait for stall, drip-feed recovery
    invalidate_channel(driver, credentials, table_name, topic, partition=0)
    producer.send(100)
    stalled_rows = _wait_for_stall(driver, table_name, rows_before)

    producer.start_continuous(batch_size=10, interval=1.0)
    wait_for_rows(
        table_name,
        stalled_rows + 50,
        at_least=True,
        connector_name=connector.name,
        timeout=120,
    )
    producer.stop_continuous()
    _assert_task_running(driver, connector.name)

    # Verify offset integrity: the full range 0..max_offset must have no gaps.
    # With the recordProcessed fix (SNOW-3344243), the offset rewind replays all
    # records that were in-flight during the flush-failure window, so no data is lost.
    cur = driver.snowflake_conn.cursor()
    offsets = sorted(
        row[0]
        for row in cur.execute(
            f"SELECT DISTINCT record_metadata:offset::int AS off "
            f'FROM "{table_name}" ORDER BY off'
        ).fetchall()
    )

    total_rows = int(driver.select_number_of_records(table_name))
    distinct_offsets = len(offsets)
    max_offset = offsets[-1]

    logger.info(
        f"Offset check: {total_rows} total rows, {distinct_offsets} distinct offsets, "
        f"range [0..{max_offset}]"
    )

    # No gaps in the full range 0..max_offset
    expected_offsets = set(range(max_offset + 1))
    actual_offsets = set(offsets)
    missing = expected_offsets - actual_offsets
    assert not missing, (
        f"Missing offsets (gaps) in range [0..{max_offset}]: "
        f"{sorted(missing)[:20]}{'...' if len(missing) > 20 else ''}"
    )

    duplicates = total_rows - distinct_offsets
    if duplicates > 0:
        logger.info(f"Found {duplicates} duplicate rows (expected after recovery)")


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_invalidation_during_flush(
    driver, credentials, name_salt, create_connector, wait_for_rows
):
    """Test 7: Invalidate a channel right after data starts flowing (races with flush).

    Sends an initial batch, waits for the pipe to be created, then immediately
    sends more and invalidates — the invalidation races with the flush.
    """
    topic = f"test_invalidation_during_flush{name_salt}"
    table_name = topic
    driver.createTopics(topic, partitionNum=1, replicationNum=1)

    connector = create_connector(v4_config=CONNECTOR_CONFIG)
    driver.wait_for_connector_running(connector.name)

    producer = RecordProducer(driver, topic)

    # First batch: ensure the pipe/channel exists
    producer.send(100)
    wait_for_rows(table_name, 100, connector_name=connector.name)
    rows_before = int(driver.select_number_of_records(table_name))
    logger.info(f"Initial batch committed ({rows_before} rows), pipe exists")

    # Send second batch and immediately invalidate — races with flush
    producer.send(100)
    invalidate_channel(driver, credentials, table_name, topic, partition=0)
    logger.info(
        "Invalidated immediately after sending second batch (racing with flush)"
    )

    # Wait for stall, then drip-feed for recovery
    stalled_rows = _wait_for_stall(driver, table_name, rows_before)

    producer.start_continuous(batch_size=10, interval=1.0)
    wait_for_rows(
        table_name,
        stalled_rows + 50,
        at_least=True,
        connector_name=connector.name,
        timeout=120,
    )
    producer.stop_continuous()
    _assert_task_running(driver, connector.name)

    final_rows = int(driver.select_number_of_records(table_name))
    logger.info(f"Final row count: {final_rows} (stalled at {stalled_rows})")
    assert final_rows > stalled_rows


================================================
FILE: test/tests/test_channel_invalidation_recovery.py
================================================
"""E2E test: KC task should recover after channel invalidation, not die.

Reproduces the bug where AppendRowWithRetryAndFallbackPolicy successfully
recovers a channel after InvalidChannelError but re-throws the exception,
causing the KC framework to kill the task as "unrecoverable".
"""

import logging

import pytest

from lib.config_migration import V4_CONFIG_TEMPLATE
from lib.utils import RecordProducer
from tests.test_channel_invalidation import (
    _assert_task_running,
    _wait_for_stall,
    invalidate_channel,
)

logger = logging.getLogger(__name__)

RECORD_BATCH = 100
CONNECTOR_CONFIG = {
    **V4_CONFIG_TEMPLATE,
    "topics": "SNOWFLAKE_TEST_TOPIC",
    "tasks.max": "1",
    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
    "value.converter": "org.apache.kafka.connect.json.JsonConverter",
    "value.converter.schemas.enable": "false",
    "errors.tolerance": "none",
    "errors.log.enable": "true",
    "snowflake.validation": "client_side",
    "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
    "snowflake.compatibility.enable.column.identifier.normalization": "true",
}


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_channel_invalidation_recovery(
    driver,
    credentials,
    name_salt,
    create_connector,
    wait_for_rows,
):
    """Channel invalidation should not kill the KC task.

    Steps:
    1. Start connector, produce records, verify ingestion works.
    2. Invalidate channel via SYSTEM$STREAMING_CHANNEL_INVALIDATE.
    3. Send records (buffered by SDK), wait for ingestion to stall — proves
       the SDK flush failed.
    4. Drip-feed new records — appendRow throws SFException synchronously,
       triggering the Failsafe fallback → reopenChannel.
    5. Assert: task is RUNNING, new rows arrive in Snowflake.

    Without the fix (PR #1401), step 4 re-throws after recovery and kills the task.
    """
    topic = f"test_channel_invalidation_recovery{name_salt}"
    table_name = topic.upper()
    driver.createTopics(topic, partitionNum=1, replicationNum=1)

    connector = create_connector(v4_config=CONNECTOR_CONFIG)
    driver.wait_for_connector_running(connector.name)

    # -- Phase 1: Baseline ingestion --
    producer = RecordProducer(driver, topic)
    producer.send(RECORD_BATCH)
    wait_for_rows(table_name, RECORD_BATCH, connector_name=connector.name)
    rows_before = int(driver.select_number_of_records(table_name))
    logger.info(f"Phase 1: {rows_before} rows ingested")

    # -- Phase 2: Invalidate the channel --
    invalidate_channel(driver, credentials, table_name, topic, partition=0)

    # -- Phase 3: Trigger flush failure and verify stall --
    producer.send(RECORD_BATCH)
    stalled_rows = _wait_for_stall(driver, table_name, rows_before)
    assert stalled_rows == rows_before, (
        f"Expected ingestion to stall at {rows_before} rows after invalidation, "
        f"but rows advanced to {stalled_rows}. "
        f"SYSTEM$STREAMING_CHANNEL_INVALIDATE may not have taken effect."
    )

    # -- Phase 4: Trigger synchronous recovery via drip-feed --
    producer.start_continuous(batch_size=10, interval=1.0)
    wait_for_rows(table_name, rows_before + RECORD_BATCH, at_least=True, timeout=120)
    producer.stop_continuous()
    rows_after = int(driver.select_number_of_records(table_name))

    _assert_task_running(driver, connector.name)
    assert rows_after > rows_before, (
        f"No new rows after recovery (before={rows_before}, after={rows_after})"
    )
    logger.info(
        f"Recovery verified: {rows_before} → {stalled_rows} (stalled) → "
        f"{rows_after} (recovered), task RUNNING"
    )


================================================
FILE: test/tests/test_column_identifier_normalization.py
================================================
"""E2E tests for column identifier normalization."""

import json

import pytest

from lib.config_migration import V4_CONFIG_TEMPLATE

pytestmark = pytest.mark.correctness

TWO_CITY_DDL = '(ID NUMBER, "city" VARCHAR, CITY VARCHAR, RECORD_METADATA VARIANT) ENABLE_SCHEMA_EVOLUTION = TRUE'
NORM_MATRIX = [True, False]
NORM_IDS = ["norm=on", "norm=off"]


@pytest.mark.parametrize("normalization", NORM_MATRIX, ids=NORM_IDS)
def test_with_validation(
    driver,
    name_salt,
    connector_version,
    create_connector,
    create_table,
    wait_for_rows,
    normalization,
):
    """val=ON, schema_evo=ON. KCv3 always normalizes, so skip v3+norm=OFF.
    Row 3 triggers schema evolution to add age and AGE columns.
    """
    if connector_version == "v3" and not normalization:
        pytest.skip("KCv3 always normalizes; norm=OFF is KCv4-only")

    tag = f"val_n{'1' if normalization else '0'}"
    table = create_table(
        f"column_identifier_normalization_{tag}".upper(),
        columns=TWO_CITY_DDL,
    )
    topic = f"column_identifier_normalization_{tag}{name_salt}"
    dlq = f"DLQ_NORM_{name_salt}_{tag}"

    connector = create_connector(
        v4_config={
            **V4_CONFIG_TEMPLATE,
            "tasks.max": "1",
            "key.converter": "org.apache.kafka.connect.storage.StringConverter",
            "value.converter": "org.apache.kafka.connect.json.JsonConverter",
            "value.converter.schemas.enable": "false",
            "snowflake.enable.schematization": "true",
            "snowflake.compatibility.enable.column.identifier.normalization": str(
                normalization
            ).lower(),
            "snowflake.validation": "client_side",
            "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
            "errors.tolerance": "all",
            "errors.log.enable": "true",
            "errors.deadletterqueue.topic.name": dlq,
            "errors.deadletterqueue.topic.replication.factor": "1",
            "topics": topic,
            "jmx": "true",
        }
    )
    driver.startConnectorWaitTime()

    city_key, age_key = ('"city"', '"age"') if normalization else ("city", "age")
    rows = [
        {"ID": 0, city_key: "lower_0", "CITY": "upper_0"},
        {"ID": 1, city_key: "lower_only"},
        {"ID": 2, "CITY": "upper_only"},
        {"ID": 3, age_key: 10, "AGE": 20},
    ]
    driver.sendBytesData(
        topic, [json.dumps(r).encode("utf-8") for r in rows], partition=0
    )
    wait_for_rows(table.name, 4, connector_name=connector.name)

    row0 = table.select("*", 'WHERE "ID" = 0')[0]
    assert row0["city"] == "lower_0"
    assert row0["CITY"] == "upper_0"

    row1 = table.select("*", 'WHERE "ID" = 1')[0]
    assert row1["city"] == "lower_only"

    row2 = table.select("*", 'WHERE "ID" = 2')[0]
    assert row2["CITY"] == "upper_only"

    row3 = table.select("*", 'WHERE "ID" = 3')[0]
    assert row3["city"] is None
    assert row3["CITY"] is None
    assert row3["age"] == 10
    assert row3["AGE"] == 20

    cols = {row[0]: row[1] for row in table.schema()}
    assert "age" in cols
    assert "AGE" in cols


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
@pytest.mark.parametrize("normalization", NORM_MATRIX, ids=NORM_IDS)
def test_without_validation(
    driver,
    name_salt,
    connector_version,
    create_connector,
    create_table,
    wait_for_rows,
    normalization,
):
    """val=OFF, schema_evo=ON, KCv4 only.
    Server-side MBCN CI fallback writes to ALL case-insensitive-matching columns.
    Server-side schema evolution uppercases new column names.
    """
    tag = f"noval_n{'1' if normalization else '0'}"
    table = create_table(
        f"column_identifier_normalization_{tag}".upper(),
        columns=TWO_CITY_DDL,
    )
    topic = f"column_identifier_normalization_{tag}{name_salt}"
    dlq = f"DLQ_NORM_{name_salt}_{tag}"

    connector = create_connector(
        v4_config={
            **V4_CONFIG_TEMPLATE,
            "tasks.max": "1",
            "key.converter": "org.apache.kafka.connect.storage.StringConverter",
            "value.converter": "org.apache.kafka.connect.json.JsonConverter",
            "value.converter.schemas.enable": "false",
            "snowflake.enable.schematization": "true",
            "snowflake.compatibility.enable.column.identifier.normalization": str(
                normalization
            ).lower(),
            "snowflake.validation": "server_side",
            "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
            "errors.tolerance": "all",
            "errors.log.enable": "true",
            "errors.deadletterqueue.topic.name": dlq,
            "errors.deadletterqueue.topic.replication.factor": "1",
            "topics": topic,
            "jmx": "true",
        }
    )
    driver.startConnectorWaitTime()

    city_key, age_key = ('"city"', '"age"') if normalization else ("city", "age")
    rows = [
        {"ID": 0, city_key: "lower_0", "CITY": "upper_0"},
        {"ID": 1, city_key: "lower_only"},
        {"ID": 2, "CITY": "upper_only"},
        {"ID": 3, age_key: 10},
    ]
    driver.sendBytesData(
        topic, [json.dumps(r).encode("utf-8") for r in rows], partition=0
    )
    wait_for_rows(table.name, 4, connector_name=connector.name)

    row0 = table.select("*", 'WHERE "ID" = 0')[0]
    assert row0["city"] == "lower_0"
    assert row0["CITY"] == "upper_0"

    # MBCN CI fallback: single key writes to both CI-matching columns
    row1 = table.select("*", 'WHERE "ID" = 1')[0]
    assert row1["city"] == "lower_only"
    assert row1["CITY"] == "lower_only"

    row2 = table.select("*", 'WHERE "ID" = 2')[0]
    assert row2["city"] == "upper_only"
    assert row2["CITY"] == "upper_only"

    # Server-side schema evo uppercases new column names
    row3 = table.select("*", 'WHERE "ID" = 3')[0]
    assert row3["city"] is None
    assert row3["CITY"] is None
    assert row3["AGE"] == 10

    cols = {row[0]: row[1] for row in table.schema()}
    assert "AGE" in cols


================================================
FILE: test/tests/test_confluent_protobuf_protobuf.py
================================================
import json

import pytest
from confluent_kafka import SerializingProducer
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.schema_registry.protobuf import ProtobufSerializer

from lib.matchers import ANY_INT

FILE_NAME = "travis_correct_confluent_protobuf_protobuf"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 100


def _build_sensor(sensor_pb2):
    sensor = sensor_pb2.SensorReading()
    sensor.dateTime = 1234
    sensor.reading = 321.321
    sensor.device.deviceID = "555-4321"
    sensor.device.enabled = True
    sensor.float_val = 4321.4321
    sensor.int32_val = (1 << 31) - 1
    sensor.sint32_val = (1 << 31) - 1
    sensor.sint64_val = (1 << 63) - 1
    sensor.uint32_val = (1 << 32) - 1
    sensor.bytes_val = b"\xde\xad"
    sensor.double_array_val.extend([1 / 3, 32.21, 434324321])
    sensor.uint64_val = (1 << 64) - 1
    return sensor


@pytest.mark.confluent_only
@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_confluent_protobuf_protobuf(
    sensor_pb2,
    driver,
    name_salt,
    connector_version,
    create_connector_from_file,
    create_table,
    wait_for_rows,
    request,
):
    # Assertions below capture v3 reference behavior (test ported from v3).
    # v4 parity confirmed 2026-03-31. v3 cannot run due to SR classloader conflict.
    platform_version = request.config.getoption("--platform-version") or ""
    if platform_version.startswith("8."):
        pytest.skip("BlueApron protobuf converter incompatible with Confluent 8.x")
    table = create_table(
        FILE_NAME.upper(),
        columns="(record_metadata variant, record_content variant)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send via schema-registry-backed protobuf producer --
    sr_client = SchemaRegistryClient({"url": driver.schemaRegistryAddress})
    key_ser = ProtobufSerializer(sensor_pb2.SensorReading, sr_client)
    val_ser = ProtobufSerializer(sensor_pb2.SensorReading, sr_client)
    producer = SerializingProducer(
        {
            "bootstrap.servers": driver.kafkaAddress,
            "key.serializer": key_ser,
            "value.serializer": val_ser,
        }
    )

    sensor = _build_sensor(sensor_pb2)
    for _ in range(RECORD_COUNT):
        producer.produce(topic, sensor, sensor)
        producer.poll(0)
    producer.flush()

    # -- Verify row count --
    wait_for_rows(table.name, RECORD_COUNT)

    # -- Verify first row content --
    row = table.select("record_metadata, record_content")[0]

    sensor_dict = {
        "bytes_val": "3q0=",
        "dateTime": 1234,
        "device": {"deviceID": "555-4321", "enabled": True},
        "double_array_val": [0.3333333333333333, 32.21, 4.343243210000000e08],
        "float_val": 4321.432,
        "int32_val": 2147483647,
        "reading": 321.321,
        "sint32_val": 2147483647,
        "sint64_val": 9223372036854775807,
        "uint32_val": 4294967295,
        "uint64_val": -1,
    }

    record_metadata = json.loads(row["RECORD_METADATA"])
    assert record_metadata == {
        "CreateTime": ANY_INT,
        "SnowflakeConnectorPushTime": ANY_INT,
        "key": sensor_dict,
        "offset": ANY_INT,
        "partition": ANY_INT,
        "topic": topic,
    }

    record_content = json.loads(row["RECORD_CONTENT"])
    assert record_content == sensor_dict


================================================
FILE: test/tests/test_default_pipe_features.py
================================================
"""E2E tests for FR7 default pipe features: identity and default columns.

These tests verify that the Kafka Connector correctly handles tables with
AUTOINCREMENT (identity) columns and columns with DEFAULT values. The
primary concern is client-side validation: the RowValidator must not reject
records that omit server-filled columns.

v4-only — no v3 equivalent (FR7 requires SSv2 default pipe support).
"""

import json
import logging

import pytest

from lib.config_migration import V4_CONFIG_TEMPLATE
from lib.driver import KafkaDriver

logger = logging.getLogger(__name__)

RECORD_COUNT = 20


def _connector_config(topic: str, *, validation: bool) -> dict:
    """Build a v4 connector config for default pipe feature tests."""
    return {
        **V4_CONFIG_TEMPLATE,
        "tasks.max": "1",
        "key.converter": "org.apache.kafka.connect.storage.StringConverter",
        "value.converter": "org.apache.kafka.connect.json.JsonConverter",
        "value.converter.schemas.enable": "false",
        "snowflake.enable.schematization": "true",
        "snowflake.validation": "client_side" if validation else "server_side",
        "snowflake.compatibility.enable.column.identifier.normalization": "true",
        "topics": topic,
    }


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
@pytest.mark.parametrize("validation", [True, False], ids=["compat", "ht"])
def test_identity_column(
    driver: KafkaDriver,
    create_table,
    create_topics,
    create_connector,
    wait_for_rows,
    validation: bool,
):
    """Ingest into a table with an AUTOINCREMENT identity column.

    The record does NOT include a value for the identity column.
    The server should auto-fill sequential IDs.
    """
    tag = "compat" if validation else "ht"
    base_name = f"fr7_identity_{tag}"

    table = create_table(
        base_name,
        columns=(
            "(ID NUMBER AUTOINCREMENT START 1 INCREMENT 1, "
            "RECORD_METADATA VARIANT, "
            "DATA VARCHAR)"
        ),
        cleanup_topic=False,
    )
    topic = create_topics([base_name], with_tables=False)[0]

    create_connector(v4_config=_connector_config(topic, validation=validation))
    driver.startConnectorWaitTime()

    records = [
        json.dumps({"data": f"row_{i}"}).encode("utf-8") for i in range(RECORD_COUNT)
    ]
    driver.sendBytesData(topic, records, partition=0)

    wait_for_rows(table.name, RECORD_COUNT)

    rows = table.select('"ID", "DATA"', 'ORDER BY "ID"')
    assert len(rows) == RECORD_COUNT
    ids = [row["ID"] for row in rows]
    assert ids == list(range(1, RECORD_COUNT + 1)), (
        f"Expected sequential IDs, got {ids}"
    )
    assert rows[0]["DATA"] == "row_0"


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
@pytest.mark.parametrize("validation", [True, False], ids=["compat", "ht"])
def test_default_timestamp_column(
    driver: KafkaDriver,
    create_table,
    create_topics,
    create_connector,
    wait_for_rows,
    validation: bool,
):
    """Ingest into a table with a DEFAULT CURRENT_TIMESTAMP() NOT NULL column.

    The record does NOT include a value for the timestamp column.
    The server should auto-fill the current timestamp.
    """
    tag = "compat" if validation else "ht"
    base_name = f"fr7_defts_{tag}"

    table = create_table(
        base_name,
        columns=(
            "(RECORD_METADATA VARIANT, "
            "DATA VARCHAR, "
            "CREATED_AT TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP() NOT NULL)"
        ),
        cleanup_topic=False,
    )
    topic = create_topics([base_name], with_tables=False)[0]

    create_connector(v4_config=_connector_config(topic, validation=validation))
    driver.startConnectorWaitTime()

    records = [
        json.dumps({"data": f"row_{i}"}).encode("utf-8") for i in range(RECORD_COUNT)
    ]
    driver.sendBytesData(topic, records, partition=0)

    wait_for_rows(table.name, RECORD_COUNT)

    rows = table.select('"DATA", "CREATED_AT"', "LIMIT 1")
    assert rows, "Expected at least one row"
    assert rows[0]["CREATED_AT"] is not None, (
        "CREATED_AT should be filled by server default"
    )
    assert rows[0]["DATA"] == "row_0"


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
@pytest.mark.parametrize("validation", [True, False], ids=["compat", "ht"])
def test_default_numeric_column(
    driver: KafkaDriver,
    create_table,
    create_topics,
    create_connector,
    wait_for_rows,
    validation: bool,
):
    """Ingest into a table with a DEFAULT 0 NOT NULL numeric column.

    The record does NOT include a value for the status column.
    The server should auto-fill with the default value 0.
    """
    tag = "compat" if validation else "ht"
    base_name = f"fr7_defnum_{tag}"

    table = create_table(
        base_name,
        columns=(
            "(RECORD_METADATA VARIANT, DATA VARCHAR, STATUS NUMBER DEFAULT 0 NOT NULL)"
        ),
        cleanup_topic=False,
    )
    topic = create_topics([base_name], with_tables=False)[0]

    create_connector(v4_config=_connector_config(topic, validation=validation))
    driver.startConnectorWaitTime()

    records = [
        json.dumps({"data": f"row_{i}"}).encode("utf-8") for i in range(RECORD_COUNT)
    ]
    driver.sendBytesData(topic, records, partition=0)

    wait_for_rows(table.name, RECORD_COUNT)

    rows = table.select('"DATA", "STATUS"', "LIMIT 1")
    assert rows, "Expected at least one row"
    assert rows[0]["STATUS"] == 0, (
        f"STATUS should be 0 (server default), got {rows[0]['STATUS']}"
    )
    assert rows[0]["DATA"] == "row_0"


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
@pytest.mark.parametrize("validation", [True, False], ids=["compat", "ht"])
def test_mixed_identity_and_defaults(
    driver: KafkaDriver,
    create_table,
    create_topics,
    create_connector,
    wait_for_rows,
    validation: bool,
):
    """Ingest into a table with identity + default + regular columns.

    Only the DATA column is populated by the record. The server fills:
    - ID: auto-increment
    - CREATED_AT: CURRENT_TIMESTAMP()
    - STATUS: default 1
    """
    tag = "compat" if validation else "ht"
    base_name = f"fr7_mixed_{tag}"

    table = create_table(
        base_name,
        columns=(
            "(ID NUMBER AUTOINCREMENT, "
            "RECORD_METADATA VARIANT, "
            "DATA VARCHAR, "
            "CREATED_AT TIMESTAMP_NTZ DEFAULT CURRENT_TIMESTAMP() NOT NULL, "
            "STATUS NUMBER DEFAULT 1 NOT NULL)"
        ),
        cleanup_topic=False,
    )
    topic = create_topics([base_name], with_tables=False)[0]

    create_connector(v4_config=_connector_config(topic, validation=validation))
    driver.startConnectorWaitTime()

    records = [
        json.dumps({"data": f"row_{i}"}).encode("utf-8") for i in range(RECORD_COUNT)
    ]
    driver.sendBytesData(topic, records, partition=0)

    wait_for_rows(table.name, RECORD_COUNT)

    rows = table.select('"ID", "DATA", "CREATED_AT", "STATUS"', 'ORDER BY "ID" LIMIT 5')
    assert len(rows) >= 1
    row = rows[0]
    assert row["ID"] == 1, f"Expected ID=1, got {row['ID']}"
    assert row["DATA"] == "row_0"
    assert row["CREATED_AT"] is not None, "CREATED_AT should be filled by default"
    assert row["STATUS"] == 1, f"STATUS should be 1 (default), got {row['STATUS']}"


================================================
FILE: test/tests/test_error_table.py
================================================
"""E2E tests for Snowflake Error Table support in v4 high-throughput mode.

Verifies:
1. Table WITHOUT error logging + v4-ht → connector starts, invalid data silently dropped
2. Table WITH error logging + v4-ht → connector starts, invalid data captured in error table
3. Schema mismatch (extra columns, no schema evolution) + v4-ht → rows captured in error table
4. Same bad record: v4-compat routes to DLQ, v4-ht routes to error table
"""

import json
import logging
import os
import time
from pathlib import Path
from typing import Callable

import pytest
from snowflake.connector.errors import ProgrammingError

from lib.driver import KafkaDriver, quote_name
from lib.fixtures.table import Table

logger = logging.getLogger(__name__)

TEMPLATE_DIR = Path("rest_request_template")
BASE_TEMPLATE = "datatype_ingestion.json"
STABILIZATION_SLEEP = int(os.environ.get("TEST_STABILIZATION_SLEEP", "30"))


def _v4_ht_config() -> dict:
    """Build a v4-ht connector config from the base template.

    Always sets errors.tolerance=all so that channel errors (row rejections
    reported by Snowflake) are tolerated and we can observe error table behavior
    rather than task failure.
    """
    base = json.loads((TEMPLATE_DIR / BASE_TEMPLATE).read_text())
    config = dict(base["config"])
    config["snowflake.enable.schematization"] = "true"
    config["snowflake.validation"] = "server_side"
    config["errors.tolerance"] = "all"
    config["snowflake.streaming.validate.compatibility.with.classic"] = "false"
    return config


def _v4_compat_dlq_config(dlq_topic: str) -> dict:
    """Build a v4-compat connector config with DLQ routing."""
    base = json.loads((TEMPLATE_DIR / BASE_TEMPLATE).read_text())
    config = dict(base["config"])
    config["snowflake.enable.schematization"] = "true"
    config["snowflake.validation"] = "client_side"
    config["errors.tolerance"] = "all"
    config["errors.deadletterqueue.topic.name"] = dlq_topic
    config["errors.deadletterqueue.topic.replication.factor"] = "1"
    config["errors.deadletterqueue.context.headers.enable"] = "true"
    config["snowflake.streaming.validate.compatibility.with.classic"] = "false"
    return config


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_error_table_without_error_logging(
    driver: KafkaDriver,
    create_table: Callable,
    create_custom_connector: Callable,
):
    """v4-ht targeting a table WITHOUT ERROR_LOGGING — connector starts, errors silently dropped."""
    table: Table = create_table(
        "et_no_logging",
        columns="(ID VARCHAR NOT NULL, VAL NUMBER, RECORD_METADATA VARIANT)",
    )
    driver.createTopics(table.name, partitionNum=1, replicationNum=1)

    connector = create_custom_connector("et_no_logging", _v4_ht_config())
    driver.startConnectorWaitTime()

    records = [
        json.dumps({"ID": "valid_1", "VAL": 42}).encode(),
        json.dumps({"ID": "invalid_1", "VAL": "not_a_number"}).encode(),
    ]
    keys = [json.dumps({"number": str(i)}).encode() for i in range(len(records))]
    driver.sendBytesData(table.name, records, keys)

    time.sleep(STABILIZATION_SLEEP)

    failed = driver.get_failed_tasks(connector.name)
    assert not failed, f"Connector task failed: {failed}"

    count = driver.select_number_of_records(table.name)
    assert count >= 1, f"Expected at least 1 row, got {count}"

    # Without ERROR_LOGGING, ERROR_TABLE() raises (no error logging enabled) or returns 0 rows.
    # Both outcomes confirm that no error logging is happening.
    cursor = driver.snowflake_conn.cursor()
    try:
        cursor.execute(f"SELECT * FROM ERROR_TABLE({quote_name(table.name)})")
        error_rows = cursor.fetchall()
        assert len(error_rows) == 0, (
            f"Expected 0 error table rows without ERROR_LOGGING, got {len(error_rows)}"
        )
        logger.info("Error table query returned 0 rows (no error logging enabled)")
    except ProgrammingError:
        # ERROR_TABLE() raises ProgrammingError when ERROR_LOGGING is not enabled — expected
        logger.info("Error table query raised as expected (ERROR_LOGGING not enabled)")


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_error_table_with_error_logging(
    driver: KafkaDriver,
    create_table: Callable,
    create_custom_connector: Callable,
):
    """v4-ht targeting a table WITH ERROR_LOGGING — invalid data captured in error table."""
    table: Table = create_table(
        "et_with_logging",
        columns=(
            "(ID VARCHAR NOT NULL, VAL NUMBER, RECORD_METADATA VARIANT) ERROR_LOGGING = TRUE"
        ),
    )
    driver.createTopics(table.name, partitionNum=1, replicationNum=1)

    connector = create_custom_connector("et_with_logging", _v4_ht_config())
    driver.startConnectorWaitTime()

    records = [
        json.dumps({"ID": "valid_1", "VAL": 42}).encode(),
        json.dumps({"ID": "invalid_1", "VAL": "not_a_number"}).encode(),
        json.dumps({"ID": "invalid_2", "VAL": {"nested": True}}).encode(),
    ]
    keys = [json.dumps({"number": str(i)}).encode() for i in range(len(records))]
    driver.sendBytesData(table.name, records, keys)

    time.sleep(STABILIZATION_SLEEP)

    failed = driver.get_failed_tasks(connector.name)
    assert not failed, f"Connector task failed: {failed}"

    count = driver.select_number_of_records(table.name)
    assert count >= 1, f"Expected at least 1 row, got {count}"

    cursor = driver.snowflake_conn.cursor()
    cursor.execute(f"SELECT * FROM ERROR_TABLE({quote_name(table.name)})")
    col_names = [desc[0] for desc in cursor.description]
    error_rows = cursor.fetchall()
    logger.info("Error table rows (with logging): %d", len(error_rows))

    assert len(error_rows) >= 2, (
        f"Expected at least 2 error rows (2 invalid records sent), got {len(error_rows)}"
    )
    for row in error_rows:
        row_dict = dict(zip(col_names, row))
        logger.info("Error table entry: %s", row_dict)
        assert row_dict.get("ERROR_CODE") is not None, (
            f"Error table row missing ERROR_CODE: {row_dict}"
        )


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_error_table_schema_mismatch(
    driver: KafkaDriver,
    create_table: Callable,
    create_custom_connector: Callable,
):
    """v4-ht: both value validation and schema mismatch errors land in the error table.

    Two distinct rejection reasons are exercised:
    - Value validation: ID value too long for VARCHAR(5) constraint.
    - Schema mismatch: VAL column is NOT NULL but absent from the record
      (SSv2 treats the missing key as NULL, violating the constraint).
    """
    table: Table = create_table(
        "et_schema_mismatch",
        columns=(
            "(ID VARCHAR(5) NOT NULL, VAL NUMBER NOT NULL, RECORD_METADATA VARIANT)"
            " ERROR_LOGGING = TRUE"
        ),
    )
    driver.createTopics(table.name, partitionNum=1, replicationNum=1)

    connector = create_custom_connector("et_schema_mismatch", _v4_ht_config())
    driver.startConnectorWaitTime()

    records = [
        # Valid record.
        json.dumps({"ID": "ok", "VAL": 42}).encode(),
        # Value validation: ID exceeds VARCHAR(5).
        json.dumps({"ID": "toolong", "VAL": 10}).encode(),
        # Schema mismatch: VAL is NOT NULL but missing from the payload.
        json.dumps({"ID": "miss"}).encode(),
    ]
    keys = [json.dumps({"number": str(i)}).encode() for i in range(len(records))]
    driver.sendBytesData(table.name, records, keys)

    time.sleep(STABILIZATION_SLEEP)

    failed = driver.get_failed_tasks(connector.name)
    assert not failed, f"Connector task failed: {failed}"

    count = driver.select_number_of_records(table.name)
    assert count >= 1, f"Expected at least 1 row (valid record), got {count}"

    cursor = driver.snowflake_conn.cursor()
    cursor.execute(f"SELECT * FROM ERROR_TABLE({quote_name(table.name)})")
    col_names = [desc[0] for desc in cursor.description]
    error_rows = cursor.fetchall()
    logger.info("Schema mismatch error table rows: %d", len(error_rows))

    assert len(error_rows) >= 2, (
        f"Expected at least 2 error rows (value overflow + NOT NULL violation),"
        f" got {len(error_rows)}"
    )
    for row in error_rows:
        row_dict = dict(zip(col_names, row))
        assert row_dict.get("ERROR_CODE") is not None, (
            f"Error table row missing ERROR_CODE: {row_dict}"
        )


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_error_table_vs_dlq_routing(
    driver: KafkaDriver,
    create_table: Callable,
    create_custom_connector: Callable,
):
    """Same bad record routes differently depending on validation mode.

    v4-compat (client_side validation + DLQ): invalid records are caught
    client-side and routed to the DLQ topic.

    v4-ht (server_side validation + error table): invalid records pass through
    to Snowflake, which rejects them into the error table.
    """
    # v4-compat table — schema mismatch caught client-side → DLQ.
    table_compat: Table = create_table(
        "et_routing_compat",
        columns="(ID VARCHAR NOT NULL, VAL NUMBER, RECORD_METADATA VARIANT)",
    )
    # v4-ht table — schema mismatch caught server-side → error table.
    table_ht: Table = create_table(
        "et_routing_ht",
        columns="(ID VARCHAR NOT NULL, VAL NUMBER, RECORD_METADATA VARIANT) ERROR_LOGGING = TRUE",
    )

    driver.createTopics(table_compat.name, partitionNum=1, replicationNum=1)
    driver.createTopics(table_ht.name, partitionNum=1, replicationNum=1)

    # DLQ topic for the compat connector; must exist before the connector starts.
    dlq_topic = f"dlq_{table_compat.name.lower()}"
    driver.createTopics(dlq_topic, partitionNum=1, replicationNum=1)

    # Each connector uses default topic→table routing (connector name = topic = table).
    # No topic2table.map needed.
    connector_compat = create_custom_connector(
        "et_routing_compat", _v4_compat_dlq_config(dlq_topic)
    )
    connector_ht = create_custom_connector("et_routing_ht", _v4_ht_config())

    driver.startConnectorWaitTime()

    # Send the same records to both topics: one valid, two invalid.
    records = [
        json.dumps({"ID": "valid_1", "VAL": 42}).encode(),
        json.dumps({"ID": "invalid_1", "VAL": "not_a_number"}).encode(),
        json.dumps({"ID": "invalid_2", "VAL": {"nested": True}}).encode(),
    ]
    keys = [json.dumps({"number": str(i)}).encode() for i in range(len(records))]
    driver.sendBytesData(table_compat.name, records, keys)
    driver.sendBytesData(table_ht.name, records, keys)

    # Two connectors running simultaneously — allow extra time.
    time.sleep(2 * STABILIZATION_SLEEP)

    assert not driver.get_failed_tasks(connector_compat.name), (
        "Compat connector task failed"
    )
    assert not driver.get_failed_tasks(connector_ht.name), "HT connector task failed"

    # Both tables should have the valid record.
    assert driver.select_number_of_records(table_compat.name) >= 1, (
        "Expected at least 1 row in compat table"
    )
    assert driver.select_number_of_records(table_ht.name) >= 1, (
        "Expected at least 1 row in HT table"
    )

    # v4-compat: invalid records land in DLQ, not error table.
    dlq_count = driver.consume_messages_dlq({"config": connector_compat.config}, 0, 1)
    assert dlq_count >= 2, (
        f"Expected at least 2 records in DLQ (v4-compat), got {dlq_count}"
    )

    # v4-ht: invalid records land in error table, not DLQ.
    cursor = driver.snowflake_conn.cursor()
    cursor.execute(f"SELECT * FROM ERROR_TABLE({quote_name(table_ht.name)})")
    error_rows = cursor.fetchall()
    logger.info("v4-ht error table rows: %d", len(error_rows))
    assert len(error_rows) >= 2, (
        f"Expected at least 2 error table rows (v4-ht), got {len(error_rows)}"
    )


================================================
FILE: test/tests/test_json_json.py
================================================
import json

from lib.matchers import ANY_INT

FILE_NAME = "travis_correct_json_json"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 100


def test_json_json(
    driver, name_salt, create_connector_from_file, create_table, wait_for_rows
):
    table = create_table(
        FILE_NAME.upper(),
        columns='(record_metadata variant, "NUMBER" varchar)',
    )
    topic = f"{FILE_NAME}{name_salt}"

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send --
    keys = [json.dumps({"number": str(i)}).encode("utf-8") for i in range(RECORD_COUNT)]
    values = [
        json.dumps({"number": str(i)}).encode("utf-8") for i in range(RECORD_COUNT)
    ]
    driver.sendBytesData(topic, values, keys)

    # -- Verify row count --
    wait_for_rows(table.name, RECORD_COUNT)

    # -- Verify first row content --
    record_metadata = json.loads(
        table.select_scalar(
            "record_metadata", "ORDER BY record_metadata:offset LIMIT 1"
        )
    )

    assert record_metadata == {
        "SnowflakeConnectorPushTime": ANY_INT,
        "key": {"number": "0"},
        "offset": 0,
        "partition": 0,
    }


================================================
FILE: test/tests/test_kc_delete_create.py
================================================
import json
from time import sleep

import pytest

FILE_NAME = "test_kc_delete_create"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 1000
SLEEP_TIME = 10


def _send_batch(driver, topic, record_count):
    values = [
        json.dumps({"column1": str(i)}).encode("utf-8") for i in range(record_count)
    ]
    driver.sendBytesData(topic, values, [], 0)
    sleep(2)


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_kc_delete_create(
    driver, name_salt, create_connector_from_file, create_table, wait_for_rows
):
    table = create_table(
        FILE_NAME.upper(),
        columns="(record_metadata variant, column1 varchar)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    connector_name = f"{FILE_NAME}{name_salt}"
    driver.createTopics(topic, partitionNum=1, replicationNum=1)

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send 1/2, delete, create, send 2/2 --
    _send_batch(driver, topic, RECORD_COUNT)

    driver.deleteConnector(connector_name)
    sleep(SLEEP_TIME)

    driver.createConnector(
        name_salt=name_salt, rest_request_template_filename=CONFIG_FILE
    )
    sleep(SLEEP_TIME)

    _send_batch(driver, topic, RECORD_COUNT)

    # -- Verify --
    wait_for_rows(table.name, RECORD_COUNT * 2, connector_name=connector_name)


================================================
FILE: test/tests/test_kc_delete_create_chaos.py
================================================
import json
from time import sleep

import pytest

FILE_NAME = "test_kc_delete_create_chaos"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 1000
SLEEP_TIME = 10


def _send_batch(driver, topic, record_count):
    values = [
        json.dumps({"column1": str(i)}).encode("utf-8") for i in range(record_count)
    ]
    driver.sendBytesData(topic, values, [], 0)
    sleep(2)


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_kc_delete_create_chaos(
    driver, name_salt, create_connector_from_file, create_table, wait_for_rows
):
    table = create_table(
        FILE_NAME.upper(),
        columns="(record_metadata variant, column1 varchar)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    connector_name = f"{FILE_NAME}{name_salt}"
    driver.createTopics(topic, partitionNum=1, replicationNum=1)

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send 1/3, delete (with pressure), send 2/3, create, send 3/3 --
    _send_batch(driver, topic, RECORD_COUNT)

    driver.deleteConnector(connector_name)
    _send_batch(driver, topic, RECORD_COUNT)
    sleep(SLEEP_TIME)

    driver.createConnector(
        name_salt=name_salt, rest_request_template_filename=CONFIG_FILE
    )
    driver.wait_for_connector_running(connector_name)

    _send_batch(driver, topic, RECORD_COUNT)

    # -- Verify --
    wait_for_rows(table.name, RECORD_COUNT * 3, connector_name=connector_name)


================================================
FILE: test/tests/test_kc_delete_resume.py
================================================
import json
import pytest
from time import sleep

FILE_NAME = "test_kc_delete_resume"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 1000
SLEEP_TIME = 10


def _send_batch(driver, topic, record_count):
    values = [
        json.dumps({"column1": str(i)}).encode("utf-8") for i in range(record_count)
    ]
    driver.sendBytesData(topic, values, [], 0)
    sleep(2)


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_kc_delete_resume(
    driver, name_salt, create_connector_from_file, create_table, wait_for_rows
):
    """Verify that resuming a deleted connector is a no-op.

    Sequence:
      1. Send batch 1 → wait for ingestion → ingested
      2. Delete connector
      3. Resume connector → fails silently (connector was deleted)
      4. Send batch 2 → NOT ingested (no running connector)

    Only batch 1 should appear in the table (RECORD_COUNT rows).
    """
    table = create_table(
        FILE_NAME.upper(),
        columns="(record_metadata variant, column1 varchar)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    connector_name = f"{FILE_NAME}{name_salt}"
    driver.createTopics(topic, partitionNum=1, replicationNum=1)

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send batch 1 and wait for it to be ingested before deleting --
    _send_batch(driver, topic, RECORD_COUNT)
    wait_for_rows(table.name, RECORD_COUNT, connector_name=connector_name)

    # -- Delete connector --
    driver.deleteConnector(connector_name)
    sleep(SLEEP_TIME)

    # -- Resume (should fail since connector was deleted) --
    driver.resumeConnector(connector_name)
    sleep(SLEEP_TIME)

    # -- Send batch 2 (no connector running, so this won't be ingested) --
    _send_batch(driver, topic, RECORD_COUNT)

    # -- Verify only batch 1 was ingested --
    wait_for_rows(table.name, RECORD_COUNT, connector_name=connector_name)


================================================
FILE: test/tests/test_kc_delete_resume_chaos.py
================================================
import json
import time
import pytest
from time import sleep

FILE_NAME = "test_kc_delete_resume_chaos"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 1000
SLEEP_TIME = 10


def _send_batch(driver, topic, record_count):
    values = [
        json.dumps({"column1": str(i)}).encode("utf-8") for i in range(record_count)
    ]
    driver.sendBytesData(topic, values, [], 0)
    sleep(2)


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_kc_delete_resume_chaos(
    driver, name_salt, create_connector_from_file, create_table, wait_for_rows
):
    """Verify connector behavior during delete with pressure and a failed resume.

    Sequence:
      1. Send batch 1 → wait for ingestion → ingested
      2. Delete connector
      3. Send batch 2 (pressure during deletion) → partially ingested
      4. Resume connector → fails silently (connector was deleted)
      5. Send batch 3 → NOT ingested (no running connector)

    Expected: between RECORD_COUNT and 2 × RECORD_COUNT rows
    (batch 1 always ingested, some of batch 2 may be ingested before
    the deletion completes; batch 3 is never ingested because resume
    cannot recreate a deleted connector).
    """
    table = create_table(
        FILE_NAME.upper(),
        columns="(record_metadata variant, column1 varchar)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    connector_name = f"{FILE_NAME}{name_salt}"
    driver.createTopics(topic, partitionNum=1, replicationNum=1)

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send batch 1 and wait for it to be ingested --
    _send_batch(driver, topic, RECORD_COUNT)
    wait_for_rows(table.name, RECORD_COUNT, connector_name=connector_name)

    # -- Delete connector + pressure (batch 2 sent during deletion) --
    driver.deleteConnector(connector_name)
    _send_batch(driver, topic, RECORD_COUNT)
    sleep(SLEEP_TIME)

    # -- Resume (should fail since connector was deleted) --
    driver.resumeConnector(connector_name)
    sleep(SLEEP_TIME)

    # -- Send batch 3 (no connector running) --
    _send_batch(driver, topic, RECORD_COUNT)

    # -- Verify: between 1 and 2 batches ingested --
    # Cannot use wait_for_rows (exact match) since batch 2 may partially arrive
    # (deleteConnector returns immediately without waiting for full shutdown),
    # making the total non-deterministic. Poll until count >= RECORD_COUNT instead.
    deadline = time.monotonic() + 60
    while True:
        count = table.select_scalar("count(*)")
        if count >= RECORD_COUNT:
            break
        if time.monotonic() >= deadline:
            raise AssertionError(
                f"Expected at least {RECORD_COUNT} rows in {table.name}, got {count}"
            )
        sleep(5)

    upper_bound = RECORD_COUNT * 2
    assert count <= upper_bound, (
        f"Expected at most {upper_bound} rows, got {count} — "
        f"unexpected duplication or batch 3 was ingested"
    )


================================================
FILE: test/tests/test_kc_pause_create.py
================================================
import json
from time import sleep

import pytest

FILE_NAME = "test_kc_pause_create"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 1000
SLEEP_TIME = 10


def _send_batch(driver, topic, record_count):
    values = [
        json.dumps({"column1": str(i)}).encode("utf-8") for i in range(record_count)
    ]
    driver.sendBytesData(topic, values, [], 0)
    sleep(2)


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_kc_pause_create(
    driver, name_salt, create_connector_from_file, create_table, wait_for_rows
):
    table = create_table(
        FILE_NAME.upper(),
        columns="(record_metadata variant, column1 varchar)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    connector_name = f"{FILE_NAME}{name_salt}"
    driver.createTopics(topic, partitionNum=1, replicationNum=1)

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send 1/2, pause, create, send 2/2 --
    _send_batch(driver, topic, RECORD_COUNT)

    driver.pauseConnector(connector_name)
    sleep(SLEEP_TIME)

    driver.createConnector(
        name_salt=name_salt, rest_request_template_filename=CONFIG_FILE
    )
    driver.wait_for_connector_running(connector_name)

    _send_batch(driver, topic, RECORD_COUNT)

    # -- Verify --
    wait_for_rows(table.name, RECORD_COUNT * 2, connector_name=connector_name)


================================================
FILE: test/tests/test_kc_pause_create_chaos.py
================================================
import json
from time import sleep

import pytest

FILE_NAME = "test_kc_pause_create_chaos"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 1000
SLEEP_TIME = 10


def _send_batch(driver, topic, record_count):
    values = [
        json.dumps({"column1": str(i)}).encode("utf-8") for i in range(record_count)
    ]
    driver.sendBytesData(topic, values, [], 0)
    sleep(2)


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_kc_pause_create_chaos(
    driver, name_salt, create_connector_from_file, create_table, wait_for_rows
):
    table = create_table(
        FILE_NAME.upper(),
        columns="(record_metadata variant, column1 varchar)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    connector_name = f"{FILE_NAME}{name_salt}"
    driver.createTopics(topic, partitionNum=1, replicationNum=1)

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send 1/3, pause (with pressure), send 2/3, create, send 3/3 --
    _send_batch(driver, topic, RECORD_COUNT)

    driver.pauseConnector(connector_name)
    _send_batch(driver, topic, RECORD_COUNT)
    sleep(SLEEP_TIME)

    driver.createConnector(
        name_salt=name_salt, rest_request_template_filename=CONFIG_FILE
    )
    driver.wait_for_connector_running(connector_name)

    _send_batch(driver, topic, RECORD_COUNT)

    # -- Verify --
    wait_for_rows(table.name, RECORD_COUNT * 3, connector_name=connector_name)


================================================
FILE: test/tests/test_kc_pause_resume.py
================================================
import json
from time import sleep

import pytest

FILE_NAME = "test_kc_pause_resume"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 1000
SLEEP_TIME = 10


def _send_batch(driver, topic, record_count):
    values = [
        json.dumps({"column1": str(i)}).encode("utf-8") for i in range(record_count)
    ]
    driver.sendBytesData(topic, values, [], 0)
    sleep(2)


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_kc_pause_resume(
    driver, name_salt, create_connector_from_file, create_table, wait_for_rows
):
    table = create_table(
        FILE_NAME.upper(),
        columns="(record_metadata variant, column1 varchar)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    connector_name = f"{FILE_NAME}{name_salt}"
    driver.createTopics(topic, partitionNum=1, replicationNum=1)

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send 1/2, pause, resume, send 2/2 --
    _send_batch(driver, topic, RECORD_COUNT)

    driver.pauseConnector(connector_name)
    sleep(SLEEP_TIME)

    driver.resumeConnector(connector_name)
    sleep(SLEEP_TIME)

    _send_batch(driver, topic, RECORD_COUNT)

    # -- Verify --
    wait_for_rows(table.name, RECORD_COUNT * 2, connector_name=connector_name)


================================================
FILE: test/tests/test_kc_pause_resume_chaos.py
================================================
import json
from time import sleep

import pytest

FILE_NAME = "test_kc_pause_resume_chaos"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 1000
SLEEP_TIME = 10


def _send_batch(driver, topic, record_count):
    values = [
        json.dumps({"column1": str(i)}).encode("utf-8") for i in range(record_count)
    ]
    driver.sendBytesData(topic, values, [], 0)
    sleep(2)


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_kc_pause_resume_chaos(
    driver, name_salt, create_connector_from_file, create_table, wait_for_rows
):
    table = create_table(
        FILE_NAME.upper(),
        columns="(record_metadata variant, column1 varchar)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    connector_name = f"{FILE_NAME}{name_salt}"
    driver.createTopics(topic, partitionNum=1, replicationNum=1)

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send 1/3, pause (with pressure), send 2/3, resume, send 3/3 --
    _send_batch(driver, topic, RECORD_COUNT)

    driver.pauseConnector(connector_name)
    _send_batch(driver, topic, RECORD_COUNT)
    sleep(SLEEP_TIME)

    driver.resumeConnector(connector_name)
    sleep(SLEEP_TIME)

    _send_batch(driver, topic, RECORD_COUNT)

    # -- Verify --
    wait_for_rows(table.name, RECORD_COUNT * 3, connector_name=connector_name)


================================================
FILE: test/tests/test_kc_recreate.py
================================================
import json
from time import sleep

import pytest

FILE_NAME = "test_kc_recreate"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 1000
SLEEP_TIME = 10


def _send_batch(driver, topic, record_count):
    values = [
        json.dumps({"column1": str(i)}).encode("utf-8") for i in range(record_count)
    ]
    driver.sendBytesData(topic, values, [], 0)
    sleep(2)


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_kc_recreate(
    driver, name_salt, create_connector_from_file, create_table, wait_for_rows
):
    table = create_table(
        FILE_NAME.upper(),
        columns="(record_metadata variant, column1 varchar)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    driver.createTopics(topic, partitionNum=1, replicationNum=1)

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send 1/2, create (idempotent) twice, send 2/2 --
    _send_batch(driver, topic, RECORD_COUNT)

    driver.createConnector(
        name_salt=name_salt, rest_request_template_filename=CONFIG_FILE
    )
    sleep(SLEEP_TIME)

    driver.createConnector(
        name_salt=name_salt, rest_request_template_filename=CONFIG_FILE
    )
    sleep(SLEEP_TIME)

    _send_batch(driver, topic, RECORD_COUNT)

    # -- Verify --
    wait_for_rows(table.name, RECORD_COUNT * 2)


================================================
FILE: test/tests/test_kc_recreate_chaos.py
================================================
import json
from time import sleep

import pytest

FILE_NAME = "test_kc_recreate_chaos"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 100
SLEEP_TIME = 10


def _send_batch(driver, topic, record_count):
    values = [
        json.dumps({"column1": str(i)}).encode("utf-8") for i in range(record_count)
    ]
    driver.sendBytesData(topic, values, [], 0)
    sleep(2)


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_kc_recreate_chaos(
    driver, name_salt, create_connector_from_file, create_table, wait_for_rows
):
    table = create_table(
        FILE_NAME.upper(),
        columns="(record_metadata variant, column1 varchar)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    driver.createTopics(topic, partitionNum=1, replicationNum=1)

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send 1/2, create (idempotent) twice with pressure, send 2/2 --
    _send_batch(driver, topic, RECORD_COUNT)

    driver.createConnector(
        name_salt=name_salt, rest_request_template_filename=CONFIG_FILE
    )
    sleep(SLEEP_TIME)

    driver.createConnector(
        name_salt=name_salt, rest_request_template_filename=CONFIG_FILE
    )
    sleep(SLEEP_TIME)

    _send_batch(driver, topic, RECORD_COUNT)

    # -- Verify --
    wait_for_rows(table.name, RECORD_COUNT * 2)


================================================
FILE: test/tests/test_kc_restart.py
================================================
import json
from time import sleep

import pytest

FILE_NAME = "test_kc_restart"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 1000
SLEEP_TIME = 10


def _send_batch(driver, topic, record_count):
    values = [
        json.dumps({"column1": str(i)}).encode("utf-8") for i in range(record_count)
    ]
    driver.sendBytesData(topic, values, [], 0)
    sleep(2)


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_kc_restart(
    driver, name_salt, create_connector_from_file, create_table, wait_for_rows
):
    table = create_table(
        FILE_NAME.upper(),
        columns="(record_metadata variant, column1 varchar)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    connector_name = f"{FILE_NAME}{name_salt}"
    driver.createTopics(topic, partitionNum=1, replicationNum=1)

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send 1/3, restart connector, send 2/3, restart connector+tasks, send 3/3 --
    _send_batch(driver, topic, RECORD_COUNT)

    driver.restartConnector(connector_name)
    driver.wait_for_connector_running(connector_name)

    _send_batch(driver, topic, RECORD_COUNT)

    driver.restartConnectorAndTasks(connector_name)
    driver.wait_for_connector_running(connector_name)

    _send_batch(driver, topic, RECORD_COUNT)

    # -- Verify --
    wait_for_rows(table.name, RECORD_COUNT * 3, connector_name=connector_name)


================================================
FILE: test/tests/test_multiple_topic_to_one_table_snowpipe_streaming.py
================================================
import json
from time import sleep

import pytest

pytestmark = pytest.mark.correctness

FILE_NAME = "travis_correct_multiple_topic_to_one_table_snowpipe_streaming"
CONFIG_FILE = f"{FILE_NAME}.json"
TOPIC_COUNT = 3
PARTITION_COUNT = 3
RECORDS_PER_PARTITION = 1000


def test_multiple_topic_to_one_table_snowpipe_streaming(
    driver,
    name_salt,
    create_connector_from_file,
    create_table,
    wait_for_rows,
):
    table = create_table(
        FILE_NAME.upper(),
        columns="(record_metadata variant, field1 varchar)",
    )

    topics = []
    for i in range(TOPIC_COUNT):
        t = f"{FILE_NAME}{name_salt}{i}"
        driver.createTopics(t, partitionNum=PARTITION_COUNT, replicationNum=1)
        topics.append(t)

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send --
    for topic in topics:
        for p in range(PARTITION_COUNT):
            values = [
                json.dumps({"field1": str(e)}).encode("utf-8")
                for e in range(RECORDS_PER_PARTITION)
            ]
            driver.sendBytesData(topic, values, [], partition=p)
            sleep(2)

    total_expected = RECORDS_PER_PARTITION * PARTITION_COUNT * TOPIC_COUNT

    # -- Verify row count --
    wait_for_rows(table.name, total_expected)

    # -- Verify no over-duplication (each offset+partition combo appears at most TOPIC_COUNT times) --
    result = table.select(
        'record_metadata:"offset"::string AS offset_no, '
        'record_metadata:"partition"::string AS partition_no',
        f"GROUP BY offset_no, partition_no HAVING count(*) > {TOPIC_COUNT}",
    )
    assert not result, f"Over-duplication detected: {result[0]}"

    # -- Verify unique offsets per partition --
    rows = table.select(
        'count(DISTINCT record_metadata:"offset"::number) AS unique_offsets, '
        'record_metadata:"partition"::number AS partition_no',
        "GROUP BY partition_no ORDER BY partition_no",
    )
    assert len(rows) == PARTITION_COUNT
    for p in range(PARTITION_COUNT):
        assert rows[p]["UNIQUE_OFFSETS"] == RECORDS_PER_PARTITION
        assert rows[p]["PARTITION_NO"] == p

    # -- Verify all topics contributed to each partition --
    topic_rows = table.select(
        'count(DISTINCT record_metadata:"topic"::string) AS topic_no, '
        'record_metadata:"partition"::number AS partition_no',
        "GROUP BY partition_no ORDER BY partition_no",
    )
    assert len(topic_rows) == PARTITION_COUNT
    for p in range(PARTITION_COUNT):
        assert topic_rows[p]["TOPIC_NO"] == TOPIC_COUNT
        assert topic_rows[p]["PARTITION_NO"] == p

    # -- Cleanup extra Kafka topics (table/main topic handled by fixture) --
    for t in topics:
        driver.deleteTopic(t)


================================================
FILE: test/tests/test_native_complex_smt.py
================================================
import json

from lib.matchers import ANY_INT, RegexMatch

FILE_NAME = "travis_correct_native_complex_smt"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 100


def test_native_complex_smt(
    driver, name_salt, create_connector_from_file, create_table, wait_for_rows
):
    """Verify a complex SMT chain: ValueToKey + ExtractField$Key + ReplaceField$Value.

    Connector config transforms:
      1. ValueToKey(fields=c1) — copies c1 into the key
      2. ExtractField$Key(field=c1) — extracts c1 as the key
      3. ReplaceField$Value(blacklist=c2) — drops c2 from the value

    After transforms, the key holds the c1 value and the value retains only c1.
    """
    table = create_table(
        FILE_NAME.upper(),
        columns="(record_metadata variant, c1 variant)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send 100 records --
    values = [
        json.dumps({"c1": {"int": str(i)}, "c2": "Suppose to be dropped."}).encode(
            "utf-8"
        )
        for i in range(RECORD_COUNT)
    ]
    driver.sendBytesData(topic, values)

    # -- Verify row count --
    wait_for_rows(table.name, RECORD_COUNT)

    # -- Verify first row: key extracted, c2 dropped --
    row = table.select("*")[0]

    assert json.loads(row["RECORD_METADATA"]) == {
        "CreateTime": ANY_INT,
        "SnowflakeConnectorPushTime": ANY_INT,
        "key": {"int": RegexMatch(r"\d+")},
        "offset": 0,
        "partition": 0,
        "topic": topic,
    }
    assert json.loads(row["C1"]) == {"int": RegexMatch(r"\d+")}


================================================
FILE: test/tests/test_native_string_json_without_schema.py
================================================
import json

from lib.matchers import ANY_INT

FILE_NAME = "travis_correct_native_string_json_without_schema"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 100


def test_native_string_json_without_schema(
    driver, name_salt, create_connector_from_file, create_table, wait_for_rows
):
    """Verify that an SMT (ReplaceField$Value blacklisting 'c2') drops the c2
    field before ingestion, leaving only the 'val' field.

    Connector config uses StringConverter key + JsonConverter value with a
    ReplaceField transform that removes 'c2'.
    """
    table = create_table(
        FILE_NAME.upper(),
        columns="(record_metadata variant, val varchar)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send 100 records with 'val' and 'c2' (c2 will be dropped by SMT) --
    values = [
        json.dumps({"val": str(i), "c2": "Suppose to be dropped."}).encode("utf-8")
        for i in range(RECORD_COUNT)
    ]
    driver.sendBytesData(topic, values)

    # -- Verify row count --
    wait_for_rows(table.name, RECORD_COUNT)

    # -- Verify first row: only 'val' survives the SMT --
    row = table.select("*")[0]

    assert json.loads(row["RECORD_METADATA"]) == {
        "CreateTime": ANY_INT,
        "SnowflakeConnectorPushTime": ANY_INT,
        "offset": 0,
        "partition": 0,
        "topic": topic,
    }
    assert row["VAL"] == "0"


================================================
FILE: test/tests/test_native_string_protobuf.py
================================================
import json

from lib.matchers import ANY_INT

FILE_NAME = "travis_correct_native_string_protobuf"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 100


def _build_sensor(sensor_pb2):
    sensor = sensor_pb2.SensorReading()
    sensor.dateTime = 1234
    sensor.reading = 321.321
    sensor.device.deviceID = "555-4321"
    sensor.device.enabled = True
    sensor.float_val = 4321.4321
    sensor.int32_val = (1 << 31) - 1
    sensor.sint32_val = (1 << 31) - 1
    sensor.sint64_val = (1 << 63) - 1
    sensor.uint32_val = (1 << 32) - 1
    sensor.bytes_val = b"\xde\xad"
    sensor.double_array_val.extend([1 / 3, 32.21, 434324321])
    sensor.uint64_val = (1 << 64) - 1
    return sensor


def test_native_string_protobuf(
    sensor_pb2,
    driver,
    name_salt,
    create_connector_from_file,
    create_table,
    wait_for_rows,
):
    table = create_table(
        FILE_NAME.upper(),
        columns="(record_metadata variant, record_content variant)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send --
    sensor = _build_sensor(sensor_pb2)
    values = [sensor.SerializeToString() for _ in range(RECORD_COUNT)]
    driver.sendBytesData(topic, values)

    # -- Verify row count --
    wait_for_rows(table.name, RECORD_COUNT)

    # -- Verify first row content --
    row = table.select("record_metadata, record_content", "LIMIT 1")[0]

    record_metadata = json.loads(row["RECORD_METADATA"])
    assert record_metadata == {
        "CreateTime": ANY_INT,
        "SnowflakeConnectorPushTime": ANY_INT,
        "offset": 0,
        "partition": 0,
        "topic": topic,
    }

    record_content = json.loads(row["RECORD_CONTENT"])
    assert record_content == {
        "bytes_val": "3q0=",
        "dateTime": 1234,
        "device": {"deviceID": "555-4321", "enabled": True},
        "double_array_val": [0.3333333333333333, 32.21, 4.343243210000000e08],
        "float_val": 4321.432,
        "int32_val": 2147483647,
        "reading": 321.321,
        "sint32_val": 2147483647,
        "sint64_val": 9223372036854775807,
        "uint32_val": 4294967295,
        "uint64_val": 18446744073709551615,
    }


================================================
FILE: test/tests/test_nullable_values_after_smt.py
================================================
import json

FILE_NAME = "nullable_values_after_smt"
CONFIG_FILE = f"{FILE_NAME}.json"
TOTAL_EVENTS = 200
EXPECTED_ROWS = 100  # only every-other event has optionalField


def test_nullable_values_after_smt(
    driver,
    name_salt,
    create_connector_from_file,
    create_table,
    wait_for_rows,
):
    table = create_table(
        FILE_NAME.upper(),
        columns="(index number, from_optional_field boolean, record_metadata variant)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send --
    # The connector config has an SMT that extracts only the optionalField sub-object.
    # Events without optionalField are dropped (behavior.on.null.values = IGNORE).
    values = []
    for idx in range(TOTAL_EVENTS):
        event = {"index": idx, "someKey": "someValue"}
        if idx % 2 == 0:
            event["optionalField"] = {"index": idx, "from_optional_field": True}
        values.append(json.dumps(event).encode("utf-8"))

    driver.sendBytesData(topic, values)

    # -- Verify row count --
    wait_for_rows(table.name, EXPECTED_ROWS)

    # -- Verify content --
    rows = table.select(
        "index, from_optional_field, record_metadata:offset::number AS offset",
    )

    parsed = [
        {
            "index": r["INDEX"],
            "from_optional_field": r["FROM_OPTIONAL_FIELD"],
            "offset": r["OFFSET"],
        }
        for r in rows
    ]
    expected = [
        {"index": idx, "from_optional_field": True, "offset": idx}
        for idx in range(0, TOTAL_EVENTS, 2)
    ]
    assert parsed == expected


================================================
FILE: test/tests/test_schema_evolution_streaming.py
================================================
import json

import pytest
from lib.fixtures.table import Table

pytestmark = pytest.mark.schema_evolution

FILE_NAME = "snowpipe_streaming_schema_evolution"
CONFIG_FILE = f"{FILE_NAME}.json"


def _assert_success_rows(table, schematization, record_count):
    """Shared assertions for successful schema evolution tests."""
    cols = {row[0]: row[1] for row in table.schema()}

    if schematization:
        assert "CITY" in cols, f"Expected CITY column, got: {list(cols.keys())}"
        assert "AGE" in cols, f"Expected AGE column, got: {list(cols.keys())}"

        rows = table.select(
            '"CITY", "AGE"',
            'WHERE RECORD_METADATA:"offset"::number = 0',
        )
        assert rows, "Expected row with offset 0"
        assert rows[0]["CITY"] == "Hsinchu"
        assert rows[0]["AGE"] == 0
    else:
        assert "RECORD_CONTENT" in cols, (
            f"Expected RECORD_CONTENT column, got: {list(cols.keys())}"
        )

        rows = table.select(
            "RECORD_CONTENT",
            'WHERE RECORD_METADATA:"offset"::number = 0',
        )
        assert rows, "Expected row with offset 0"
        content = json.loads(rows[0]["RECORD_CONTENT"])
        assert content["city"] == "Hsinchu"
        assert content["age"] == 0

    count = table.select_scalar("count(*)")
    assert count == record_count, f"Expected {record_count} rows, got {count}"


def _assert_dlq(driver, config, table, record_count):
    """Shared assertions for DLQ tests."""
    offsets_in_dlq = driver.consume_messages_dlq(config, 0, record_count - 1)
    assert offsets_in_dlq == record_count, (
        f"Expected {record_count} records in DLQ, got {offsets_in_dlq}"
    )

    count = table.select_scalar("count(*)")
    assert count == 0, f"Expected 0 rows in table (DLQ), got {count}"


def test_schema_evolution_add_columns(
    driver, create_connector_from_file, create_table, create_topics, wait_for_rows
):
    """ENABLE_SCHEMA_EVOLUTION=TRUE, schematization=on, send records with extra fields.

    Runs for both v3 and v4. Flat columns CITY, AGE are added via schema evolution.
    """
    table = create_table(
        FILE_NAME.upper(),
        columns="(RECORD_METADATA VARIANT) ENABLE_SCHEMA_EVOLUTION = TRUE",
        cleanup_topic=False,
    )
    topic = create_topics([FILE_NAME], with_tables=False)[0]

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    record_count = 100
    values = [
        json.dumps({"city": "Hsinchu", "age": i}).encode("utf-8")
        for i in range(record_count)
    ]
    driver.sendBytesData(topic, values, [], partition=0)

    wait_for_rows(table.name, record_count)

    _assert_success_rows(table, schematization=True, record_count=record_count)


def test_schema_evolution_multi_wave(
    driver, create_connector_from_file, create_table, create_topics, wait_for_rows
):
    """Send two waves of records with different schemas.

    Wave 1: {city, age}           -> ADD COLUMN for CITY, AGE
    Wave 2: {city, age, country}  -> ADD COLUMN for COUNTRY
    Verifies that wave-1 rows have NULL for COUNTRY.
    """
    table = create_table(
        FILE_NAME.upper(),
        columns="(RECORD_METADATA VARIANT) ENABLE_SCHEMA_EVOLUTION = TRUE",
        cleanup_topic=False,
    )
    topic = create_topics([FILE_NAME], with_tables=False)[0]

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    wave1_count = 50
    wave1 = [
        json.dumps({"city": "Hsinchu", "age": i}).encode("utf-8")
        for i in range(wave1_count)
    ]
    driver.sendBytesData(topic, wave1, [], partition=0)

    wait_for_rows(table.name, wave1_count)

    wave2_count = 50
    wave2 = [
        json.dumps(
            {
                "city": "Taipei",
                "age": 100 + i,
                "country": "TW",
            }
        ).encode("utf-8")
        for i in range(wave2_count)
    ]
    driver.sendBytesData(topic, wave2, [], partition=0)

    total_expected = wave1_count + wave2_count
    wait_for_rows(table.name, total_expected)

    cols = {row[0]: row[1] for row in table.schema()}
    assert "CITY" in cols
    assert "AGE" in cols
    assert "COUNTRY" in cols, (
        f"Expected COUNTRY column after wave 2, got: {list(cols.keys())}"
    )

    rows = table.select(
        '"CITY", "AGE", "COUNTRY"',
        f'WHERE RECORD_METADATA:"offset"::number = {wave1_count}',
    )
    assert rows, f"Expected row at offset {wave1_count}"
    assert rows[0]["CITY"] == "Taipei"
    assert rows[0]["COUNTRY"] == "TW"

    null_country_count = table.select("count(*)", "WHERE COUNTRY IS NULL")[0][
        "COUNT(*)"
    ]
    assert null_country_count == wave1_count, (
        f"Expected {wave1_count} rows with NULL country, got {null_country_count}"
    )


def test_schema_evolution_disabled_mid_stream(
    driver, create_connector_from_file, create_table, create_topics, wait_for_rows
):
    """ENABLE_SCHEMA_EVOLUTION toggled off after initial evolution."""
    table = create_table(
        FILE_NAME.upper(),
        columns="(RECORD_METADATA VARIANT) ENABLE_SCHEMA_EVOLUTION = TRUE",
        cleanup_topic=False,
    )
    topic = create_topics([FILE_NAME], with_tables=False)[0]

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # Wave 1: evolve schema while ENABLE_SCHEMA_EVOLUTION=TRUE
    wave1_count = 50
    wave1 = [
        json.dumps({"city": "Hsinchu", "age": i}).encode("utf-8")
        for i in range(wave1_count)
    ]
    driver.sendBytesData(topic, wave1, [], partition=0)
    wait_for_rows(table.name, wave1_count)

    _assert_success_rows(table, schematization=True, record_count=wave1_count)

    # Disable schema evolution on the table
    driver.snowflake_conn.cursor().execute(
        "ALTER TABLE identifier(%s) SET ENABLE_SCHEMA_EVOLUTION = FALSE", (table.name,)
    )

    # Wave 2: new column COUNTRY — DDL is still attempted and succeeds
    # because the test role has OWNERSHIP privilege.
    wave2_count = 50
    wave2 = [
        json.dumps({"city": "Taipei", "age": 100 + i, "country": "TW"}).encode("utf-8")
        for i in range(wave2_count)
    ]
    driver.sendBytesData(topic, wave2, [], partition=0)

    total = wave1_count + wave2_count
    wait_for_rows(table.name, total)

    cols = {row[0]: row[1] for row in table.schema()}
    assert "COUNTRY" in cols, (
        f"Expected COUNTRY column (DDL succeeded via OWNERSHIP), got: {list(cols.keys())}"
    )

    count = table.select_scalar("count(*)")
    assert count == total, f"Expected {total} rows, got {count}"


def test_schema_evolution_happy_path(
    driver, create_connector_from_file, create_table, create_topics, wait_for_rows
):
    """Send records that match the existing table schema exactly.

    Validation passes without triggering schema evolution. Verifies that
    client-side validation does not interfere with normal ingestion.
    """
    table = create_table(
        FILE_NAME.upper(),
        columns="(RECORD_METADATA VARIANT, CITY VARCHAR, AGE NUMBER) "
        "ENABLE_SCHEMA_EVOLUTION = TRUE",
        cleanup_topic=False,
    )
    topic = create_topics([FILE_NAME], with_tables=False)[0]

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    record_count = 100
    values = [
        json.dumps({"city": "Hsinchu", "age": i}).encode("utf-8")
        for i in range(record_count)
    ]
    driver.sendBytesData(topic, values, [], partition=0)

    wait_for_rows(table.name, record_count)

    rows = table.select(
        '"CITY", "AGE"',
        'WHERE RECORD_METADATA:"offset"::number = 0',
    )
    assert rows, "Expected row with offset 0"
    assert rows[0]["CITY"] == "Hsinchu"
    assert rows[0]["AGE"] == 0


def test_schema_evolution_drop_not_null(
    driver, create_connector_from_file, create_table, create_topics, wait_for_rows
):
    """Table has a NOT NULL column, but records omit it.

    Schema evolution should drop the NOT NULL constraint and add the extra
    column, allowing records to be ingested with NULL for the original column.
    """
    table = create_table(
        FILE_NAME.upper(),
        columns="(RECORD_METADATA VARIANT, STATUS VARCHAR NOT NULL) "
        "ENABLE_SCHEMA_EVOLUTION = TRUE",
        cleanup_topic=False,
    )
    topic = create_topics([FILE_NAME], with_tables=False)[0]

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    record_count = 50
    values = [
        json.dumps({"city": "Hsinchu", "age": i}).encode("utf-8")
        for i in range(record_count)
    ]
    driver.sendBytesData(topic, values, [], partition=0)

    wait_for_rows(table.name, record_count)

    cols = {row[0]: row[1] for row in table.schema()}
    assert "CITY" in cols, f"Expected CITY column, got: {list(cols.keys())}"
    assert "AGE" in cols, f"Expected AGE column, got: {list(cols.keys())}"
    assert "STATUS" in cols

    null_status_count = table.select("count(*)", "WHERE STATUS IS NULL")[0]["COUNT(*)"]
    assert null_status_count == record_count, (
        f"Expected {record_count} rows with NULL STATUS, got {null_status_count}"
    )


@pytest.mark.parametrize("schema_evo", [True, False], ids=["evo=on", "evo=off"])
@pytest.mark.parametrize(
    "schematization", [True, False], ids=["schema=on", "schema=off"]
)
@pytest.mark.parametrize("validation", [True, False], ids=["valid=on", "valid=off"])
def test_schema_evolution_config_variants(
    driver,
    name_salt,
    connector_version,
    create_connector_from_file,
    create_table,
    create_topics,
    wait_for_rows,
    schema_evo,
    schematization,
    validation,
):
    """Full config matrix for ENABLE_SCHEMA_EVOLUTION x schematization x validation.

    Runs for both v3 and v4. Combinations that are inapplicable to a given
    connector version are skipped with a reason (serving as documentation of
    the known v3/v4 differences).

    v4 (KC v4):
      - Client-side validation works for both schematization=on and off.
      - schematization=on: validates individual columns (CITY, AGE, etc.)
      - schematization=off: validates RECORD_CONTENT/RECORD_METADATA VARIANT
        columns against the table schema.
      - validation can be toggled via snowflake.validation.

    v3 (KC v3):
      - V1 Ingest SDK always performs client-side validation; it cannot be
        disabled, so all validation=False combos are skipped.

    Behaviour matrix (for combos that run):
      schema_evo=True:  extra columns are added and records are ingested.
      schema_evo=False + validation=True: extra columns route to DLQ.
      schema_evo=False + validation=False (v4 only): server Error Table
        handles errors; test returns early (no client-side assertion).
    """

    if connector_version == "v3":
        if not validation:
            pytest.skip(
                "KC v3 uses V1 Ingest SDK which always performs client-side "
                "validation; validation cannot be disabled"
            )

        if schema_evo and not schematization:
            pytest.skip(
                "KC v3 does not support schema evolution when schematization is off"
            )

    evo_tag = "evo" if schema_evo else "noevo"
    sch_tag = "sch" if schematization else "nosch"
    val_tag = "val" if validation else "noval"
    variant_name = f"{FILE_NAME}_{evo_tag}_{sch_tag}_{val_tag}"
    topic = create_topics([variant_name], with_tables=False)[0]
    dlq_topic = f"DLQ_MATRIX_{variant_name}_{name_salt}"

    if not schema_evo and schematization:
        # Pre-create with schema evo disabled so extra columns are rejected.
        table = create_table(
            variant_name.upper(),
            columns="(RECORD_METADATA VARIANT) ENABLE_SCHEMA_EVOLUTION = FALSE",
            cleanup_topic=False,
        )
    else:
        table = Table(driver, topic.upper())

    overrides = {
        "topics": topic,
        "snowflake.enable.schematization": str(schematization).lower(),
        "snowflake.validation": "client_side" if validation else "server_side",
        "errors.deadletterqueue.topic.name": dlq_topic,
    }

    config = create_connector_from_file(CONFIG_FILE, config_overrides=overrides)
    driver.startConnectorWaitTime()

    record_count = 100

    if not schematization:
        # When schematization is off, data is ingested into RECORD_CONTENT as
        # VARIANT regardless of schema_evo or validation settings.
        values = [
            json.dumps({"city": "Hsinchu", "age": i}).encode("utf-8")
            for i in range(record_count)
        ]
        driver.sendBytesData(topic, values, [], partition=0)

        wait_for_rows(table.name, record_count)

        _assert_success_rows(table, schematization, record_count)
    elif schema_evo:
        values = [
            json.dumps({"city": "Hsinchu", "age": i}).encode("utf-8")
            for i in range(record_count)
        ]
        driver.sendBytesData(topic, values, [], partition=0)

        wait_for_rows(table.name, record_count)

        _assert_success_rows(table, schematization, record_count)
    else:
        if not validation:
            # No client-side validation -> server handles the error via Error Table.
            # DLQ routing only works when client validation is on.
            return

        record_count = 5
        values = [
            json.dumps({"city": "Hsinchu", "age": i}).encode("utf-8")
            for i in range(record_count)
        ]
        driver.sendBytesData(topic, values, [], partition=0)

        _assert_dlq(driver, config, table, record_count)


================================================
FILE: test/tests/test_schema_mapping.py
================================================
import datetime
import json

import pytest

pytestmark = pytest.mark.correctness

FILE_NAME = "travis_correct_schema_mapping"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 100

RECORD = {
    "PERFORMANCE_STRING": "Excellent",
    # KCv3 used embedded quotes ('"case_sensitive..."') because the custom
    # SnowflakeJsonConverter stripped them.  The standard JsonConverter
    # preserves the key as-is, so we omit the embedded quotes.
    "case_sensitive_PERFORMANCE_CHAR": "A",
    # Base64-encoded binary value for server_side validation mode, which passes strings
    # directly to the Ingest SDK. A server-side parameter controls whether the SDK
    # interprets binary strings as base64 or hex; base64 is the expected default.
    "PERFORMANCE_BINARY": "/////w==",
    "RATING_INT": 100,
    "RATING_DOUBLE": 0.99,
    "APPROVAL": True,
    "APPROVAL_DATE": "2022-06-15",
    "APPROVAL_TIME": "23:59:59.999999",
    "INFO_ARRAY": ["HELLO", "WORLD"],
    "INFO": {"TREE_1": "APPLE", "TREE_2": "PINEAPPLE"},
    "INFO_OBJECT": {"TREE_1": "APPLE", "TREE_2": "PINEAPPLE"},
}

GOLD_VALUES = {
    "PERFORMANCE_STRING": "Excellent",
    "case_sensitive_PERFORMANCE_CHAR": "A",
    "PERFORMANCE_BINARY": b"\xff\xff\xff\xff",
    "RATING_INT": 100,
    "RATING_DOUBLE": 0.99,
    "APPROVAL": True,
    "APPROVAL_DATE": datetime.date(2022, 6, 15),
    "APPROVAL_TIME": datetime.time(23, 59, 59, 999999),
    "INFO_ARRAY": r'["HELLO","WORLD"]',
    "INFO": r'{"TREE_1":"APPLE","TREE_2":"PINEAPPLE"}',
    "INFO_OBJECT": r'{"TREE_1":"APPLE","TREE_2":"PINEAPPLE"}',
}


@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_schema_mapping(
    driver, name_salt, create_connector_from_file, create_table, wait_for_rows
):
    """Verify that each data type maps to the correct Snowflake column type
    and that RECORD_METADATA is automatically added.

    Tests STRING, CHAR, BINARY, NUMBER, DOUBLE, BOOLEAN, DATE, TIME,
    ARRAY, VARIANT, and OBJECT columns.
    """
    table = create_table(
        FILE_NAME.upper(),
        columns="("
        "PERFORMANCE_STRING STRING, "
        '"case_sensitive_PERFORMANCE_CHAR" CHAR, '
        "PERFORMANCE_BINARY BINARY, "
        "RATING_INT NUMBER, "
        "RATING_DOUBLE DOUBLE, "
        "APPROVAL BOOLEAN, "
        "APPROVAL_DATE DATE, "
        "APPROVAL_TIME TIME, "
        "INFO_ARRAY ARRAY, "
        "INFO VARIANT, "
        "INFO_OBJECT OBJECT, "
        "RECORD_METADATA VARIANT"
        ")",
    )
    topic = f"{FILE_NAME}{name_salt}"

    # TODO: SNOW-3236195: RowValidator uppercases unquoted column names via
    # LiteralQuoteUtils.unquoteColumnName(), but DESCRIBE TABLE preserves case for
    # quoted columns (e.g. "case_sensitive_PERFORMANCE_CHAR"). This causes a false
    # structural error. Fix by normalizing both sides in RowValidator.
    create_connector_from_file(
        CONFIG_FILE,
        config_overrides={
            "snowflake.validation": "server_side",
            "snowflake.compatibility.enable.autogenerated.table.name.sanitization": "true",
        },
    )
    driver.startConnectorWaitTime()

    # -- Send --
    keys = [json.dumps({"number": str(i)}).encode("utf-8") for i in range(RECORD_COUNT)]
    values = [json.dumps(RECORD).encode("utf-8") for _ in range(RECORD_COUNT)]
    driver.sendBytesData(topic, values, keys)

    # -- Verify row count --
    wait_for_rows(table.name, RECORD_COUNT)

    # -- Verify content of first row --
    row = table.select("*")[0]

    for field, gold in GOLD_VALUES.items():
        actual = row[field]
        if isinstance(actual, str):
            # Remove formatting whitespace added by Snowflake
            assert "".join(actual.split()) == gold, (
                f"Column {field}: expected {gold!r}, got {actual!r}"
            )
        else:
            assert actual == gold, f"Column {field}: expected {gold!r}, got {actual!r}"


================================================
FILE: test/tests/test_schema_not_supported_converter.py
================================================
import json
import time

import pytest

pytestmark = pytest.mark.correctness

FILE_NAME = "travis_correct_schema_not_supported_converter"
CONFIG_FILE = f"{FILE_NAME}.json"

RECORD = {
    "PERFORMANCE_STRING": "Excellent",
    '"case_sensitive_PERFORMANCE_CHAR"': "A",
    "PERFORMANCE_HEX": "FFFFFFFF",
    "RATING_INT": 100,
    "RATING_DOUBLE": 0.99,
    "APPROVAL": "true",
    "APPROVAL_DATE": "2022-06-15",
    "APPROVAL_TIME": "23:59:59.999999",
    "INFO_ARRAY": ["HELLO", "WORLD"],
    "INFO": {"TREE_1": "APPLE", "TREE_2": "PINEAPPLE"},
    "INFO_OBJECT": {"TREE_1": "APPLE", "TREE_2": "PINEAPPLE"},
}


def test_schema_not_supported_converter(
    driver, name_salt, create_connector_from_file, create_table
):
    table = create_table(
        FILE_NAME.upper(),
        columns='(PERFORMANCE_STRING STRING, "case_sensitive_PERFORMANCE_CHAR" CHAR, '
        "PERFORMANCE_HEX BINARY, RATING_INT NUMBER, RATING_DOUBLE DOUBLE, "
        "APPROVAL BOOLEAN, APPROVAL_DATE DATE, APPROVAL_TIME TIME, "
        "INFO_ARRAY ARRAY, INFO VARIANT, INFO_OBJECT OBJECT)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send --
    keys = [json.dumps({"number": str(i)}).encode("utf-8") for i in range(100)]
    values = [json.dumps(RECORD).encode("utf-8") for _ in range(100)]
    driver.sendBytesData(topic, values, keys)

    # -- Verify: nothing should be ingested with unsupported converters --
    time.sleep(30)
    count = table.select_scalar("count(*)")
    assert count == 0, (
        f"Expected 0 rows but got {count}; unsupported converter should reject all records"
    )


================================================
FILE: test/tests/test_snowpipe_streaming_legacy_avro_sr.py
================================================
"""RECORD_CONTENT mode with Avro SR converter.

Verifies that Avro-encoded records land correctly in the legacy
RECORD_CONTENT / RECORD_METADATA VARIANT columns when
snowflake.enable.schematization=false.

v3 parity cannot be verified: even with v4 removed, v3's bundled SR
classes clash with the Confluent 7.8.0 platform's SR classes
(ServiceConfigurationError: CelExecutor not a subtype of RuleExecutor).
Assertions reflect expected Avro deserialization behavior (JSON object
with correct field values). v4-only.
"""

import json

import pytest
from confluent_kafka import avro

FILE_NAME = "snowpipe_streaming_legacy_avro_sr"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 100

VALUE_SCHEMA = avro.loads("""
{
    "type": "record",
    "name": "value_schema",
    "fields": [
        {"name": "id", "type": "int"},
        {"name": "firstName", "type": "string"},
        {"name": "time", "type": "int"}
    ]
}
""")


@pytest.mark.confluent_only
@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_snowpipe_streaming_legacy_avro_sr(
    connector_version,
    driver,
    name_salt,
    create_connector_from_file,
    create_table,
    wait_for_rows,
):
    """Verify that Avro SR records land in RECORD_CONTENT as JSON objects."""
    table = create_table(
        FILE_NAME.upper(),
        columns="(RECORD_METADATA variant, RECORD_CONTENT variant)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send Avro SR records --
    values = [{"id": i, "firstName": "abc0", "time": 1835} for i in range(RECORD_COUNT)]
    driver.sendAvroSRData(topic, values, VALUE_SCHEMA)

    # -- Verify row count --
    wait_for_rows(table.name, RECORD_COUNT)

    # -- Verify RECORD_CONTENT for offset 0 --
    row = table.select(
        "RECORD_CONTENT, RECORD_METADATA",
        'WHERE RECORD_METADATA:"offset"::number = 0',
    )[0]

    content = json.loads(row["RECORD_CONTENT"])
    if isinstance(content, str):
        content = json.loads(content)
    assert content["id"] == 0, (
        f"Expected id=0 in RECORD_CONTENT, got: {row['RECORD_CONTENT']}"
    )
    assert content["firstName"] == "abc0", (
        f"Expected firstName=abc0 in RECORD_CONTENT, got: {row['RECORD_CONTENT']}"
    )
    assert content["time"] == 1835, (
        f"Expected time=1835 in RECORD_CONTENT, got: {row['RECORD_CONTENT']}"
    )

    metadata = json.loads(row["RECORD_METADATA"])
    if isinstance(metadata, str):
        metadata = json.loads(metadata)
    for key in ("offset", "partition", "topic"):
        assert key in metadata, (
            f"RECORD_METADATA missing '{key}': {row['RECORD_METADATA']}"
        )


================================================
FILE: test/tests/test_snowpipe_streaming_legacy_byte_array_converter.py
================================================
import base64
import json

import pytest

FILE_NAME = "snowpipe_streaming_legacy_byte_array_converter"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 100


# Assertions capture v3 reference behavior (verified dual on Confluent 7.8.0,
# 2026-03-31). Validation mode is irrelevant for RECORD_CONTENT — the entire
# payload goes into a VARIANT column with no type checking.
@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_snowpipe_streaming_legacy_byte_array_converter(
    connector_version,
    driver,
    name_salt,
    create_connector_from_file,
    create_table,
    wait_for_rows,
):
    """Verify that ByteArrayConverter is accepted when enable.schematization=false
    and that raw byte payloads land (base64-encoded) in the legacy RECORD_CONTENT column.
    """
    table = create_table(
        FILE_NAME.upper(),
        columns="(RECORD_METADATA variant, RECORD_CONTENT variant)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    driver.createTopics(topic, partitionNum=1, replicationNum=1)

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send raw byte records --
    values = [f"binary payload {i}".encode("utf-8") for i in range(RECORD_COUNT)]
    driver.sendBytesData(topic, values, [], partition=0)

    # -- Verify row count --
    wait_for_rows(table.name, RECORD_COUNT)

    # -- Verify RECORD_CONTENT contains base64-encoded data --
    row = table.select(
        "RECORD_CONTENT, RECORD_METADATA",
        'WHERE RECORD_METADATA:"offset"::number = 0',
    )[0]

    content = str(row["RECORD_CONTENT"])
    expected_b64 = base64.b64encode(b"binary payload 0").decode("utf-8")
    assert expected_b64 in content, (
        f"Expected base64 '{expected_b64}' in RECORD_CONTENT, got: {row['RECORD_CONTENT']}"
    )

    metadata = json.loads(row["RECORD_METADATA"])
    if isinstance(metadata, str):
        metadata = json.loads(metadata)
    for key in ("offset", "partition", "topic"):
        assert key in metadata, (
            f"RECORD_METADATA missing '{key}': {row['RECORD_METADATA']}"
        )


================================================
FILE: test/tests/test_snowpipe_streaming_legacy_string_converter.py
================================================
import json

import pytest

FILE_NAME = "snowpipe_streaming_legacy_string_converter"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 100


# Assertions capture v3 reference behavior (verified dual on Confluent 7.8.0,
# 2026-03-31). Validation mode is irrelevant for RECORD_CONTENT — the entire
# payload goes into a VARIANT column with no type checking.
@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_snowpipe_streaming_legacy_string_converter(
    connector_version,
    driver,
    name_salt,
    create_connector_from_file,
    create_table,
    wait_for_rows,
):
    """Verify that StringConverter is accepted when enable.schematization=false
    and that raw string payloads land in the legacy RECORD_CONTENT column.
    """
    table = create_table(
        FILE_NAME.upper(),
        columns="(RECORD_METADATA variant, RECORD_CONTENT variant)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    driver.createTopics(topic, partitionNum=1, replicationNum=1)

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send raw string records --
    values = [f"hello world {i}".encode("utf-8") for i in range(RECORD_COUNT)]
    driver.sendBytesData(topic, values, [], partition=0)

    # -- Verify row count --
    wait_for_rows(table.name, RECORD_COUNT)

    # -- Verify RECORD_CONTENT contains the string payload --
    row = table.select(
        "RECORD_CONTENT, RECORD_METADATA",
        'WHERE RECORD_METADATA:"offset"::number = 0',
    )[0]

    content = str(row["RECORD_CONTENT"])
    assert "hello world 0" in content, (
        f"Expected 'hello world 0' in RECORD_CONTENT, got: {row['RECORD_CONTENT']}"
    )

    metadata = json.loads(row["RECORD_METADATA"])
    if isinstance(metadata, str):
        metadata = json.loads(metadata)
    for key in ("offset", "partition", "topic"):
        assert key in metadata, (
            f"RECORD_METADATA missing '{key}': {row['RECORD_METADATA']}"
        )


================================================
FILE: test/tests/test_snowpipe_streaming_legacy_string_json.py
================================================
import json

import pytest

FILE_NAME = "snowpipe_streaming_legacy_string_json"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 100


# Assertions capture v3 reference behavior (verified dual on Confluent 7.8.0,
# 2026-03-31). Validation mode is irrelevant for RECORD_CONTENT — the entire
# payload goes into a VARIANT column with no type checking.
@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_snowpipe_streaming_legacy_string_json(
    connector_version,
    driver,
    name_salt,
    create_connector_from_file,
    create_table,
    wait_for_rows,
):
    """Verify that enable.schematization=false wraps JSON records into the
    legacy RECORD_CONTENT / RECORD_METADATA VARIANT columns — the same
    table layout that KC v3 used by default.
    """
    table = create_table(
        FILE_NAME.upper(),
        columns="(RECORD_METADATA variant, RECORD_CONTENT variant)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    driver.createTopics(topic, partitionNum=1, replicationNum=1)

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send JSON records --
    values = [
        json.dumps({"city": "Portland", "age": i}).encode("utf-8")
        for i in range(RECORD_COUNT)
    ]
    driver.sendBytesData(topic, values, [], partition=0)

    # -- Verify row count --
    wait_for_rows(table.name, RECORD_COUNT)

    # -- Verify RECORD_CONTENT contains original JSON fields --
    row = table.select(
        "RECORD_CONTENT, RECORD_METADATA",
        'WHERE RECORD_METADATA:"offset"::number = 0',
    )[0]

    content = json.loads(row["RECORD_CONTENT"])
    # VARIANT may store the payload as a JSON-encoded string (double-encoded)
    if isinstance(content, str):
        content = json.loads(content)
    assert content["city"] == "Portland", (
        f"Expected city=Portland in RECORD_CONTENT, got: {row['RECORD_CONTENT']}"
    )
    assert content["age"] == 0, (
        f"Expected age=0 in RECORD_CONTENT, got: {row['RECORD_CONTENT']}"
    )

    metadata = json.loads(row["RECORD_METADATA"])
    if isinstance(metadata, str):
        metadata = json.loads(metadata)
    for key in ("offset", "partition", "topic"):
        assert key in metadata, (
            f"RECORD_METADATA missing '{key}': {row['RECORD_METADATA']}"
        )


================================================
FILE: test/tests/test_snowpipe_streaming_schema_mapping_dlq.py
================================================
import json

import pytest

FILE_NAME = "snowpipe_streaming_schema_mapping_dlq"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORDS_PER_TYPE = 10

# Correct records are ingested; incorrect records go to DLQ
EXPECTED_IN_TABLE = RECORDS_PER_TYPE  # only correct records
EXPECTED_IN_DLQ = 2 * RECORDS_PER_TYPE  # two types of incorrect records


@pytest.mark.skip(reason="Requires client-side validation")
def test_snowpipe_streaming_schema_mapping_dlq(
    driver, name_salt, create_connector_from_file, create_table, wait_for_rows
):
    """Verify that schema mapping errors route failing records to the DLQ
    while correct records are ingested normally.

    Three types of records are sent:
      1. Incorrect: string value in a NUMBER column (not parseable)
      2. Incorrect: array where an object is expected
      3. Correct: proper types

    Only type (3) should land in the table. Types (1) and (2) go to DLQ.
    """
    table = create_table(
        FILE_NAME.upper(),
        columns="(PERFORMANCE_STRING STRING, RATING_INT NUMBER, RECORD_METADATA VARIANT)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    config = create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send incorrect data (string in NUMBER column) --
    incorrect_record = {"PERFORMANCE_STRING": "Excellent", "RATING_INT": "NO-a-NO"}
    _send_records(driver, topic, incorrect_record, RECORDS_PER_TYPE)

    # -- Send incorrect data (array instead of object) --
    another_incorrect = [{"PERFORMANCE_STRING": "Excellent", "RATING_INT": 100}]
    _send_records(driver, topic, another_incorrect, RECORDS_PER_TYPE)

    # -- Send correct data --
    correct_record = {"PERFORMANCE_STRING": "Excellent", "RATING_INT": 100}
    _send_records(driver, topic, correct_record, RECORDS_PER_TYPE)

    # -- Verify correct records landed in table --
    wait_for_rows(table.name, EXPECTED_IN_TABLE)

    # -- Verify DLQ received failing records --
    offsets_in_dlq = driver.consume_messages_dlq(config, 0, EXPECTED_IN_DLQ - 1)
    assert offsets_in_dlq == EXPECTED_IN_DLQ, (
        f"Expected {EXPECTED_IN_DLQ} records in DLQ, got {offsets_in_dlq}"
    )

    # -- Verify content of ingested rows --
    row = table.select("*")[0]

    for field, gold in {"PERFORMANCE_STRING": "Excellent", "RATING_INT": 100}.items():
        actual = row[field]
        if isinstance(actual, str):
            assert "".join(actual.split()) == gold, (
                f"Column {field}: expected {gold!r}, got {actual!r}"
            )
        else:
            assert actual == gold, f"Column {field}: expected {gold!r}, got {actual!r}"


def _send_records(driver, topic, record, count):
    keys = [json.dumps({"number": str(i)}).encode("utf-8") for i in range(count)]
    values = [json.dumps(record).encode("utf-8") for _ in range(count)]
    driver.sendBytesData(topic, values, keys)


================================================
FILE: test/tests/test_snowpipe_streaming_string_avro_sr.py
================================================
from time import sleep

import pytest
from confluent_kafka import avro

FILE_NAME = "travis_correct_snowpipe_streaming_string_avro_sr"
CONFIG_FILE = f"{FILE_NAME}.json"
PARTITION_COUNT = 3
RECORDS_PER_PARTITION = 1000

VALUE_SCHEMA = avro.loads("""
{
    "type": "record",
    "name": "value_schema",
    "fields": [
        {"name": "id", "type": "int"},
        {"name": "firstName", "type": "string"},
        {"name": "time", "type": "int"},
        {"name": "someFloat", "type": "float"},
        {"name": "someFloatNaN", "type": "float"}
    ]
}
""")


@pytest.mark.confluent_only
@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_snowpipe_streaming_string_avro_sr(
    driver,
    name_salt,
    connector_version,
    create_connector_from_file,
    create_table,
    wait_for_rows,
):
    # Assertions below capture v3 reference behavior (test ported from v3).
    # v4 parity confirmed 2026-03-31. v3 cannot run due to SR classloader conflict.
    table = create_table(
        FILE_NAME.upper(),
        columns="(record_metadata variant, id number, firstName string, "
        "time number, someFloat number, someFloatNaN string)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    driver.createTopics(topic, partitionNum=PARTITION_COUNT, replicationNum=1)

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send --
    for p in range(PARTITION_COUNT):
        values = [
            {
                "id": i,
                "firstName": "abc0",
                "time": 1835,
                "someFloat": 21.37,
                "someFloatNaN": "NaN",
            }
            for i in range(RECORDS_PER_PARTITION)
        ]
        driver.sendAvroSRData(
            topic, values, VALUE_SCHEMA, key=[], key_schema="", partition=p
        )
        sleep(2)

    total_expected = RECORDS_PER_PARTITION * PARTITION_COUNT

    # -- Verify row count --
    wait_for_rows(table.name, total_expected)

    # -- Verify no duplicates --
    result = table.select(
        'record_metadata:"offset"::string AS offset_no, '
        'record_metadata:"partition"::string AS partition_no',
        "GROUP BY offset_no, partition_no HAVING count(*) > 1",
    )
    assert not result, f"Duplicate detected: {result[0]}"

    # -- Verify unique offsets per partition --
    rows = table.select(
        'count(DISTINCT record_metadata:"offset"::number) AS unique_offsets, '
        'record_metadata:"partition"::number AS partition_no',
        "GROUP BY partition_no ORDER BY partition_no",
    )
    assert len(rows) == PARTITION_COUNT
    for p in range(PARTITION_COUNT):
        assert rows[p]["UNIQUE_OFFSETS"] == RECORDS_PER_PARTITION
        assert rows[p]["PARTITION_NO"] == p


================================================
FILE: test/tests/test_snowpipe_streaming_string_json.py
================================================
import json
from time import sleep

FILE_NAME = "travis_correct_snowpipe_streaming_string_json"
CONFIG_FILE = f"{FILE_NAME}.json"
PARTITION_COUNT = 3
RECORDS_PER_PARTITION = 1000


def test_snowpipe_streaming_string_json(
    driver, name_salt, create_connector_from_file, create_table, wait_for_rows
):
    table = create_table(
        FILE_NAME.upper(),
        columns="(record_metadata variant, fieldName varchar)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    driver.createTopics(topic, partitionNum=PARTITION_COUNT, replicationNum=1)

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send --
    for p in range(PARTITION_COUNT):
        values = []
        for i in range(RECORDS_PER_PARTITION - 2):
            values.append(json.dumps({"fieldName": str(i)}).encode("utf-8"))

        values.append(None)
        values.append(b"")

        driver.sendBytesData(topic, values, [], partition=p)
        sleep(2)

    total_expected = RECORDS_PER_PARTITION * PARTITION_COUNT

    # -- Verify row count --
    wait_for_rows(table.name, total_expected)

    # -- Verify no duplicates --
    result = table.select(
        'record_metadata:"offset"::string AS offset_no, '
        'record_metadata:"partition"::string AS partition_no',
        "GROUP BY offset_no, partition_no HAVING count(*) > 1",
    )
    assert not result, f"Duplicate detected: {result[0]}"

    # -- Verify unique offsets per partition --
    rows = table.select(
        'count(DISTINCT record_metadata:"offset"::number) AS unique_offsets, '
        'record_metadata:"partition"::number AS partition_no',
        "GROUP BY partition_no ORDER BY partition_no",
    )
    assert len(rows) == PARTITION_COUNT
    for p in range(PARTITION_COUNT):
        assert rows[p]["UNIQUE_OFFSETS"] == RECORDS_PER_PARTITION, (
            f"Partition {p}: expected {RECORDS_PER_PARTITION} unique offsets, "
            f"got {rows[p]['UNIQUE_OFFSETS']}"
        )
        assert rows[p]["PARTITION_NO"] == p

    # -- Verify SnowflakeConnectorPushTime is populated --
    push_time_count = table.select(
        "count(*)",
        "WHERE NOT is_null_value(record_metadata:SnowflakeConnectorPushTime)",
    )[0]["COUNT(*)"]
    assert push_time_count == total_expected, (
        f"Empty ConnectorPushTime detected ({push_time_count}/{total_expected})"
    )


================================================
FILE: test/tests/test_snowpipe_streaming_string_json_dlq.py
================================================
import time

import pytest

pytestmark = pytest.mark.correctness

FILE_NAME = "snowpipe_streaming_string_json_dlq"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 5
EXPECTED_IN_TABLE = 0
EXPECTED_IN_DLQ = 5


def test_snowpipe_streaming_string_json_dlq(
    driver, name_salt, create_connector_from_file, create_table
):
    table = create_table(
        FILE_NAME.upper(),
        columns="(record_metadata variant, record_content variant)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    driver.createTopics(topic, partitionNum=1, replicationNum=1)

    config = create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send invalid data that cannot be deserialized --
    invalid = b'{invalid_string"}'
    values = [invalid for _ in range(RECORD_COUNT)]
    driver.sendBytesData(topic, values, [], partition=0)

    # -- Verify: no rows should land in the table --
    time.sleep(30)
    count = table.select_scalar("count(*)")
    assert count == EXPECTED_IN_TABLE, (
        f"Expected {EXPECTED_IN_TABLE} rows but got {count}"
    )

    # -- Verify: records should appear in the DLQ topic --
    offsets_in_dlq = driver.consume_messages_dlq(config, 0, EXPECTED_IN_DLQ - 1)
    assert offsets_in_dlq == EXPECTED_IN_DLQ, (
        f"Expected {EXPECTED_IN_DLQ} offsets in DLQ, got {offsets_in_dlq}"
    )


================================================
FILE: test/tests/test_snowpipe_streaming_string_json_ignore_tombstone.py
================================================
import json
from time import sleep

FILE_NAME = "test_snowpipe_streaming_string_json_ignore_tombstone"
CONFIG_FILE = f"{FILE_NAME}.json"
PARTITION_COUNT = 3
RECORDS_PER_PARTITION = 1000
# Both None and "" are treated as tombstones in streaming mode (community converters).
EXPECTED_PER_PARTITION = RECORDS_PER_PARTITION - 2

# TODO: KC v3 uses case-sensitive field names matching. But the column names are upper case by default.
LONG_FIELD = "NUMBERNUMBERNUMBERNUMBERNUMBERNUMBERNUMBERNUMBERNUMBERNUMBERNUMBERNUMBER"


def test_snowpipe_streaming_string_json_ignore_tombstone(
    driver,
    name_salt,
    create_connector_from_file,
    create_table,
    wait_for_rows,
):
    """Verify Snowpipe Streaming with behavior.on.null.values=IGNORE across
    multiple partitions.

    Sends RECORDS_PER_PARTITION records per partition (including a None and ""
    tombstone in each).  Both are dropped by the connector, leaving
    (RECORDS_PER_PARTITION - 2) × PARTITION_COUNT rows.
    Verifies: no duplicates, unique offsets per partition.
    """
    table = create_table(
        FILE_NAME.upper(),
        columns=f'(record_metadata variant, "{LONG_FIELD}" varchar)',
    )
    topic = f"{FILE_NAME}{name_salt}"

    driver.createTopics(topic, partitionNum=PARTITION_COUNT, replicationNum=1)

    config = create_connector_from_file(CONFIG_FILE)
    connector_name = config["name"]
    driver.startConnectorWaitTime()

    # -- Send --
    for p in range(PARTITION_COUNT):
        values = []
        for i in range(RECORDS_PER_PARTITION - 2):
            values.append(json.dumps({LONG_FIELD: str(i)}).encode("utf-8"))

        values.append(None)
        values.append(b"")  # community converters treat this as a tombstone

        driver.sendBytesData(topic, values, [], partition=p)
        sleep(2)

    total_expected = EXPECTED_PER_PARTITION * PARTITION_COUNT

    # -- Verify row count --
    wait_for_rows(table.name, total_expected, connector_name=connector_name)

    # -- Verify no duplicates --
    result = table.select(
        'record_metadata:"offset"::string AS offset_no, '
        'record_metadata:"partition"::string AS partition_no',
        "GROUP BY offset_no, partition_no HAVING count(*) > 1",
    )
    assert not result, f"Duplicate detected: {result[0]}"

    # -- Verify unique offsets per partition --
    rows = table.select(
        'count(DISTINCT record_metadata:"offset"::number) AS unique_offsets, '
        'record_metadata:"partition"::number AS partition_no',
        "GROUP BY partition_no ORDER BY partition_no",
    )
    assert len(rows) == PARTITION_COUNT
    for p in range(PARTITION_COUNT):
        assert rows[p]["UNIQUE_OFFSETS"] == EXPECTED_PER_PARTITION, (
            f"Partition {p}: expected {EXPECTED_PER_PARTITION} unique offsets, "
            f"got {rows[p]['UNIQUE_OFFSETS']}"
        )
        assert rows[p]["PARTITION_NO"] == p


================================================
FILE: test/tests/test_string_avrosr.py
================================================
import json

import pytest
from confluent_kafka import avro
from lib.matchers import ANY_INT

FILE_NAME = "travis_correct_string_avrosr"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 100

VALUE_SCHEMA = avro.loads("""
{
    "type": "record",
    "name": "value_schema",
    "fields": [
        {"name": "id", "type": "int"},
        {"name": "firstName", "type": "string"},
        {"name": "time", "type": "int"}
    ]
}
""")


@pytest.mark.confluent_only
@pytest.mark.parametrize("connector_version", ["v4"], indirect=True)
def test_string_avrosr(
    driver,
    name_salt,
    connector_version,
    create_connector_from_file,
    create_table,
    wait_for_rows,
):
    # Assertions below capture v3 reference behavior (test ported from v3).
    # v4 parity confirmed 2026-03-31. v3 cannot run due to SR classloader conflict.
    table = create_table(
        FILE_NAME.upper(),
        columns="(record_metadata variant, id number, firstName varchar, time number)",
    )
    topic = f"{FILE_NAME}{name_salt}"

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send --
    values = [{"id": i, "firstName": "abc0", "time": 1835} for i in range(RECORD_COUNT)]
    driver.sendAvroSRData(topic, values, VALUE_SCHEMA)

    # -- Verify row count --
    wait_for_rows(table.name, RECORD_COUNT)

    # -- Verify first row content --
    row = table.select("*")[0]

    assert row["ID"] == 0
    assert row["FIRSTNAME"] == "abc0"
    assert row["TIME"] == 1835

    record_metadata = json.loads(row["RECORD_METADATA"])
    assert record_metadata == {
        "CreateTime": ANY_INT,
        "SnowflakeConnectorPushTime": ANY_INT,
        "offset": 0,
        "partition": 0,
        "topic": topic,
    }


================================================
FILE: test/tests/test_string_json.py
================================================
import json

from lib.matchers import ANY_INT

FILE_NAME = "travis_correct_string_json"
CONFIG_FILE = f"{FILE_NAME}.json"
RECORD_COUNT = 100


def _build_records() -> list[bytes | None]:
    """Build the list of values to produce.

    98 normal JSON records, then a tombstone (None), then an empty-string record
    that Snowflake custom converters treat as a normal record.
    """
    records: list[bytes | None] = [
        json.dumps({"number": str(i)}).encode("utf-8") for i in range(RECORD_COUNT - 2)
    ]
    records.append(None)
    records.append(b"")
    return records


def test_string_json(
    driver,
    name_salt,
    connector_version,
    create_connector_from_file,
    create_table,
    wait_for_rows,
):
    table = create_table(
        FILE_NAME.upper(),
        columns='(record_metadata variant, "NUMBER" varchar)',
    )
    topic = f"{FILE_NAME}{name_salt}"

    create_connector_from_file(CONFIG_FILE)
    driver.startConnectorWaitTime()

    # -- Send --
    headers = [("header1", "value1"), ("header2", "{}")]
    records = _build_records()
    driver.sendBytesData(topic, records, [], 0, headers)

    # -- Verify row count --
    wait_for_rows(table.name, RECORD_COUNT)

    # -- Verify first row content --
    # Snowflake does not guarantee row ordering without ORDER BY, so we must
    # select the specific record at offset 0 rather than relying on insertion order.
    rows = table.select("record_metadata", "WHERE record_metadata:offset::int = 0")
    record_metadata = json.loads(rows[0]["RECORD_METADATA"])

    match connector_version:
        case "v3":
            expected_header2 = []
        case "v4":
            expected_header2 = "[]"

    assert record_metadata == {
        "CreateTime": ANY_INT,
        "SnowflakeConnectorPushTime": ANY_INT,
        "headers": {
            "header1": "value1",
            "header2": expected_header2,
        },
        "offset": 0,
        "partition": 0,
        "topic": topic,
    }


================================================
FILE: upload_jar.sh
================================================
#!/usr/bin/env bash


if ! VERSION=$(xmllint --xpath '/*[local-name()="project"]/*[local-name()="version"]/text()' pom.xml)
then
  echo "failed to read version from pom.xml"
  exit 1
fi
echo "version to upload: $VERSION"

if ! API_KEY_SECRET_ID=$(op item list --tags "connectors-nexus-api-key" --format json | jq -r '.[].id')
then
  echo "failed to find required api key in 1password"
  exit 1
fi

if ! USER_PASS=$(op item get $API_KEY_SECRET_ID --format json | jq -r '.fields[] | select(.type=="CONCEALED") | .value')
then
  echo 'failed to read user:password from 1password'
  exit 1
fi

FILE="https://nexus.int.snowflakecomputing.com/repository/connectors/snowflake-kafka-connector-$USER-$VERSION.jar"

echo trying to delete $FILE....
curl -X DELETE \
  -u $USER_PASS \
  $FILE

echo uploading new file to $FILE...
curl --fail \
  --upload-file ./target/snowflake-kafka-connector-$VERSION.jar \
  -u $USER_PASS \
  -w "\nHTTP Status: %{http_code}\n" \
  $FILE