Showing preview only (2,251K chars total). Download the full file or copy to clipboard to get everything.
Repository: jcustenborder/kafka-connect-spooldir
Branch: master
Commit: 7506b3da0701
Files: 140
Total size: 2.1 MB
Directory structure:
gitextract_f2vujxj0/
├── .gitignore
├── Jenkinsfile
├── LICENSE
├── README.md
├── bin/
│ └── debug.sh
├── config/
│ ├── AvroExample.properties
│ ├── CSVExample.json
│ ├── CSVExample.properties
│ ├── CSVSchemaGenerator.properties
│ ├── JsonExample.properties
│ └── connect-avro-docker.properties
├── docker-compose.yml
├── pom.xml
└── src/
├── main/
│ └── java/
│ └── com/
│ └── github/
│ └── jcustenborder/
│ └── kafka/
│ └── connect/
│ └── spooldir/
│ ├── AbstractCleanUpPolicy.java
│ ├── AbstractSchemaGenerator.java
│ ├── AbstractSourceConnector.java
│ ├── AbstractSourceConnectorConfig.java
│ ├── AbstractSourceTask.java
│ ├── AbstractSpoolDirSourceConnector.java
│ ├── AbstractSpoolDirSourceConnectorConfig.java
│ ├── AbstractSpoolDirSourceTask.java
│ ├── AbstractTaskPartitionerPredicate.java
│ ├── CsvSchemaGenerator.java
│ ├── FileComparator.java
│ ├── InputFile.java
│ ├── InputFileDequeue.java
│ ├── JsonSchemaGenerator.java
│ ├── Metadata.java
│ ├── SpoolDirAvroSourceConnector.java
│ ├── SpoolDirAvroSourceConnectorConfig.java
│ ├── SpoolDirAvroSourceTask.java
│ ├── SpoolDirBinaryFileSourceConnector.java
│ ├── SpoolDirBinaryFileSourceConnectorConfig.java
│ ├── SpoolDirBinaryFileSourceTask.java
│ ├── SpoolDirCsvSourceConnector.java
│ ├── SpoolDirCsvSourceConnectorConfig.java
│ ├── SpoolDirCsvSourceTask.java
│ ├── SpoolDirJsonSourceConnector.java
│ ├── SpoolDirJsonSourceConnectorConfig.java
│ ├── SpoolDirJsonSourceTask.java
│ ├── SpoolDirLineDelimitedSourceConnector.java
│ ├── SpoolDirLineDelimitedSourceConnectorConfig.java
│ ├── SpoolDirLineDelimitedSourceTask.java
│ ├── SpoolDirSchemaLessJsonSourceConnector.java
│ ├── SpoolDirSchemaLessJsonSourceConnectorConfig.java
│ ├── SpoolDirSchemaLessJsonSourceTask.java
│ ├── elf/
│ │ ├── SchemaConversion.java
│ │ ├── SchemaConversionBuilder.java
│ │ ├── SpoolDirELFSourceConnector.java
│ │ ├── SpoolDirELFSourceConnectorConfig.java
│ │ ├── SpoolDirELFSourceTask.java
│ │ └── converters/
│ │ ├── LocalDateLogFieldConverter.java
│ │ ├── LocalTimeLogFieldConverter.java
│ │ ├── LogFieldConverter.java
│ │ ├── LogFieldConverterFactory.java
│ │ ├── PrimitiveLogFieldConverter.java
│ │ └── TimestampLogFieldConverter.java
│ └── package-info.java
└── test/
├── java/
│ └── com/
│ └── github/
│ └── jcustenborder/
│ └── kafka/
│ └── connect/
│ └── spooldir/
│ ├── AbstractCleanUpPolicyTest.java
│ ├── AbstractSchemaGeneratorTest.java
│ ├── AbstractSpoolDirSourceConnectorTest.java
│ ├── AbstractSpoolDirSourceTaskTest.java
│ ├── ByNameAbstractTaskPartitionerPredicateTest.java
│ ├── CsvSchemaGeneratorTest.java
│ ├── DeleteCleanupPolicySubDirsNoRetainTest.java
│ ├── DeleteCleanupPolicySubDirsRetainTest.java
│ ├── DeleteCleanupPolicyTest.java
│ ├── DocumentationTest.java
│ ├── FileComparatorTest.java
│ ├── JsonSchemaGeneratorTest.java
│ ├── MinimumFileAgePredicateTest.java
│ ├── MoveByDateCleanupPolicySubDirsNoRetainTest.java
│ ├── MoveByDateCleanupPolicySubDirsRetainTest.java
│ ├── MoveByDateCleanupPolicyTest.java
│ ├── MoveCleanupPolicySubDirsNoRetainTest.java
│ ├── MoveCleanupPolicySubDirsRetainTest.java
│ ├── MoveCleanupPolicyTest.java
│ ├── NamedTest.java
│ ├── NoneCleanupPolicyTest.java
│ ├── ProcessingFileExistsPredicateTest.java
│ ├── SpoolDirAvroSourceTaskTest.java
│ ├── SpoolDirBinaryFileSourceTaskTest.java
│ ├── SpoolDirCsvSourceConnectorConfigTest.java
│ ├── SpoolDirCsvSourceConnectorTest.java
│ ├── SpoolDirCsvSourceTaskSubDirsNoRetainTest.java
│ ├── SpoolDirCsvSourceTaskSubDirsRetainTest.java
│ ├── SpoolDirCsvSourceTaskTest.java
│ ├── SpoolDirJsonSourceConnectorTest.java
│ ├── SpoolDirJsonSourceTaskTest.java
│ ├── SpoolDirLineDelimitedSourceTaskTest.java
│ ├── SpoolDirSchemaLessJsonSourceTaskTest.java
│ ├── TestCase.java
│ ├── TestDataUtils.java
│ └── elf/
│ ├── SchemaConversionBuilderTest.java
│ └── SpoolDirELFSourceTaskTest.java
└── resources/
├── com/
│ └── github/
│ └── jcustenborder/
│ └── kafka/
│ └── connect/
│ └── spooldir/
│ ├── SpoolBinaryFileSourceConnector/
│ │ ├── binary.json
│ │ └── fromXML.json
│ ├── SpoolDirBinaryFileSourceConnector/
│ │ ├── binary.json
│ │ └── fromXML.json
│ ├── SpoolDirCsvSourceConnector/
│ │ ├── schema.json
│ │ ├── schemaheaders.json
│ │ └── tsv.json
│ ├── SpoolDirJsonSourceConnector/
│ │ └── test.json
│ ├── SpoolDirLineDelimitedSourceConnector/
│ │ └── fix.json
│ ├── avro/
│ │ ├── FieldsMatch.data
│ │ └── FieldsMatch.json
│ ├── binary/
│ │ ├── DataHasMoreFields.data
│ │ └── DataHasMoreFields.json
│ ├── csv/
│ │ ├── BlankLines.data
│ │ ├── BlankLines.json
│ │ ├── DataHasMoreFields.data
│ │ ├── DataHasMoreFields.json
│ │ ├── FieldsMatch.data
│ │ ├── FieldsMatch.json
│ │ ├── FileModeFieldFieldsMatch.data
│ │ ├── FileModeFieldFieldsMatch.json
│ │ ├── SchemaHasMoreFields.data
│ │ ├── SchemaHasMoreFields.json
│ │ ├── SourceOffset.data
│ │ ├── SourceOffset.json
│ │ ├── WithHeaderSkipLines.data
│ │ ├── WithHeaderSkipLines.json
│ │ ├── WithoutHeader.data
│ │ └── WithoutHeader.json
│ ├── elf/
│ │ ├── SpoolDirELFSourceConnector/
│ │ │ └── example.json
│ │ └── elf/
│ │ ├── FieldsMatch.data
│ │ └── FieldsMatch.json
│ ├── json/
│ │ ├── DataHasMoreFields.data
│ │ ├── DataHasMoreFields.json
│ │ ├── FieldsMatch.data
│ │ ├── FieldsMatch.json
│ │ ├── FileModeFieldFieldsMatch.data
│ │ ├── FileModeFieldFieldsMatch.json
│ │ ├── SchemaHasMoreFields.data
│ │ ├── SchemaHasMoreFields.json
│ │ ├── SourceOffset.data
│ │ └── SourceOffset.json
│ └── schemalessjson/
│ ├── DataHasMoreFields.data
│ └── DataHasMoreFields.json
└── logback.xml
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
target
*.iml
.okhttpcache
ELFTesting.properties
.checkstyle
.factorypath
.idea/
================================================
FILE: Jenkinsfile
================================================
#!groovy
@Library('jenkins-pipeline') import com.github.jcustenborder.jenkins.pipeline.KafkaConnectPipeline
def pipe = new KafkaConnectPipeline()
pipe.execute()
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
# Introduction
[Documentation](https://jcustenborder.github.io/kafka-connect-documentation/projects/kafka-connect-spooldir) | [Confluent Hub](https://www.confluent.io/hub/jcustenborder/kafka-connect-spooldir)
This Kafka Connect connector provides the capability to watch a directory for files and read the data as new files are written to the input directory. Each of the records in the input file will be converted based on the user supplied schema. The connectors in this project handle all different kinds of use cases like ingesting json, csv, tsv, avro, or binary files.
# Installation
## Confluent Hub
The following command can be used to install the plugin directly from the Confluent Hub using the
[Confluent Hub Client](https://docs.confluent.io/current/connect/managing/confluent-hub/client.html).
```bash
confluent-hub install jcustenborder/kafka-connect-spooldir:latest
```
## Manually
The zip file that is deployed to the [Confluent Hub](https://www.confluent.io/hub/jcustenborder/kafka-connect-spooldir) is available under
`target/components/packages/`. You can manually extract this zip file which includes all dependencies. All the dependencies
that are required to deploy the plugin are under `target/kafka-connect-target` as well. Make sure that you include all the dependencies that are required
to run the plugin.
1. Create a directory under the `plugin.path` on your Connect worker.
2. Copy all of the dependencies under the newly created subdirectory.
3. Restart the Connect worker.
# Source Connectors
## [Schema Less Json Source Connector](https://jcustenborder.github.io/kafka-connect-documentation/projects/kafka-connect-spooldir/sources/SpoolDirSchemaLessJsonSourceConnector.html)
```
com.github.jcustenborder.kafka.connect.spooldir.SpoolDirSchemaLessJsonSourceConnector
```
This connector is used to `stream <https://en.wikipedia.org/wiki/JSON_Streaming>_` JSON files from a directory while converting the data based on the schema supplied in the configuration. This connector will read each file node by node writing the result to Kafka. For example if your data file contains several json objects the connector will read from { to } for each object and write each object to Kafka.
### Important
This connector does not try to convert the json records to a schema. The recommended converter to use is the StringConverter. Example: `value.converter=org.apache.kafka.connect.storage.StringConverter`
### Configuration
#### File System
##### `error.path`
The directory to place files in which have error(s). This directory must exist and be writable by the user running Kafka Connect.
*Importance:* HIGH
*Type:* STRING
*Validator:* Absolute path to a directory that exists and is writable.
##### `input.file.pattern`
Regular expression to check input file names against. This expression must match the entire filename. The equivalent of Matcher.matches().
*Importance:* HIGH
*Type:* STRING
##### `input.path`
The directory to read files that will be processed. This directory must exist and be writable by the user running Kafka Connect.
*Importance:* HIGH
*Type:* STRING
*Validator:* Absolute path to a directory that exists and is writable.
##### `finished.path`
The directory to place files that have been successfully processed. This directory must exist and be writable by the user running Kafka Connect.
*Importance:* HIGH
*Type:* STRING
##### `halt.on.error`
Should the task halt when it encounters an error or continue to the next file.
*Importance:* HIGH
*Type:* BOOLEAN
*Default Value:* true
##### `cleanup.policy`
Determines how the connector should cleanup the files that have been successfully processed. NONE leaves the files in place which could cause them to be reprocessed if the connector is restarted. DELETE removes the file from the filesystem. MOVE will move the file to a finished directory. MOVEBYDATE will move the file to a finished directory with subdirectories by date
*Importance:* MEDIUM
*Type:* STRING
*Default Value:* MOVE
*Validator:* Matches: ``NONE``, ``DELETE``, ``MOVE``, ``MOVEBYDATE``
##### `task.partitioner`
The task partitioner implementation is used when the connector is configured to use more than one task. This is used by each task to identify which files will be processed by that task. This ensures that each file is only assigned to one task.
*Importance:* MEDIUM
*Type:* STRING
*Default Value:* ByName
*Validator:* Matches: ``ByName``
##### `file.buffer.size.bytes`
The size of buffer for the BufferedInputStream that will be used to interact with the file system.
*Importance:* LOW
*Type:* INT
*Default Value:* 131072
*Validator:* [1,...]
##### `file.minimum.age.ms`
The amount of time in milliseconds after the file was last written to before the file can be processed.
*Importance:* LOW
*Type:* LONG
*Default Value:* 0
*Validator:* [0,...]
##### `files.sort.attributes`
The attributes each file will use to determine the sort order. `Name` is name of the file. `Length` is the length of the file preferring larger files first. `LastModified` is the LastModified attribute of the file preferring older files first.
*Importance:* LOW
*Type:* LIST
*Default Value:* [NameAsc]
*Validator:* Matches: ``NameAsc``, ``NameDesc``, ``LengthAsc``, ``LengthDesc``, ``LastModifiedAsc``, ``LastModifiedDesc``
##### `processing.file.extension`
Before a file is processed, a flag is created in its directory to indicate the file is being handled. The flag file has the same name as the file, but with this property appended as a suffix.
*Importance:* LOW
*Type:* STRING
*Default Value:* .PROCESSING
*Validator:* Matches regex( ^.*\..+$ )
#### General
##### `topic`
The Kafka topic to write the data to.
*Importance:* HIGH
*Type:* STRING
##### `batch.size`
The number of records that should be returned with each batch.
*Importance:* LOW
*Type:* INT
*Default Value:* 1000
##### `empty.poll.wait.ms`
The amount of time to wait if a poll returns an empty list of records.
*Importance:* LOW
*Type:* LONG
*Default Value:* 500
*Validator:* [1,...,9223372036854775807]
##### `file.charset`
Character set to read wth file with.
*Importance:* LOW
*Type:* STRING
*Default Value:* UTF-8
*Validator:* Big5,Big5-HKSCS,CESU-8,EUC-JP,EUC-KR,GB18030,GB2312,GBK,IBM-Thai,IBM00858,IBM01140,IBM01141,IBM01142,IBM01143,IBM01144,IBM01145,IBM01146,IBM01147,IBM01148,IBM01149,IBM037,IBM1026,IBM1047,IBM273,IBM277,IBM278,IBM280,IBM284,IBM285,IBM290,IBM297,IBM420,IBM424,IBM437,IBM500,IBM775,IBM850,IBM852,IBM855,IBM857,IBM860,IBM861,IBM862,IBM863,IBM864,IBM865,IBM866,IBM868,IBM869,IBM870,IBM871,IBM918,ISO-2022-CN,ISO-2022-JP,ISO-2022-JP-2,ISO-2022-KR,ISO-8859-1,ISO-8859-13,ISO-8859-15,ISO-8859-16,ISO-8859-2,ISO-8859-3,ISO-8859-4,ISO-8859-5,ISO-8859-6,ISO-8859-7,ISO-8859-8,ISO-8859-9,JIS_X0201,JIS_X0212-1990,KOI8-R,KOI8-U,Shift_JIS,TIS-620,US-ASCII,UTF-16,UTF-16BE,UTF-16LE,UTF-32,UTF-32BE,UTF-32LE,UTF-8,windows-1250,windows-1251,windows-1252,windows-1253,windows-1254,windows-1255,windows-1256,windows-1257,windows-1258,windows-31j,x-Big5-HKSCS-2001,x-Big5-Solaris,x-euc-jp-linux,x-EUC-TW,x-eucJP-Open,x-IBM1006,x-IBM1025,x-IBM1046,x-IBM1097,x-IBM1098,x-IBM1112,x-IBM1122,x-IBM1123,x-IBM1124,x-IBM1129,x-IBM1166,x-IBM1364,x-IBM1381,x-IBM1383,x-IBM29626C,x-IBM300,x-IBM33722,x-IBM737,x-IBM833,x-IBM834,x-IBM856,x-IBM874,x-IBM875,x-IBM921,x-IBM922,x-IBM930,x-IBM933,x-IBM935,x-IBM937,x-IBM939,x-IBM942,x-IBM942C,x-IBM943,x-IBM943C,x-IBM948,x-IBM949,x-IBM949C,x-IBM950,x-IBM964,x-IBM970,x-ISCII91,x-ISO-2022-CN-CNS,x-ISO-2022-CN-GB,x-iso-8859-11,x-JIS0208,x-JISAutoDetect,x-Johab,x-MacArabic,x-MacCentralEurope,x-MacCroatian,x-MacCyrillic,x-MacDingbat,x-MacGreek,x-MacHebrew,x-MacIceland,x-MacRoman,x-MacRomania,x-MacSymbol,x-MacThai,x-MacTurkish,x-MacUkraine,x-MS932_0213,x-MS950-HKSCS,x-MS950-HKSCS-XP,x-mswin-936,x-PCK,x-SJIS_0213,x-UTF-16LE-BOM,X-UTF-32BE-BOM,X-UTF-32LE-BOM,x-windows-50220,x-windows-50221,x-windows-874,x-windows-949,x-windows-950,x-windows-iso2022jp
##### `task.count`
Internal setting to the connector used to instruct a task on which files to select. The connector will override this setting.
*Importance:* LOW
*Type:* INT
*Default Value:* 1
*Validator:* [1,...]
##### `task.index`
Internal setting to the connector used to instruct a task on which files to select. The connector will override this setting.
*Importance:* LOW
*Type:* INT
*Default Value:* 0
*Validator:* [0,...]
#### Timestamps
##### `timestamp.mode`
Determines how the connector will set the timestamp for the [ConnectRecord](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/connector/ConnectRecord.html#timestamp()). If set to `Field` then the timestamp will be read from a field in the value. This field cannot be optional and must be a [Timestamp](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.html). Specify the field in `timestamp.field`. If set to `FILE_TIME` then the last modified time of the file will be used. If set to `PROCESS_TIME` the time the record is read will be used.
*Importance:* MEDIUM
*Type:* STRING
*Default Value:* PROCESS_TIME
*Validator:* Matches: ``FIELD``, ``FILE_TIME``, ``PROCESS_TIME``
## [Json Source Connector](https://jcustenborder.github.io/kafka-connect-documentation/projects/kafka-connect-spooldir/sources/SpoolDirJsonSourceConnector.html)
```
com.github.jcustenborder.kafka.connect.spooldir.SpoolDirJsonSourceConnector
```
This connector is used to `stream <https://en.wikipedia.org/wiki/JSON_Streaming>` JSON files from a directory while converting the data based on the schema supplied in the configuration.
### Important
There are some caveats to running this connector with `schema.generation.enabled = true`. If schema generation is enabled the connector will start by reading one of the files that match `input.file.pattern` in the path specified by `input.path`. If there are no files when the connector starts or is restarted the connector will fail to start. If there are different fields in other files they will not be detected. The recommended path is to specify a schema that the files will be parsed with. This will ensure that data written by this connector to Kafka will be consistent across files that have inconsistent columns. For example if some files have an optional column that is not always included, create a schema that includes the column marked as optional.
### Note
If you want to import JSON node by node in the file and do not care about schemas, do not use this connector with Schema Generation enabled. Take a look at the Schema Less Json Source Connector.
### Tip
To get a starting point for a schema you can use the following command to generate an all String schema. This will give you the basic structure of a schema. From there you can changes the types to match what you expect.
.. code-block:: bash
mvn clean package
export CLASSPATH="$(find target/kafka-connect-target/usr/share/kafka-connect/kafka-connect-spooldir -type f -name '*.jar' | tr '\n' ':')"
kafka-run-class com.github.jcustenborder.kafka.connect.spooldir.AbstractSchemaGenerator -t json -f src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/json/FieldsMatch.data -c config/JsonExample.properties -i id
### Configuration
#### File System
##### `error.path`
The directory to place files in which have error(s). This directory must exist and be writable by the user running Kafka Connect.
*Importance:* HIGH
*Type:* STRING
*Validator:* Absolute path to a directory that exists and is writable.
##### `input.file.pattern`
Regular expression to check input file names against. This expression must match the entire filename. The equivalent of Matcher.matches().
*Importance:* HIGH
*Type:* STRING
##### `input.path`
The directory to read files that will be processed. This directory must exist and be writable by the user running Kafka Connect.
*Importance:* HIGH
*Type:* STRING
*Validator:* Absolute path to a directory that exists and is writable.
##### `finished.path`
The directory to place files that have been successfully processed. This directory must exist and be writable by the user running Kafka Connect.
*Importance:* HIGH
*Type:* STRING
##### `halt.on.error`
Should the task halt when it encounters an error or continue to the next file.
*Importance:* HIGH
*Type:* BOOLEAN
*Default Value:* true
##### `cleanup.policy`
Determines how the connector should cleanup the files that have been successfully processed. NONE leaves the files in place which could cause them to be reprocessed if the connector is restarted. DELETE removes the file from the filesystem. MOVE will move the file to a finished directory. MOVEBYDATE will move the file to a finished directory with subdirectories by date
*Importance:* MEDIUM
*Type:* STRING
*Default Value:* MOVE
*Validator:* Matches: ``NONE``, ``DELETE``, ``MOVE``, ``MOVEBYDATE``
##### `task.partitioner`
The task partitioner implementation is used when the connector is configured to use more than one task. This is used by each task to identify which files will be processed by that task. This ensures that each file is only assigned to one task.
*Importance:* MEDIUM
*Type:* STRING
*Default Value:* ByName
*Validator:* Matches: ``ByName``
##### `file.buffer.size.bytes`
The size of buffer for the BufferedInputStream that will be used to interact with the file system.
*Importance:* LOW
*Type:* INT
*Default Value:* 131072
*Validator:* [1,...]
##### `file.minimum.age.ms`
The amount of time in milliseconds after the file was last written to before the file can be processed.
*Importance:* LOW
*Type:* LONG
*Default Value:* 0
*Validator:* [0,...]
##### `files.sort.attributes`
The attributes each file will use to determine the sort order. `Name` is name of the file. `Length` is the length of the file preferring larger files first. `LastModified` is the LastModified attribute of the file preferring older files first.
*Importance:* LOW
*Type:* LIST
*Default Value:* [NameAsc]
*Validator:* Matches: ``NameAsc``, ``NameDesc``, ``LengthAsc``, ``LengthDesc``, ``LastModifiedAsc``, ``LastModifiedDesc``
##### `processing.file.extension`
Before a file is processed, a flag is created in its directory to indicate the file is being handled. The flag file has the same name as the file, but with this property appended as a suffix.
*Importance:* LOW
*Type:* STRING
*Default Value:* .PROCESSING
*Validator:* Matches regex( ^.*\..+$ )
#### General
##### `topic`
The Kafka topic to write the data to.
*Importance:* HIGH
*Type:* STRING
##### `batch.size`
The number of records that should be returned with each batch.
*Importance:* LOW
*Type:* INT
*Default Value:* 1000
##### `empty.poll.wait.ms`
The amount of time to wait if a poll returns an empty list of records.
*Importance:* LOW
*Type:* LONG
*Default Value:* 500
*Validator:* [1,...,9223372036854775807]
##### `task.count`
Internal setting to the connector used to instruct a task on which files to select. The connector will override this setting.
*Importance:* LOW
*Type:* INT
*Default Value:* 1
*Validator:* [1,...]
##### `task.index`
Internal setting to the connector used to instruct a task on which files to select. The connector will override this setting.
*Importance:* LOW
*Type:* INT
*Default Value:* 0
*Validator:* [0,...]
#### Schema
##### `key.schema`
The schema for the key written to Kafka.
*Importance:* HIGH
*Type:* STRING
##### `value.schema`
The schema for the value written to Kafka.
*Importance:* HIGH
*Type:* STRING
#### Schema Generation
##### `schema.generation.enabled`
Flag to determine if schemas should be dynamically generated. If set to true, `key.schema` and `value.schema` can be omitted, but `schema.generation.key.name` and `schema.generation.value.name` must be set.
*Importance:* MEDIUM
*Type:* BOOLEAN
##### `schema.generation.key.fields`
The field(s) to use to build a key schema. This is only used during schema generation.
*Importance:* MEDIUM
*Type:* LIST
##### `schema.generation.key.name`
The name of the generated key schema.
*Importance:* MEDIUM
*Type:* STRING
*Default Value:* com.github.jcustenborder.kafka.connect.model.Key
##### `schema.generation.value.name`
The name of the generated value schema.
*Importance:* MEDIUM
*Type:* STRING
*Default Value:* com.github.jcustenborder.kafka.connect.model.Value
#### Timestamps
##### `timestamp.field`
The field in the value schema that will contain the parsed timestamp for the record. This field cannot be marked as optional and must be a [Timestamp](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.html)
*Importance:* MEDIUM
*Type:* STRING
##### `timestamp.mode`
Determines how the connector will set the timestamp for the [ConnectRecord](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/connector/ConnectRecord.html#timestamp()). If set to `Field` then the timestamp will be read from a field in the value. This field cannot be optional and must be a [Timestamp](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.html). Specify the field in `timestamp.field`. If set to `FILE_TIME` then the last modified time of the file will be used. If set to `PROCESS_TIME` the time the record is read will be used.
*Importance:* MEDIUM
*Type:* STRING
*Default Value:* PROCESS_TIME
*Validator:* Matches: ``FIELD``, ``FILE_TIME``, ``PROCESS_TIME``
##### `parser.timestamp.date.formats`
The date formats that are expected in the file. This is a list of strings that will be used to parse the date fields in order. The most accurate date format should be the first in the list. Take a look at the Java documentation for more info. https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html
*Importance:* LOW
*Type:* LIST
*Default Value:* [yyyy-MM-dd'T'HH:mm:ss, yyyy-MM-dd' 'HH:mm:ss]
##### `parser.timestamp.timezone`
The timezone that all of the dates will be parsed with.
*Importance:* LOW
*Type:* STRING
*Default Value:* UTC
## [Binary File Source Connector](https://jcustenborder.github.io/kafka-connect-documentation/projects/kafka-connect-spooldir/sources/SpoolDirBinaryFileSourceConnector.html)
```
com.github.jcustenborder.kafka.connect.spooldir.SpoolDirBinaryFileSourceConnector
```
This connector is used to read an entire file as a byte array write the data to Kafka.
### Warning
Large files will be read as a single byte array. This means that the process could run out of memory or try to send a message to Kafka that is greater than the max message size. If this happens an exception will be thrown.
### Important
The recommended converter to use is the ByteArrayConverter. Example: `value.converter=org.apache.kafka.connect.storage.ByteArrayConverter`
### Configuration
#### File System
##### `error.path`
The directory to place files in which have error(s). This directory must exist and be writable by the user running Kafka Connect.
*Importance:* HIGH
*Type:* STRING
*Validator:* Absolute path to a directory that exists and is writable.
##### `input.file.pattern`
Regular expression to check input file names against. This expression must match the entire filename. The equivalent of Matcher.matches().
*Importance:* HIGH
*Type:* STRING
##### `input.path`
The directory to read files that will be processed. This directory must exist and be writable by the user running Kafka Connect.
*Importance:* HIGH
*Type:* STRING
*Validator:* Absolute path to a directory that exists and is writable.
##### `finished.path`
The directory to place files that have been successfully processed. This directory must exist and be writable by the user running Kafka Connect.
*Importance:* HIGH
*Type:* STRING
##### `halt.on.error`
Should the task halt when it encounters an error or continue to the next file.
*Importance:* HIGH
*Type:* BOOLEAN
*Default Value:* true
##### `cleanup.policy`
Determines how the connector should cleanup the files that have been successfully processed. NONE leaves the files in place which could cause them to be reprocessed if the connector is restarted. DELETE removes the file from the filesystem. MOVE will move the file to a finished directory. MOVEBYDATE will move the file to a finished directory with subdirectories by date
*Importance:* MEDIUM
*Type:* STRING
*Default Value:* MOVE
*Validator:* Matches: ``NONE``, ``DELETE``, ``MOVE``, ``MOVEBYDATE``
##### `task.partitioner`
The task partitioner implementation is used when the connector is configured to use more than one task. This is used by each task to identify which files will be processed by that task. This ensures that each file is only assigned to one task.
*Importance:* MEDIUM
*Type:* STRING
*Default Value:* ByName
*Validator:* Matches: ``ByName``
##### `file.buffer.size.bytes`
The size of buffer for the BufferedInputStream that will be used to interact with the file system.
*Importance:* LOW
*Type:* INT
*Default Value:* 131072
*Validator:* [1,...]
##### `file.minimum.age.ms`
The amount of time in milliseconds after the file was last written to before the file can be processed.
*Importance:* LOW
*Type:* LONG
*Default Value:* 0
*Validator:* [0,...]
##### `files.sort.attributes`
The attributes each file will use to determine the sort order. `Name` is name of the file. `Length` is the length of the file preferring larger files first. `LastModified` is the LastModified attribute of the file preferring older files first.
*Importance:* LOW
*Type:* LIST
*Default Value:* [NameAsc]
*Validator:* Matches: ``NameAsc``, ``NameDesc``, ``LengthAsc``, ``LengthDesc``, ``LastModifiedAsc``, ``LastModifiedDesc``
##### `processing.file.extension`
Before a file is processed, a flag is created in its directory to indicate the file is being handled. The flag file has the same name as the file, but with this property appended as a suffix.
*Importance:* LOW
*Type:* STRING
*Default Value:* .PROCESSING
*Validator:* Matches regex( ^.*\..+$ )
#### General
##### `topic`
The Kafka topic to write the data to.
*Importance:* HIGH
*Type:* STRING
##### `batch.size`
The number of records that should be returned with each batch.
*Importance:* LOW
*Type:* INT
*Default Value:* 1000
##### `empty.poll.wait.ms`
The amount of time to wait if a poll returns an empty list of records.
*Importance:* LOW
*Type:* LONG
*Default Value:* 500
*Validator:* [1,...,9223372036854775807]
##### `task.count`
Internal setting to the connector used to instruct a task on which files to select. The connector will override this setting.
*Importance:* LOW
*Type:* INT
*Default Value:* 1
*Validator:* [1,...]
##### `task.index`
Internal setting to the connector used to instruct a task on which files to select. The connector will override this setting.
*Importance:* LOW
*Type:* INT
*Default Value:* 0
*Validator:* [0,...]
#### Timestamps
##### `timestamp.mode`
Determines how the connector will set the timestamp for the [ConnectRecord](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/connector/ConnectRecord.html#timestamp()). If set to `Field` then the timestamp will be read from a field in the value. This field cannot be optional and must be a [Timestamp](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.html). Specify the field in `timestamp.field`. If set to `FILE_TIME` then the last modified time of the file will be used. If set to `PROCESS_TIME` the time the record is read will be used.
*Importance:* MEDIUM
*Type:* STRING
*Default Value:* PROCESS_TIME
*Validator:* Matches: ``FIELD``, ``FILE_TIME``, ``PROCESS_TIME``
## [Line Delimited Source Connector](https://jcustenborder.github.io/kafka-connect-documentation/projects/kafka-connect-spooldir/sources/SpoolDirLineDelimitedSourceConnector.html)
```
com.github.jcustenborder.kafka.connect.spooldir.SpoolDirLineDelimitedSourceConnector
```
This connector is used to read a file line by line and write the data to Kafka.
### Important
The recommended converter to use is the StringConverter. Example: `value.converter=org.apache.kafka.connect.storage.StringConverter`
### Configuration
#### File System
##### `error.path`
The directory to place files in which have error(s). This directory must exist and be writable by the user running Kafka Connect.
*Importance:* HIGH
*Type:* STRING
*Validator:* Absolute path to a directory that exists and is writable.
##### `input.file.pattern`
Regular expression to check input file names against. This expression must match the entire filename. The equivalent of Matcher.matches().
*Importance:* HIGH
*Type:* STRING
##### `input.path`
The directory to read files that will be processed. This directory must exist and be writable by the user running Kafka Connect.
*Importance:* HIGH
*Type:* STRING
*Validator:* Absolute path to a directory that exists and is writable.
##### `finished.path`
The directory to place files that have been successfully processed. This directory must exist and be writable by the user running Kafka Connect.
*Importance:* HIGH
*Type:* STRING
##### `halt.on.error`
Should the task halt when it encounters an error or continue to the next file.
*Importance:* HIGH
*Type:* BOOLEAN
*Default Value:* true
##### `cleanup.policy`
Determines how the connector should cleanup the files that have been successfully processed. NONE leaves the files in place which could cause them to be reprocessed if the connector is restarted. DELETE removes the file from the filesystem. MOVE will move the file to a finished directory. MOVEBYDATE will move the file to a finished directory with subdirectories by date
*Importance:* MEDIUM
*Type:* STRING
*Default Value:* MOVE
*Validator:* Matches: ``NONE``, ``DELETE``, ``MOVE``, ``MOVEBYDATE``
##### `task.partitioner`
The task partitioner implementation is used when the connector is configured to use more than one task. This is used by each task to identify which files will be processed by that task. This ensures that each file is only assigned to one task.
*Importance:* MEDIUM
*Type:* STRING
*Default Value:* ByName
*Validator:* Matches: ``ByName``
##### `file.buffer.size.bytes`
The size of buffer for the BufferedInputStream that will be used to interact with the file system.
*Importance:* LOW
*Type:* INT
*Default Value:* 131072
*Validator:* [1,...]
##### `file.minimum.age.ms`
The amount of time in milliseconds after the file was last written to before the file can be processed.
*Importance:* LOW
*Type:* LONG
*Default Value:* 0
*Validator:* [0,...]
##### `files.sort.attributes`
The attributes each file will use to determine the sort order. `Name` is name of the file. `Length` is the length of the file preferring larger files first. `LastModified` is the LastModified attribute of the file preferring older files first.
*Importance:* LOW
*Type:* LIST
*Default Value:* [NameAsc]
*Validator:* Matches: ``NameAsc``, ``NameDesc``, ``LengthAsc``, ``LengthDesc``, ``LastModifiedAsc``, ``LastModifiedDesc``
##### `processing.file.extension`
Before a file is processed, a flag is created in its directory to indicate the file is being handled. The flag file has the same name as the file, but with this property appended as a suffix.
*Importance:* LOW
*Type:* STRING
*Default Value:* .PROCESSING
*Validator:* Matches regex( ^.*\..+$ )
#### General
##### `topic`
The Kafka topic to write the data to.
*Importance:* HIGH
*Type:* STRING
##### `batch.size`
The number of records that should be returned with each batch.
*Importance:* LOW
*Type:* INT
*Default Value:* 1000
##### `empty.poll.wait.ms`
The amount of time to wait if a poll returns an empty list of records.
*Importance:* LOW
*Type:* LONG
*Default Value:* 500
*Validator:* [1,...,9223372036854775807]
##### `file.charset`
Character set to read wth file with.
*Importance:* LOW
*Type:* STRING
*Default Value:* UTF-8
*Validator:* Big5,Big5-HKSCS,CESU-8,EUC-JP,EUC-KR,GB18030,GB2312,GBK,IBM-Thai,IBM00858,IBM01140,IBM01141,IBM01142,IBM01143,IBM01144,IBM01145,IBM01146,IBM01147,IBM01148,IBM01149,IBM037,IBM1026,IBM1047,IBM273,IBM277,IBM278,IBM280,IBM284,IBM285,IBM290,IBM297,IBM420,IBM424,IBM437,IBM500,IBM775,IBM850,IBM852,IBM855,IBM857,IBM860,IBM861,IBM862,IBM863,IBM864,IBM865,IBM866,IBM868,IBM869,IBM870,IBM871,IBM918,ISO-2022-CN,ISO-2022-JP,ISO-2022-JP-2,ISO-2022-KR,ISO-8859-1,ISO-8859-13,ISO-8859-15,ISO-8859-16,ISO-8859-2,ISO-8859-3,ISO-8859-4,ISO-8859-5,ISO-8859-6,ISO-8859-7,ISO-8859-8,ISO-8859-9,JIS_X0201,JIS_X0212-1990,KOI8-R,KOI8-U,Shift_JIS,TIS-620,US-ASCII,UTF-16,UTF-16BE,UTF-16LE,UTF-32,UTF-32BE,UTF-32LE,UTF-8,windows-1250,windows-1251,windows-1252,windows-1253,windows-1254,windows-1255,windows-1256,windows-1257,windows-1258,windows-31j,x-Big5-HKSCS-2001,x-Big5-Solaris,x-euc-jp-linux,x-EUC-TW,x-eucJP-Open,x-IBM1006,x-IBM1025,x-IBM1046,x-IBM1097,x-IBM1098,x-IBM1112,x-IBM1122,x-IBM1123,x-IBM1124,x-IBM1129,x-IBM1166,x-IBM1364,x-IBM1381,x-IBM1383,x-IBM29626C,x-IBM300,x-IBM33722,x-IBM737,x-IBM833,x-IBM834,x-IBM856,x-IBM874,x-IBM875,x-IBM921,x-IBM922,x-IBM930,x-IBM933,x-IBM935,x-IBM937,x-IBM939,x-IBM942,x-IBM942C,x-IBM943,x-IBM943C,x-IBM948,x-IBM949,x-IBM949C,x-IBM950,x-IBM964,x-IBM970,x-ISCII91,x-ISO-2022-CN-CNS,x-ISO-2022-CN-GB,x-iso-8859-11,x-JIS0208,x-JISAutoDetect,x-Johab,x-MacArabic,x-MacCentralEurope,x-MacCroatian,x-MacCyrillic,x-MacDingbat,x-MacGreek,x-MacHebrew,x-MacIceland,x-MacRoman,x-MacRomania,x-MacSymbol,x-MacThai,x-MacTurkish,x-MacUkraine,x-MS932_0213,x-MS950-HKSCS,x-MS950-HKSCS-XP,x-mswin-936,x-PCK,x-SJIS_0213,x-UTF-16LE-BOM,X-UTF-32BE-BOM,X-UTF-32LE-BOM,x-windows-50220,x-windows-50221,x-windows-874,x-windows-949,x-windows-950,x-windows-iso2022jp
##### `task.count`
Internal setting to the connector used to instruct a task on which files to select. The connector will override this setting.
*Importance:* LOW
*Type:* INT
*Default Value:* 1
*Validator:* [1,...]
##### `task.index`
Internal setting to the connector used to instruct a task on which files to select. The connector will override this setting.
*Importance:* LOW
*Type:* INT
*Default Value:* 0
*Validator:* [0,...]
#### Timestamps
##### `timestamp.mode`
Determines how the connector will set the timestamp for the [ConnectRecord](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/connector/ConnectRecord.html#timestamp()). If set to `Field` then the timestamp will be read from a field in the value. This field cannot be optional and must be a [Timestamp](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.html). Specify the field in `timestamp.field`. If set to `FILE_TIME` then the last modified time of the file will be used. If set to `PROCESS_TIME` the time the record is read will be used.
*Importance:* MEDIUM
*Type:* STRING
*Default Value:* PROCESS_TIME
*Validator:* Matches: ``FIELD``, ``FILE_TIME``, ``PROCESS_TIME``
## [Avro Source Connector](https://jcustenborder.github.io/kafka-connect-documentation/projects/kafka-connect-spooldir/sources/SpoolDirAvroSourceConnector.html)
```
com.github.jcustenborder.kafka.connect.spooldir.SpoolDirAvroSourceConnector
```
This connector is used to read avro data files from the file system and write their contents to kafka.
### Configuration
#### File System
##### `error.path`
The directory to place files in which have error(s). This directory must exist and be writable by the user running Kafka Connect.
*Importance:* HIGH
*Type:* STRING
*Validator:* Absolute path to a directory that exists and is writable.
##### `input.file.pattern`
Regular expression to check input file names against. This expression must match the entire filename. The equivalent of Matcher.matches().
*Importance:* HIGH
*Type:* STRING
##### `input.path`
The directory to read files that will be processed. This directory must exist and be writable by the user running Kafka Connect.
*Importance:* HIGH
*Type:* STRING
*Validator:* Absolute path to a directory that exists and is writable.
##### `finished.path`
The directory to place files that have been successfully processed. This directory must exist and be writable by the user running Kafka Connect.
*Importance:* HIGH
*Type:* STRING
##### `halt.on.error`
Should the task halt when it encounters an error or continue to the next file.
*Importance:* HIGH
*Type:* BOOLEAN
*Default Value:* true
##### `cleanup.policy`
Determines how the connector should cleanup the files that have been successfully processed. NONE leaves the files in place which could cause them to be reprocessed if the connector is restarted. DELETE removes the file from the filesystem. MOVE will move the file to a finished directory. MOVEBYDATE will move the file to a finished directory with subdirectories by date
*Importance:* MEDIUM
*Type:* STRING
*Default Value:* MOVE
*Validator:* Matches: ``NONE``, ``DELETE``, ``MOVE``, ``MOVEBYDATE``
##### `task.partitioner`
The task partitioner implementation is used when the connector is configured to use more than one task. This is used by each task to identify which files will be processed by that task. This ensures that each file is only assigned to one task.
*Importance:* MEDIUM
*Type:* STRING
*Default Value:* ByName
*Validator:* Matches: ``ByName``
##### `file.buffer.size.bytes`
The size of buffer for the BufferedInputStream that will be used to interact with the file system.
*Importance:* LOW
*Type:* INT
*Default Value:* 131072
*Validator:* [1,...]
##### `file.minimum.age.ms`
The amount of time in milliseconds after the file was last written to before the file can be processed.
*Importance:* LOW
*Type:* LONG
*Default Value:* 0
*Validator:* [0,...]
##### `files.sort.attributes`
The attributes each file will use to determine the sort order. `Name` is name of the file. `Length` is the length of the file preferring larger files first. `LastModified` is the LastModified attribute of the file preferring older files first.
*Importance:* LOW
*Type:* LIST
*Default Value:* [NameAsc]
*Validator:* Matches: ``NameAsc``, ``NameDesc``, ``LengthAsc``, ``LengthDesc``, ``LastModifiedAsc``, ``LastModifiedDesc``
##### `processing.file.extension`
Before a file is processed, a flag is created in its directory to indicate the file is being handled. The flag file has the same name as the file, but with this property appended as a suffix.
*Importance:* LOW
*Type:* STRING
*Default Value:* .PROCESSING
*Validator:* Matches regex( ^.*\..+$ )
#### General
##### `topic`
The Kafka topic to write the data to.
*Importance:* HIGH
*Type:* STRING
##### `batch.size`
The number of records that should be returned with each batch.
*Importance:* LOW
*Type:* INT
*Default Value:* 1000
##### `empty.poll.wait.ms`
The amount of time to wait if a poll returns an empty list of records.
*Importance:* LOW
*Type:* LONG
*Default Value:* 500
*Validator:* [1,...,9223372036854775807]
##### `task.count`
Internal setting to the connector used to instruct a task on which files to select. The connector will override this setting.
*Importance:* LOW
*Type:* INT
*Default Value:* 1
*Validator:* [1,...]
##### `task.index`
Internal setting to the connector used to instruct a task on which files to select. The connector will override this setting.
*Importance:* LOW
*Type:* INT
*Default Value:* 0
*Validator:* [0,...]
#### Timestamps
##### `timestamp.mode`
Determines how the connector will set the timestamp for the [ConnectRecord](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/connector/ConnectRecord.html#timestamp()). If set to `Field` then the timestamp will be read from a field in the value. This field cannot be optional and must be a [Timestamp](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.html). Specify the field in `timestamp.field`. If set to `FILE_TIME` then the last modified time of the file will be used. If set to `PROCESS_TIME` the time the record is read will be used.
*Importance:* MEDIUM
*Type:* STRING
*Default Value:* PROCESS_TIME
*Validator:* Matches: ``FIELD``, ``FILE_TIME``, ``PROCESS_TIME``
## [Extended Log File Format Source Connector](https://jcustenborder.github.io/kafka-connect-documentation/projects/kafka-connect-spooldir/sources/SpoolDirELFSourceConnector.html)
```
com.github.jcustenborder.kafka.connect.spooldir.elf.SpoolDirELFSourceConnector
```
This connector is used to stream `Extended Log File Format <https://www.w3.org/TR/WD-logfile.html>` files from a directory while converting the data to a strongly typed schema.
### Configuration
#### File System
##### `error.path`
The directory to place files in which have error(s). This directory must exist and be writable by the user running Kafka Connect.
*Importance:* HIGH
*Type:* STRING
*Validator:* Absolute path to a directory that exists and is writable.
##### `input.file.pattern`
Regular expression to check input file names against. This expression must match the entire filename. The equivalent of Matcher.matches().
*Importance:* HIGH
*Type:* STRING
##### `input.path`
The directory to read files that will be processed. This directory must exist and be writable by the user running Kafka Connect.
*Importance:* HIGH
*Type:* STRING
*Validator:* Absolute path to a directory that exists and is writable.
##### `finished.path`
The directory to place files that have been successfully processed. This directory must exist and be writable by the user running Kafka Connect.
*Importance:* HIGH
*Type:* STRING
##### `halt.on.error`
Should the task halt when it encounters an error or continue to the next file.
*Importance:* HIGH
*Type:* BOOLEAN
*Default Value:* true
##### `cleanup.policy`
Determines how the connector should cleanup the files that have been successfully processed. NONE leaves the files in place which could cause them to be reprocessed if the connector is restarted. DELETE removes the file from the filesystem. MOVE will move the file to a finished directory. MOVEBYDATE will move the file to a finished directory with subdirectories by date
*Importance:* MEDIUM
*Type:* STRING
*Default Value:* MOVE
*Validator:* Matches: ``NONE``, ``DELETE``, ``MOVE``, ``MOVEBYDATE``
##### `task.partitioner`
The task partitioner implementation is used when the connector is configured to use more than one task. This is used by each task to identify which files will be processed by that task. This ensures that each file is only assigned to one task.
*Importance:* MEDIUM
*Type:* STRING
*Default Value:* ByName
*Validator:* Matches: ``ByName``
##### `file.buffer.size.bytes`
The size of buffer for the BufferedInputStream that will be used to interact with the file system.
*Importance:* LOW
*Type:* INT
*Default Value:* 131072
*Validator:* [1,...]
##### `file.minimum.age.ms`
The amount of time in milliseconds after the file was last written to before the file can be processed.
*Importance:* LOW
*Type:* LONG
*Default Value:* 0
*Validator:* [0,...]
##### `files.sort.attributes`
The attributes each file will use to determine the sort order. `Name` is name of the file. `Length` is the length of the file preferring larger files first. `LastModified` is the LastModified attribute of the file preferring older files first.
*Importance:* LOW
*Type:* LIST
*Default Value:* [NameAsc]
*Validator:* Matches: ``NameAsc``, ``NameDesc``, ``LengthAsc``, ``LengthDesc``, ``LastModifiedAsc``, ``LastModifiedDesc``
##### `processing.file.extension`
Before a file is processed, a flag is created in its directory to indicate the file is being handled. The flag file has the same name as the file, but with this property appended as a suffix.
*Importance:* LOW
*Type:* STRING
*Default Value:* .PROCESSING
*Validator:* Matches regex( ^.*\..+$ )
#### General
##### `topic`
The Kafka topic to write the data to.
*Importance:* HIGH
*Type:* STRING
##### `batch.size`
The number of records that should be returned with each batch.
*Importance:* LOW
*Type:* INT
*Default Value:* 1000
##### `empty.poll.wait.ms`
The amount of time to wait if a poll returns an empty list of records.
*Importance:* LOW
*Type:* LONG
*Default Value:* 500
*Validator:* [1,...,9223372036854775807]
##### `task.count`
Internal setting to the connector used to instruct a task on which files to select. The connector will override this setting.
*Importance:* LOW
*Type:* INT
*Default Value:* 1
*Validator:* [1,...]
##### `task.index`
Internal setting to the connector used to instruct a task on which files to select. The connector will override this setting.
*Importance:* LOW
*Type:* INT
*Default Value:* 0
*Validator:* [0,...]
#### Timestamps
##### `timestamp.mode`
Determines how the connector will set the timestamp for the [ConnectRecord](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/connector/ConnectRecord.html#timestamp()). If set to `Field` then the timestamp will be read from a field in the value. This field cannot be optional and must be a [Timestamp](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.html). Specify the field in `timestamp.field`. If set to `FILE_TIME` then the last modified time of the file will be used. If set to `PROCESS_TIME` the time the record is read will be used.
*Importance:* MEDIUM
*Type:* STRING
*Default Value:* PROCESS_TIME
*Validator:* Matches: ``FIELD``, ``FILE_TIME``, ``PROCESS_TIME``
## [CSV Source Connector](https://jcustenborder.github.io/kafka-connect-documentation/projects/kafka-connect-spooldir/sources/SpoolDirCsvSourceConnector.html)
```
com.github.jcustenborder.kafka.connect.spooldir.SpoolDirCsvSourceConnector
```
The SpoolDirCsvSourceConnector will monitor the directory specified in `input.path` for files and read them as a CSV converting each of the records to the strongly typed equivalent specified in `key.schema` and `value.schema`.
### Important
There are some caveats to running this connector with `schema.generation.enabled = true`. If schema generation is enabled the connector will start by reading one of the files that match `input.file.pattern` in the path specified by `input.path`. If there are no files when the connector starts or is restarted the connector will fail to start. If there are different fields in other files they will not be detected. The recommended path is to specify a schema that the files will be parsed with. This will ensure that data written by this connector to Kafka will be consistent across files that have inconsistent columns. For example if some files have an optional column that is not always included, create a schema that includes the column marked as optional.
### Tip
To get a starting point for a schema you can use the following command to generate an all String schema. This will give you the basic structure of a schema. From there you can changes the types to match what you expect.
.. code-block:: bash
mvn clean package
export CLASSPATH="$(find target/kafka-connect-target/usr/share/kafka-connect/kafka-connect-spooldir -type f -name '*.jar' | tr '\n' ':')"
kafka-run-class com.github.jcustenborder.kafka.connect.spooldir.AbstractSchemaGenerator -t csv -f src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/FieldsMatch.data -c config/CSVExample.properties -i id
### Configuration
#### File System
##### `error.path`
The directory to place files in which have error(s). This directory must exist and be writable by the user running Kafka Connect.
*Importance:* HIGH
*Type:* STRING
*Validator:* Absolute path to a directory that exists and is writable.
##### `input.file.pattern`
Regular expression to check input file names against. This expression must match the entire filename. The equivalent of Matcher.matches().
*Importance:* HIGH
*Type:* STRING
##### `input.path`
The directory to read files that will be processed. This directory must exist and be writable by the user running Kafka Connect.
*Importance:* HIGH
*Type:* STRING
*Validator:* Absolute path to a directory that exists and is writable.
##### `finished.path`
The directory to place files that have been successfully processed. This directory must exist and be writable by the user running Kafka Connect.
*Importance:* HIGH
*Type:* STRING
##### `halt.on.error`
Should the task halt when it encounters an error or continue to the next file.
*Importance:* HIGH
*Type:* BOOLEAN
*Default Value:* true
##### `cleanup.policy`
Determines how the connector should cleanup the files that have been successfully processed. NONE leaves the files in place which could cause them to be reprocessed if the connector is restarted. DELETE removes the file from the filesystem. MOVE will move the file to a finished directory. MOVEBYDATE will move the file to a finished directory with subdirectories by date
*Importance:* MEDIUM
*Type:* STRING
*Default Value:* MOVE
*Validator:* Matches: ``NONE``, ``DELETE``, ``MOVE``, ``MOVEBYDATE``
##### `task.partitioner`
The task partitioner implementation is used when the connector is configured to use more than one task. This is used by each task to identify which files will be processed by that task. This ensures that each file is only assigned to one task.
*Importance:* MEDIUM
*Type:* STRING
*Default Value:* ByName
*Validator:* Matches: ``ByName``
##### `file.buffer.size.bytes`
The size of buffer for the BufferedInputStream that will be used to interact with the file system.
*Importance:* LOW
*Type:* INT
*Default Value:* 131072
*Validator:* [1,...]
##### `file.minimum.age.ms`
The amount of time in milliseconds after the file was last written to before the file can be processed.
*Importance:* LOW
*Type:* LONG
*Default Value:* 0
*Validator:* [0,...]
##### `files.sort.attributes`
The attributes each file will use to determine the sort order. `Name` is name of the file. `Length` is the length of the file preferring larger files first. `LastModified` is the LastModified attribute of the file preferring older files first.
*Importance:* LOW
*Type:* LIST
*Default Value:* [NameAsc]
*Validator:* Matches: ``NameAsc``, ``NameDesc``, ``LengthAsc``, ``LengthDesc``, ``LastModifiedAsc``, ``LastModifiedDesc``
##### `processing.file.extension`
Before a file is processed, a flag is created in its directory to indicate the file is being handled. The flag file has the same name as the file, but with this property appended as a suffix.
*Importance:* LOW
*Type:* STRING
*Default Value:* .PROCESSING
*Validator:* Matches regex( ^.*\..+$ )
#### General
##### `topic`
The Kafka topic to write the data to.
*Importance:* HIGH
*Type:* STRING
##### `batch.size`
The number of records that should be returned with each batch.
*Importance:* LOW
*Type:* INT
*Default Value:* 1000
##### `csv.case.sensitive.field.names`
Flag to determine if the field names in the header row should be treated as case sensitive.
*Importance:* LOW
*Type:* BOOLEAN
##### `csv.rfc.4180.parser.enabled`
Flag to determine if the RFC 4180 parser should be used instead of the default parser.
*Importance:* LOW
*Type:* BOOLEAN
##### `empty.poll.wait.ms`
The amount of time to wait if a poll returns an empty list of records.
*Importance:* LOW
*Type:* LONG
*Default Value:* 500
*Validator:* [1,...,9223372036854775807]
##### `task.count`
Internal setting to the connector used to instruct a task on which files to select. The connector will override this setting.
*Importance:* LOW
*Type:* INT
*Default Value:* 1
*Validator:* [1,...]
##### `task.index`
Internal setting to the connector used to instruct a task on which files to select. The connector will override this setting.
*Importance:* LOW
*Type:* INT
*Default Value:* 0
*Validator:* [0,...]
#### Schema
##### `key.schema`
The schema for the key written to Kafka.
*Importance:* HIGH
*Type:* STRING
##### `value.schema`
The schema for the value written to Kafka.
*Importance:* HIGH
*Type:* STRING
#### CSV Parsing
##### `csv.first.row.as.header`
Flag to indicate if the fist row of data contains the header of the file. If true the position of the columns will be determined by the first row to the CSV. The column position will be inferred from the position of the schema supplied in `value.schema`. If set to true the number of columns must be greater than or equal to the number of fields in the schema.
*Importance:* MEDIUM
*Type:* BOOLEAN
##### `csv.escape.char`
The character as an integer to use when a special character is encountered. The default escape character is typically a \(92)
*Importance:* LOW
*Type:* INT
*Default Value:* 92
##### `csv.file.charset`
Character set to read wth file with.
*Importance:* LOW
*Type:* STRING
*Default Value:* UTF-8
*Validator:* Big5,Big5-HKSCS,CESU-8,EUC-JP,EUC-KR,GB18030,GB2312,GBK,IBM-Thai,IBM00858,IBM01140,IBM01141,IBM01142,IBM01143,IBM01144,IBM01145,IBM01146,IBM01147,IBM01148,IBM01149,IBM037,IBM1026,IBM1047,IBM273,IBM277,IBM278,IBM280,IBM284,IBM285,IBM290,IBM297,IBM420,IBM424,IBM437,IBM500,IBM775,IBM850,IBM852,IBM855,IBM857,IBM860,IBM861,IBM862,IBM863,IBM864,IBM865,IBM866,IBM868,IBM869,IBM870,IBM871,IBM918,ISO-2022-CN,ISO-2022-JP,ISO-2022-JP-2,ISO-2022-KR,ISO-8859-1,ISO-8859-13,ISO-8859-15,ISO-8859-16,ISO-8859-2,ISO-8859-3,ISO-8859-4,ISO-8859-5,ISO-8859-6,ISO-8859-7,ISO-8859-8,ISO-8859-9,JIS_X0201,JIS_X0212-1990,KOI8-R,KOI8-U,Shift_JIS,TIS-620,US-ASCII,UTF-16,UTF-16BE,UTF-16LE,UTF-32,UTF-32BE,UTF-32LE,UTF-8,windows-1250,windows-1251,windows-1252,windows-1253,windows-1254,windows-1255,windows-1256,windows-1257,windows-1258,windows-31j,x-Big5-HKSCS-2001,x-Big5-Solaris,x-euc-jp-linux,x-EUC-TW,x-eucJP-Open,x-IBM1006,x-IBM1025,x-IBM1046,x-IBM1097,x-IBM1098,x-IBM1112,x-IBM1122,x-IBM1123,x-IBM1124,x-IBM1129,x-IBM1166,x-IBM1364,x-IBM1381,x-IBM1383,x-IBM29626C,x-IBM300,x-IBM33722,x-IBM737,x-IBM833,x-IBM834,x-IBM856,x-IBM874,x-IBM875,x-IBM921,x-IBM922,x-IBM930,x-IBM933,x-IBM935,x-IBM937,x-IBM939,x-IBM942,x-IBM942C,x-IBM943,x-IBM943C,x-IBM948,x-IBM949,x-IBM949C,x-IBM950,x-IBM964,x-IBM970,x-ISCII91,x-ISO-2022-CN-CNS,x-ISO-2022-CN-GB,x-iso-8859-11,x-JIS0208,x-JISAutoDetect,x-Johab,x-MacArabic,x-MacCentralEurope,x-MacCroatian,x-MacCyrillic,x-MacDingbat,x-MacGreek,x-MacHebrew,x-MacIceland,x-MacRoman,x-MacRomania,x-MacSymbol,x-MacThai,x-MacTurkish,x-MacUkraine,x-MS932_0213,x-MS950-HKSCS,x-MS950-HKSCS-XP,x-mswin-936,x-PCK,x-SJIS_0213,x-UTF-16LE-BOM,X-UTF-32BE-BOM,X-UTF-32LE-BOM,x-windows-50220,x-windows-50221,x-windows-874,x-windows-949,x-windows-950,x-windows-iso2022jp
##### `csv.ignore.leading.whitespace`
Sets the ignore leading whitespace setting - if true, white space in front of a quote in a field is ignored.
*Importance:* LOW
*Type:* BOOLEAN
*Default Value:* true
##### `csv.ignore.quotations`
Sets the ignore quotations mode - if true, quotations are ignored.
*Importance:* LOW
*Type:* BOOLEAN
##### `csv.keep.carriage.return`
Flag to determine if the carriage return at the end of the line should be maintained.
*Importance:* LOW
*Type:* BOOLEAN
##### `csv.null.field.indicator`
Indicator to determine how the CSV Reader can determine if a field is null. Valid values are EMPTY_SEPARATORS, EMPTY_QUOTES, BOTH, NEITHER. For more information see http://opencsv.sourceforge.net/apidocs/com/opencsv/enums/CSVReaderNullFieldIndicator.html.
*Importance:* LOW
*Type:* STRING
*Default Value:* NEITHER
*Validator:* Matches: ``EMPTY_SEPARATORS``, ``EMPTY_QUOTES``, ``BOTH``, ``NEITHER``
##### `csv.quote.char`
The character that is used to quote a field. This typically happens when the csv.separator.char character is within the data.
*Importance:* LOW
*Type:* INT
*Default Value:* 34
##### `csv.separator.char`
The character that separates each field in the form of an integer. Typically in a CSV this is a ,(44) character. A TSV would use a tab(9) character. If `csv.separator.char` is defined as a null(0), then the RFC 4180 parser must be utilized by default. This is the equivalent of `csv.rfc.4180.parser.enabled = true`.
*Importance:* LOW
*Type:* INT
*Default Value:* 44
##### `csv.skip.lines`
Number of lines to skip in the beginning of the file.
*Importance:* LOW
*Type:* INT
*Default Value:* 0
##### `csv.strict.quotes`
Sets the strict quotes setting - if true, characters outside the quotes are ignored.
*Importance:* LOW
*Type:* BOOLEAN
##### `csv.verify.reader`
Flag to determine if the reader should be verified.
*Importance:* LOW
*Type:* BOOLEAN
*Default Value:* true
#### Schema Generation
##### `schema.generation.enabled`
Flag to determine if schemas should be dynamically generated. If set to true, `key.schema` and `value.schema` can be omitted, but `schema.generation.key.name` and `schema.generation.value.name` must be set.
*Importance:* MEDIUM
*Type:* BOOLEAN
##### `schema.generation.key.fields`
The field(s) to use to build a key schema. This is only used during schema generation.
*Importance:* MEDIUM
*Type:* LIST
##### `schema.generation.key.name`
The name of the generated key schema.
*Importance:* MEDIUM
*Type:* STRING
*Default Value:* com.github.jcustenborder.kafka.connect.model.Key
##### `schema.generation.value.name`
The name of the generated value schema.
*Importance:* MEDIUM
*Type:* STRING
*Default Value:* com.github.jcustenborder.kafka.connect.model.Value
#### Timestamps
##### `timestamp.field`
The field in the value schema that will contain the parsed timestamp for the record. This field cannot be marked as optional and must be a [Timestamp](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.html)
*Importance:* MEDIUM
*Type:* STRING
##### `timestamp.mode`
Determines how the connector will set the timestamp for the [ConnectRecord](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/connector/ConnectRecord.html#timestamp()). If set to `Field` then the timestamp will be read from a field in the value. This field cannot be optional and must be a [Timestamp](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.html). Specify the field in `timestamp.field`. If set to `FILE_TIME` then the last modified time of the file will be used. If set to `PROCESS_TIME` the time the record is read will be used.
*Importance:* MEDIUM
*Type:* STRING
*Default Value:* PROCESS_TIME
*Validator:* Matches: ``FIELD``, ``FILE_TIME``, ``PROCESS_TIME``
##### `parser.timestamp.date.formats`
The date formats that are expected in the file. This is a list of strings that will be used to parse the date fields in order. The most accurate date format should be the first in the list. Take a look at the Java documentation for more info. https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html
*Importance:* LOW
*Type:* LIST
*Default Value:* [yyyy-MM-dd'T'HH:mm:ss, yyyy-MM-dd' 'HH:mm:ss]
##### `parser.timestamp.timezone`
The timezone that all of the dates will be parsed with.
*Importance:* LOW
*Type:* STRING
*Default Value:* UTC
# Development
## Building the source
```bash
mvn clean package
```
## Contributions
Contributions are always welcomed! Before you start any development please create an issue and
start a discussion. Create a pull request against your newly created issue and we're happy to see
if we can merge your pull request. First and foremost any time you're adding code to the code base
you need to include test coverage. Make sure that you run `mvn clean package` before submitting your
pull to ensure that all of the tests, checkstyle rules, and the package can be successfully built.
================================================
FILE: bin/debug.sh
================================================
#!/usr/bin/env bash
#
# Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
: ${INPUT_PATH:='/tmp/spooldir/input'}
: ${ERROR_PATH:='/tmp/spooldir/error'}
: ${FINISHED_PATH:='/tmp/spooldir/finished'}
: ${DEBUG_SUSPEND_FLAG:='y'}
export KAFKA_DEBUG='n'
export DEBUG_SUSPEND_FLAG='n'
# export KAFKA_OPTS='-agentpath:/Applications/YourKit-Java-Profiler-2017.02.app/Contents/Resources/bin/mac/libyjpagent.jnilib=disablestacktelemetry,exceptions=disable,delay=10000'
set -e
# mvn clean package
if [ ! -d "${INPUT_PATH}" ]; then
mkdir -p "${INPUT_PATH}"
fi
if [ ! -d "${ERROR_PATH}" ]; then
mkdir -p "${ERROR_PATH}"
fi
if [ ! -d "${FINISHED_PATH}" ]; then
mkdir -p "${FINISHED_PATH}"
fi
cp /Users/jeremy/Downloads/csv-spooldir-source.csv "${INPUT_PATH}/csv-spooldir-source.csv"
# cp src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/FieldsMatch.data "${INPUT_PATH}/FieldsMatch.csv"
# cp src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/json/FieldsMatch.data "${INPUT_PATH}/FieldsMatch.json"
# connect-standalone config/connect-avro-docker.properties config/CSVSchemaGenerator.properties
# connect-standalone config/connect-avro-docker.properties config/JsonExample.properties
# connect-standalone config/connect-avro-docker.properties config/AvroExample.properties
export DOCKER_IMAGE="confluentinc/cp-kafka-connect:5.5.2-1-ubi8"
docker run --rm --network=kafka-connect-spooldir_default \
-p "5005:5005" \
-v "/tmp/spooldir:/tmp/spooldir" \
-v "$(pwd)/config:/config" \
-v "$(pwd)/target/kafka-connect-target/usr/share/kafka-connect:/plugins" \
"${DOCKER_IMAGE}" /bin/connect-standalone /config/connect-avro-docker.properties /config/CSVSchemaGenerator.properties
================================================
FILE: config/AvroExample.properties
================================================
#
# Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
name=AvroSpoolDir
tasks.max=1
connector.class=com.github.jcustenborder.kafka.connect.spooldir.SpoolDirAvroSourceConnector
input.file.pattern=^.*\.avro$
halt.on.error=false
topic=testing
input.path=/Users/jeremy/data/stackoverflow
finished.path=/tmp/spooldir/finished
error.path=/tmp/spooldir/error
batch.size = 5000
cleanup.policy = NONE
================================================
FILE: config/CSVExample.json
================================================
{
"name": "CsvSpoolDir",
"config": {
"tasks.max": "1",
"connector.class": "com.github.jcustenborder.kafka.connect.spooldir.SpoolDirCsvSourceConnector",
"input.file.pattern": "^.*\\.csv$",
"halt.on.error": "false",
"topic": "testing"
"csv.first.row.as.header": "true",
"csv.null.field.indicator": "EMPTY_SEPARATORS",
"input.path": "/tmp/spooldir/input",
"finished.path": "/tmp/spooldir/finished",
"error.path": "/tmp/spooldir/error",
"key.schema": "{\"name\":\"com.example.users.UserKey\",\"type\":\"STRUCT\",\"isOptional\":false,\"fieldSchemas\":{\"id\":{\"type\":\"INT64\",\"isOptional\":false}}}",
"value.schema": "{\"name\":\"com.example.users.User\",\"type\":\"STRUCT\",\"isOptional\":false,\"fieldSchemas\":{\"id\":{\"type\":\"INT64\",\"isOptional\":false},\"first_name\":{\"type\":\"STRING\",\"isOptional\":true},\"last_name\":{\"type\":\"STRING\",\"isOptional\":true},\"email\":{\"type\":\"STRING\",\"isOptional\":true},\"gender\":{\"type\":\"STRING\",\"isOptional\":true},\"ip_address\":{\"type\":\"STRING\",\"isOptional\":true},\"last_login\":{\"name\":\"org.apache.kafka.connect.data.Timestamp\",\"type\":\"INT64\",\"version\":1,\"isOptional\":true},\"account_balance\":{\"name\":\"org.apache.kafka.connect.data.Decimal\",\"type\":\"BYTES\",\"version\":1,\"parameters\":{\"scale\":\"2\"},\"isOptional\":true},\"country\":{\"type\":\"STRING\",\"isOptional\":true},\"favorite_color\":{\"type\":\"STRING\",\"isOptional\":true}}}"
}
}
================================================
FILE: config/CSVExample.properties
================================================
#
# Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
name=CsvSpoolDir
tasks.max=1
connector.class=com.github.jcustenborder.kafka.connect.spooldir.SpoolDirCsvSourceConnector
input.file.pattern=^.*\.csv$
halt.on.error=false
topic=testing
key.schema={"name":"com.example.users.UserKey","type":"STRUCT","isOptional":false,"fieldSchemas":{"id":{"type":"INT64","isOptional":false}}}
value.schema={"name":"com.example.users.User","type":"STRUCT","isOptional":false,"fieldSchemas":{"id":{"type":"INT64","isOptional":false},"first_name":{"type":"STRING","isOptional":true},"last_name":{"type":"STRING","isOptional":true},"email":{"type":"STRING","isOptional":true},"gender":{"type":"STRING","isOptional":true},"ip_address":{"type":"STRING","isOptional":true},"last_login":{"name":"org.apache.kafka.connect.data.Timestamp","type":"INT64","version":1,"isOptional":true},"account_balance":{"name":"org.apache.kafka.connect.data.Decimal","type":"BYTES","version":1,"parameters":{"scale":"2"},"isOptional":true},"country":{"type":"STRING","isOptional":true},"favorite_color":{"type":"STRING","isOptional":true}}}
csv.first.row.as.header=true
csv.null.field.indicator=EMPTY_SEPARATORS
input.path=/tmp/spooldir/input
finished.path=/tmp/spooldir/finished
error.path=/tmp/spooldir/error
batch.size = 5000
cleanup.policy = DELETE
file.buffer.size.bytes = 1048576
================================================
FILE: config/CSVSchemaGenerator.properties
================================================
#
# Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
name=CsvSpoolDir
tasks.max=1
connector.class=com.github.jcustenborder.kafka.connect.spooldir.SpoolDirCsvSourceConnector
input.file.pattern=^.*\.csv$
halt.on.error=false
topic=testing
csv.first.row.as.header=true
csv.null.field.indicator=EMPTY_SEPARATORS
input.path=/tmp/spooldir/input
finished.path=/tmp/spooldir/finished
error.path=/tmp/spooldir/error
batch.size = 5000
cleanup.policy = DELETE
file.buffer.size.bytes = 1048576
schema.generation.enabled=true
================================================
FILE: config/JsonExample.properties
================================================
#
# Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
name=JsonSpoolDir
tasks.max=1
connector.class=com.github.jcustenborder.kafka.connect.spooldir.SpoolDirJsonSourceConnector
input.file.pattern=^.*\.json$
finished.path=/tmp/spooldir/finished
input.path=/tmp/spooldir/input
error.path=/tmp/spooldir/error
halt.on.error=false
topic=testing
key.schema={"name":"com.example.users.UserKey","type":"STRUCT","isOptional":false,"fieldSchemas":{"id":{"type":"INT64","isOptional":false}}}
value.schema={"name":"com.example.users.User","type":"STRUCT","isOptional":false,"fieldSchemas":{"id":{"type":"INT64","isOptional":false},"first_name":{"type":"STRING","isOptional":true},"last_name":{"type":"STRING","isOptional":true},"email":{"type":"STRING","isOptional":true},"gender":{"type":"STRING","isOptional":true},"ip_address":{"type":"STRING","isOptional":true},"last_login":{"name":"org.apache.kafka.connect.data.Timestamp","type":"INT64","version":1,"isOptional":true},"account_balance":{"name":"org.apache.kafka.connect.data.Decimal","type":"BYTES","version":1,"parameters":{"scale":"2"},"isOptional":true},"country":{"type":"STRING","isOptional":true},"favorite_color":{"type":"STRING","isOptional":true}}}
================================================
FILE: config/connect-avro-docker.properties
================================================
#
# Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
group.id=foo
bootstrap.servers=kafka:9092
key.converter=io.confluent.connect.avro.AvroConverter
key.converter.schema.registry.url=http://schema-registry:8081
value.converter=io.confluent.connect.avro.AvroConverter
value.converter.schema.registry.url=http://schema-registry:8081
internal.key.converter=org.apache.kafka.connect.json.JsonConverter
internal.value.converter=org.apache.kafka.connect.json.JsonConverter
internal.key.converter.schemas.enable=false
internal.value.converter.schemas.enable=false
offset.storage.file.filename=/tmp/connect.offsets
plugin.path=target/kafka-connect-target/usr/share/kafka-connect,/plugins
config.storage.replication.factor=1
config.storage.topic=connect_config
offset.storage.replication.factor=1
offset.storage.topic=connect_offset
status.storage.replication.factor=1
status.storage.topic=connect_status
================================================
FILE: docker-compose.yml
================================================
#
# Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
version: "2"
services:
zookeeper:
image: confluentinc/cp-zookeeper:6.0.0
ports:
- "2181:2181"
environment:
ZOOKEEPER_CLIENT_PORT: 2181
kafka:
image: confluentinc/cp-kafka:6.0.0
depends_on:
- zookeeper
ports:
- "9092:9092"
environment:
KAFKA_ZOOKEEPER_CONNECT: "zookeeper:2181"
KAFKA_ADVERTISED_LISTENERS: "plaintext://kafka:9092"
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
schema-registry:
image: confluentinc/cp-schema-registry:6.0.0
depends_on:
- kafka
- zookeeper
ports:
- "8081:8081"
environment:
SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: "zookeeper:2181"
SCHEMA_REGISTRY_HOST_NAME: schema-registry
connect:
image: confluentinc/cp-kafka-connect:5.5.1-1-ubi8
depends_on:
- kafka
- zookeeper
- schema-registry
ports:
- "5005:5005"
environment:
CONNECT_BOOTSTRAP_SERVERS: kafka:9092
CONNECT_GROUP_ID: spooldir
================================================
FILE: pom.xml
================================================
<?xml version="1.0"?>
<!--
Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.github.jcustenborder.kafka.connect</groupId>
<artifactId>kafka-connect-parent</artifactId>
<version>2.8.0-1</version>
</parent>
<artifactId>kafka-connect-spooldir</artifactId>
<version>2.0-SNAPSHOT</version>
<name>kafka-connect-spooldir</name>
<description>A Kafka Connect connector reading delimited files from the file system.
</description>
<url>https://github.com/jcustenborder/kafka-connect-spooldir</url>
<inceptionYear>2016</inceptionYear>
<licenses>
<license>
<name>Apache License 2.0</name>
<url>https:/github.com/jcustenborder/kafka-connect-spooldir/LICENSE</url>
<distribution>repo</distribution>
</license>
</licenses>
<developers>
<developer>
<id>jcustenborder</id>
<name>Jeremy Custenborder</name>
<url>https://github.com/jcustenborder</url>
<roles>
<role>Committer</role>
</roles>
</developer>
</developers>
<scm>
<connection>scm:git:https://github.com/jcustenborder/kafka-connect-spooldir.git</connection>
<developerConnection>scm:git:git@github.com:jcustenborder/kafka-connect-spooldir.git
</developerConnection>
<url>https://github.com/jcustenborder/kafka-connect-spooldir</url>
</scm>
<issueManagement>
<system>github</system>
<url>https://github.com/jcustenborder/kafka-connect-spooldir/issues</url>
</issueManagement>
<dependencies>
<dependency>
<groupId>com.github.jcustenborder.kafka.connect</groupId>
<artifactId>connect-utils-parser</artifactId>
</dependency>
<dependency>
<groupId>net.sourceforge.argparse4j</groupId>
<artifactId>argparse4j</artifactId>
<version>0.7.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.opencsv</groupId>
<artifactId>opencsv</artifactId>
<version>4.6</version>
</dependency>
<dependency>
<groupId>commons-beanutils</groupId>
<artifactId>commons-beanutils</artifactId>
<version>1.9.4</version>
</dependency>
<dependency>
<groupId>io.confluent</groupId>
<artifactId>kafka-connect-avro-converter</artifactId>
<version>5.2.1</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>[1.19,)</version>
</dependency>
<dependency>
<groupId>com.github.jcustenborder.parsers</groupId>
<artifactId>extended-log-format</artifactId>
<version>[0.0.2.12, 0.0.2.1000)</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>io.confluent</groupId>
<artifactId>kafka-connect-maven-plugin</artifactId>
<version>0.11.2</version>
<executions>
<execution>
<id>hub</id>
<goals>
<goal>kafka-connect</goal>
</goals>
<configuration>
<confluentControlCenterIntegration>true</confluentControlCenterIntegration>
<documentationUrl>https://docs.confluent.io/kafka-connect-spooldir/current/index.html
</documentationUrl>
<componentTypes>
<componentType>source</componentType>
</componentTypes>
<tags>
<tag>File</tag>
<tag>Flume</tag>
<tag>csv</tag>
<tag>json</tag>
</tags>
<title>Kafka Connect Spooldir</title>
<supportProviderName>Confluent, Inc.</supportProviderName>
<supportUrl>https://docs.confluent.io/kafka-connect-spooldir/current/index.html</supportUrl>
<supportSummary><![CDATA[This connector is <a href="https://www.confluent.io/subscription/">supported by Confluent</a> as part of a
<a href="https://www.confluent.io/product/confluent-platform/">Confluent Platform</a> subscription.]]></supportSummary>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
================================================
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractCleanUpPolicy.java
================================================
/**
* Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.jcustenborder.kafka.connect.spooldir;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.text.SimpleDateFormat;
abstract class AbstractCleanUpPolicy implements Closeable {
private static final Logger log = LoggerFactory.getLogger(AbstractCleanUpPolicy.class);
private static final SimpleDateFormat DATE_FORMATTER = new SimpleDateFormat("yyyy-MM-dd");
protected final InputFile inputFile;
protected final File errorPath;
protected final File finishedPath;
protected AbstractCleanUpPolicy(InputFile inputFile, File errorPath, File finishedPath) {
this.inputFile = inputFile;
this.errorPath = errorPath;
this.finishedPath = finishedPath;
}
public static AbstractCleanUpPolicy create(AbstractSourceConnectorConfig config, InputFile inputFile) throws IOException {
final AbstractCleanUpPolicy result;
switch (config.cleanupPolicy) {
case MOVE:
result = new Move(inputFile, config.errorPath, config.finishedPath);
break;
case MOVEBYDATE:
result = new MoveByDate(inputFile, config.errorPath, config.finishedPath);
break;
case DELETE:
result = new Delete(inputFile, config.errorPath, config.finishedPath);
break;
case NONE:
result = new None(inputFile, config.errorPath, config.finishedPath);
break;
default:
throw new UnsupportedOperationException(
String.format("%s is not supported", config.cleanupPolicy)
);
}
return result;
}
protected boolean createDirectory(File directory) {
if (directory.exists()) {
return true;
}
if (!directory.mkdir()) {
log.error("Cannot make directory - " + directory.getAbsolutePath());
return false;
}
if (!directory.setWritable(true)) {
log.error("Cannot make directory writable - " + directory.getAbsolutePath());
return false;
}
return true;
}
@Override
public void close() throws IOException {
this.inputFile.close();
}
/**
* Method is used to handle file cleanup when processing the file has errored.
*/
public void error() throws IOException {
close();
log.error(
"Error during processing, moving {} to {}.",
this.inputFile,
this.errorPath
);
this.inputFile.moveToDirectory(this.errorPath);
}
/**
* Method is used to handle file cleanup when processing the file was successful.
*/
public void success() throws IOException {
close();
}
static class Move extends AbstractCleanUpPolicy {
protected Move(InputFile inputFile, File errorPath, File finishedPath) {
super(inputFile, errorPath, finishedPath);
}
@Override
public void success() throws IOException {
super.success();
this.inputFile.moveToDirectory(this.finishedPath);
}
}
static class MoveByDate extends AbstractCleanUpPolicy {
protected MoveByDate(InputFile inputFile, File errorPath, File finishedPath) {
super(inputFile, errorPath, finishedPath);
}
@Override
public void success() throws IOException {
super.success();
// Setup directory named as the file created date
File subDirectory = new File(this.finishedPath, DATE_FORMATTER.format(this.inputFile.lastModified()));
log.trace("Finished path: {}", subDirectory);
if (createDirectory(subDirectory)) {
this.inputFile.moveToDirectory(subDirectory);
} else {
this.inputFile.moveToDirectory(this.finishedPath);
}
}
}
static class Delete extends AbstractCleanUpPolicy {
protected Delete(InputFile inputFile, File errorPath, File finishedPath) {
super(inputFile, errorPath, finishedPath);
}
@Override
public void success() throws IOException {
super.success();
this.inputFile.delete();
}
}
static class None extends AbstractCleanUpPolicy {
protected None(InputFile inputFile, File errorPath, File finishedPath) {
super(inputFile, errorPath, finishedPath);
}
@Override
public void success() throws IOException {
super.success();
log.trace("Leaving {}", this.inputFile);
}
}
}
================================================
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSchemaGenerator.java
================================================
/**
* Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.jcustenborder.kafka.connect.spooldir;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.github.jcustenborder.kafka.connect.utils.jackson.ObjectMapperFactory;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import net.sourceforge.argparse4j.ArgumentParsers;
import net.sourceforge.argparse4j.inf.ArgumentParser;
import net.sourceforge.argparse4j.inf.ArgumentParserException;
import net.sourceforge.argparse4j.inf.Namespace;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.AbstractMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
public abstract class AbstractSchemaGenerator<CONFIG extends AbstractSpoolDirSourceConnectorConfig> {
static final String DUMMY_SCHEMA;
static final Map<String, Object> DEFAULTS;
private static final Logger log = LoggerFactory.getLogger(AbstractSchemaGenerator.class);
static {
String dummySchema;
try {
dummySchema = ObjectMapperFactory.INSTANCE.writeValueAsString(SchemaBuilder.struct().build());
} catch (JsonProcessingException e) {
dummySchema = null;
}
DUMMY_SCHEMA = dummySchema;
}
static {
Map<String, Object> defaultSettings = new LinkedHashMap<>();
defaultSettings.put(AbstractSourceConnectorConfig.INPUT_FILE_PATTERN_CONF, ".*");
defaultSettings.put(AbstractSourceConnectorConfig.INPUT_PATH_CONFIG, "/tmp/input");
defaultSettings.put(AbstractSourceConnectorConfig.FINISHED_PATH_CONFIG, "/tmp/finish");
defaultSettings.put(AbstractSourceConnectorConfig.ERROR_PATH_CONFIG, "/tmp/error");
defaultSettings.put(AbstractSpoolDirSourceConnectorConfig.VALUE_SCHEMA_CONF, DUMMY_SCHEMA);
defaultSettings.put(AbstractSpoolDirSourceConnectorConfig.KEY_SCHEMA_CONF, DUMMY_SCHEMA);
defaultSettings.put(AbstractSourceConnectorConfig.TOPIC_CONF, "dummy");
defaultSettings.put(AbstractSpoolDirSourceConnectorConfig.SCHEMA_GENERATION_ENABLED_CONF, "true");
DEFAULTS = ImmutableMap.copyOf(defaultSettings);
}
protected CONFIG config;
public AbstractSchemaGenerator(Map<String, ?> settings) {
Map<String, Object> copySettings = new LinkedHashMap<>(settings);
for (Map.Entry<String, Object> kvp : DEFAULTS.entrySet()) {
if (!copySettings.containsKey(kvp.getKey())) {
copySettings.put(kvp.getKey(), kvp.getValue());
}
}
this.config = config(copySettings);
}
public static void main(String... args) throws Exception {
ArgumentParser parser = ArgumentParsers.newArgumentParser("CsvSchemaGenerator")
.defaultHelp(true)
.description("Generate a schema based on a file.");
parser.addArgument("-t", "--type")
.required(true)
.choices("csv", "json")
.help("The type of generator to use.");
parser.addArgument("-c", "--config")
.type(File.class);
parser.addArgument("-f", "--file")
.type(File.class)
.required(true)
.help("The data file to generate the schema from.");
parser.addArgument("-i", "--id")
.nargs("*")
.help("Field(s) to use as an identifier.");
parser.addArgument("-o", "--output")
.type(File.class)
.help("Output location to write the configuration to. Stdout is default.");
Namespace ns = null;
try {
ns = parser.parseArgs(args);
} catch (ArgumentParserException ex) {
parser.handleError(ex);
System.exit(1);
}
File inputFile = ns.get("file");
List<String> ids = ns.getList("id");
if (null == ids) {
ids = ImmutableList.of();
}
Map<String, Object> settings = new LinkedHashMap<>();
File inputPropertiesFile = ns.get("config");
if (null != inputPropertiesFile) {
Properties inputProperties = new Properties();
try (FileInputStream inputStream = new FileInputStream(inputPropertiesFile)) {
inputProperties.load(inputStream);
}
for (String s : inputProperties.stringPropertyNames()) {
Object v = inputProperties.getProperty(s);
settings.put(s, v);
}
}
final AbstractSchemaGenerator generator;
final String type = ns.getString("type");
if ("csv".equalsIgnoreCase(type)) {
generator = new CsvSchemaGenerator(settings);
} else if ("json".equalsIgnoreCase(type)) {
generator = new JsonSchemaGenerator(settings);
} else {
throw new UnsupportedOperationException(
String.format("'%s' is not a supported schema generator type", type)
);
}
Map.Entry<Schema, Schema> kvp = generator.generate(inputFile, ids);
Properties properties = new Properties();
properties.putAll(settings);
properties.setProperty(AbstractSpoolDirSourceConnectorConfig.KEY_SCHEMA_CONF, ObjectMapperFactory.INSTANCE.writeValueAsString(kvp.getKey()));
properties.setProperty(AbstractSpoolDirSourceConnectorConfig.VALUE_SCHEMA_CONF, ObjectMapperFactory.INSTANCE.writeValueAsString(kvp.getValue()));
String output = ns.getString("output");
final String comment = "Configuration was dynamically generated. Please verify before submitting.";
if (Strings.isNullOrEmpty(output)) {
properties.store(System.out, comment);
} else {
try (FileOutputStream outputStream = new FileOutputStream(output)) {
properties.store(outputStream, comment);
}
}
}
protected abstract CONFIG config(Map<String, ?> settings);
protected abstract Map<String, Schema.Type> determineFieldTypes(InputStream inputStream) throws IOException;
void addField(SchemaBuilder builder, String name, Schema.Type schemaType) {
log.trace("addField() - name = {} schemaType = {}", name, schemaType);
builder.field(
name,
SchemaBuilder.type(schemaType).optional().build()
);
}
public Map.Entry<Schema, Schema> generate(File inputFile, List<String> keyFields) throws IOException {
log.trace("generate() - inputFile = '{}', keyFields = {}", inputFile, keyFields);
final Map<String, Schema.Type> fieldTypes;
log.info("Determining fields from {}", inputFile);
try (InputStream inputStream = new FileInputStream(inputFile)) {
fieldTypes = determineFieldTypes(inputStream);
}
log.trace("generate() - Building key schema.");
SchemaBuilder keySchemaBuilder = SchemaBuilder.struct()
.name(this.config.schemaGenerationKeyName);
for (String keyFieldName : keyFields) {
log.trace("generate() - Adding keyFieldName field '{}'", keyFieldName);
if (fieldTypes.containsKey(keyFieldName)) {
Schema.Type schemaType = fieldTypes.get(keyFieldName);
addField(keySchemaBuilder, keyFieldName, schemaType);
} else {
log.warn("Key field '{}' is not in the data.", keyFieldName);
}
}
log.trace("generate() - Building value schema.");
SchemaBuilder valueSchemaBuilder = SchemaBuilder.struct()
.name(this.config.schemaGenerationValueName);
for (Map.Entry<String, Schema.Type> kvp : fieldTypes.entrySet()) {
addField(valueSchemaBuilder, kvp.getKey(), kvp.getValue());
}
return new AbstractMap.SimpleEntry<>(keySchemaBuilder.build(), valueSchemaBuilder.build());
}
}
================================================
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSourceConnector.java
================================================
/**
* Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.jcustenborder.kafka.connect.spooldir;
import com.github.jcustenborder.kafka.connect.utils.VersionUtil;
import org.apache.kafka.connect.source.SourceConnector;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
public abstract class AbstractSourceConnector<CONF extends AbstractSourceConnectorConfig> extends SourceConnector {
protected Map<String, String> settings;
protected CONF config;
protected abstract CONF config(Map<String, ?> settings);
@Override
public void start(Map<String, String> settings) {
this.config = config(settings);
this.settings = settings;
}
@Override
public List<Map<String, String>> taskConfigs(int taskCount) {
List<Map<String, String>> result = new ArrayList<>();
for (int i = 0; i < taskCount; i++) {
Map<String, String> taskConfig = new LinkedHashMap<>(this.settings);
taskConfig.put(AbstractSourceConnectorConfig.TASK_INDEX_CONF, Integer.toString(i));
taskConfig.put(AbstractSourceConnectorConfig.TASK_COUNT_CONF, Integer.toString(taskCount));
result.add(taskConfig);
}
return result;
}
@Override
public void stop() {
}
@Override
public String version() {
return VersionUtil.version(this.getClass());
}
}
================================================
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSourceConnectorConfig.java
================================================
/**
* Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.jcustenborder.kafka.connect.spooldir;
import com.github.jcustenborder.kafka.connect.utils.config.ConfigKeyBuilder;
import com.github.jcustenborder.kafka.connect.utils.config.ConfigUtils;
import com.github.jcustenborder.kafka.connect.utils.config.ValidEnum;
import com.github.jcustenborder.kafka.connect.utils.config.ValidPattern;
import com.github.jcustenborder.kafka.connect.utils.config.recommenders.Recommenders;
import com.github.jcustenborder.kafka.connect.utils.config.validators.Validators;
import com.github.jcustenborder.kafka.connect.utils.config.validators.filesystem.ValidDirectoryWritable;
import com.google.common.collect.ImmutableList;
import com.google.common.io.PatternFilenameFilter;
import org.apache.kafka.common.config.AbstractConfig;
import org.apache.kafka.common.config.ConfigDef;
import java.io.File;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
public abstract class AbstractSourceConnectorConfig extends AbstractConfig {
public static final String FINISHED_PATH_CONFIG = "finished.path";
public static final String ERROR_PATH_CONFIG = "error.path";
public static final String INPUT_FILE_PATTERN_CONF = "input.file.pattern";
public static final String HALT_ON_ERROR_CONF = "halt.on.error";
public static final String FILE_MINIMUM_AGE_MS_CONF = "file.minimum.age.ms";
public static final String FILE_SORT_ATTRIBUTES_CONF = "files.sort.attributes";
public static final String INPUT_PATH_WALK_RECURSIVELY = "input.path.walk.recursively";
public static final boolean INPUT_PATH_WALK_RECURSIVELY_DEFAULT = false;
static final String INPUT_PATH_WALK_RECURSIVELY_DOC = "If enabled, any sub-directories dropped under `input.path` will be recursively walked looking for files matching the configured `input.file.pattern`. After processing is complete the discovered sub directory structure (as well as files within them) will handled according to the configured `cleanup.policy` (i.e. moved or deleted etc). For each discovered file, the walked sub-directory path will be set as a header named `file.relative.path`";
public static final String CLEANUP_POLICY_MAINTAIN_RELATIVE_PATH = "cleanup.policy.maintain.relative.path";
static final boolean CLEANUP_POLICY_MAINTAIN_RELATIVE_PATH_DEFAULT = false;
static final String CLEANUP_POLICY_MAINTAIN_RELATIVE_PATH_DOC = "If `" + INPUT_PATH_WALK_RECURSIVELY + "` is enabled in combination with this flag being `true`, the walked sub-directories which contained files will be retained as-is under the `input.path`. The actual files within the sub-directories will moved (with a copy of the sub-dir structure) or deleted as per the `cleanup.policy` defined, but the parent sub-directory structure will remain.";
public static final String PROCESSING_FILE_EXTENSION_CONF = "processing.file.extension";
//RecordProcessorConfig
public static final String BATCH_SIZE_CONF = "batch.size";
public static final String PROCESSING_FILE_EXTENSION_DEFAULT = ".PROCESSING";
public static final String TOPIC_CONF = "topic";
public static final String EMPTY_POLL_WAIT_MS_CONF = "empty.poll.wait.ms";
public static final String CLEANUP_POLICY_CONF = "cleanup.policy";
public static final String CLEANUP_POLICY_DOC = "Determines how the connector should cleanup the " +
"files that have been successfully processed. NONE leaves the files in place which could " +
"cause them to be reprocessed if the connector is restarted. DELETE removes the file from the " +
"filesystem. MOVE will move the file to a finished directory. MOVEBYDATE will move the file to " +
"a finished directory with subdirectories by date";
public static final String GROUP_FILESYSTEM = "File System";
public static final String GROUP_GENERAL = "General";
//DirectoryMonitorConfig
//PollingDirectoryMonitorConfig
public static final String INPUT_PATH_CONFIG = "input.path";
public static final String TIMESTAMP_MODE_CONF = "timestamp.mode";
public static final String GROUP_TIMESTAMP = "Timestamps";
static final String BATCH_SIZE_DOC = "The number of records that should be returned with each batch.";
static final int BATCH_SIZE_DEFAULT = 1000;
static final String TOPIC_DOC = "The Kafka topic to write the data to.";
static final String INPUT_PATH_DOC = "The directory to read files that will be processed. This directory must exist and be writable by the user running Kafka Connect.";
static final String FINISHED_PATH_DOC = "The directory to place files that have been successfully processed. This directory must exist and be writable by the user running Kafka Connect.";
static final String ERROR_PATH_DOC = "The directory to place files in which have error(s). This directory must exist and be writable by the user running Kafka Connect.";
static final String INPUT_FILE_PATTERN_DOC = "Regular expression to check input file names against. This expression " +
"must match the entire filename. The equivalent of Matcher.matches().";
static final String HALT_ON_ERROR_DOC = "Should the task halt when it encounters an error or continue to the next file.";
static final String FILE_MINIMUM_AGE_MS_DOC = "The amount of time in milliseconds after the file was last written to before the file can be processed.";
static final String PROCESSING_FILE_EXTENSION_DOC = "Before a file is processed, a flag is created in its directory to indicate the file is being handled. " +
"The flag file has the same name as the file, but with this property appended as a suffix.";
static final String EMPTY_POLL_WAIT_MS_DOC = "The amount of time to wait if a poll returns an empty list of records.";
static final String TIMESTAMP_FIELD_DOC = "The field in the value schema that will contain the parsed timestamp for the record. " +
"This field cannot be marked as optional and must be a " +
"[Timestamp](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.html)";
static final String TIMESTAMP_MODE_DOC = "Determines how the connector will set the timestamp for the " +
"[ConnectRecord](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/connector/ConnectRecord.html#timestamp()). " +
"If set to `Field` then the timestamp will be read from a field in the value. This field cannot be optional and must be " +
"a [Timestamp](https://kafka.apache.org/0102/javadoc/org/apache/kafka/connect/data/Schema.html). Specify the field " +
" in `" + AbstractSpoolDirSourceConnectorConfig.TIMESTAMP_FIELD_CONF + "`. " +
"If set to `FILE_TIME` then " +
"the last modified time of the file will be used. If set to `PROCESS_TIME` the time the record is read will be used.";
static final String FILE_SORT_ATTRIBUTES_DOC = "The attributes each file will use to determine the sort order. " +
"`Name` is name of the file. `Length` is the length of the file preferring larger files first. `LastModified` is " +
"the LastModified attribute of the file preferring older files first.";
public static final String TASK_INDEX_CONF = "task.index";
static final String TASK_INDEX_DOC = "Internal setting to the connector used to instruct a " +
"task on which files to select. The connector will override this setting.";
public static final String TASK_COUNT_CONF = "task.count";
static final String TASK_COUNT_DOC = "Internal setting to the connector used to instruct a " +
"task on which files to select. The connector will override this setting.";
public static final String TASK_PARTITIONER_CONF = "task.partitioner";
static final String TASK_PARTITIONER_DOC = "The task partitioner implementation is used when the " +
"connector is configured to use more than one task. This is used by each task to identify " +
"which files will be processed by that task. This ensures that each file is only assigned to " +
"one task.";
public static final String FILE_BUFFER_SIZE_CONF = "file.buffer.size.bytes";
static final String FILE_BUFFER_SIZE_DOC = "The size of buffer for the BufferedInputStream that will be used to " +
"interact with the file system.";
public final File inputPath;
public final File finishedPath;
public final File errorPath;
public final boolean haltOnError;
public final long minimumFileAgeMS;
public final int batchSize;
public final String topic;
public final long emptyPollWaitMs;
public final String processingFileExtension;
public final TimestampMode timestampMode;
public final CleanupPolicy cleanupPolicy;
public final PatternFilenameFilter inputFilenameFilter;
public final List<FileAttribute> fileSortAttributes;
public final int taskIndex;
public final int taskCount;
public final TaskPartitioner taskPartitioner;
public final boolean bufferedInputStream;
public final int fileBufferSizeBytes;
public final boolean inputPathWalkRecursively;
public final boolean inputPathWalkRecursivelyRetainSubDirs;
public final boolean finishedPathRequired() {
boolean result;
switch (this.cleanupPolicy) {
case MOVE:
case MOVEBYDATE:
result = true;
break;
default:
result = false;
}
return result;
}
public AbstractSourceConnectorConfig(ConfigDef definition, Map<?, ?> originals, boolean bufferedInputStream) {
super(definition, originals);
this.bufferedInputStream = bufferedInputStream;
this.inputPath = ConfigUtils.getAbsoluteFile(this, INPUT_PATH_CONFIG);
this.cleanupPolicy = ConfigUtils.getEnum(CleanupPolicy.class, this, CLEANUP_POLICY_CONF);
if (finishedPathRequired()) {
this.finishedPath = ConfigUtils.getAbsoluteFile(this, FINISHED_PATH_CONFIG);
} else {
this.finishedPath = null;
}
this.errorPath = ConfigUtils.getAbsoluteFile(this, ERROR_PATH_CONFIG);
this.haltOnError = this.getBoolean(HALT_ON_ERROR_CONF);
this.minimumFileAgeMS = this.getLong(FILE_MINIMUM_AGE_MS_CONF);
this.batchSize = this.getInt(BATCH_SIZE_CONF);
this.topic = this.getString(TOPIC_CONF);
this.emptyPollWaitMs = this.getLong(EMPTY_POLL_WAIT_MS_CONF);
this.processingFileExtension = this.getString(PROCESSING_FILE_EXTENSION_CONF);
this.timestampMode = ConfigUtils.getEnum(TimestampMode.class, this, TIMESTAMP_MODE_CONF);
final String inputPatternText = this.getString(INPUT_FILE_PATTERN_CONF);
final Pattern inputPattern = Pattern.compile(inputPatternText);
this.inputFilenameFilter = new PatternFilenameFilter(inputPattern);
this.fileSortAttributes = ConfigUtils.getEnums(FileAttribute.class, this, FILE_SORT_ATTRIBUTES_CONF);
this.taskIndex = getInt(TASK_INDEX_CONF);
this.taskCount = getInt(TASK_COUNT_CONF);
this.taskPartitioner = ConfigUtils.getEnum(TaskPartitioner.class, this, TASK_PARTITIONER_CONF);
this.inputPathWalkRecursively = this.getBoolean(INPUT_PATH_WALK_RECURSIVELY);
this.inputPathWalkRecursivelyRetainSubDirs = this.getBoolean(CLEANUP_POLICY_MAINTAIN_RELATIVE_PATH);
if (bufferedInputStream) {
this.fileBufferSizeBytes = getInt(FILE_BUFFER_SIZE_CONF);
} else {
this.fileBufferSizeBytes = 0;
}
}
protected static ConfigDef config(boolean bufferedInputStream) {
final ConfigDef result = new ConfigDef()
.define(
ConfigKeyBuilder.of(TOPIC_CONF, ConfigDef.Type.STRING)
.documentation(TOPIC_DOC)
.group(GROUP_GENERAL)
.importance(ConfigDef.Importance.HIGH)
.build()
).define(
ConfigKeyBuilder.of(BATCH_SIZE_CONF, ConfigDef.Type.INT)
.documentation(BATCH_SIZE_DOC)
.importance(ConfigDef.Importance.LOW)
.defaultValue(BATCH_SIZE_DEFAULT)
.group(GROUP_GENERAL)
.build()
).define(
ConfigKeyBuilder.of(EMPTY_POLL_WAIT_MS_CONF, ConfigDef.Type.LONG)
.documentation(EMPTY_POLL_WAIT_MS_DOC)
.importance(ConfigDef.Importance.LOW)
.defaultValue(500L)
.validator(ConfigDef.Range.between(1L, Long.MAX_VALUE))
.group(GROUP_GENERAL)
.build()
)
.define(
ConfigKeyBuilder.of(CLEANUP_POLICY_CONF, ConfigDef.Type.STRING)
.documentation(CLEANUP_POLICY_DOC)
.importance(ConfigDef.Importance.MEDIUM)
.validator(ValidEnum.of(CleanupPolicy.class))
.defaultValue(CleanupPolicy.MOVE.toString())
.group(GROUP_FILESYSTEM)
.build()
)
.define(
ConfigKeyBuilder.of(INPUT_PATH_CONFIG, ConfigDef.Type.STRING)
.documentation(INPUT_PATH_DOC)
.importance(ConfigDef.Importance.HIGH)
.validator(ValidDirectoryWritable.of())
.group(GROUP_FILESYSTEM)
.build()
).define(
ConfigKeyBuilder.of(FINISHED_PATH_CONFIG, ConfigDef.Type.STRING)
.documentation(FINISHED_PATH_DOC)
.importance(ConfigDef.Importance.HIGH)
.defaultValue("")
.recommender(
Recommenders.visibleIf(CLEANUP_POLICY_CONF, CleanupPolicy.MOVE.toString())
)
.group(GROUP_FILESYSTEM)
.build()
).define(
ConfigKeyBuilder.of(ERROR_PATH_CONFIG, ConfigDef.Type.STRING)
.documentation(ERROR_PATH_DOC)
.importance(ConfigDef.Importance.HIGH)
.validator(ValidDirectoryWritable.of())
.group(GROUP_FILESYSTEM)
.build()
).define(
ConfigKeyBuilder.of(INPUT_FILE_PATTERN_CONF, ConfigDef.Type.STRING)
.documentation(INPUT_FILE_PATTERN_DOC)
.importance(ConfigDef.Importance.HIGH)
.group(GROUP_FILESYSTEM)
.build()
).define(
ConfigKeyBuilder.of(HALT_ON_ERROR_CONF, ConfigDef.Type.BOOLEAN)
.documentation(HALT_ON_ERROR_DOC)
.importance(ConfigDef.Importance.HIGH)
.defaultValue(true)
.group(GROUP_FILESYSTEM)
.build()
).define(
ConfigKeyBuilder.of(FILE_MINIMUM_AGE_MS_CONF, ConfigDef.Type.LONG)
.documentation(FILE_MINIMUM_AGE_MS_DOC)
.importance(ConfigDef.Importance.LOW)
.group(GROUP_FILESYSTEM)
.defaultValue(0L)
.validator(ConfigDef.Range.atLeast(0L))
.build()
).define(
ConfigKeyBuilder.of(PROCESSING_FILE_EXTENSION_CONF, ConfigDef.Type.STRING)
.documentation(PROCESSING_FILE_EXTENSION_DOC)
.importance(ConfigDef.Importance.LOW)
.validator(ValidDirectoryWritable.of())
.group(GROUP_FILESYSTEM)
.defaultValue(PROCESSING_FILE_EXTENSION_DEFAULT)
.validator(ValidPattern.of("^.*\\..+$"))
.build()
).define(
ConfigKeyBuilder.of(TIMESTAMP_MODE_CONF, ConfigDef.Type.STRING)
.documentation(TIMESTAMP_MODE_DOC)
.importance(ConfigDef.Importance.MEDIUM)
.group(GROUP_TIMESTAMP)
.defaultValue(TimestampMode.PROCESS_TIME.toString())
.validator(ValidEnum.of(TimestampMode.class))
.build()
).define(
ConfigKeyBuilder.of(FILE_SORT_ATTRIBUTES_CONF, ConfigDef.Type.LIST)
.documentation(FILE_SORT_ATTRIBUTES_DOC)
.importance(ConfigDef.Importance.LOW)
.validator(Validators.validEnum(FileAttribute.class))
.group(GROUP_FILESYSTEM)
.defaultValue(ImmutableList.of(FileAttribute.NameAsc.name()))
.build()
).define(
ConfigKeyBuilder.of(TASK_INDEX_CONF, ConfigDef.Type.INT)
.documentation(TASK_INDEX_DOC)
.importance(ConfigDef.Importance.LOW)
.validator(ConfigDef.Range.atLeast(0))
.group(GROUP_GENERAL)
.defaultValue(0)
.build()
).define(
ConfigKeyBuilder.of(TASK_COUNT_CONF, ConfigDef.Type.INT)
.documentation(TASK_COUNT_DOC)
.importance(ConfigDef.Importance.LOW)
.validator(ConfigDef.Range.atLeast(1))
.group(GROUP_GENERAL)
.defaultValue(1)
.build()
).define(
ConfigKeyBuilder.of(TASK_PARTITIONER_CONF, ConfigDef.Type.STRING)
.documentation(TASK_PARTITIONER_DOC)
.importance(ConfigDef.Importance.MEDIUM)
.validator(Validators.validEnum(TaskPartitioner.class))
.defaultValue(TaskPartitioner.ByName.toString())
.group(GROUP_FILESYSTEM)
.build()
).define(
ConfigKeyBuilder.of(INPUT_PATH_WALK_RECURSIVELY, ConfigDef.Type.BOOLEAN)
.documentation(INPUT_PATH_WALK_RECURSIVELY_DOC)
.importance(ConfigDef.Importance.LOW)
.defaultValue(INPUT_PATH_WALK_RECURSIVELY_DEFAULT)
.group(GROUP_FILESYSTEM)
.build()
).define(
ConfigKeyBuilder.of(CLEANUP_POLICY_MAINTAIN_RELATIVE_PATH, ConfigDef.Type.BOOLEAN)
.documentation(CLEANUP_POLICY_MAINTAIN_RELATIVE_PATH_DOC)
.importance(ConfigDef.Importance.LOW)
.defaultValue(CLEANUP_POLICY_MAINTAIN_RELATIVE_PATH_DEFAULT)
.group(GROUP_FILESYSTEM)
.build()
);
if (bufferedInputStream) {
result.define(
ConfigKeyBuilder.of(FILE_BUFFER_SIZE_CONF, ConfigDef.Type.INT)
.documentation(FILE_BUFFER_SIZE_DOC)
.importance(ConfigDef.Importance.LOW)
.validator(ConfigDef.Range.atLeast(1))
.defaultValue(128 * 1024)
.group(GROUP_FILESYSTEM)
.build()
);
}
return result;
}
public enum TimestampMode {
FIELD,
FILE_TIME,
PROCESS_TIME
}
public enum CleanupPolicy {
NONE,
DELETE,
MOVE,
MOVEBYDATE
}
public enum FileAttribute {
NameAsc,
NameDesc,
LengthAsc,
LengthDesc,
LastModifiedAsc,
LastModifiedDesc
}
public enum TaskPartitioner {
ByName
}
}
================================================
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSourceTask.java
================================================
/**
* Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.jcustenborder.kafka.connect.spooldir;
import com.github.jcustenborder.kafka.connect.utils.VersionUtil;
import org.apache.kafka.connect.data.SchemaAndValue;
import org.apache.kafka.connect.errors.ConnectException;
import org.apache.kafka.connect.source.SourceRecord;
import org.apache.kafka.connect.source.SourceTask;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Preconditions;
import com.google.common.base.Stopwatch;
import com.google.common.collect.ImmutableMap;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
public abstract class AbstractSourceTask<CONF extends AbstractSourceConnectorConfig> extends SourceTask {
private static final Logger log = LoggerFactory.getLogger(AbstractSourceTask.class);
protected Map<String, ?> sourcePartition;
protected CONF config;
private final Stopwatch processingTime = Stopwatch.createUnstarted();
protected InputFile inputFile;
private boolean hasRecords = false;
private static void checkDirectory(String key, File directoryPath) {
if (log.isInfoEnabled()) {
log.info("Checking if directory {} '{}' exists.",
key,
directoryPath
);
}
String errorMessage = String.format(
"Directory for '%s' '%s' does not exist ",
key,
directoryPath
);
if (!directoryPath.isDirectory()) {
throw new ConnectException(
errorMessage,
new FileNotFoundException(directoryPath.getAbsolutePath())
);
}
if (log.isInfoEnabled()) {
log.info("Checking to ensure {} '{}' is writable ", key, directoryPath);
}
errorMessage = String.format(
"Directory for '%s' '%s' it not writable.",
key,
directoryPath
);
File temporaryFile = null;
try {
temporaryFile = File.createTempFile(".permission", ".testing", directoryPath);
} catch (IOException ex) {
throw new ConnectException(
errorMessage,
ex
);
} finally {
try {
if (null != temporaryFile && temporaryFile.exists()) {
Preconditions.checkState(temporaryFile.delete(), "Unable to delete temp file in %s", directoryPath);
}
} catch (Exception ex) {
if (log.isWarnEnabled()) {
log.warn("Exception thrown while deleting {}.", temporaryFile, ex);
}
}
}
}
protected abstract CONF config(Map<String, ?> settings);
protected abstract void configure(InputFile inputFile, Long lastOffset) throws IOException;
protected abstract List<SourceRecord> process() throws IOException;
protected abstract long recordOffset();
@Override
public void start(Map<String, String> settings) {
this.config = config(settings);
checkDirectory(AbstractSourceConnectorConfig.INPUT_PATH_CONFIG, this.config.inputPath);
checkDirectory(AbstractSourceConnectorConfig.ERROR_PATH_CONFIG, this.config.errorPath);
if (this.config.finishedPathRequired()) {
checkDirectory(AbstractSourceConnectorConfig.FINISHED_PATH_CONFIG, this.config.finishedPath);
}
this.inputFileDequeue = new InputFileDequeue(this.config);
}
@Override
public void stop() {
log.info("Stopping task.");
try {
if (null != this.inputFile) {
this.inputFile.close();
}
if (null != this.cleanUpPolicy) {
this.cleanUpPolicy.close();
}
} catch (IOException ex) {
log.error("Exception thrown while closing {}", this.inputFile);
}
}
@Override
public String version() {
return VersionUtil.version(this.getClass());
}
InputFileDequeue inputFileDequeue;
int emptyCount = 0;
long recordCount;
@Override
public List<SourceRecord> poll() throws InterruptedException {
log.trace("poll()");
List<SourceRecord> results = read();
if (results.isEmpty()) {
emptyCount++;
if (emptyCount > 1) {
log.trace("read() returned empty list. Sleeping {} ms.", this.config.emptyPollWaitMs);
Thread.sleep(this.config.emptyPollWaitMs);
}
return null;
}
emptyCount = 0;
log.trace("read() returning {} result(s)", results.size());
return results;
}
//
/**
* Calculates the byte count in a human readable form. Special thanks to
* https://stackoverflow.com/questions/3758606/how-to-convert-byte-size-into-human-readable-format-in-java
*
* @param bytes
* @param si
* @return
*/
public static String humanReadableByteCount(long bytes, boolean si) {
final int unit = si ? 1000 : 1024;
if (bytes < unit) return bytes + " B";
int exp = (int) (Math.log(bytes) / Math.log(unit));
String pre = (si ? "kMGTPE" : "KMGTPE").charAt(exp - 1) + (si ? "" : "i");
return String.format("%.1f %sB", bytes / Math.pow(unit, exp), pre);
}
private void recordProcessingTime() {
final long secondsElapsed = processingTime.elapsed(TimeUnit.SECONDS);
final long bytesPerSecond;
if (0L == secondsElapsed || 0L == this.inputFile.length()) {
bytesPerSecond = 0L;
} else {
bytesPerSecond = this.inputFile.length() / secondsElapsed;
}
if (bytesPerSecond > 0) {
log.info(
"Finished processing {} record(s) in {} second(s). Processing speed {} per second.",
this.recordCount,
secondsElapsed,
humanReadableByteCount(bytesPerSecond, false)
);
} else {
log.info(
"Finished processing {} record(s) in {} second(s).",
this.recordCount,
secondsElapsed
);
}
}
AbstractCleanUpPolicy cleanUpPolicy;
public List<SourceRecord> read() {
try {
if (!hasRecords) {
if (null != this.inputFile) {
recordProcessingTime();
this.inputFile.close();
this.cleanUpPolicy.success();
this.inputFile = null;
}
log.trace("read() - polling for next file.");
InputFile nextFile = this.inputFileDequeue.poll();
log.trace("read() - nextFile = '{}'", nextFile);
if (null == nextFile) {
log.trace("read() - No next file found.");
return new ArrayList<>();
}
this.inputFile = nextFile;
try {
this.sourcePartition = ImmutableMap.of(
"fileName", this.inputFile.getName()
);
log.info("Opening {}", this.inputFile);
Long lastOffset = null;
log.trace("looking up offset for {}", this.sourcePartition);
Map<String, Object> offset = this.context.offsetStorageReader().offset(this.sourcePartition);
if (null != offset && !offset.isEmpty()) {
Number number = (Number) offset.get("offset");
lastOffset = number.longValue();
}
this.cleanUpPolicy = AbstractCleanUpPolicy.create(this.config, this.inputFile);
this.recordCount = 0;
log.trace("read() - calling configure(lastOffset={})", lastOffset);
configure(this.inputFile, lastOffset);
} catch (Exception ex) {
throw new ConnectException(ex);
}
processingTime.reset();
processingTime.start();
}
List<SourceRecord> records = process();
this.hasRecords = !records.isEmpty();
return records;
} catch (Exception ex) {
long recordOffset;
try {
recordOffset = recordOffset();
} catch (Exception e) {
log.error("Exception thrown while calling recordOffset()", e);
recordOffset = -1;
}
log.error("Exception encountered processing line {} of {}.", recordOffset, this.inputFile, ex);
try {
this.cleanUpPolicy.error();
} catch (IOException e) {
log.warn("Exception while while closing cleanup policy", ex);
}
if (this.config.haltOnError) {
throw new ConnectException(ex);
} else {
return new ArrayList<>();
}
}
}
protected Map<String, ?> offset() {
return ImmutableMap.of(
"offset",
recordOffset()
);
}
protected SourceRecord record(
SchemaAndValue key,
SchemaAndValue value,
Long timestamp) {
Map<String, ?> sourceOffset = offset();
SourceRecord result = new SourceRecord(
this.sourcePartition,
sourceOffset,
this.config.topic,
null,
null != key ? key.schema() : null,
null != key ? key.value() : null,
value.schema(),
value.value(),
timestamp,
this.inputFile.metadata().headers(recordOffset())
);
return result;
}
}
================================================
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSpoolDirSourceConnector.java
================================================
/**
* Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.jcustenborder.kafka.connect.spooldir;
import com.github.jcustenborder.kafka.connect.utils.VersionUtil;
import com.github.jcustenborder.kafka.connect.utils.jackson.ObjectMapperFactory;
import com.google.common.base.Preconditions;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.errors.ConnectException;
import org.apache.kafka.connect.errors.DataException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public abstract class AbstractSpoolDirSourceConnector<CONF extends AbstractSpoolDirSourceConnectorConfig> extends AbstractSourceConnector<CONF> {
private static final Logger log = LoggerFactory.getLogger(AbstractSpoolDirSourceConnector.class);
protected abstract AbstractSchemaGenerator<CONF> generator(Map<String, String> settings);
@Override
public String version() {
return VersionUtil.version(this.getClass());
}
@Override
public void start(final Map<String, String> input) {
super.start(input);
this.config = config(input);
final Map<String, String> settings = new LinkedHashMap<>(input);
if (this.config.schemasRequired() && (null == this.config.valueSchema || null == this.config.keySchema)) {
log.info("Key or Value schema was not defined. Running schema generator.");
AbstractSchemaGenerator<CONF> generator = generator(settings);
try {
List<File> inputFiles = Arrays.stream(this.config.inputPath.listFiles(this.config.inputFilenameFilter))
.limit(5)
.collect(Collectors.toList());
Preconditions.checkState(
!inputFiles.isEmpty(),
"Could not find any input file(s) to infer schema from."
);
Map<String, Map.Entry<Schema, Schema>> schemas = new HashMap<>();
Multimap<String, File> schemaToFiles = HashMultimap.create();
for (File inputFile : inputFiles) {
Map.Entry<Schema, Schema> schemaEntry = generator.generate(inputFile, this.config.keyFields);
String schema = ObjectMapperFactory.INSTANCE.writeValueAsString(schemaEntry.getValue());
schemaToFiles.put(schema, inputFile);
schemas.put(schema, schemaEntry);
}
Map<String, Collection<File>> schemaToFilesMap = schemaToFiles.asMap();
if (1 != schemaToFilesMap.keySet().size()) {
StringBuilder builder = new StringBuilder();
builder.append("More than one schema was found for the input pattern.\n");
for (String schema : schemaToFilesMap.keySet()) {
builder.append("Schema: ");
builder.append(schema);
builder.append("\n");
for (File f : schemaToFilesMap.get(schema)) {
builder.append(" ");
builder.append(f);
builder.append("\n");
}
}
throw new DataException(builder.toString());
}
Map.Entry<Schema, Schema> schemaPair = null;
for (Map.Entry<Schema, Schema> s : schemas.values()) {
schemaPair = s;
break;
}
if (null == schemaPair) {
throw new DataException("Schema could not be generated.");
}
final String keySchema = ObjectMapperFactory.INSTANCE.writeValueAsString(schemaPair.getKey());
log.info("Setting {} to {}", AbstractSpoolDirSourceConnectorConfig.KEY_SCHEMA_CONF, keySchema);
final String valueSchema = ObjectMapperFactory.INSTANCE.writeValueAsString(schemaPair.getValue());
log.info("Setting {} to {}", AbstractSpoolDirSourceConnectorConfig.VALUE_SCHEMA_CONF, valueSchema);
settings.put(AbstractSpoolDirSourceConnectorConfig.KEY_SCHEMA_CONF, keySchema);
settings.put(AbstractSpoolDirSourceConnectorConfig.VALUE_SCHEMA_CONF, valueSchema);
} catch (IOException e) {
throw new ConnectException("Exception thrown while generating schema", e);
}
this.settings = settings;
}
this.settings = settings;
}
}
================================================
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSpoolDirSourceConnectorConfig.java
================================================
/**
* Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.jcustenborder.kafka.connect.spooldir;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.github.jcustenborder.kafka.connect.utils.config.ConfigKeyBuilder;
import com.github.jcustenborder.kafka.connect.utils.config.recommenders.Recommenders;
import com.github.jcustenborder.kafka.connect.utils.jackson.ObjectMapperFactory;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import org.apache.kafka.common.config.ConfigDef;
import org.apache.kafka.common.config.ConfigDef.Type;
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.Timestamp;
import org.apache.kafka.connect.errors.ConnectException;
import org.apache.kafka.connect.errors.DataException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.TimeZone;
@SuppressWarnings("WeakerAccess")
public abstract class AbstractSpoolDirSourceConnectorConfig extends AbstractSourceConnectorConfig {
public static final String TIMESTAMP_FIELD_CONF = "timestamp.field";
public static final String KEY_SCHEMA_CONF = "key.schema";
public static final String VALUE_SCHEMA_CONF = "value.schema";
public static final String PARSER_TIMESTAMP_DATE_FORMATS_CONF = "parser.timestamp.date.formats";
public static final String PARSER_TIMESTAMP_TIMEZONE_CONF = "parser.timestamp.timezone";
public static final String SCHEMA_GENERATION_KEY_FIELDS_CONF = "schema.generation.key.fields";
public static final String SCHEMA_GENERATION_KEY_NAME_CONF = "schema.generation.key.name";
public static final String SCHEMA_GENERATION_VALUE_NAME_CONF = "schema.generation.value.name";
public static final String SCHEMA_GENERATION_ENABLED_CONF = "schema.generation.enabled";
public static final String GROUP_SCHEMA_GENERATION = "Schema Generation";
public static final String GROUP_SCHEMA = "Schema";
static final String KEY_SCHEMA_DOC = "The schema for the key written to Kafka.";
static final String VALUE_SCHEMA_DOC = "The schema for the value written to Kafka.";
static final String PARSER_TIMESTAMP_DATE_FORMATS_DOC = "The date formats that are expected in the file. This is a list " +
"of strings that will be used to parse the date fields in order. The most accurate date format should be the first " +
"in the list. Take a look at the Java documentation for more info. " +
"https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html";
static final List<String> PARSER_TIMESTAMP_DATE_FORMATS_DEFAULT = Arrays.asList("yyyy-MM-dd'T'HH:mm:ss", "yyyy-MM-dd' 'HH:mm:ss");
static final String PARSER_TIMESTAMP_TIMEZONE_DOC = "The timezone that all of the dates will be parsed with.";
static final String PARSER_TIMESTAMP_TIMEZONE_DEFAULT = "UTC";
static final String SCHEMA_GENERATION_KEY_FIELDS_DOC = "The field(s) to use to build a key schema. This is only used during schema generation.";
static final String SCHEMA_GENERATION_KEY_NAME_DOC = "The name of the generated key schema.";
static final String SCHEMA_GENERATION_VALUE_NAME_DOC = "The name of the generated value schema.";
static final String SCHEMA_GENERATION_ENABLED_DOC = "Flag to determine if schemas should be dynamically generated. If set " +
" to true, `" + KEY_SCHEMA_CONF + "` and `" + VALUE_SCHEMA_CONF + "` can be omitted, but `" + SCHEMA_GENERATION_KEY_NAME_CONF + "` " +
"and `" + SCHEMA_GENERATION_VALUE_NAME_CONF + "` must be set.";
private static final Logger log = LoggerFactory.getLogger(AbstractSpoolDirSourceConnectorConfig.class);
public final Schema keySchema;
public final Schema valueSchema;
public final SimpleDateFormat[] parserTimestampDateFormats;
public final TimeZone parserTimestampTimezone;
public final String timestampField;
public final List<String> keyFields;
public final boolean schemaGenerationEnabled;
public final String schemaGenerationKeyName;
public final String schemaGenerationValueName;
public AbstractSpoolDirSourceConnectorConfig(final boolean isTask, boolean bufferedInputStream, ConfigDef configDef, Map<String, ?> settings) {
super(configDef, settings, bufferedInputStream);
this.keyFields = this.getList(SCHEMA_GENERATION_KEY_FIELDS_CONF);
this.schemaGenerationEnabled = this.getBoolean(SCHEMA_GENERATION_ENABLED_CONF);
this.schemaGenerationKeyName = this.getString(SCHEMA_GENERATION_KEY_NAME_CONF);
this.schemaGenerationValueName = this.getString(SCHEMA_GENERATION_VALUE_NAME_CONF);
String timestampTimezone = this.getString(PARSER_TIMESTAMP_TIMEZONE_CONF);
this.parserTimestampTimezone = TimeZone.getTimeZone(timestampTimezone);
List<SimpleDateFormat> results = new ArrayList<>();
List<String> formats = this.getList(PARSER_TIMESTAMP_DATE_FORMATS_CONF);
for (String s : formats) {
SimpleDateFormat dateFormat = new SimpleDateFormat(s);
dateFormat.setTimeZone(this.parserTimestampTimezone);
results.add(dateFormat);
}
this.parserTimestampDateFormats = results.toArray(new SimpleDateFormat[results.size()]);
this.keySchema = readSchema(KEY_SCHEMA_CONF);
this.valueSchema = readSchema(VALUE_SCHEMA_CONF);
if (!this.schemaGenerationEnabled) {
Preconditions.checkNotNull(
this.keySchema,
"'%s' must be set if '%s' = false.",
KEY_SCHEMA_CONF,
SCHEMA_GENERATION_ENABLED_CONF
);
Preconditions.checkNotNull(
this.valueSchema,
"'%s' must be set if '%s' = false.",
VALUE_SCHEMA_CONF,
SCHEMA_GENERATION_ENABLED_CONF
);
} else {
Preconditions.checkState(
!Strings.isNullOrEmpty(this.schemaGenerationKeyName),
"'%s' must be set if '%s' = true.",
SCHEMA_GENERATION_KEY_NAME_CONF,
SCHEMA_GENERATION_ENABLED_CONF
);
Preconditions.checkState(
!Strings.isNullOrEmpty(this.schemaGenerationValueName),
"'%s' must be set if '%s' = true.",
SCHEMA_GENERATION_VALUE_NAME_CONF,
SCHEMA_GENERATION_ENABLED_CONF
);
}
if (TimestampMode.FIELD == this.timestampMode) {
this.timestampField = this.getString(TIMESTAMP_FIELD_CONF);
if (Strings.isNullOrEmpty(this.timestampField)) {
throw new ConnectException(
String.format(
"When `%s` is set to `%s`, `%s` must be set to a timestamp field. Cannot be null or empty.",
TIMESTAMP_MODE_CONF,
TimestampMode.FIELD,
TIMESTAMP_FIELD_CONF
)
);
}
log.trace("ctor() - Looking for timestamp field '{}'", this.timestampField);
Field timestampField = this.valueSchema.field(this.timestampField);
if (null == timestampField ||
timestampField.schema().isOptional() ||
!Timestamp.LOGICAL_NAME.equals(timestampField.schema().name())) {
String example;
try {
example = ObjectMapperFactory.INSTANCE.writeValueAsString(Timestamp.SCHEMA);
} catch (JsonProcessingException e) {
example = null;
}
log.trace("ctor() - example: {}", example);
throw new ConnectException(
String.format(
"Field '%s' must be present and set to a timestamp and cannot be optional. Example %s",
this.timestampField,
example
)
);
}
} else {
this.timestampField = null;
}
if (schemasRequired() && (isTask && null == this.valueSchema)) {
throw new DataException(
String.format("'%s' must be set to a valid schema.", VALUE_SCHEMA_CONF)
);
}
}
protected static ConfigDef config(boolean bufferedInputStream) {
ConfigDef.Recommender schemaRecommender = new ConfigDef.Recommender() {
@Override
public List<Object> validValues(String key, Map<String, Object> settings) {
return ImmutableList.of();
}
@Override
public boolean visible(String key, Map<String, Object> settings) {
boolean schemaGenerationEnabled = (boolean) settings.get(SCHEMA_GENERATION_ENABLED_CONF);
if (KEY_SCHEMA_CONF.endsWith(key)) {
return !schemaGenerationEnabled;
}
if (VALUE_SCHEMA_CONF.endsWith(key)) {
return !schemaGenerationEnabled;
}
if (SCHEMA_GENERATION_KEY_NAME_CONF.endsWith(key)) {
return schemaGenerationEnabled;
}
if (SCHEMA_GENERATION_VALUE_NAME_CONF.endsWith(key)) {
return schemaGenerationEnabled;
}
if (SCHEMA_GENERATION_KEY_FIELDS_CONF.endsWith(key)) {
return schemaGenerationEnabled;
}
return true;
}
};
return AbstractSourceConnectorConfig.config(bufferedInputStream)
.define(
ConfigKeyBuilder.of(KEY_SCHEMA_CONF, Type.STRING)
.documentation(KEY_SCHEMA_DOC)
.importance(ConfigDef.Importance.HIGH)
.group(GROUP_SCHEMA)
.defaultValue("")
.width(ConfigDef.Width.LONG)
.build()
).define(
ConfigKeyBuilder.of(VALUE_SCHEMA_CONF, Type.STRING)
.documentation(VALUE_SCHEMA_DOC)
.importance(ConfigDef.Importance.HIGH)
.group(GROUP_SCHEMA)
.defaultValue("")
.width(ConfigDef.Width.LONG)
.build()
).define(
ConfigKeyBuilder.of(SCHEMA_GENERATION_ENABLED_CONF, Type.BOOLEAN)
.documentation(SCHEMA_GENERATION_ENABLED_DOC)
.importance(ConfigDef.Importance.MEDIUM)
.group(GROUP_SCHEMA_GENERATION)
.defaultValue(false)
.recommender(schemaRecommender)
.build()
)
.define(
ConfigKeyBuilder.of(SCHEMA_GENERATION_KEY_FIELDS_CONF, Type.LIST)
.documentation(SCHEMA_GENERATION_KEY_FIELDS_DOC)
.importance(ConfigDef.Importance.MEDIUM)
.group(GROUP_SCHEMA_GENERATION)
.defaultValue(ImmutableList.of())
.recommender(schemaRecommender)
.build()
).define(
ConfigKeyBuilder.of(SCHEMA_GENERATION_KEY_NAME_CONF, Type.STRING)
.documentation(SCHEMA_GENERATION_KEY_NAME_DOC)
.importance(ConfigDef.Importance.MEDIUM)
.group(GROUP_SCHEMA_GENERATION)
.defaultValue("com.github.jcustenborder.kafka.connect.model.Key")
.recommender(schemaRecommender)
.build()
).define(
ConfigKeyBuilder.of(SCHEMA_GENERATION_VALUE_NAME_CONF, Type.STRING)
.documentation(SCHEMA_GENERATION_VALUE_NAME_DOC)
.importance(ConfigDef.Importance.MEDIUM)
.group(GROUP_SCHEMA_GENERATION)
.defaultValue("com.github.jcustenborder.kafka.connect.model.Value")
.recommender(schemaRecommender)
.build()
)
.define(
ConfigKeyBuilder.of(PARSER_TIMESTAMP_TIMEZONE_CONF, Type.STRING)
.documentation(PARSER_TIMESTAMP_TIMEZONE_DOC)
.importance(ConfigDef.Importance.LOW)
.group(GROUP_TIMESTAMP)
.defaultValue(PARSER_TIMESTAMP_TIMEZONE_DEFAULT)
.build()
).define(
ConfigKeyBuilder.of(PARSER_TIMESTAMP_DATE_FORMATS_CONF, Type.LIST)
.documentation(PARSER_TIMESTAMP_DATE_FORMATS_DOC)
.importance(ConfigDef.Importance.LOW)
.group(GROUP_TIMESTAMP)
.defaultValue(PARSER_TIMESTAMP_DATE_FORMATS_DEFAULT)
.build()
).define(
ConfigKeyBuilder.of(TIMESTAMP_FIELD_CONF, Type.STRING)
.documentation(TIMESTAMP_FIELD_DOC)
.importance(ConfigDef.Importance.MEDIUM)
.group(GROUP_TIMESTAMP)
.defaultValue("")
.recommender(
Recommenders.visibleIf(TIMESTAMP_MODE_CONF, TimestampMode.FIELD.toString())
)
.build()
);
}
public abstract boolean schemasRequired();
Schema readSchema(final String key) {
String schema = this.getString(key);
Schema result;
if (Strings.isNullOrEmpty(schema)) {
result = null;
} else {
try {
result = ObjectMapperFactory.INSTANCE.readValue(schema, Schema.class);
} catch (IOException e) {
throw new DataException("Could not read schema from '" + key + "'", e);
}
}
return result;
}
}
================================================
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSpoolDirSourceTask.java
================================================
/**
* Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.jcustenborder.kafka.connect.spooldir;
import com.github.jcustenborder.kafka.connect.utils.data.Parser;
import com.github.jcustenborder.kafka.connect.utils.data.type.DateTypeParser;
import com.github.jcustenborder.kafka.connect.utils.data.type.TimeTypeParser;
import com.github.jcustenborder.kafka.connect.utils.data.type.TimestampTypeParser;
import com.github.jcustenborder.kafka.connect.utils.data.type.TypeParser;
import com.google.common.collect.ImmutableMap;
import org.apache.kafka.connect.data.Date;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaAndValue;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.data.Time;
import org.apache.kafka.connect.data.Timestamp;
import org.apache.kafka.connect.source.SourceRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
import java.util.Map;
public abstract class AbstractSpoolDirSourceTask<CONF extends AbstractSpoolDirSourceConnectorConfig> extends AbstractSourceTask<CONF> {
private static final Logger log = LoggerFactory.getLogger(AbstractSpoolDirSourceTask.class);
protected Parser parser;
@Override
public void start(Map<String, String> settings) {
super.start(settings);
this.parser = new Parser();
Map<Schema, TypeParser> dateTypeParsers = ImmutableMap.of(
Timestamp.SCHEMA, new TimestampTypeParser(this.config.parserTimestampTimezone, this.config.parserTimestampDateFormats),
Date.SCHEMA, new DateTypeParser(this.config.parserTimestampTimezone, this.config.parserTimestampDateFormats),
Time.SCHEMA, new TimeTypeParser(this.config.parserTimestampTimezone, this.config.parserTimestampDateFormats)
);
for (Map.Entry<Schema, TypeParser> kvp : dateTypeParsers.entrySet()) {
this.parser.registerTypeParser(kvp.getKey(), kvp.getValue());
}
}
protected void addRecord(List<SourceRecord> records, SchemaAndValue key, SchemaAndValue value) {
final Long timestamp;
switch (this.config.timestampMode) {
case FIELD:
Struct valueStruct = (Struct) value.value();
log.trace("addRecord() - Reading date from timestamp field '{}'", this.config.timestampField);
final java.util.Date date = (java.util.Date) valueStruct.get(this.config.timestampField);
timestamp = date.getTime();
break;
case FILE_TIME:
timestamp = this.inputFile.lastModified();
break;
case PROCESS_TIME:
timestamp = null;
break;
default:
throw new UnsupportedOperationException(
String.format("Unsupported timestamp mode. %s", this.config.timestampMode)
);
}
SourceRecord sourceRecord = record(
key,
value,
timestamp
);
recordCount++;
records.add(sourceRecord);
}
}
================================================
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractTaskPartitionerPredicate.java
================================================
/**
* Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.jcustenborder.kafka.connect.spooldir;
import com.google.common.hash.Hashing;
import org.apache.kafka.common.config.ConfigException;
import java.io.File;
import java.util.function.Predicate;
abstract class AbstractTaskPartitionerPredicate implements Predicate<File> {
final int index;
final int count;
protected AbstractTaskPartitionerPredicate(int index, int count) {
this.index = index;
this.count = count;
}
public static Predicate<File> create(AbstractSourceConnectorConfig config) {
Predicate<File> result;
if (config.taskCount == 1) {
result = new None(config.taskIndex, config.taskCount);
} else {
switch (config.taskPartitioner) {
case ByName:
result = new ByName(config.taskIndex, config.taskCount);
break;
default:
throw new ConfigException(
AbstractSourceConnectorConfig.TASK_PARTITIONER_CONF,
config.taskPartitioner.toString(),
"Unsupported value"
);
}
}
return result;
}
/**
* This implementation is used to bypass the check.
*/
static class None extends AbstractTaskPartitionerPredicate {
None(int index, int count) {
super(index, count);
}
@Override
public boolean test(File file) {
return true;
}
}
/**
*
*/
static class ByName extends AbstractTaskPartitionerPredicate {
protected ByName(int index, int count) {
super(index, count);
}
@Override
public boolean test(File file) {
final int hashcode = Math.abs(
Hashing.adler32()
.hashUnencodedChars(file.getName())
.asInt()
);
return (hashcode % this.count) == index;
}
}
}
================================================
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/CsvSchemaGenerator.java
================================================
/**
* Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.jcustenborder.kafka.connect.spooldir;
import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;
import com.opencsv.ICSVParser;
import org.apache.kafka.connect.data.Schema;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.LinkedHashMap;
import java.util.Map;
public class CsvSchemaGenerator extends AbstractSchemaGenerator<SpoolDirCsvSourceConnectorConfig> {
private static final Logger log = LoggerFactory.getLogger(CsvSchemaGenerator.class);
public CsvSchemaGenerator(Map<String, ?> settings) {
super(settings);
}
@Override
protected SpoolDirCsvSourceConnectorConfig config(Map<String, ?> settings) {
return new SpoolDirCsvSourceConnectorConfig(false, settings);
}
@Override
protected Map<String, Schema.Type> determineFieldTypes(InputStream inputStream) throws IOException {
Map<String, Schema.Type> typeMap = new LinkedHashMap<>();
ICSVParser parserBuilder = this.config.createCSVParserBuilder();
try (InputStreamReader reader = new InputStreamReader(inputStream)) {
CSVReaderBuilder readerBuilder = this.config.createCSVReaderBuilder(reader, parserBuilder);
try (CSVReader csvReader = readerBuilder.build()) {
String[] headers = null;
if (this.config.firstRowAsHeader) {
headers = csvReader.readNext();
}
String[] row = csvReader.readNext();
if (null == headers) {
headers = new String[row.length];
for (int i = 1; i <= row.length; i++) {
headers[(i - 1)] = String.format("column%02d", i);
}
}
for (String s : headers) {
typeMap.put(s, Schema.Type.STRING);
}
}
}
return typeMap;
}
}
================================================
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/FileComparator.java
================================================
/**
* Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.jcustenborder.kafka.connect.spooldir;
import com.google.common.collect.ComparisonChain;
import java.io.File;
import java.util.Comparator;
import java.util.List;
class FileComparator implements Comparator<File> {
final List<AbstractSourceConnectorConfig.FileAttribute> attributes;
FileComparator(List<AbstractSourceConnectorConfig.FileAttribute> attributes) {
this.attributes = attributes;
}
@Override
public int compare(File f1, File f2) {
ComparisonChain chain = ComparisonChain.start();
for (AbstractSourceConnectorConfig.FileAttribute fileAttribute : this.attributes) {
switch (fileAttribute) {
case NameAsc:
chain = chain.compare(f1.getName(), f2.getName());
break;
case NameDesc:
chain = chain.compare(f2.getName(), f1.getName());
break;
case LengthAsc: // We prefer larger files first.
chain = chain.compare(f1.length(), f2.length());
break;
case LengthDesc: // We prefer larger files first.
chain = chain.compare(f2.length(), f1.length());
break;
case LastModifiedAsc:
chain = chain.compare(f1.lastModified(), f2.lastModified());
break;
case LastModifiedDesc:
chain = chain.compare(f2.lastModified(), f1.lastModified());
break;
default:
throw new UnsupportedOperationException(
String.format("%s is not a supported FileAttribute.", fileAttribute)
);
}
}
return chain.result();
}
}
================================================
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/InputFile.java
================================================
/**
* Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.jcustenborder.kafka.connect.spooldir;
import com.google.common.collect.ImmutableMap;
import com.google.common.io.Files;
import org.apache.commons.compress.compressors.CompressorException;
import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedInputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.nio.charset.Charset;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
public class InputFile implements Closeable {
private static final Logger log = LoggerFactory.getLogger(InputFile.class);
private final File file;
private final File processingFlag;
private final String name;
private final String path;
private final long length;
private final long lastModified;
private final Metadata metadata;
private final AbstractSourceConnectorConfig config;
private final String inputPathSubDir;
InputStreamReader inputStreamReader;
LineNumberReader lineNumberReader;
InputStream inputStream;
InputFile(AbstractSourceConnectorConfig config, File file) {
this.config = config;
this.file = file;
this.name = this.file.getName();
this.path = this.file.getPath();
this.lastModified = this.file.lastModified();
this.length = this.file.length();
String processingFileName = file.getName() + config.processingFileExtension;
this.processingFlag = new File(file.getParentFile(), processingFileName);
this.inputPathSubDir = determineRelativePath(file, config.inputPath);
this.metadata = new Metadata(file, this.inputPathSubDir);
}
static final Map<String, String> SUPPORTED_COMPRESSION_TYPES = ImmutableMap.of(
"bz2", CompressorStreamFactory.BZIP2,
"gz", CompressorStreamFactory.GZIP,
"snappy", CompressorStreamFactory.SNAPPY_RAW,
"lz4", CompressorStreamFactory.LZ4_BLOCK,
"z", CompressorStreamFactory.Z
);
private static String determineRelativePath(File inputPath, File inputFile) {
Path relative = inputFile.toPath().relativize(inputPath.toPath()); // inputPath.toPath().relativize(inputFile.getParentFile().toPath());
String subDir = relative.toString();
if ("".equals(subDir)) {
return null;
}
return subDir;
}
public String inputPathSubDir() {
return this.inputPathSubDir;
}
public File file() {
return this.file;
}
public File processingFlag() {
return this.processingFlag;
}
public Metadata metadata() {
return this.metadata;
}
public InputStream inputStream() {
return this.inputStream;
}
public InputStream openStream() throws IOException {
if (null != this.inputStream) {
throw new IOException(
String.format("File %s is already open", this.file)
);
}
final String extension = Files.getFileExtension(file.getName());
log.trace("openStream() - fileName = '{}' extension = '{}'", file, extension);
this.inputStream = new FileInputStream(this.file);
if (this.config.bufferedInputStream) {
log.trace(
"openStream() - Wrapping '{}' in a BufferedInputStream with bufferSize = {}",
this.file,
this.config.fileBufferSizeBytes
);
this.inputStream = new BufferedInputStream(this.inputStream, this.config.fileBufferSizeBytes);
}
if (SUPPORTED_COMPRESSION_TYPES.containsKey(extension)) {
final String compressor = SUPPORTED_COMPRESSION_TYPES.get(extension);
log.info("Decompressing {} as {}", file, compressor);
final CompressorStreamFactory compressorStreamFactory = new CompressorStreamFactory();
try {
this.inputStream = compressorStreamFactory.createCompressorInputStream(
compressor,
this.inputStream
);
} catch (CompressorException e) {
throw new IOException("Exception thrown while creating compressor stream " + compressor, e);
}
}
startProcessing();
return inputStream;
}
public void startProcessing() throws IOException {
log.info("Creating processing flag {}", this.processingFlag);
Files.touch(this.processingFlag);
}
public InputStreamReader openInputStreamReader(Charset charset) throws IOException {
if (null == this.inputStreamReader) {
InputStream inputStream = null != this.inputStream ? this.inputStream : openStream();
this.inputStreamReader = new InputStreamReader(inputStream, charset);
}
return this.inputStreamReader;
}
public InputStreamReader inputStreamReader() {
return this.inputStreamReader;
}
public LineNumberReader openLineNumberReader(Charset charset) throws IOException {
if (null == this.lineNumberReader) {
InputStreamReader inputStreamReader = this.inputStreamReader != null ?
this.inputStreamReader : openInputStreamReader(charset);
this.lineNumberReader = new LineNumberReader(inputStreamReader);
}
return this.lineNumberReader;
}
public LineNumberReader lineNumberReader() {
return this.lineNumberReader;
}
@Override
public String toString() {
return this.file.toString();
}
@Override
public void close() throws IOException {
if (null != this.lineNumberReader) {
this.lineNumberReader.close();
}
if (null != this.inputStreamReader) {
this.inputStreamReader.close();
}
if (null != this.inputStream) {
log.info("Closing {}", this.file);
this.inputStream.close();
}
if (this.processingFlag.exists()) {
log.info("Removing processing flag {}", this.processingFlag);
if (!this.processingFlag.delete()) {
log.warn("Could not remove processing flag {}", this.processingFlag);
}
}
}
public String getName() {
return name;
}
public String getPath() {
return path;
}
public long length() {
return this.length;
}
public long lastModified() {
return this.lastModified;
}
private List<File> getInputPathSubDirsToCleanup() {
List<File> inputPathSubDirsToCleanup = null;
if (this.inputPathSubDir != null && !config.inputPathWalkRecursivelyRetainSubDirs) {
inputPathSubDirsToCleanup = new ArrayList<File>();
File lastSubDir = this.config.inputPath;
for (String subDirName : this.inputPathSubDir.split(File.separator)) {
lastSubDir = new File(lastSubDir, subDirName);
inputPathSubDirsToCleanup.add(lastSubDir);
}
Collections.reverse(inputPathSubDirsToCleanup);
}
return inputPathSubDirsToCleanup;
}
private void cleanupInputDirSubDirs() {
List<File> inputPathSubDirsToCleanup = this.getInputPathSubDirsToCleanup();
if (inputPathSubDirsToCleanup != null) {
for (File subDir : inputPathSubDirsToCleanup) {
try {
if (subDir.listFiles() == null || subDir.listFiles().length == 0) {
if (!subDir.delete()) {
log.error("Failed to delete input.path sub-directory: {}", subDir);
} else {
log.info("Cleaned up input.path sub-directory: {}", subDir);
}
} else {
log.info("Cannot clean up input.path sub-directory as it is not empty: {}", subDir);
}
} catch (SecurityException e) {
log.error("SecurityException thrown while trying to delete input.path sub-directory: {}", subDir, e);
}
}
}
}
public void moveToDirectory(File outputDirectory) {
if (this.inputPathSubDir != null) {
outputDirectory = new File(outputDirectory, this.inputPathSubDir);
if (!outputDirectory.isDirectory()) {
outputDirectory.mkdirs();
}
}
File outputFile = new File(outputDirectory, this.file.getName());
try {
if (this.file.exists()) {
log.info("Moving {} to {}", this.file, outputFile);
Files.move(this.file, outputFile);
}
} catch (IOException e) {
log.error("Exception thrown while trying to move {} to {}", this.file, outputFile, e);
}
this.cleanupInputDirSubDirs();
}
public void delete() {
log.info("Deleting {}", this.file);
if (!this.file.delete()) {
log.warn("Could not delete {}", this.file);
}
this.cleanupInputDirSubDirs();
}
public boolean exists() {
return this.file.exists();
}
}
================================================
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/InputFileDequeue.java
================================================
/**
* Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.jcustenborder.kafka.connect.spooldir;
import com.google.common.collect.ForwardingDeque;
import com.google.common.io.PatternFilenameFilter;
import org.apache.kafka.common.utils.Time;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayDeque;
import java.util.Arrays;
import java.util.Deque;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class InputFileDequeue extends ForwardingDeque<InputFile> {
private static final Logger log = LoggerFactory.getLogger(InputFileDequeue.class);
private final AbstractSourceConnectorConfig config;
private final FileComparator fileComparator;
private final Predicate<File> processingFileExists;
private final Predicate<File> fileMinimumAge;
private final Predicate<File> filePartitionSelector;
public InputFileDequeue(AbstractSourceConnectorConfig config) {
this.config = config;
this.fileComparator = new FileComparator(config.fileSortAttributes);
this.processingFileExists = new ProcessingFileExistsPredicate(config.processingFileExtension);
this.fileMinimumAge = new MinimumFileAgePredicate(config.minimumFileAgeMS);
this.filePartitionSelector = AbstractTaskPartitionerPredicate.create(config);
}
Deque<InputFile> files;
static File processingFile(String processingFileExtension, File input) {
String fileName = input.getName() + processingFileExtension;
return new File(input.getParentFile(), fileName);
}
@Override
protected Deque<InputFile> delegate() {
if (null != files && !files.isEmpty()) {
return files;
}
log.trace("delegate() - Searching for file(s) in {}", this.config.inputPath);
final File[] input;
if (this.config.inputPathWalkRecursively) {
final PatternFilenameFilter walkerFilenameFilter = this.config.inputFilenameFilter;
Predicate<File> filenameFilterPredicate = file -> walkerFilenameFilter.accept(file.getParentFile(), file.getName());
try (Stream<Path> filesWalk = Files.walk(this.config.inputPath.toPath())) {
input = filesWalk.map(Path::toFile)
.filter(File::isFile)
.filter(filenameFilterPredicate)
.toArray(File[]::new);
} catch (IOException e) {
log.error("Unexpected eror walking {}: {}", this.config.inputPath.toPath(), e.getMessage(), e);
return new ArrayDeque<>();
}
} else {
input = this.config.inputPath.listFiles(this.config.inputFilenameFilter);
}
if (null == input || input.length == 0) {
log.info("No files matching {} were found in {}", AbstractSourceConnectorConfig.INPUT_FILE_PATTERN_CONF, this.config.inputPath);
return new ArrayDeque<>();
}
log.trace("delegate() - Found {} potential file(s).", input.length);
this.files = Arrays.stream(input)
.filter(this.filePartitionSelector)
.filter(this.processingFileExists)
.filter(this.fileMinimumAge)
.sorted(this.fileComparator)
.map(f -> new InputFile(this.config, f))
.collect(Collectors.toCollection(ArrayDeque::new));
return this.files;
}
static class ProcessingFileExistsPredicate implements Predicate<File> {
final String processingFileExtension;
ProcessingFileExistsPredicate(String processingFileExtension) {
this.processingFileExtension = processingFileExtension;
}
@Override
public boolean test(File file) {
File processingFile = processingFile(this.processingFileExtension, file);
log.trace("Checking for processing file: {}", processingFile);
return !processingFile.exists();
}
}
static class MinimumFileAgePredicate implements Predicate<File> {
final long minimumFileAgeMS;
final Time time;
/**
* @param minimumFileAgeMS Minimum time since last write in milliseconds.
*/
MinimumFileAgePredicate(long minimumFileAgeMS) {
this(minimumFileAgeMS, Time.SYSTEM);
}
/**
* Constructor is only used for testing.
*
* @param minimumFileAgeMS
* @param time
*/
MinimumFileAgePredicate(long minimumFileAgeMS, Time time) {
this.minimumFileAgeMS = minimumFileAgeMS;
this.time = time;
}
@Override
public boolean test(File file) {
long fileAgeMS = this.time.milliseconds() - file.lastModified();
if (fileAgeMS < 0L) {
log.warn("File {} has a date in the future.", file);
}
if (fileAgeMS >= this.minimumFileAgeMS) {
return true;
} else {
log.debug("Skipping {} because it does not meet the minimum age.", file);
return false;
}
}
}
}
================================================
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/JsonSchemaGenerator.java
================================================
/**
* Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.jcustenborder.kafka.connect.spooldir;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.JsonNode;
import com.github.jcustenborder.kafka.connect.utils.jackson.ObjectMapperFactory;
import org.apache.kafka.connect.data.Schema;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
public class JsonSchemaGenerator extends AbstractSchemaGenerator<SpoolDirJsonSourceConnectorConfig> {
public JsonSchemaGenerator(Map<String, ?> settings) {
super(settings);
}
@Override
protected SpoolDirJsonSourceConnectorConfig config(Map<String, ?> settings) {
return new SpoolDirJsonSourceConnectorConfig(false, settings);
}
@Override
protected Map<String, Schema.Type> determineFieldTypes(InputStream inputStream) throws IOException {
Map<String, Schema.Type> typeMap = new LinkedHashMap<>();
JsonFactory factory = new JsonFactory();
try (JsonParser parser = factory.createParser(inputStream)) {
Iterator<JsonNode> iterator = ObjectMapperFactory.INSTANCE.readValues(parser, JsonNode.class);
while (iterator.hasNext()) {
JsonNode node = iterator.next();
if (node.isObject()) {
Iterator<String> fieldNames = node.fieldNames();
while (fieldNames.hasNext()) {
typeMap.put(fieldNames.next(), Schema.Type.STRING);
}
break;
}
}
}
return typeMap;
}
}
================================================
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/Metadata.java
================================================
/**
* Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.jcustenborder.kafka.connect.spooldir;
import com.google.common.collect.ImmutableMap;
import com.google.common.io.Files;
import org.apache.kafka.connect.header.ConnectHeaders;
import org.apache.kafka.connect.header.Headers;
import java.io.File;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.Map;
/**
* Class is used to write metadata for the InputFile.
*/
class Metadata {
static final String HEADER_PATH = "file.path";
static final String HEADER_PARENT_DIR_NAME = "file.parent.dir.name";
static final String HEADER_NAME = "file.name";
static final String HEADER_NAME_WITHOUT_EXTENSION = "file.name.without.extension";
static final String HEADER_LAST_MODIFIED = "file.last.modified";
static final String HEADER_LENGTH = "file.length";
static final String HEADER_OFFSET = "file.offset";
static final String HEADER_FILE_RELATIVE_PATH = "file.relative.path";
final String path;
final String name;
final String nameWithoutExtension;
final Date lastModified;
final long length;
final String relativePath;
String parentDirName = null;
public static final Map<String, String> HEADER_DESCRIPTIONS;
static {
Map<String, String> result = new LinkedHashMap<>();
result.put(HEADER_PATH, "The absolute path to the file ingested.");
result.put(HEADER_PARENT_DIR_NAME, "The parent directory name of the file ingested");
result.put(HEADER_NAME, "The name part of the file ingested.");
result.put(HEADER_NAME_WITHOUT_EXTENSION, "The file name without the extension part of the file.");
result.put(HEADER_LAST_MODIFIED, "The last modified date of the file.");
result.put(HEADER_LENGTH, "The size of the file in bytes.");
result.put(HEADER_OFFSET, "The offset for this piece of data within the file.");
result.put(HEADER_FILE_RELATIVE_PATH, "The file's parent sub-directory relative from the input.path.");
HEADER_DESCRIPTIONS = ImmutableMap.copyOf(result);
}
public static final String HEADER_DOCS;
static {
StringBuilder builder = new StringBuilder();
HEADER_DESCRIPTIONS.forEach((key, value) -> {
builder.append("* `");
builder.append(key);
builder.append("` - ");
builder.append(value);
builder.append('\n');
});
HEADER_DOCS = builder.toString();
}
public Metadata(File file, String relativePath) {
this.path = file.getAbsolutePath();
this.name = file.getName();
this.lastModified = new Date(file.lastModified());
this.length = file.length();
this.nameWithoutExtension = Files.getNameWithoutExtension(this.name);
if (file.getParentFile() != null) {
this.parentDirName = file.getParentFile().getName();
}
this.relativePath = relativePath;
}
/**
* Method is used to copy metadata from the file to the headers of the file.
*
* @return Returns a Headers object populated with the metadata from the file.
*/
public Headers headers(long offset) {
ConnectHeaders headers = new ConnectHeaders();
headers.addString(HEADER_NAME, this.name);
headers.addString(HEADER_NAME_WITHOUT_EXTENSION, this.nameWithoutExtension);
headers.addString(HEADER_PATH, this.path);
headers.addString(HEADER_PARENT_DIR_NAME, this.parentDirName);
headers.addLong(HEADER_LENGTH, this.length);
headers.addLong(HEADER_OFFSET, offset);
headers.addTimestamp(HEADER_LAST_MODIFIED, this.lastModified);
if (this.relativePath != null) {
headers.addString(HEADER_FILE_RELATIVE_PATH, this.relativePath);
}
return headers;
}
}
================================================
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirAvroSourceConnector.java
================================================
/**
* Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.jcustenborder.kafka.connect.spooldir;
import com.github.jcustenborder.kafka.connect.utils.config.Description;
import com.github.jcustenborder.kafka.connect.utils.config.DocumentationImportant;
import com.github.jcustenborder.kafka.connect.utils.config.Title;
import org.apache.kafka.common.config.ConfigDef;
import org.apache.kafka.connect.connector.Task;
import java.util.Map;
@Title("Avro Source Connector")
@Description("This connector is used to read avro data files from the file system and write their contents " +
"to Kafka. The schema of the file is used to read the data and produce it to Kafka")
@DocumentationImportant("This connector has a dependency on the Confluent Schema Registry specifically kafka-connect-avro-converter. " +
"This dependency is not shipped along with the connector to ensure that there are not potential version mismatch issues. " +
"The easiest way to ensure this component is available is to use one of the Confluent packages or containers for deployment.")
public class SpoolDirAvroSourceConnector extends AbstractSourceConnector<SpoolDirAvroSourceConnectorConfig> {
@Override
protected SpoolDirAvroSourceConnectorConfig config(Map<String, ?> settings) {
return new SpoolDirAvroSourceConnectorConfig(settings);
}
@Override
public Class<? extends Task> taskClass() {
return SpoolDirAvroSourceTask.class;
}
@Override
public ConfigDef config() {
return SpoolDirAvroSourceConnectorConfig.config();
}
}
================================================
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirAvroSourceConnectorConfig.java
================================================
/**
* Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.jcustenborder.kafka.connect.spooldir;
import org.apache.kafka.common.config.ConfigDef;
import java.util.Map;
public class SpoolDirAvroSourceConnectorConfig extends AbstractSourceConnectorConfig {
public SpoolDirAvroSourceConnectorConfig(Map<?, ?> originals) {
super(config(), originals, true);
}
public static ConfigDef config() {
return AbstractSourceConnectorConfig.config(true);
}
}
================================================
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirAvroSourceTask.java
================================================
/**
* Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.jcustenborder.kafka.connect.spooldir;
import io.confluent.connect.avro.AvroData;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.generic.GenericContainer;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.io.DatumReader;
import org.apache.kafka.connect.data.SchemaAndValue;
import org.apache.kafka.connect.source.SourceRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class SpoolDirAvroSourceTask extends AbstractSourceTask<SpoolDirAvroSourceConnectorConfig> {
private static final Logger log = LoggerFactory.getLogger(SpoolDirAvroSourceTask.class);
long recordOffset;
AvroData avroData = new AvroData(1024);
DataFileReader<GenericContainer> dataFileReader;
DatumReader<GenericContainer> datumReader = new GenericDatumReader<>();
@Override
protected SpoolDirAvroSourceConnectorConfig config(Map<String, ?> settings) {
return new SpoolDirAvroSourceConnectorConfig(settings);
}
@Override
protected void configure(InputFile inputFile, Long lastOffset) throws IOException {
if (null != this.dataFileReader) {
this.dataFileReader.close();
}
inputFile.startProcessing();
this.dataFileReader = new DataFileReader<>(inputFile.file(), datumReader);
this.recordOffset = 0;
if (null != lastOffset) {
while (recordOffset < lastOffset && this.dataFileReader.hasNext()) {
this.dataFileReader.next();
recordOffset++;
}
}
}
@Override
protected List<SourceRecord> process() throws IOException {
int recordCount = 0;
List<SourceRecord> records = new ArrayList<>(this.config.batchSize);
GenericContainer container = null;
while (recordCount <= this.config.batchSize && dataFileReader.hasNext()) {
container = dataFileReader.next(container);
SchemaAndValue value = avroData.toConnectData(this.dataFileReader.getSchema(), container);
SourceRecord sourceRecord = record(null, value, null);
records.add(sourceRecord);
recordCount++;
recordOffset++;
}
return records;
}
@Override
protected long recordOffset() {
return recordOffset;
}
}
================================================
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirBinaryFileSourceConnector.java
================================================
/**
* Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.jcustenborder.kafka.connect.spooldir;
import com.github.jcustenborder.kafka.connect.utils.config.Description;
import com.github.jcustenborder.kafka.connect.utils.config.DocumentationImportant;
import com.github.jcustenborder.kafka.connect.utils.config.DocumentationWarning;
import com.github.jcustenborder.kafka.connect.utils.config.Title;
import org.apache.kafka.common.config.ConfigDef;
import org.apache.kafka.connect.connector.Task;
import java.util.Map;
@Title("Binary File Source Connector")
@Description("This connector is used to read an entire file as a byte array write the data to Kafka.")
@DocumentationImportant("The recommended converter to use is the ByteArrayConverter. " +
"Example: `value.converter=org.apache.kafka.connect.storage.ByteArrayConverter`")
@DocumentationWarning("Large files will be read as a single byte array. This means that the process could " +
"run out of memory or try to send a message to Kafka that is greater than the max message size. If this happens " +
"an exception will be thrown.")
public class SpoolDirBinaryFileSourceConnector extends AbstractSourceConnector<SpoolDirBinaryFileSourceConnectorConfig> {
@Override
protected SpoolDirBinaryFileSourceConnectorConfig config(Map<String, ?> settings) {
return new SpoolDirBinaryFileSourceConnectorConfig(settings);
}
@Override
public Class<? extends Task> taskClass() {
return SpoolDirBinaryFileSourceTask.class;
}
@Override
public ConfigDef config() {
return SpoolDirBinaryFileSourceConnectorConfig.config();
}
}
================================================
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirBinaryFileSourceConnectorConfig.java
================================================
/**
* Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.jcustenborder.kafka.connect.spooldir;
import org.apache.kafka.common.config.ConfigDef;
import java.util.Map;
public class SpoolDirBinaryFileSourceConnectorConfig extends AbstractSourceConnectorConfig {
public SpoolDirBinaryFileSourceConnectorConfig(Map<?, ?> originals) {
super(config(), originals, true);
}
public static ConfigDef config() {
return AbstractSourceConnectorConfig.config(true);
}
}
================================================
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirBinaryFileSourceTask.java
================================================
/**
* Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.jcustenborder.kafka.connect.spooldir;
import com.google.common.io.ByteStreams;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaAndValue;
import org.apache.kafka.connect.source.SourceRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class SpoolDirBinaryFileSourceTask extends AbstractSourceTask<SpoolDirBinaryFileSourceConnectorConfig> {
private static final Logger log = LoggerFactory.getLogger(SpoolDirBinaryFileSourceTask.class);
@Override
protected SpoolDirBinaryFileSourceConnectorConfig config(Map<String, ?> settings) {
return new SpoolDirBinaryFileSourceConnectorConfig(settings);
}
@Override
protected void configure(InputFile inputFile, Long lastOffset) throws IOException {
inputFile.openStream();
}
@Override
protected List<SourceRecord> process() throws IOException {
List<SourceRecord> records = new ArrayList<>(1);
if (this.inputFile.inputStream().available() > 0) {
byte[] buffer = ByteStreams.toByteArray(this.inputFile.inputStream());
records.add(
record(
null,
new SchemaAndValue(Schema.BYTES_SCHEMA, buffer),
null
)
);
}
return records;
}
@Override
protected long recordOffset() {
return 0;
}
}
================================================
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceConnector.java
================================================
/**
* Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.jcustenborder.kafka.connect.spooldir;
import com.github.jcustenborder.kafka.connect.utils.config.Description;
import com.github.jcustenborder.kafka.connect.utils.config.DocumentationImportant;
import com.github.jcustenborder.kafka.connect.utils.config.DocumentationTip;
import com.github.jcustenborder.kafka.connect.utils.config.Title;
import org.apache.kafka.common.config.ConfigDef;
import org.apache.kafka.connect.connector.Task;
import java.util.Map;
@Title("CSV Source Connector")
@Description("The SpoolDirCsvSourceConnector will monitor the directory specified in `input.path` for files and read them as a CSV " +
"converting each of the records to the strongly typed equivalent specified in `key.schema` and `value.schema`.")
@DocumentationTip("To get a starting point for a schema you can use the following command to generate an all String schema. This " +
"will give you the basic structure of a schema. From there you can changes the types to match what you expect.\n\n" +
".. code-block:: bash\n\n" +
" mvn clean package\n" +
" export CLASSPATH=\"$(find target/kafka-connect-target/usr/share/kafka-connect/kafka-connect-spooldir -type f -name '*.jar' | tr '\\n' ':')\"\n" +
" kafka-run-class com.github.jcustenborder.kafka.connect.spooldir.AbstractSchemaGenerator -t csv -f src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/FieldsMatch.data -c config/CSVExample.properties -i id\n" +
"")
@DocumentationImportant("There are some caveats to running this connector with `" + SpoolDirCsvSourceConnectorConfig.SCHEMA_GENERATION_ENABLED_CONF +
" = true`. If schema generation is enabled the connector will start by reading one of the files that match `" + SpoolDirCsvSourceConnectorConfig.INPUT_FILE_PATTERN_CONF +
"` in the path specified by `" + SpoolDirCsvSourceConnectorConfig.INPUT_PATH_CONFIG + "`. If there are no files when the connector starts or is restarted " +
"the connector will fail to start. If there are different fields in other files they will not be detected. The recommended path is to specify a schema that the " +
"files will be parsed with. This will ensure that data written by this connector to Kafka will be consistent across files that have inconsistent columns. For example " +
"if some files have an optional column that is not always included, create a schema that includes the column marked as optional.")
public class SpoolDirCsvSourceConnector extends AbstractSpoolDirSourceConnector<SpoolDirCsvSourceConnectorConfig> {
@Override
protected SpoolDirCsvSourceConn
gitextract_f2vujxj0/
├── .gitignore
├── Jenkinsfile
├── LICENSE
├── README.md
├── bin/
│ └── debug.sh
├── config/
│ ├── AvroExample.properties
│ ├── CSVExample.json
│ ├── CSVExample.properties
│ ├── CSVSchemaGenerator.properties
│ ├── JsonExample.properties
│ └── connect-avro-docker.properties
├── docker-compose.yml
├── pom.xml
└── src/
├── main/
│ └── java/
│ └── com/
│ └── github/
│ └── jcustenborder/
│ └── kafka/
│ └── connect/
│ └── spooldir/
│ ├── AbstractCleanUpPolicy.java
│ ├── AbstractSchemaGenerator.java
│ ├── AbstractSourceConnector.java
│ ├── AbstractSourceConnectorConfig.java
│ ├── AbstractSourceTask.java
│ ├── AbstractSpoolDirSourceConnector.java
│ ├── AbstractSpoolDirSourceConnectorConfig.java
│ ├── AbstractSpoolDirSourceTask.java
│ ├── AbstractTaskPartitionerPredicate.java
│ ├── CsvSchemaGenerator.java
│ ├── FileComparator.java
│ ├── InputFile.java
│ ├── InputFileDequeue.java
│ ├── JsonSchemaGenerator.java
│ ├── Metadata.java
│ ├── SpoolDirAvroSourceConnector.java
│ ├── SpoolDirAvroSourceConnectorConfig.java
│ ├── SpoolDirAvroSourceTask.java
│ ├── SpoolDirBinaryFileSourceConnector.java
│ ├── SpoolDirBinaryFileSourceConnectorConfig.java
│ ├── SpoolDirBinaryFileSourceTask.java
│ ├── SpoolDirCsvSourceConnector.java
│ ├── SpoolDirCsvSourceConnectorConfig.java
│ ├── SpoolDirCsvSourceTask.java
│ ├── SpoolDirJsonSourceConnector.java
│ ├── SpoolDirJsonSourceConnectorConfig.java
│ ├── SpoolDirJsonSourceTask.java
│ ├── SpoolDirLineDelimitedSourceConnector.java
│ ├── SpoolDirLineDelimitedSourceConnectorConfig.java
│ ├── SpoolDirLineDelimitedSourceTask.java
│ ├── SpoolDirSchemaLessJsonSourceConnector.java
│ ├── SpoolDirSchemaLessJsonSourceConnectorConfig.java
│ ├── SpoolDirSchemaLessJsonSourceTask.java
│ ├── elf/
│ │ ├── SchemaConversion.java
│ │ ├── SchemaConversionBuilder.java
│ │ ├── SpoolDirELFSourceConnector.java
│ │ ├── SpoolDirELFSourceConnectorConfig.java
│ │ ├── SpoolDirELFSourceTask.java
│ │ └── converters/
│ │ ├── LocalDateLogFieldConverter.java
│ │ ├── LocalTimeLogFieldConverter.java
│ │ ├── LogFieldConverter.java
│ │ ├── LogFieldConverterFactory.java
│ │ ├── PrimitiveLogFieldConverter.java
│ │ └── TimestampLogFieldConverter.java
│ └── package-info.java
└── test/
├── java/
│ └── com/
│ └── github/
│ └── jcustenborder/
│ └── kafka/
│ └── connect/
│ └── spooldir/
│ ├── AbstractCleanUpPolicyTest.java
│ ├── AbstractSchemaGeneratorTest.java
│ ├── AbstractSpoolDirSourceConnectorTest.java
│ ├── AbstractSpoolDirSourceTaskTest.java
│ ├── ByNameAbstractTaskPartitionerPredicateTest.java
│ ├── CsvSchemaGeneratorTest.java
│ ├── DeleteCleanupPolicySubDirsNoRetainTest.java
│ ├── DeleteCleanupPolicySubDirsRetainTest.java
│ ├── DeleteCleanupPolicyTest.java
│ ├── DocumentationTest.java
│ ├── FileComparatorTest.java
│ ├── JsonSchemaGeneratorTest.java
│ ├── MinimumFileAgePredicateTest.java
│ ├── MoveByDateCleanupPolicySubDirsNoRetainTest.java
│ ├── MoveByDateCleanupPolicySubDirsRetainTest.java
│ ├── MoveByDateCleanupPolicyTest.java
│ ├── MoveCleanupPolicySubDirsNoRetainTest.java
│ ├── MoveCleanupPolicySubDirsRetainTest.java
│ ├── MoveCleanupPolicyTest.java
│ ├── NamedTest.java
│ ├── NoneCleanupPolicyTest.java
│ ├── ProcessingFileExistsPredicateTest.java
│ ├── SpoolDirAvroSourceTaskTest.java
│ ├── SpoolDirBinaryFileSourceTaskTest.java
│ ├── SpoolDirCsvSourceConnectorConfigTest.java
│ ├── SpoolDirCsvSourceConnectorTest.java
│ ├── SpoolDirCsvSourceTaskSubDirsNoRetainTest.java
│ ├── SpoolDirCsvSourceTaskSubDirsRetainTest.java
│ ├── SpoolDirCsvSourceTaskTest.java
│ ├── SpoolDirJsonSourceConnectorTest.java
│ ├── SpoolDirJsonSourceTaskTest.java
│ ├── SpoolDirLineDelimitedSourceTaskTest.java
│ ├── SpoolDirSchemaLessJsonSourceTaskTest.java
│ ├── TestCase.java
│ ├── TestDataUtils.java
│ └── elf/
│ ├── SchemaConversionBuilderTest.java
│ └── SpoolDirELFSourceTaskTest.java
└── resources/
├── com/
│ └── github/
│ └── jcustenborder/
│ └── kafka/
│ └── connect/
│ └── spooldir/
│ ├── SpoolBinaryFileSourceConnector/
│ │ ├── binary.json
│ │ └── fromXML.json
│ ├── SpoolDirBinaryFileSourceConnector/
│ │ ├── binary.json
│ │ └── fromXML.json
│ ├── SpoolDirCsvSourceConnector/
│ │ ├── schema.json
│ │ ├── schemaheaders.json
│ │ └── tsv.json
│ ├── SpoolDirJsonSourceConnector/
│ │ └── test.json
│ ├── SpoolDirLineDelimitedSourceConnector/
│ │ └── fix.json
│ ├── avro/
│ │ ├── FieldsMatch.data
│ │ └── FieldsMatch.json
│ ├── binary/
│ │ ├── DataHasMoreFields.data
│ │ └── DataHasMoreFields.json
│ ├── csv/
│ │ ├── BlankLines.data
│ │ ├── BlankLines.json
│ │ ├── DataHasMoreFields.data
│ │ ├── DataHasMoreFields.json
│ │ ├── FieldsMatch.data
│ │ ├── FieldsMatch.json
│ │ ├── FileModeFieldFieldsMatch.data
│ │ ├── FileModeFieldFieldsMatch.json
│ │ ├── SchemaHasMoreFields.data
│ │ ├── SchemaHasMoreFields.json
│ │ ├── SourceOffset.data
│ │ ├── SourceOffset.json
│ │ ├── WithHeaderSkipLines.data
│ │ ├── WithHeaderSkipLines.json
│ │ ├── WithoutHeader.data
│ │ └── WithoutHeader.json
│ ├── elf/
│ │ ├── SpoolDirELFSourceConnector/
│ │ │ └── example.json
│ │ └── elf/
│ │ ├── FieldsMatch.data
│ │ └── FieldsMatch.json
│ ├── json/
│ │ ├── DataHasMoreFields.data
│ │ ├── DataHasMoreFields.json
│ │ ├── FieldsMatch.data
│ │ ├── FieldsMatch.json
│ │ ├── FileModeFieldFieldsMatch.data
│ │ ├── FileModeFieldFieldsMatch.json
│ │ ├── SchemaHasMoreFields.data
│ │ ├── SchemaHasMoreFields.json
│ │ ├── SourceOffset.data
│ │ └── SourceOffset.json
│ └── schemalessjson/
│ ├── DataHasMoreFields.data
│ └── DataHasMoreFields.json
└── logback.xml
SYMBOL INDEX (407 symbols across 81 files)
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractCleanUpPolicy.java
class AbstractCleanUpPolicy (line 26) | abstract class AbstractCleanUpPolicy implements Closeable {
method AbstractCleanUpPolicy (line 34) | protected AbstractCleanUpPolicy(InputFile inputFile, File errorPath, F...
method create (line 41) | public static AbstractCleanUpPolicy create(AbstractSourceConnectorConf...
method createDirectory (line 65) | protected boolean createDirectory(File directory) {
method close (line 80) | @Override
method error (line 88) | public void error() throws IOException {
method success (line 101) | public void success() throws IOException {
class Move (line 105) | static class Move extends AbstractCleanUpPolicy {
method Move (line 106) | protected Move(InputFile inputFile, File errorPath, File finishedPat...
method success (line 110) | @Override
class MoveByDate (line 117) | static class MoveByDate extends AbstractCleanUpPolicy {
method MoveByDate (line 118) | protected MoveByDate(InputFile inputFile, File errorPath, File finis...
method success (line 122) | @Override
class Delete (line 137) | static class Delete extends AbstractCleanUpPolicy {
method Delete (line 138) | protected Delete(InputFile inputFile, File errorPath, File finishedP...
method success (line 142) | @Override
class None (line 149) | static class None extends AbstractCleanUpPolicy {
method None (line 150) | protected None(InputFile inputFile, File errorPath, File finishedPat...
method success (line 154) | @Override
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSchemaGenerator.java
class AbstractSchemaGenerator (line 43) | public abstract class AbstractSchemaGenerator<CONFIG extends AbstractSpo...
method AbstractSchemaGenerator (line 75) | public AbstractSchemaGenerator(Map<String, ?> settings) {
method main (line 87) | public static void main(String... args) throws Exception {
method config (line 170) | protected abstract CONFIG config(Map<String, ?> settings);
method determineFieldTypes (line 172) | protected abstract Map<String, Schema.Type> determineFieldTypes(InputS...
method addField (line 174) | void addField(SchemaBuilder builder, String name, Schema.Type schemaTy...
method generate (line 182) | public Map.Entry<Schema, Schema> generate(File inputFile, List<String>...
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSourceConnector.java
class AbstractSourceConnector (line 26) | public abstract class AbstractSourceConnector<CONF extends AbstractSourc...
method config (line 29) | protected abstract CONF config(Map<String, ?> settings);
method start (line 31) | @Override
method taskConfigs (line 37) | @Override
method stop (line 51) | @Override
method version (line 56) | @Override
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSourceConnectorConfig.java
class AbstractSourceConnectorConfig (line 35) | public abstract class AbstractSourceConnectorConfig extends AbstractConf...
method finishedPathRequired (line 136) | public final boolean finishedPathRequired() {
method AbstractSourceConnectorConfig (line 152) | public AbstractSourceConnectorConfig(ConfigDef definition, Map<?, ?> o...
method config (line 190) | protected static ConfigDef config(boolean bufferedInputStream) {
type TimestampMode (line 348) | public enum TimestampMode {
type CleanupPolicy (line 354) | public enum CleanupPolicy {
type FileAttribute (line 361) | public enum FileAttribute {
type TaskPartitioner (line 370) | public enum TaskPartitioner {
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSourceTask.java
class AbstractSourceTask (line 37) | public abstract class AbstractSourceTask<CONF extends AbstractSourceConn...
method checkDirectory (line 46) | private static void checkDirectory(String key, File directoryPath) {
method config (line 99) | protected abstract CONF config(Map<String, ?> settings);
method configure (line 101) | protected abstract void configure(InputFile inputFile, Long lastOffset...
method process (line 103) | protected abstract List<SourceRecord> process() throws IOException;
method recordOffset (line 105) | protected abstract long recordOffset();
method start (line 107) | @Override
method stop (line 121) | @Override
method version (line 136) | @Override
method poll (line 145) | @Override
method humanReadableByteCount (line 174) | public static String humanReadableByteCount(long bytes, boolean si) {
method recordProcessingTime (line 182) | private void recordProcessingTime() {
method read (line 211) | public List<SourceRecord> read() {
method offset (line 279) | protected Map<String, ?> offset() {
method record (line 286) | protected SourceRecord record(
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSpoolDirSourceConnector.java
class AbstractSpoolDirSourceConnector (line 39) | public abstract class AbstractSpoolDirSourceConnector<CONF extends Abstr...
method generator (line 42) | protected abstract AbstractSchemaGenerator<CONF> generator(Map<String,...
method version (line 44) | @Override
method start (line 49) | @Override
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSpoolDirSourceConnectorConfig.java
class AbstractSpoolDirSourceConnectorConfig (line 44) | @SuppressWarnings("WeakerAccess")
method AbstractSpoolDirSourceConnectorConfig (line 88) | public AbstractSpoolDirSourceConnectorConfig(final boolean isTask, boo...
method config (line 192) | protected static ConfigDef config(boolean bufferedInputStream) {
method schemasRequired (line 304) | public abstract boolean schemasRequired();
method readSchema (line 306) | Schema readSchema(final String key) {
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSpoolDirSourceTask.java
class AbstractSpoolDirSourceTask (line 37) | public abstract class AbstractSpoolDirSourceTask<CONF extends AbstractSp...
method start (line 41) | @Override
method addRecord (line 57) | protected void addRecord(List<SourceRecord> records, SchemaAndValue ke...
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractTaskPartitionerPredicate.java
class AbstractTaskPartitionerPredicate (line 24) | abstract class AbstractTaskPartitionerPredicate implements Predicate<Fil...
method AbstractTaskPartitionerPredicate (line 28) | protected AbstractTaskPartitionerPredicate(int index, int count) {
method create (line 33) | public static Predicate<File> create(AbstractSourceConnectorConfig con...
class None (line 58) | static class None extends AbstractTaskPartitionerPredicate {
method None (line 59) | None(int index, int count) {
method test (line 63) | @Override
class ByName (line 72) | static class ByName extends AbstractTaskPartitionerPredicate {
method ByName (line 74) | protected ByName(int index, int count) {
method test (line 78) | @Override
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/CsvSchemaGenerator.java
class CsvSchemaGenerator (line 31) | public class CsvSchemaGenerator extends AbstractSchemaGenerator<SpoolDir...
method CsvSchemaGenerator (line 34) | public CsvSchemaGenerator(Map<String, ?> settings) {
method config (line 38) | @Override
method determineFieldTypes (line 43) | @Override
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/FileComparator.java
class FileComparator (line 24) | class FileComparator implements Comparator<File> {
method FileComparator (line 27) | FileComparator(List<AbstractSourceConnectorConfig.FileAttribute> attri...
method compare (line 31) | @Override
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/InputFile.java
class InputFile (line 40) | public class InputFile implements Closeable {
method InputFile (line 55) | InputFile(AbstractSourceConnectorConfig config, File file) {
method determineRelativePath (line 77) | private static String determineRelativePath(File inputPath, File input...
method inputPathSubDir (line 86) | public String inputPathSubDir() {
method file (line 90) | public File file() {
method processingFlag (line 94) | public File processingFlag() {
method metadata (line 98) | public Metadata metadata() {
method inputStream (line 103) | public InputStream inputStream() {
method openStream (line 107) | public InputStream openStream() throws IOException {
method startProcessing (line 146) | public void startProcessing() throws IOException {
method openInputStreamReader (line 151) | public InputStreamReader openInputStreamReader(Charset charset) throws...
method inputStreamReader (line 160) | public InputStreamReader inputStreamReader() {
method openLineNumberReader (line 164) | public LineNumberReader openLineNumberReader(Charset charset) throws I...
method lineNumberReader (line 173) | public LineNumberReader lineNumberReader() {
method toString (line 178) | @Override
method close (line 183) | @Override
method getName (line 203) | public String getName() {
method getPath (line 207) | public String getPath() {
method length (line 211) | public long length() {
method lastModified (line 215) | public long lastModified() {
method getInputPathSubDirsToCleanup (line 219) | private List<File> getInputPathSubDirsToCleanup() {
method cleanupInputDirSubDirs (line 233) | private void cleanupInputDirSubDirs() {
method moveToDirectory (line 255) | public void moveToDirectory(File outputDirectory) {
method delete (line 278) | public void delete() {
method exists (line 287) | public boolean exists() {
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/InputFileDequeue.java
class InputFileDequeue (line 35) | public class InputFileDequeue extends ForwardingDeque<InputFile> {
method InputFileDequeue (line 44) | public InputFileDequeue(AbstractSourceConnectorConfig config) {
method processingFile (line 54) | static File processingFile(String processingFileExtension, File input) {
method delegate (line 59) | @Override
class ProcessingFileExistsPredicate (line 102) | static class ProcessingFileExistsPredicate implements Predicate<File> {
method ProcessingFileExistsPredicate (line 105) | ProcessingFileExistsPredicate(String processingFileExtension) {
method test (line 109) | @Override
class MinimumFileAgePredicate (line 117) | static class MinimumFileAgePredicate implements Predicate<File> {
method MinimumFileAgePredicate (line 124) | MinimumFileAgePredicate(long minimumFileAgeMS) {
method MinimumFileAgePredicate (line 134) | MinimumFileAgePredicate(long minimumFileAgeMS, Time time) {
method test (line 140) | @Override
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/JsonSchemaGenerator.java
class JsonSchemaGenerator (line 30) | public class JsonSchemaGenerator extends AbstractSchemaGenerator<SpoolDi...
method JsonSchemaGenerator (line 31) | public JsonSchemaGenerator(Map<String, ?> settings) {
method config (line 35) | @Override
method determineFieldTypes (line 40) | @Override
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/Metadata.java
class Metadata (line 31) | class Metadata {
method Metadata (line 82) | public Metadata(File file, String relativePath) {
method headers (line 101) | public Headers headers(long offset) {
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirAvroSourceConnector.java
class SpoolDirAvroSourceConnector (line 26) | @Title("Avro Source Connector")
method config (line 33) | @Override
method taskClass (line 38) | @Override
method config (line 43) | @Override
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirAvroSourceConnectorConfig.java
class SpoolDirAvroSourceConnectorConfig (line 22) | public class SpoolDirAvroSourceConnectorConfig extends AbstractSourceCon...
method SpoolDirAvroSourceConnectorConfig (line 25) | public SpoolDirAvroSourceConnectorConfig(Map<?, ?> originals) {
method config (line 29) | public static ConfigDef config() {
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirAvroSourceTask.java
class SpoolDirAvroSourceTask (line 33) | public class SpoolDirAvroSourceTask extends AbstractSourceTask<SpoolDirA...
method config (line 41) | @Override
method configure (line 46) | @Override
method process (line 64) | @Override
method recordOffset (line 80) | @Override
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirBinaryFileSourceConnector.java
class SpoolDirBinaryFileSourceConnector (line 27) | @Title("Binary File Source Connector")
method config (line 35) | @Override
method taskClass (line 40) | @Override
method config (line 45) | @Override
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirBinaryFileSourceConnectorConfig.java
class SpoolDirBinaryFileSourceConnectorConfig (line 22) | public class SpoolDirBinaryFileSourceConnectorConfig extends AbstractSou...
method SpoolDirBinaryFileSourceConnectorConfig (line 23) | public SpoolDirBinaryFileSourceConnectorConfig(Map<?, ?> originals) {
method config (line 27) | public static ConfigDef config() {
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirBinaryFileSourceTask.java
class SpoolDirBinaryFileSourceTask (line 30) | public class SpoolDirBinaryFileSourceTask extends AbstractSourceTask<Spo...
method config (line 33) | @Override
method configure (line 38) | @Override
method process (line 43) | @Override
method recordOffset (line 60) | @Override
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceConnector.java
class SpoolDirCsvSourceConnector (line 27) | @Title("CSV Source Connector")
method config (line 44) | @Override
method generator (line 49) | @Override
method taskClass (line 54) | @Override
method config (line 59) | @Override
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceConnectorConfig.java
class SpoolDirCsvSourceConnectorConfig (line 37) | class SpoolDirCsvSourceConnectorConfig extends AbstractSpoolDirSourceCon...
method SpoolDirCsvSourceConnectorConfig (line 124) | public SpoolDirCsvSourceConnectorConfig(final boolean isTask, Map<Stri...
method config (line 144) | static ConfigDef config() {
method getChar (line 287) | final char getChar(String key) {
method createCSVParserBuilder (line 292) | public ICSVParser createCSVParserBuilder() {
method createCSVReaderBuilder (line 316) | public CSVReaderBuilder createCSVReaderBuilder(Reader reader, ICSVPars...
method schemasRequired (line 325) | @Override
class CharsetValidator (line 330) | static class CharsetValidator implements ConfigDef.Validator {
method of (line 331) | static CharsetValidator of() {
method ensureValid (line 335) | @Override
method toString (line 349) | @Override
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceTask.java
class SpoolDirCsvSourceTask (line 36) | public class SpoolDirCsvSourceTask extends AbstractSpoolDirSourceTask<Sp...
method config (line 43) | @Override
method configure (line 48) | @Override
method start (line 83) | @Override
method recordOffset (line 88) | @Override
method process (line 100) | @Override
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirJsonSourceConnector.java
class SpoolDirJsonSourceConnector (line 28) | @Title("Json Source Connector")
method config (line 47) | @Override
method generator (line 52) | @Override
method taskClass (line 57) | @Override
method config (line 62) | @Override
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirJsonSourceConnectorConfig.java
class SpoolDirJsonSourceConnectorConfig (line 22) | class SpoolDirJsonSourceConnectorConfig extends AbstractSpoolDirSourceCo...
method SpoolDirJsonSourceConnectorConfig (line 23) | public SpoolDirJsonSourceConnectorConfig(final boolean isTask, Map<Str...
method schemasRequired (line 27) | @Override
method config (line 32) | public static ConfigDef config() {
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirJsonSourceTask.java
class SpoolDirJsonSourceTask (line 37) | public class SpoolDirJsonSourceTask extends AbstractSpoolDirSourceTask<S...
method config (line 44) | @Override
method start (line 49) | @Override
method configure (line 55) | @Override
method next (line 78) | JsonNode next() {
method process (line 83) | @Override
method recordOffset (line 123) | @Override
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirLineDelimitedSourceConnector.java
class SpoolDirLineDelimitedSourceConnector (line 26) | @Title("Line Delimited Source Connector")
method config (line 31) | @Override
method taskClass (line 36) | @Override
method config (line 41) | @Override
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirLineDelimitedSourceConnectorConfig.java
class SpoolDirLineDelimitedSourceConnectorConfig (line 25) | public class SpoolDirLineDelimitedSourceConnectorConfig extends Abstract...
method SpoolDirLineDelimitedSourceConnectorConfig (line 33) | public SpoolDirLineDelimitedSourceConnectorConfig(Map<?, ?> originals) {
method config (line 38) | public static ConfigDef config() {
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirLineDelimitedSourceTask.java
class SpoolDirLineDelimitedSourceTask (line 29) | public class SpoolDirLineDelimitedSourceTask extends AbstractSourceTask<...
method config (line 32) | @Override
method configure (line 37) | @Override
method process (line 42) | @Override
method recordOffset (line 59) | @Override
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirSchemaLessJsonSourceConnector.java
class SpoolDirSchemaLessJsonSourceConnector (line 26) | @Title("Schema Less Json Source Connector")
method config (line 35) | @Override
method taskClass (line 40) | @Override
method config (line 45) | @Override
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirSchemaLessJsonSourceConnectorConfig.java
class SpoolDirSchemaLessJsonSourceConnectorConfig (line 25) | public class SpoolDirSchemaLessJsonSourceConnectorConfig extends Abstrac...
method SpoolDirSchemaLessJsonSourceConnectorConfig (line 33) | public SpoolDirSchemaLessJsonSourceConnectorConfig(Map<?, ?> originals) {
method config (line 38) | public static ConfigDef config() {
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirSchemaLessJsonSourceTask.java
class SpoolDirSchemaLessJsonSourceTask (line 34) | public class SpoolDirSchemaLessJsonSourceTask extends AbstractSourceTask...
method config (line 37) | @Override
method configure (line 47) | @Override
method process (line 59) | @Override
method recordOffset (line 78) | @Override
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/SchemaConversion.java
class SchemaConversion (line 28) | public class SchemaConversion {
method SchemaConversion (line 33) | SchemaConversion(Schema valueSchema, List<LogFieldConverter> valueConv...
method convert (line 39) | static SchemaAndValue convert(Schema schema, List<LogFieldConverter> c...
method convert (line 54) | public SchemaAndValue convert(LogEntry entry) {
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/SchemaConversionBuilder.java
class SchemaConversionBuilder (line 33) | public class SchemaConversionBuilder {
method SchemaConversionBuilder (line 37) | public SchemaConversionBuilder(ElfParser parser) {
method normalizeFieldName (line 41) | static String normalizeFieldName(String fieldName) {
method build (line 51) | public SchemaConversion build() {
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/SpoolDirELFSourceConnector.java
class SpoolDirELFSourceConnector (line 31) | @Title("Extended Log File Format Source Connector")
method taskConfigs (line 36) | @Override
method stop (line 50) | @Override
method version (line 55) | @Override
method start (line 62) | @Override
method taskClass (line 68) | @Override
method config (line 73) | @Override
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/SpoolDirELFSourceConnectorConfig.java
class SpoolDirELFSourceConnectorConfig (line 23) | class SpoolDirELFSourceConnectorConfig extends AbstractSourceConnectorCo...
method SpoolDirELFSourceConnectorConfig (line 25) | public SpoolDirELFSourceConnectorConfig(Map<String, ?> settings) {
method config (line 29) | public static ConfigDef config(boolean bufferedInputStream) {
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/SpoolDirELFSourceTask.java
class SpoolDirELFSourceTask (line 35) | public class SpoolDirELFSourceTask extends AbstractSourceTask<SpoolDirEL...
method config (line 42) | @Override
method start (line 47) | @Override
method configure (line 54) | @Override
method next (line 77) | LogEntry next() throws IOException {
method process (line 82) | @Override
method recordOffset (line 102) | @Override
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/converters/LocalDateLogFieldConverter.java
class LocalDateLogFieldConverter (line 25) | public class LocalDateLogFieldConverter extends LogFieldConverter {
method convert (line 28) | @Override
method LocalDateLogFieldConverter (line 35) | public LocalDateLogFieldConverter(String logFieldName, Field field) {
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/converters/LocalTimeLogFieldConverter.java
class LocalTimeLogFieldConverter (line 26) | public class LocalTimeLogFieldConverter extends LogFieldConverter {
method convert (line 29) | @Override
method LocalTimeLogFieldConverter (line 36) | public LocalTimeLogFieldConverter(String logFieldName, Field field) {
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/converters/LogFieldConverter.java
class LogFieldConverter (line 24) | public abstract class LogFieldConverter {
method convert (line 29) | protected abstract Object convert(Object input);
method LogFieldConverter (line 31) | public LogFieldConverter(String logFieldName, Field field) {
method convert (line 36) | public void convert(LogEntry logEntry, Struct struct) {
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/converters/LogFieldConverterFactory.java
class LogFieldConverterFactory (line 28) | public class LogFieldConverterFactory {
method schema (line 30) | static Schema schema(Class<?> logClass, String logFieldName) {
method create (line 55) | public LogFieldConverter create(
method createDateTime (line 122) | public LogFieldConverter createDateTime(SchemaBuilder builder, String ...
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/converters/PrimitiveLogFieldConverter.java
class PrimitiveLogFieldConverter (line 20) | public class PrimitiveLogFieldConverter extends LogFieldConverter {
method convert (line 21) | @Override
method PrimitiveLogFieldConverter (line 26) | public PrimitiveLogFieldConverter(String logFieldName, Field field) {
FILE: src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/converters/TimestampLogFieldConverter.java
class TimestampLogFieldConverter (line 28) | public class TimestampLogFieldConverter extends LogFieldConverter {
method TimestampLogFieldConverter (line 32) | public TimestampLogFieldConverter(Field field, String timeField, Strin...
method convert (line 38) | @Override
method convert (line 43) | @Override
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractCleanUpPolicyTest.java
class AbstractCleanUpPolicyTest (line 21) | public abstract class AbstractCleanUpPolicyTest<T extends AbstractCleanU...
method create (line 30) | protected abstract T create(
method defineInputPathSubDir (line 34) | protected String defineInputPathSubDir() {
method getConnectorConfigMap (line 38) | protected ImmutableMap.Builder<String,String> getConnectorConfigMap() {
method before (line 47) | @BeforeEach
method getTargetFilePath (line 71) | protected File getTargetFilePath(File containerPath, InputFile inputFi...
method error (line 76) | @Test
method delete (line 85) | void delete(File file) {
method after (line 95) | @AfterEach
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSchemaGeneratorTest.java
class AbstractSchemaGeneratorTest (line 25) | public class AbstractSchemaGeneratorTest {
method createTempDir (line 32) | @BeforeEach
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSpoolDirSourceConnectorTest.java
class AbstractSpoolDirSourceConnectorTest (line 36) | public abstract class AbstractSpoolDirSourceConnectorTest<T extends Abst...
method createConnector (line 45) | protected abstract T createConnector();
method before (line 47) | @BeforeEach
method taskClass (line 52) | @Test
method createTempDir (line 57) | @BeforeEach
method cleanupTempDir (line 75) | @AfterEach
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSpoolDirSourceTaskTest.java
class AbstractSpoolDirSourceTaskTest (line 51) | public abstract class AbstractSpoolDirSourceTaskTest<T extends AbstractS...
method setup (line 60) | @BeforeEach
method configureIndent (line 71) | @BeforeEach
method createTask (line 76) | protected abstract T createTask();
method settings (line 78) | protected Map<String, String> settings() {
method defineInputPathSubDir (line 88) | protected String defineInputPathSubDir() {
method getTargetFilePath (line 92) | protected File getTargetFilePath(File containerPath, String inputFileN...
method poll (line 99) | protected void poll(final String packageName, TestCase testCase) throw...
method loadTestCases (line 194) | protected List<TestCase> loadTestCases(String packageName) throws IOEx...
method version (line 208) | @Test
method recordOffsetNPE (line 214) | @Test
method after (line 220) | @AfterEach
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/ByNameAbstractTaskPartitionerPredicateTest.java
class ByNameAbstractTaskPartitionerPredicateTest (line 18) | public class ByNameAbstractTaskPartitionerPredicateTest {
method test (line 22) | @TestFactory
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/CsvSchemaGeneratorTest.java
class CsvSchemaGeneratorTest (line 29) | public class CsvSchemaGeneratorTest extends AbstractSchemaGeneratorTest {
method foo (line 31) | @Test
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/DeleteCleanupPolicySubDirsNoRetainTest.java
class DeleteCleanupPolicySubDirsNoRetainTest (line 12) | public class DeleteCleanupPolicySubDirsNoRetainTest extends DeleteCleanu...
method defineInputPathSubDir (line 13) | @Override
method getConnectorConfigMap (line 18) | protected ImmutableMap.Builder<String,String> getConnectorConfigMap() {
method success (line 24) | @Test
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/DeleteCleanupPolicySubDirsRetainTest.java
class DeleteCleanupPolicySubDirsRetainTest (line 12) | public class DeleteCleanupPolicySubDirsRetainTest extends DeleteCleanupP...
method defineInputPathSubDir (line 13) | @Override
method getConnectorConfigMap (line 18) | protected ImmutableMap.Builder<String,String> getConnectorConfigMap() {
method success (line 24) | @Test
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/DeleteCleanupPolicyTest.java
class DeleteCleanupPolicyTest (line 11) | public class DeleteCleanupPolicyTest extends AbstractCleanUpPolicyTest<A...
method create (line 12) | @Override
method success (line 17) | @Test
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/DocumentationTest.java
class DocumentationTest (line 20) | public class DocumentationTest extends BaseDocumentationTest {
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/FileComparatorTest.java
class FileComparatorTest (line 17) | public class FileComparatorTest {
method before (line 20) | @BeforeEach
method createFile (line 25) | File createFile(String name) throws IOException {
method createFile (line 29) | File createFile(String name, long date) throws IOException {
method createFile (line 33) | File createFile(String name, long date, long length) throws IOException {
method sort (line 49) | List<File> sort(List<File> files, AbstractSourceConnectorConfig.FileAt...
method expected (line 56) | List<File> expected(List<File> files, int... indexes) {
method existingFunctionality (line 64) | @Test
method sortByLastModified (line 77) | @Test
method sortBySize (line 91) | @Test
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/JsonSchemaGeneratorTest.java
class JsonSchemaGeneratorTest (line 30) | public class JsonSchemaGeneratorTest extends AbstractSchemaGeneratorTest {
method schema (line 32) | @Test
method schemaWithCustomSchemaName (line 61) | @Test
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/MinimumFileAgePredicateTest.java
class MinimumFileAgePredicateTest (line 16) | public class MinimumFileAgePredicateTest {
method before (line 22) | @BeforeEach
method after (line 27) | @AfterEach
method time (line 34) | Time time(long milliseconds) {
method notOldEnough (line 40) | @Test
method oldEnough (line 52) | @Test
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/MoveByDateCleanupPolicySubDirsNoRetainTest.java
class MoveByDateCleanupPolicySubDirsNoRetainTest (line 12) | public class MoveByDateCleanupPolicySubDirsNoRetainTest extends MoveByDa...
method defineInputPathSubDir (line 13) | @Override
method getConnectorConfigMap (line 18) | protected ImmutableMap.Builder<String,String> getConnectorConfigMap() {
method success (line 24) | @Test
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/MoveByDateCleanupPolicySubDirsRetainTest.java
class MoveByDateCleanupPolicySubDirsRetainTest (line 11) | public class MoveByDateCleanupPolicySubDirsRetainTest extends MoveByDate...
method defineInputPathSubDir (line 12) | @Override
method getConnectorConfigMap (line 17) | protected ImmutableMap.Builder<String,String> getConnectorConfigMap() {
method success (line 23) | @Test
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/MoveByDateCleanupPolicyTest.java
class MoveByDateCleanupPolicyTest (line 14) | public class MoveByDateCleanupPolicyTest extends AbstractCleanUpPolicyTe...
method create (line 15) | @Override
method success (line 20) | @Test
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/MoveCleanupPolicySubDirsNoRetainTest.java
class MoveCleanupPolicySubDirsNoRetainTest (line 12) | public class MoveCleanupPolicySubDirsNoRetainTest extends MoveCleanupPol...
method defineInputPathSubDir (line 14) | @Override
method getConnectorConfigMap (line 19) | protected ImmutableMap.Builder<String,String> getConnectorConfigMap() {
method success (line 25) | @Test
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/MoveCleanupPolicySubDirsRetainTest.java
class MoveCleanupPolicySubDirsRetainTest (line 12) | public class MoveCleanupPolicySubDirsRetainTest extends MoveCleanupPolic...
method defineInputPathSubDir (line 14) | @Override
method getConnectorConfigMap (line 19) | protected ImmutableMap.Builder<String,String> getConnectorConfigMap() {
method success (line 25) | @Test
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/MoveCleanupPolicyTest.java
class MoveCleanupPolicyTest (line 11) | public class MoveCleanupPolicyTest extends AbstractCleanUpPolicyTest<Abs...
method create (line 12) | @Override
method success (line 17) | @Test
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/NamedTest.java
type NamedTest (line 20) | public interface NamedTest {
method path (line 21) | void path(Path path);
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/NoneCleanupPolicyTest.java
class NoneCleanupPolicyTest (line 10) | public class NoneCleanupPolicyTest extends AbstractCleanUpPolicyTest<Abs...
method create (line 11) | @Override
method success (line 16) | @Test
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/ProcessingFileExistsPredicateTest.java
class ProcessingFileExistsPredicateTest (line 14) | public class ProcessingFileExistsPredicateTest {
method before (line 20) | @BeforeEach
method after (line 26) | @AfterEach
method test (line 33) | @Test
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirAvroSourceTaskTest.java
class SpoolDirAvroSourceTaskTest (line 29) | public class SpoolDirAvroSourceTaskTest extends AbstractSpoolDirSourceTa...
method createTask (line 32) | @Override
method settings (line 37) | @Override
method foo (line 43) | @Disabled
method poll (line 71) | @TestFactory
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirBinaryFileSourceTaskTest.java
class SpoolDirBinaryFileSourceTaskTest (line 16) | public class SpoolDirBinaryFileSourceTaskTest extends AbstractSpoolDirSo...
method createTask (line 19) | @Override
method settings (line 24) | @Override
method poll (line 30) | @TestFactory
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceConnectorConfigTest.java
class SpoolDirCsvSourceConnectorConfigTest (line 16) | public class SpoolDirCsvSourceConnectorConfigTest {
method nullFieldSeparator (line 18) | @Test
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceConnectorTest.java
class SpoolDirCsvSourceConnectorTest (line 31) | public class SpoolDirCsvSourceConnectorTest extends AbstractSpoolDirSour...
method createConnector (line 32) | @Override
method startWithoutSchema (line 37) | @Test
method startWithoutSchemaMismatch (line 60) | @Test()
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceTaskSubDirsNoRetainTest.java
class SpoolDirCsvSourceTaskSubDirsNoRetainTest (line 22) | public class SpoolDirCsvSourceTaskSubDirsNoRetainTest extends SpoolDirCs...
method settings (line 25) | @Override
method defineInputPathSubDir (line 35) | @Override
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceTaskSubDirsRetainTest.java
class SpoolDirCsvSourceTaskSubDirsRetainTest (line 22) | public class SpoolDirCsvSourceTaskSubDirsRetainTest extends SpoolDirCsvS...
method settings (line 25) | @Override
method defineInputPathSubDir (line 35) | @Override
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceTaskTest.java
class SpoolDirCsvSourceTaskTest (line 51) | public class SpoolDirCsvSourceTaskTest extends AbstractSpoolDirSourceTas...
method createTask (line 54) | @Override
method settings (line 59) | @Override
method poll (line 68) | @TestFactory
method writeCSV (line 81) | void writeCSV(File outputFile, Schema schema, List<Struct> structs) th...
method rebalance (line 101) | @Test
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirJsonSourceConnectorTest.java
class SpoolDirJsonSourceConnectorTest (line 30) | public class SpoolDirJsonSourceConnectorTest extends AbstractSpoolDirSou...
method createConnector (line 31) | @Override
method startWithoutSchema (line 36) | @Test
method startWithoutSchemaMismatch (line 59) | @Test()
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirJsonSourceTaskTest.java
class SpoolDirJsonSourceTaskTest (line 31) | public class SpoolDirJsonSourceTaskTest extends AbstractSpoolDirSourceTa...
method createTask (line 34) | @Override
method settings (line 39) | @Override
method poll (line 48) | @TestFactory
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirLineDelimitedSourceTaskTest.java
class SpoolDirLineDelimitedSourceTaskTest (line 11) | public class SpoolDirLineDelimitedSourceTaskTest extends AbstractSpoolDi...
method createTask (line 14) | @Override
method settings (line 19) | @Override
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirSchemaLessJsonSourceTaskTest.java
class SpoolDirSchemaLessJsonSourceTaskTest (line 16) | public class SpoolDirSchemaLessJsonSourceTaskTest extends AbstractSpoolD...
method createTask (line 19) | @Override
method settings (line 24) | @Override
method poll (line 33) | @TestFactory
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/TestCase.java
class TestCase (line 27) | public class TestCase implements NamedTest {
method path (line 36) | @Override
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/TestDataUtils.java
class TestDataUtils (line 36) | public class TestDataUtils {
method metadata (line 39) | @Test
method loadJsonResourceFiles (line 45) | public static <T extends NamedTest> List<T> loadJsonResourceFiles(Stri...
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/elf/SchemaConversionBuilderTest.java
class SchemaConversionBuilderTest (line 40) | public class SchemaConversionBuilderTest {
method normalizeFieldName (line 42) | @TestFactory
method foo (line 72) | @Test
FILE: src/test/java/com/github/jcustenborder/kafka/connect/spooldir/elf/SpoolDirELFSourceTaskTest.java
class SpoolDirELFSourceTaskTest (line 33) | public class SpoolDirELFSourceTaskTest extends AbstractSpoolDirSourceTas...
method createTask (line 36) | @Override
method settings (line 41) | @Override
method poll (line 50) | @TestFactory
Condensed preview — 140 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (2,478K chars).
[
{
"path": ".gitignore",
"chars": 80,
"preview": "target\n*.iml\n.okhttpcache\nELFTesting.properties\n.checkstyle\n.factorypath\n.idea/\n"
},
{
"path": "Jenkinsfile",
"chars": 161,
"preview": "#!groovy\n@Library('jenkins-pipeline') import com.github.jcustenborder.jenkins.pipeline.KafkaConnectPipeline\n\ndef pipe = "
},
{
"path": "LICENSE",
"chars": 11357,
"preview": "\n Apache License\n Version 2.0, January 2004\n "
},
{
"path": "README.md",
"chars": 55988,
"preview": "# Introduction\n[Documentation](https://jcustenborder.github.io/kafka-connect-documentation/projects/kafka-connect-spoold"
},
{
"path": "bin/debug.sh",
"chars": 2289,
"preview": "#!/usr/bin/env bash\n#\n# Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n#\n# Licensed under the Apache Lic"
},
{
"path": "config/AvroExample.properties",
"chars": 952,
"preview": "#\n# Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n#\n# Licensed under the Apache License, Version 2.0 (t"
},
{
"path": "config/CSVExample.json",
"chars": 1494,
"preview": "{\n \"name\": \"CsvSpoolDir\",\n \"config\": {\n \"tasks.max\": \"1\",\n \"connector.class\": \"com.github.jcustenborder.kafka.co"
},
{
"path": "config/CSVExample.properties",
"chars": 1904,
"preview": "#\n# Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n#\n# Licensed under the Apache License, Version 2.0 (t"
},
{
"path": "config/CSVSchemaGenerator.properties",
"chars": 1073,
"preview": "#\n# Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n#\n# Licensed under the Apache License, Version 2.0 (t"
},
{
"path": "config/JsonExample.properties",
"chars": 1763,
"preview": "#\n# Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n#\n# Licensed under the Apache License, Version 2.0 (t"
},
{
"path": "config/connect-avro-docker.properties",
"chars": 1454,
"preview": "#\n# Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n#\n# Licensed under the Apache License, Version 2.0 (t"
},
{
"path": "docker-compose.yml",
"chars": 1599,
"preview": "#\n# Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n#\n# Licensed under the Apache License, Version 2.0 (t"
},
{
"path": "pom.xml",
"chars": 5793,
"preview": "<?xml version=\"1.0\"?>\n<!--\n\n Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n\n Licensed under the A"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractCleanUpPolicy.java",
"chars": 4896,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSchemaGenerator.java",
"chars": 8170,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSourceConnector.java",
"chars": 1929,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSourceConnectorConfig.java",
"chars": 19133,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSourceTask.java",
"chars": 9354,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSpoolDirSourceConnector.java",
"chars": 4933,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSpoolDirSourceConnectorConfig.java",
"chars": 13550,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSpoolDirSourceTask.java",
"chars": 3478,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractTaskPartitionerPredicate.java",
"chars": 2392,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/CsvSchemaGenerator.java",
"chars": 2460,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/FileComparator.java",
"chars": 2195,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/InputFile.java",
"chars": 9163,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/InputFileDequeue.java",
"chars": 5405,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/JsonSchemaGenerator.java",
"chars": 2181,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/Metadata.java",
"chars": 4201,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirAvroSourceConnector.java",
"chars": 2130,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirAvroSourceConnectorConfig.java",
"chars": 1055,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirAvroSourceTask.java",
"chars": 2901,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirBinaryFileSourceConnector.java",
"chars": 2201,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirBinaryFileSourceConnectorConfig.java",
"chars": 1065,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirBinaryFileSourceTask.java",
"chars": 2066,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceConnector.java",
"chars": 3726,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceConnectorConfig.java",
"chars": 18101,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceTask.java",
"chars": 5819,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirJsonSourceConnector.java",
"chars": 4000,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirJsonSourceConnectorConfig.java",
"chars": 1167,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirJsonSourceTask.java",
"chars": 4484,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirLineDelimitedSourceConnector.java",
"chars": 1862,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirLineDelimitedSourceConnectorConfig.java",
"chars": 2059,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirLineDelimitedSourceTask.java",
"chars": 2327,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirSchemaLessJsonSourceConnector.java",
"chars": 2238,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirSchemaLessJsonSourceConnectorConfig.java",
"chars": 2061,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirSchemaLessJsonSourceTask.java",
"chars": 2869,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/SchemaConversion.java",
"chars": 2062,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/SchemaConversionBuilder.java",
"chars": 3281,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/SpoolDirELFSourceConnector.java",
"chars": 2630,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/SpoolDirELFSourceConnectorConfig.java",
"chars": 1184,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/SpoolDirELFSourceTask.java",
"chars": 3514,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/converters/LocalDateLogFieldConverter.java",
"chars": 1300,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/converters/LocalTimeLogFieldConverter.java",
"chars": 1356,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/converters/LogFieldConverter.java",
"chars": 1645,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/converters/LogFieldConverterFactory.java",
"chars": 5002,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/converters/PrimitiveLogFieldConverter.java",
"chars": 1002,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/elf/converters/TimestampLogFieldConverter.java",
"chars": 1860,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/main/java/com/github/jcustenborder/kafka/connect/spooldir/package-info.java",
"chars": 2776,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractCleanUpPolicyTest.java",
"chars": 3671,
"preview": "package com.github.jcustenborder.kafka.connect.spooldir;\n\nimport com.google.common.collect.ImmutableMap;\nimport com.goog"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSchemaGeneratorTest.java",
"chars": 1798,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSpoolDirSourceConnectorTest.java",
"chars": 3380,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/AbstractSpoolDirSourceTaskTest.java",
"chars": 9638,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/ByNameAbstractTaskPartitionerPredicateTest.java",
"chars": 1265,
"preview": "package com.github.jcustenborder.kafka.connect.spooldir;\n\nimport org.junit.jupiter.api.DynamicTest;\nimport org.junit.jup"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/CsvSchemaGeneratorTest.java",
"chars": 2622,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/DeleteCleanupPolicySubDirsNoRetainTest.java",
"chars": 988,
"preview": "package com.github.jcustenborder.kafka.connect.spooldir;\n\nimport org.junit.jupiter.api.Test;\n\nimport com.google.common.c"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/DeleteCleanupPolicySubDirsRetainTest.java",
"chars": 979,
"preview": "package com.github.jcustenborder.kafka.connect.spooldir;\n\nimport org.junit.jupiter.api.Test;\n\nimport com.google.common.c"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/DeleteCleanupPolicyTest.java",
"chars": 1208,
"preview": "package com.github.jcustenborder.kafka.connect.spooldir;\n\nimport org.junit.jupiter.api.Test;\n\nimport java.io.File;\nimpor"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/DocumentationTest.java",
"chars": 828,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/FileComparatorTest.java",
"chars": 3082,
"preview": "package com.github.jcustenborder.kafka.connect.spooldir;\n\nimport com.google.common.collect.ImmutableList;\nimport com.goo"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/JsonSchemaGeneratorTest.java",
"chars": 4228,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/MinimumFileAgePredicateTest.java",
"chars": 1820,
"preview": "package com.github.jcustenborder.kafka.connect.spooldir;\n\nimport org.apache.kafka.common.utils.Time;\nimport org.junit.ju"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/MoveByDateCleanupPolicySubDirsNoRetainTest.java",
"chars": 997,
"preview": "package com.github.jcustenborder.kafka.connect.spooldir;\n\nimport org.junit.jupiter.api.Test;\n\nimport com.google.common.c"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/MoveByDateCleanupPolicySubDirsRetainTest.java",
"chars": 987,
"preview": "package com.github.jcustenborder.kafka.connect.spooldir;\n\nimport org.junit.jupiter.api.Test;\n\nimport com.google.common.c"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/MoveByDateCleanupPolicyTest.java",
"chars": 1342,
"preview": "package com.github.jcustenborder.kafka.connect.spooldir;\n\nimport org.junit.jupiter.api.Test;\n\nimport java.io.File;\nimpor"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/MoveCleanupPolicySubDirsNoRetainTest.java",
"chars": 986,
"preview": "package com.github.jcustenborder.kafka.connect.spooldir;\n\nimport org.junit.jupiter.api.Test;\n\nimport com.google.common.c"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/MoveCleanupPolicySubDirsRetainTest.java",
"chars": 977,
"preview": "package com.github.jcustenborder.kafka.connect.spooldir;\n\nimport org.junit.jupiter.api.Test;\n\nimport com.google.common.c"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/MoveCleanupPolicyTest.java",
"chars": 1023,
"preview": "package com.github.jcustenborder.kafka.connect.spooldir;\n\nimport org.junit.jupiter.api.Test;\n\nimport java.io.File;\nimpor"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/NamedTest.java",
"chars": 769,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/NoneCleanupPolicyTest.java",
"chars": 732,
"preview": "package com.github.jcustenborder.kafka.connect.spooldir;\n\nimport org.junit.jupiter.api.Test;\n\nimport java.io.File;\nimpor"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/ProcessingFileExistsPredicateTest.java",
"chars": 1232,
"preview": "package com.github.jcustenborder.kafka.connect.spooldir;\n\nimport com.google.common.io.Files;\nimport org.junit.jupiter.ap"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirAvroSourceTaskTest.java",
"chars": 2904,
"preview": "package com.github.jcustenborder.kafka.connect.spooldir;\n\nimport com.google.common.io.Files;\nimport io.confluent.connect"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirBinaryFileSourceTaskTest.java",
"chars": 1270,
"preview": "package com.github.jcustenborder.kafka.connect.spooldir;\n\nimport com.google.common.io.Files;\nimport org.junit.jupiter.ap"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceConnectorConfigTest.java",
"chars": 2510,
"preview": "package com.github.jcustenborder.kafka.connect.spooldir;\n\n\nimport com.opencsv.CSVReader;\nimport com.opencsv.CSVReaderBui"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceConnectorTest.java",
"chars": 2751,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceTaskSubDirsNoRetainTest.java",
"chars": 1375,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceTaskSubDirsRetainTest.java",
"chars": 1370,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceTaskTest.java",
"chars": 5748,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirJsonSourceConnectorTest.java",
"chars": 2756,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirJsonSourceTaskTest.java",
"chars": 2166,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirLineDelimitedSourceTaskTest.java",
"chars": 1013,
"preview": "package com.github.jcustenborder.kafka.connect.spooldir;\n\nimport org.junit.jupiter.api.Test;\nimport org.slf4j.Logger;\nim"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirSchemaLessJsonSourceTaskTest.java",
"chars": 1588,
"preview": "package com.github.jcustenborder.kafka.connect.spooldir;\n\nimport com.google.common.io.Files;\nimport org.junit.jupiter.ap"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/TestCase.java",
"chars": 1312,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/TestDataUtils.java",
"chars": 2890,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/elf/SchemaConversionBuilderTest.java",
"chars": 4252,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/test/java/com/github/jcustenborder/kafka/connect/spooldir/elf/SpoolDirELFSourceTaskTest.java",
"chars": 2322,
"preview": "/**\n * Copyright © 2016 Jeremy Custenborder (jcustenborder@gmail.com)\n *\n * Licensed under the Apache License, Version 2"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/SpoolBinaryFileSourceConnector/binary.json",
"chars": 291,
"preview": "{\n \"name\": \"Binary File\",\n \"description\": \"This file will read the entire file and write it to Kafka as a binary file."
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/SpoolBinaryFileSourceConnector/fromXML.json",
"chars": 700,
"preview": "{\n \"name\": \"Transform XML Files\",\n \"description\": \"This example will use the FromXml transformation to read the binary"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirBinaryFileSourceConnector/binary.json",
"chars": 291,
"preview": "{\n \"name\": \"Binary File\",\n \"description\": \"This file will read the entire file and write it to Kafka as a binary file."
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirBinaryFileSourceConnector/fromXML.json",
"chars": 700,
"preview": "{\n \"name\": \"Transform XML Files\",\n \"description\": \"This example will use the FromXml transformation to read the binary"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceConnector/schema.json",
"chars": 2138,
"preview": "{\n \"name\": \"CSV with schema\",\n \"description\": \"This example will read csv files and write them to Kafka parsing them t"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceConnector/schemaheaders.json",
"chars": 2415,
"preview": "{\n \"name\": \"CSV with Headers as fields\",\n \"description\": \"This example will use a transformation to copy data from the"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirCsvSourceConnector/tsv.json",
"chars": 352,
"preview": "{\n \"name\": \"TSV input file\",\n \"description\": \"This example will read a tab separated file. This method is very similar"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirJsonSourceConnector/test.json",
"chars": 258,
"preview": "{\n \"name\":\"Json\",\n \"description\":\"This example will read json from the input directory.\",\n \"config\":{\n \"finished.p"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/SpoolDirLineDelimitedSourceConnector/fix.json",
"chars": 43011,
"preview": "{\n \"description\" : \"This example will read files in a directory line by line and parse them using kafka-connect-transfo"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/avro/FieldsMatch.json",
"chars": 119581,
"preview": "{\n \"settings\" : {\n \"avro.first.row.as.header\" : \"true\"\n },\n \"offset\" : { },\n \"valueSchema\" : {\n \"name\" : \"com."
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/binary/DataHasMoreFields.data",
"chars": 45,
"preview": "asdifoasodfasdfargasdfasdfasdgfrasdfasdfasdfa"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/binary/DataHasMoreFields.json",
"chars": 1516,
"preview": "{\n \"settings\": {},\n \"offset\": {},\n \"expected\": [\n {\n \"sourcePartition\": {\n \"fileName\": \"DataHasMoreFie"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/BlankLines.data",
"chars": 1975,
"preview": "id,first_name,last_name,email,gender,ip_address,last_login,account_balance,country,favorite_color\n1,Jack,Garcia,jgarcia0"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/BlankLines.json",
"chars": 130211,
"preview": "{\n \"settings\" : {\n \"csv.first.row.as.header\" : \"true\"\n },\n \"offset\" : { },\n \"keySchema\" : {\n \"name\" : \"com.exa"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/DataHasMoreFields.data",
"chars": 2050,
"preview": "id,first_name,last_name,email,gender,ip_address,last_login,account_balance,country,favorite_color,column11\n1,Jack,Garcia"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/DataHasMoreFields.json",
"chars": 130631,
"preview": "{\n \"settings\" : {\n \"csv.first.row.as.header\" : \"true\"\n },\n \"offset\" : { },\n \"keySchema\" : {\n \"name\" : \"com.exa"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/FieldsMatch.data",
"chars": 1968,
"preview": "id,first_name,last_name,email,gender,ip_address,last_login,account_balance,country,favorite_color\n1,Jack,Garcia,jgarcia0"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/FieldsMatch.json",
"chars": 130271,
"preview": "{\n \"settings\" : {\n \"csv.first.row.as.header\" : \"true\"\n },\n \"offset\" : { },\n \"keySchema\" : {\n \"name\" : \"com.exa"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/FileModeFieldFieldsMatch.data",
"chars": 1988,
"preview": "id,first_name,last_name,email,gender,ip_address,last_login,account_balance,country,favorite_color\n1,Jack,Garcia,jgarcia0"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/FileModeFieldFieldsMatch.json",
"chars": 131147,
"preview": "{\n \"settings\" : {\n \"csv.first.row.as.header\" : \"true\"\n },\n \"offset\" : { },\n \"keySchema\" : {\n \"name\" : \"com.exa"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/SchemaHasMoreFields.data",
"chars": 1863,
"preview": "id,first_name,last_name,email,gender,ip_address,last_login,account_balance,country\n1,Jack,Garcia,jgarcia0@shop-pro.jp,Ma"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/SchemaHasMoreFields.json",
"chars": 130441,
"preview": "{\n \"settings\" : {\n \"csv.first.row.as.header\" : \"true\"\n },\n \"offset\" : { },\n \"keySchema\" : {\n \"name\" : \"com.exa"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/SourceOffset.data",
"chars": 1968,
"preview": "id,first_name,last_name,email,gender,ip_address,last_login,account_balance,country,favorite_color\n1,Jack,Garcia,jgarcia0"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/SourceOffset.json",
"chars": 72451,
"preview": "{\n \"settings\" : {\n \"csv.first.row.as.header\" : \"true\"\n },\n \"offset\" : {\n \"offset\" : 10\n },\n \"keySchema\" : {\n "
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/WithHeaderSkipLines.data",
"chars": 1984,
"preview": "#skip this line\nid,first_name,last_name,email,gender,ip_address,last_login,account_balance,country,favorite_color\n1,Jack"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/WithHeaderSkipLines.json",
"chars": 130819,
"preview": "{\n \"settings\" : {\n \"csv.first.row.as.header\" : \"true\",\n \"csv.skip.lines\": \"1\",\n \"schema.generation.enabled\": \""
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/WithoutHeader.data",
"chars": 1870,
"preview": "1,Jack,Garcia,jgarcia0@shop-pro.jp,Male,196.56.44.185,2015-09-30T15:29:03Z,347.77,IT,#4a2313\n2,John,Kim,jkim1@miibeian.g"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/csv/WithoutHeader.json",
"chars": 130392,
"preview": "{\n \"settings\" : {\n \"csv.first.row.as.header\" : \"false\"\n },\n \"offset\" : { },\n \"keySchema\" : {\n \"name\" : \"com.ex"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/elf/SpoolDirELFSourceConnector/example.json",
"chars": 286,
"preview": "{\n \"name\": \"Standard\",\n \"description\": \"This example will read Extended Log Format files and write them to Kafka.\",\n "
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/elf/elf/FieldsMatch.data",
"chars": 683,
"preview": "#Software: xyz 1.2.3.4\n#Version: 1.0\n#Start-Date: 2019-10-23 22:00:00\n#Date: 2019-10-23 22:00:00\n#Fields: date time time"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/elf/elf/FieldsMatch.json",
"chars": 36790,
"preview": "{\n \"settings\" : {\n },\n \"offset\" : { },\n \"expected\" : [ {\n \"sourcePartition\" : {\n \"fileName\" : \"FieldsMatch.e"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/json/DataHasMoreFields.data",
"chars": 5153,
"preview": "{\n\"id\": 1,\n\"first_name\": \"Jack\",\n\"last_name\": \"Garcia\",\n\"email\": \"jgarcia0@shop-pro.jp\",\n\"gender\": \"Male\",\n\"ip_address\":"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/json/DataHasMoreFields.json",
"chars": 130689,
"preview": "{\n \"settings\" : {\n \"csv.first.row.as.header\" : \"true\"\n },\n \"offset\" : { },\n \"keySchema\" : {\n \"name\" : \"com.exa"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/json/FieldsMatch.data",
"chars": 4770,
"preview": "{\n\"id\": 1,\n\"first_name\": \"Jack\",\n\"last_name\": \"Garcia\",\n\"email\": \"jgarcia0@shop-pro.jp\",\n\"gender\": \"Male\",\n\"ip_address\":"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/json/FieldsMatch.json",
"chars": 130329,
"preview": "{\n \"settings\" : {\n \"csv.first.row.as.header\" : \"true\"\n },\n \"offset\" : { },\n \"keySchema\" : {\n \"name\" : \"com.exa"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/json/FileModeFieldFieldsMatch.data",
"chars": 4807,
"preview": "{\n\"id\": 1,\n\"first_name\": \"Jack\",\n\"last_name\": \"Garcia\",\n\"email\": \"jgarcia0@shop-pro.jp\",\n\"gender\": \"Male\",\n\"ip_address\":"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/json/FileModeFieldFieldsMatch.json",
"chars": 131205,
"preview": "{\n \"settings\" : {\n \"csv.first.row.as.header\" : \"true\"\n },\n \"offset\" : { },\n \"keySchema\" : {\n \"name\" : \"com.exa"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/json/SchemaHasMoreFields.data",
"chars": 4480,
"preview": "{\n\"id\": 1,\n\"first_name\": \"Jack\",\n\"last_name\": \"Garcia\",\n\"email\": \"jgarcia0@shop-pro.jp\",\n\"gender\": \"Male\",\n\"ip_address\":"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/json/SchemaHasMoreFields.json",
"chars": 130499,
"preview": "{\n \"settings\" : {\n \"csv.first.row.as.header\" : \"true\"\n },\n \"offset\" : { },\n \"keySchema\" : {\n \"name\" : \"com.exa"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/json/SourceOffset.data",
"chars": 4770,
"preview": "{\n\"id\": 1,\n\"first_name\": \"Jack\",\n\"last_name\": \"Garcia\",\n\"email\": \"jgarcia0@shop-pro.jp\",\n\"gender\": \"Male\",\n\"ip_address\":"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/json/SourceOffset.json",
"chars": 66020,
"preview": "{\n \"settings\" : {\n \"csv.first.row.as.header\" : \"true\"\n },\n \"offset\" : {\n \"offset\" : 10\n },\n \"keySchema\" : {\n "
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/schemalessjson/DataHasMoreFields.data",
"chars": 5153,
"preview": "{\n\"id\": 1,\n\"first_name\": \"Jack\",\n\"last_name\": \"Garcia\",\n\"email\": \"jgarcia0@shop-pro.jp\",\n\"gender\": \"Male\",\n\"ip_address\":"
},
{
"path": "src/test/resources/com/github/jcustenborder/kafka/connect/spooldir/schemalessjson/DataHasMoreFields.json",
"chars": 31534,
"preview": "{\n \"settings\" : { },\n \"offset\" : { },\n \"expected\" : [ {\n \"sourcePartition\" : {\n \"fileName\" : \"DataHasMoreFiel"
},
{
"path": "src/test/resources/logback.xml",
"chars": 510,
"preview": "<configuration>\n <appender name=\"STDOUT\" class=\"ch.qos.logback.core.ConsoleAppender\">\n <encoder class=\"ch.qos."
}
]
// ... and 1 more files (download for full content)
About this extraction
This page contains the full source code of the jcustenborder/kafka-connect-spooldir GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 140 files (2.1 MB), approximately 561.7k tokens, and a symbol index with 407 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.