Showing preview only (2,094K chars total). Download the full file or copy to clipboard to get everything.
Repository: neo4j-contrib/neo4j-spark-connector
Branch: 5.0
Commit: c70d2995c935
Files: 135
Total size: 2.0 MB
Directory structure:
gitextract_girzruyy/
├── .commitlintrc.json
├── .github/
│ ├── CODEOWNERS
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.md
│ │ └── feature_request.md
│ └── dependabot.yml
├── .gitignore
├── .husky/
│ ├── commit-msg
│ └── pre-commit
├── .mvn/
│ └── wrapper/
│ └── maven-wrapper.properties
├── .teamcity/
│ ├── .editorconfig
│ ├── builds/
│ │ ├── Build.kt
│ │ ├── Common.kt
│ │ ├── Empty.kt
│ │ ├── JavaIntegrationTests.kt
│ │ ├── Maven.kt
│ │ ├── PRCheck.kt
│ │ ├── Package.kt
│ │ ├── PythonIntegrationTests.kt
│ │ ├── Release.kt
│ │ ├── SemgrepCheck.kt
│ │ └── WhiteListCheck.kt
│ ├── pom.xml
│ └── settings.kts
├── LICENSE.txt
├── README.md
├── common/
│ ├── LICENSES.txt
│ ├── NOTICE.txt
│ ├── pom.xml
│ └── src/
│ ├── main/
│ │ ├── java/
│ │ │ └── org/
│ │ │ └── neo4j/
│ │ │ └── spark/
│ │ │ └── util/
│ │ │ └── ReflectionUtils.java
│ │ ├── resources/
│ │ │ └── neo4j-spark-connector.properties
│ │ └── scala/
│ │ └── org/
│ │ └── neo4j/
│ │ └── spark/
│ │ ├── config/
│ │ │ └── TopN.scala
│ │ ├── converter/
│ │ │ ├── DataConverter.scala
│ │ │ └── TypeConverter.scala
│ │ ├── cypher/
│ │ │ ├── Cypher5Renderer.scala
│ │ │ └── CypherVersionSelector.scala
│ │ ├── reader/
│ │ │ └── BasePartitionReader.scala
│ │ ├── service/
│ │ │ ├── MappingService.scala
│ │ │ ├── Neo4jQueryService.scala
│ │ │ └── SchemaService.scala
│ │ ├── streaming/
│ │ │ └── BaseStreamingPartitionReader.scala
│ │ ├── util/
│ │ │ ├── DriverCache.scala
│ │ │ ├── Neo4jImplicits.scala
│ │ │ ├── Neo4jOptions.scala
│ │ │ ├── Neo4jUtil.scala
│ │ │ ├── ValidationUtil.scala
│ │ │ └── Validations.scala
│ │ └── writer/
│ │ ├── BaseDataWriter.scala
│ │ └── DataWriterMetrics.scala
│ └── test/
│ └── scala/
│ └── org/
│ └── neo4j/
│ └── spark/
│ ├── CommonTestSuiteIT.scala
│ ├── CommonTestSuiteWithApocIT.scala
│ ├── service/
│ │ ├── AuthenticationTest.scala
│ │ ├── Neo4jQueryServiceIT.scala
│ │ ├── Neo4jQueryServiceTest.scala
│ │ ├── SchemaServiceTSE.scala
│ │ ├── SchemaServiceTest.scala
│ │ └── SchemaServiceWithApocTSE.scala
│ └── util/
│ ├── DummyNamedReference.scala
│ ├── Neo4jImplicitsTest.scala
│ ├── Neo4jOptionsIT.scala
│ ├── Neo4jOptionsTest.scala
│ ├── Neo4jUtilTest.scala
│ ├── ValidationsIT.scala
│ └── ValidationsTest.scala
├── dangerfile.mjs
├── examples/
│ ├── neo4j_data_engineering.ipynb
│ └── neo4j_data_science.ipynb
├── jreleaser.yml
├── maven-release.sh
├── mvnw
├── mvnw.cmd
├── package.json
├── pom.xml
├── scripts/
│ ├── python/
│ │ ├── requirements.txt
│ │ └── test_spark.py
│ └── release/
│ └── upload_to_spark_packages.sh
├── spark-3/
│ ├── LICENSES.txt
│ ├── NOTICE.txt
│ ├── pom.xml
│ └── src/
│ ├── jreleaser/
│ │ └── assemblers/
│ │ └── zip/
│ │ └── README.txt.tpl
│ ├── main/
│ │ ├── assemblies/
│ │ │ └── spark-packages-assembly.xml
│ │ ├── distributions/
│ │ │ └── spark-packages.pom
│ │ ├── resources/
│ │ │ ├── META-INF/
│ │ │ │ └── services/
│ │ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ │ │ └── neo4j-spark-connector.properties
│ │ └── scala/
│ │ └── org/
│ │ └── neo4j/
│ │ └── spark/
│ │ ├── DataSource.scala
│ │ ├── Neo4jTable.scala
│ │ ├── reader/
│ │ │ ├── Neo4jPartitionReader.scala
│ │ │ ├── Neo4jPartitionReaderFactory.scala
│ │ │ ├── Neo4jScan.scala
│ │ │ └── Neo4jScanBuilder.scala
│ │ ├── streaming/
│ │ │ ├── Neo4jMicroBatchReader.scala
│ │ │ ├── Neo4jOffset.scala
│ │ │ ├── Neo4jStreamingDataWriterFactory.scala
│ │ │ ├── Neo4jStreamingPartitionReader.scala
│ │ │ ├── Neo4jStreamingPartitionReaderFactory.scala
│ │ │ └── Neo4jStreamingWriter.scala
│ │ └── writer/
│ │ ├── Neo4jBatchWriter.scala
│ │ ├── Neo4jDataWriter.scala
│ │ ├── Neo4jDataWriterFactory.scala
│ │ └── Neo4jWriterBuilder.scala
│ └── test/
│ ├── java/
│ │ └── org/
│ │ └── neo4j/
│ │ └── spark/
│ │ ├── DataSourceReaderTypesTSE.java
│ │ └── SparkConnectorSuiteIT.java
│ ├── resources/
│ │ ├── log4j2.properties
│ │ ├── neo4j-keycloak.jks
│ │ └── neo4j-sso-test-realm.json
│ └── scala/
│ └── org/
│ └── neo4j/
│ └── spark/
│ ├── DataSourceAggregationTSE.scala
│ ├── DataSourceReaderNeo4jTSE.scala
│ ├── DataSourceReaderNeo4jWithApocTSE.scala
│ ├── DataSourceReaderTSE.scala
│ ├── DataSourceReaderWithApocTSE.scala
│ ├── DataSourceSchemaWriterTSE.scala
│ ├── DataSourceStreamingReaderTSE.scala
│ ├── DataSourceStreamingWriterTSE.scala
│ ├── DataSourceWriterNeo4jSkipNullKeysTSE.scala
│ ├── DataSourceWriterNeo4jTSE.scala
│ ├── DataSourceWriterTSE.scala
│ ├── DefaultConfigTSE.scala
│ ├── GraphDataScienceIT.scala
│ ├── ReauthenticationIT.scala
│ ├── SparkConnector30ScalaSuiteIT.scala
│ ├── SparkConnector30ScalaSuiteWithApocIT.scala
│ ├── SparkConnectorAuraTest.scala
│ └── TransactionTimeoutIT.scala
└── test-support/
├── pom.xml
└── src/
├── main/
│ ├── java/
│ │ └── org/
│ │ └── neo4j/
│ │ └── spark/
│ │ └── Assert.java
│ ├── resources/
│ │ └── simplelogger.properties
│ └── scala/
│ └── org/
│ └── neo4j/
│ ├── Closeables.scala
│ ├── Neo4jContainerExtension.scala
│ └── spark/
│ ├── RowUtil.scala
│ ├── SparkConnectorScalaBaseTSE.scala
│ ├── SparkConnectorScalaBaseWithApocTSE.scala
│ ├── SparkConnectorScalaSuiteIT.scala
│ ├── SparkConnectorScalaSuiteWithApocIT.scala
│ ├── SparkConnectorScalaSuiteWithGdsBase.scala
│ └── TestUtil.scala
└── test/
└── scala/
└── org/
└── neo4j/
└── spark/
└── VersionTest.scala
================================================
FILE CONTENTS
================================================
================================================
FILE: .commitlintrc.json
================================================
{
"extends": [
"@commitlint/config-conventional"
]
}
================================================
FILE: .github/CODEOWNERS
================================================
* @neo4j/team-connectors
/.github/ @ali-ince @fbiville @venikkin
================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug report
about: Create a report to help us improve
labels: bug
---
## Guidelines
Please note that GitHub issues are only meant for bug reports/feature requests. If you have questions on how to use the Neo4j Connector for Apache Spark,
please ask on [the Neo4j Discussion Forum](https://community.neo4j.com/c/integrations/18) instead of creating an issue here.
## Expected Behavior (Mandatory)
## Actual Behavior (Mandatory)
## How to Reproduce the Problem
### Simple Dataset (where it's possible)
```
// Insert the output of the `df.show()` call
```
### Steps (Mandatory)
1.
1.
1.
## Screenshots (where it's possible)
## Specifications (Mandatory)
Currently used versions
### Versions
- Spark:
- Scala:
- Neo4j:
- Neo4j Connector:
## Additional information
* The code of the Spark job
* the structure of the Dataframe
* did you define the constraints/indexes?
* if you're you using any Spark Cloud provider please specify it (ie: Databricks)
================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.md
================================================
---
name: Feature request
about: Suggest an idea for this project
---
## Guidelines
Please note that GitHub issues are only meant for bug reports/feature requests. If you have questions on how to use the Neo4j Connector for Apache Spark,
please ask on [the Neo4j Discussion Forum](https://community.neo4j.com/c/integrations/18) instead of creating an issue here.
## Feature description (Mandatory)
A clear and concise description of what you want to happen. Add any considered drawbacks.
## Considered alternatives
A clear and concise description of any alternative solutions or features you've considered. Maybe there is something in the project that could be reused?
## How this feature can improve the project?
If you can, explain how users will be able to use this and possibly write out a version the docs.
Maybe a screenshot or design?
================================================
FILE: .github/dependabot.yml
================================================
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "daily"
cooldown:
default-days: 3
- package-ecosystem: "maven"
directory: "/"
target-branch: "5.0"
schedule:
interval: "daily"
cooldown:
default-days: 3
- package-ecosystem: "maven"
directory: "/"
target-branch: "6.0"
schedule:
interval: "daily"
cooldown:
default-days: 3
================================================
FILE: .gitignore
================================================
neo4j-home
.gradle
gradle/
build/
*~
\#*
target
out
.project
.classpath
.settings
.externalToolBuilders/
.scala_dependencies
.factorypath
.cache
.cache-main
.cache-tests
*.iws
*.ipr
*.iml
.idea
.DS_Store
.shell_history
.mailmap
.java-version
.cache-main
.cache-tests
Thumbs.db
.cache-main
.cache-tests
docs/guides
doc/node
doc/node_modules
doc/package-lock.json
scripts/python/local
node_modules
================================================
FILE: .husky/commit-msg
================================================
#!/usr/bin/env sh
npx --no -- commitlint --edit "$1"
================================================
FILE: .husky/pre-commit
================================================
#!/usr/bin/env sh
./mvnw sortpom:sort spotless:apply -f .teamcity
./mvnw sortpom:sort spotless:apply
git update-index --again
================================================
FILE: .mvn/wrapper/maven-wrapper.properties
================================================
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
wrapperVersion=3.3.2
distributionType=only-script
distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.9.9/apache-maven-3.9.9-bin.zip
================================================
FILE: .teamcity/.editorconfig
================================================
# This .editorconfig section approximates ktfmt's formatting rules. You can include it in an
# existing .editorconfig file or use it standalone by copying it to <project root>/.editorconfig
# and making sure your editor is set to read settings from .editorconfig files.
#
# It includes editor-specific config options for IntelliJ IDEA.
#
# If any option is wrong, PR are welcome
[*]
max_line_length = unset
[pom.xml]
max_line_length = 180
[{*.kt,*.kts}]
indent_style = space
insert_final_newline = true
max_line_length = 100
indent_size = 2
ij_continuation_indent_size = 4
ij_java_names_count_to_use_import_on_demand = 9999
ij_kotlin_align_in_columns_case_branch = false
ij_kotlin_align_multiline_binary_operation = false
ij_kotlin_align_multiline_extends_list = false
ij_kotlin_align_multiline_method_parentheses = false
ij_kotlin_align_multiline_parameters = true
ij_kotlin_align_multiline_parameters_in_calls = false
ij_kotlin_allow_trailing_comma = true
ij_kotlin_allow_trailing_comma_on_call_site = true
ij_kotlin_assignment_wrap = normal
ij_kotlin_blank_lines_after_class_header = 0
ij_kotlin_blank_lines_around_block_when_branches = 0
ij_kotlin_blank_lines_before_declaration_with_comment_or_annotation_on_separate_line = 1
ij_kotlin_block_comment_at_first_column = true
ij_kotlin_call_parameters_new_line_after_left_paren = true
ij_kotlin_call_parameters_right_paren_on_new_line = false
ij_kotlin_call_parameters_wrap = on_every_item
ij_kotlin_catch_on_new_line = false
ij_kotlin_class_annotation_wrap = split_into_lines
ij_kotlin_code_style_defaults = KOTLIN_OFFICIAL
ij_kotlin_continuation_indent_for_chained_calls = true
ij_kotlin_continuation_indent_for_expression_bodies = true
ij_kotlin_continuation_indent_in_argument_lists = true
ij_kotlin_continuation_indent_in_elvis = false
ij_kotlin_continuation_indent_in_if_conditions = false
ij_kotlin_continuation_indent_in_parameter_lists = false
ij_kotlin_continuation_indent_in_supertype_lists = false
ij_kotlin_else_on_new_line = false
ij_kotlin_enum_constants_wrap = off
ij_kotlin_extends_list_wrap = normal
ij_kotlin_field_annotation_wrap = split_into_lines
ij_kotlin_finally_on_new_line = false
ij_kotlin_if_rparen_on_new_line = false
ij_kotlin_import_nested_classes = false
ij_kotlin_insert_whitespaces_in_simple_one_line_method = true
ij_kotlin_keep_blank_lines_before_right_brace = 2
ij_kotlin_keep_blank_lines_in_code = 2
ij_kotlin_keep_blank_lines_in_declarations = 2
ij_kotlin_keep_first_column_comment = true
ij_kotlin_keep_indents_on_empty_lines = false
ij_kotlin_keep_line_breaks = true
ij_kotlin_lbrace_on_next_line = false
ij_kotlin_line_comment_add_space = false
ij_kotlin_line_comment_at_first_column = true
ij_kotlin_method_annotation_wrap = split_into_lines
ij_kotlin_method_call_chain_wrap = normal
ij_kotlin_method_parameters_new_line_after_left_paren = true
ij_kotlin_method_parameters_right_paren_on_new_line = true
ij_kotlin_method_parameters_wrap = on_every_item
ij_kotlin_name_count_to_use_star_import = 9999
ij_kotlin_name_count_to_use_star_import_for_members = 9999
ij_kotlin_parameter_annotation_wrap = off
ij_kotlin_space_after_comma = true
ij_kotlin_space_after_extend_colon = true
ij_kotlin_space_after_type_colon = true
ij_kotlin_space_before_catch_parentheses = true
ij_kotlin_space_before_comma = false
ij_kotlin_space_before_extend_colon = true
ij_kotlin_space_before_for_parentheses = true
ij_kotlin_space_before_if_parentheses = true
ij_kotlin_space_before_lambda_arrow = true
ij_kotlin_space_before_type_colon = false
ij_kotlin_space_before_when_parentheses = true
ij_kotlin_space_before_while_parentheses = true
ij_kotlin_spaces_around_additive_operators = true
ij_kotlin_spaces_around_assignment_operators = true
ij_kotlin_spaces_around_equality_operators = true
ij_kotlin_spaces_around_function_type_arrow = true
ij_kotlin_spaces_around_logical_operators = true
ij_kotlin_spaces_around_multiplicative_operators = true
ij_kotlin_spaces_around_range = false
ij_kotlin_spaces_around_relational_operators = true
ij_kotlin_spaces_around_unary_operator = false
ij_kotlin_spaces_around_when_arrow = true
ij_kotlin_variable_annotation_wrap = off
ij_kotlin_while_on_new_line = false
ij_kotlin_wrap_elvis_expressions = 1
ij_kotlin_wrap_expression_body_functions = 1
ij_kotlin_wrap_first_method_in_call_chain = false
================================================
FILE: .teamcity/builds/Build.kt
================================================
package builds
import jetbrains.buildServer.configs.kotlin.BuildType
import jetbrains.buildServer.configs.kotlin.Project
import jetbrains.buildServer.configs.kotlin.buildFeatures.notifications
import jetbrains.buildServer.configs.kotlin.sequential
import jetbrains.buildServer.configs.kotlin.toId
class Build(
name: String,
forPullRequests: Boolean,
javaVersions: Set<JavaVersion>,
scalaVersions: Set<ScalaVersion>,
pysparkVersions: Set<PySparkVersion>,
neo4jVersions: Set<Neo4jVersion>,
forCompatibility: Boolean = false,
customizeCompletion: BuildType.() -> Unit = {}
) :
Project(
{
this.id(name.toId())
this.name = name
val complete = Empty("${name}-complete", "complete")
val bts = sequential {
if (forPullRequests)
buildType(WhiteListCheck("${name}-whitelist-check", "white-list check"))
if (forPullRequests) dependentBuildType(PRCheck("${name}-pr-check", "pr check"))
parallel {
scalaVersions.forEach { scala ->
dependentBuildType(
SemgrepCheck(
"${name}-semgrep-check-${scala.version}",
"semgrep check (${scala.version})",
scala))
}
javaVersions.cartesianProduct(scalaVersions, neo4jVersions).forEach {
(java, scala, neo4j) ->
sequential {
val packaging =
Package(
"${name}-package-${java.version}-${scala.version}-${neo4j.version}",
"package (${java.version}, ${scala.version}, ${neo4j.version})",
java,
scala,
)
dependentBuildType(
Maven(
"${name}-build-${java.version}-${scala.version}-${neo4j.version}",
"build (${java.version}, ${scala.version}, ${neo4j.version})",
"test-compile",
java,
scala,
),
)
dependentBuildType(
Maven(
"${name}-unit-tests-${java.version}-${scala.version}-${neo4j.version}",
"unit tests (${java.version}, ${scala.version}, ${neo4j.version})",
"test",
java,
scala,
neo4j,
),
)
dependentBuildType(
collectArtifacts(
packaging,
),
)
parallel {
dependentBuildType(
JavaIntegrationTests(
"${name}-integration-tests-java-${java.version}-${scala.version}-${neo4j.version}",
"java integration tests (${java.version}, ${scala.version}, ${neo4j.version})",
java,
scala,
neo4j,
) {},
)
pysparkVersions
.filter { it.shouldTestWith(java, scala) }
.forEach { pyspark ->
pyspark.pythonVersions.forEach { python ->
dependentBuildType(
PythonIntegrationTests(
"${name}-integration-tests-pyspark-${java.version}-${scala.version}-${neo4j.version}-${python.version}-${pyspark.sparkVersion.version}",
"pyspark integration tests (${java.version}, ${scala.version}, ${neo4j.version}, ${python.version}, ${pyspark.sparkVersion.version})",
java,
python,
scala,
pyspark.sparkVersion,
neo4j,
) {
dependencies {
artifacts(packaging) {
artifactRules =
"""
+:packages/*.jar => ./scripts/python
"""
.trimIndent()
}
}
},
)
}
}
}
}
}
}
dependentBuildType(complete)
if (!forPullRequests && !forCompatibility)
dependentBuildType(Release("${name}-release", "release", DEFAULT_JAVA_VERSION))
}
bts.buildTypes().forEach {
it.thisVcs(if (forPullRequests) "pull/*" else DEFAULT_BRANCH)
it.features {
loginToECR()
requireDiskSpace("5gb")
if (!forCompatibility) enableCommitStatusPublisher()
if (forPullRequests) enablePullRequests()
}
buildType(it)
}
complete.features {
notifications {
branchFilter = buildString {
appendLine("+:$DEFAULT_BRANCH")
appendLine("+:refs/heads/$DEFAULT_BRANCH")
if (forPullRequests) {
appendLine("+:pull/*")
appendLine("+:refs/heads/pull/*")
}
}
queuedBuildRequiresApproval = forPullRequests
buildFailedToStart = !forPullRequests
buildFailed = !forPullRequests
buildFinishedSuccessfully = !forPullRequests
buildProbablyHanging = !forPullRequests
notifierSettings = slackNotifier {
connection = SLACK_CONNECTION_ID
sendTo = SLACK_CHANNEL
messageFormat = simpleMessageFormat()
}
}
}
complete.apply(customizeCompletion)
},
)
================================================
FILE: .teamcity/builds/Common.kt
================================================
package builds
import builds.Neo4jSparkConnectorVcs.branchSpec
import jetbrains.buildServer.configs.kotlin.BuildFeatures
import jetbrains.buildServer.configs.kotlin.BuildSteps
import jetbrains.buildServer.configs.kotlin.BuildType
import jetbrains.buildServer.configs.kotlin.CompoundStage
import jetbrains.buildServer.configs.kotlin.FailureAction
import jetbrains.buildServer.configs.kotlin.Requirements
import jetbrains.buildServer.configs.kotlin.ReuseBuilds
import jetbrains.buildServer.configs.kotlin.buildFeatures.PullRequests
import jetbrains.buildServer.configs.kotlin.buildFeatures.buildCache
import jetbrains.buildServer.configs.kotlin.buildFeatures.commitStatusPublisher
import jetbrains.buildServer.configs.kotlin.buildFeatures.dockerRegistryConnections
import jetbrains.buildServer.configs.kotlin.buildFeatures.freeDiskSpace
import jetbrains.buildServer.configs.kotlin.buildFeatures.pullRequests
import jetbrains.buildServer.configs.kotlin.buildSteps.DockerCommandStep
import jetbrains.buildServer.configs.kotlin.buildSteps.MavenBuildStep
import jetbrains.buildServer.configs.kotlin.buildSteps.ScriptBuildStep
import jetbrains.buildServer.configs.kotlin.buildSteps.dockerCommand
import jetbrains.buildServer.configs.kotlin.buildSteps.maven
import jetbrains.buildServer.configs.kotlin.buildSteps.script
import jetbrains.buildServer.configs.kotlin.vcs.GitVcsRoot
const val GITHUB_OWNER = "neo4j"
const val GITHUB_REPOSITORY = "neo4j-spark-connector"
const val DEFAULT_BRANCH = "5.0"
val MAVEN_DEFAULT_ARGS = buildString {
append("--no-transfer-progress ")
append("--batch-mode ")
append("-Dmaven.repo.local=%teamcity.build.checkoutDir%/.m2/repository ")
append("-Dmaven.wagon.http.retryHandler.class=standard ")
append("-Dmaven.wagon.http.retryHandler.timeout=60 ")
append("-Dmaven.wagon.http.retryHandler.count=3 ")
append(
"-Dmaven.wagon.http.retryHandler.nonRetryableClasses=java.io.InterruptedIOException,java.net.UnknownHostException,java.net.ConnectException ")
}
const val SEMGREP_DOCKER_IMAGE = "semgrep/semgrep:1.146.0"
const val FULL_GITHUB_REPOSITORY = "$GITHUB_OWNER/$GITHUB_REPOSITORY"
const val GITHUB_URL = "https://github.com/$FULL_GITHUB_REPOSITORY"
val DEFAULT_JAVA_VERSION = JavaVersion.V_11
// Look into Root Project's settings -> Connections
const val SLACK_CONNECTION_ID = "PROJECT_EXT_83"
const val SLACK_CHANNEL = "#team-connectors-feed"
// Look into Root Project's settings -> Connections
const val ECR_CONNECTION_ID = "PROJECT_EXT_124"
enum class LinuxSize(val value: String) {
SMALL("small"),
LARGE("large")
}
enum class JavaVersion(val version: String, val dockerImage: String) {
V_8(version = "8", dockerImage = "eclipse-temurin:8-jdk"),
V_11(version = "11", dockerImage = "eclipse-temurin:11-jdk"),
V_17(version = "17", dockerImage = "eclipse-temurin:17-jdk"),
V_21(version = "21", dockerImage = "eclipse-temurin:21-jdk"),
}
enum class ScalaVersion(val version: String) {
V2_12(version = "2.12"),
V2_13(version = "2.13"),
}
enum class PythonVersion(val version: String) {
V3_9(version = "3.9"),
V3_10(version = "3.10"),
V3_11(version = "3.11"),
V3_12(version = "3.12"),
V3_13(version = "3.13"),
}
enum class SparkVersion(val short: String, val version: String) {
V3_4_4(short = "3", version = "3.4.4"),
V3_5_5(short = "3", version = "3.5.5"),
}
enum class PySparkVersion(
val sparkVersion: SparkVersion,
val scalaVersion: ScalaVersion,
val javaVersions: Set<JavaVersion>,
val pythonVersions: Set<PythonVersion>,
) {
V3_4(
SparkVersion.V3_4_4,
ScalaVersion.V2_12,
setOf(
JavaVersion.V_8,
JavaVersion.V_11,
JavaVersion.V_17,
),
setOf(
PythonVersion.V3_9,
PythonVersion.V3_10,
PythonVersion.V3_11,
PythonVersion.V3_12,
),
),
V3_5(
SparkVersion.V3_5_5,
ScalaVersion.V2_12,
setOf(
JavaVersion.V_8,
JavaVersion.V_11,
JavaVersion.V_17,
JavaVersion.V_21,
),
setOf(
PythonVersion.V3_9,
PythonVersion.V3_10,
PythonVersion.V3_11,
PythonVersion.V3_12,
PythonVersion.V3_13,
),
),
}
fun PySparkVersion.shouldTestWith(javaVersion: JavaVersion, scalaVersion: ScalaVersion): Boolean =
this.javaVersions.contains(javaVersion) && this.scalaVersion == scalaVersion
enum class Neo4jVersion(val version: String, val dockerImage: String) {
V_NONE("", ""),
V_4_4("4.4", "neo4j:4.4-enterprise"),
V_4_4_DEV(
"4.4-dev",
"535893049302.dkr.ecr.eu-west-1.amazonaws.com/build-service/neo4j:4.4-enterprise-debian-nightly",
),
V_5("5", "neo4j:5-enterprise"),
V_5_DEV(
"5-dev",
"535893049302.dkr.ecr.eu-west-1.amazonaws.com/build-service/neo4j:5-enterprise-debian-nightly-bundle",
),
V_CALVER("2026", "neo4j:2026-enterprise"),
V_CALVER_DEV(
"2026-dev",
"535893049302.dkr.ecr.eu-west-1.amazonaws.com/build-service/neo4j:2026-enterprise-debian-nightly-bundle",
),
}
fun <S, T, Y> Iterable<S>.cartesianProduct(
other1: Collection<T>,
other2: Collection<Y>
): Iterable<Triple<S, T, Y>> =
this.flatMap { s -> other1.map { t -> s to t } }
.flatMap { (s, t) -> other2.map { y -> Triple(s, t, y) } }
object Neo4jSparkConnectorVcs :
GitVcsRoot(
{
id("Connectors_Neo4jSparkConnector_Build")
name = "neo4j-spark-connector"
url = "git@github.com:neo4j/neo4j-spark-connector.git"
branch = "refs/heads/$DEFAULT_BRANCH"
branchSpec = "refs/heads/*"
authMethod = defaultPrivateKey { userName = "git" }
},
)
fun Requirements.runOnLinux(size: LinuxSize = LinuxSize.SMALL) {
startsWith("cloud.amazon.agent-name-prefix", "linux-${size.value}")
}
fun BuildType.thisVcs(forBranch: String) = vcs {
root(Neo4jSparkConnectorVcs)
branchSpec = buildString {
appendLine("-:*")
appendLine("+:$forBranch")
}
cleanCheckout = true
}
fun BuildFeatures.enableCommitStatusPublisher() = commitStatusPublisher {
vcsRootExtId = Neo4jSparkConnectorVcs.id.toString()
publisher = github {
githubUrl = "https://api.github.com"
authType = personalToken { token = "%github-commit-status-token%" }
}
}
fun BuildFeatures.enablePullRequests() = pullRequests {
vcsRootExtId = Neo4jSparkConnectorVcs.id.toString()
provider = github {
authType = token { token = "%github-pull-request-token%" }
filterAuthorRole = PullRequests.GitHubRoleFilter.EVERYBODY
filterTargetBranch = buildString {
appendLine("+:$DEFAULT_BRANCH")
appendLine("+:refs/heads/$DEFAULT_BRANCH")
}
}
}
fun BuildFeatures.requireDiskSpace(size: String = "3gb") = freeDiskSpace {
requiredSpace = size
failBuild = true
}
fun BuildFeatures.loginToECR() = dockerRegistryConnections {
cleanupPushedImages = true
loginToRegistry = on { dockerRegistryId = ECR_CONNECTION_ID }
}
fun BuildFeatures.buildCache(javaVersion: JavaVersion, scalaVersion: ScalaVersion) = buildCache {
this.name =
"neo4j-spark-connector-${DEFAULT_BRANCH}-${javaVersion.version}-${scalaVersion.version}"
publish = true
use = true
publishOnlyChanged = true
rules = ".m2/repository"
}
fun CompoundStage.dependentBuildType(bt: BuildType, reuse: ReuseBuilds = ReuseBuilds.SUCCESSFUL) =
buildType(bt) {
onDependencyCancel = FailureAction.CANCEL
onDependencyFailure = FailureAction.FAIL_TO_START
reuseBuilds = reuse
}
fun collectArtifacts(buildType: BuildType): BuildType {
buildType.artifactRules =
"""
+:spark-3/target/*_for_spark_*.jar => packages
+:spark-3/target/*.zip => packages
"""
.trimIndent()
return buildType
}
fun BuildSteps.runMaven(javaVersion: JavaVersion, init: MavenBuildStep.() -> Unit): MavenBuildStep {
val maven =
this.maven {
dockerImagePlatform = MavenBuildStep.ImagePlatform.Linux
dockerImage = javaVersion.dockerImage
dockerRunParameters = "--volume /var/run/docker.sock:/var/run/docker.sock"
localRepoScope = MavenBuildStep.RepositoryScope.MAVEN_DEFAULT
}
init(maven)
return maven
}
fun BuildSteps.setVersion(name: String, version: String, javaVersion: JavaVersion): MavenBuildStep {
return this.runMaven(javaVersion) {
this.name = name
goals = "versions:set"
runnerArgs =
"$MAVEN_DEFAULT_ARGS -Djava.version=${javaVersion.version} -DnewVersion=$version -DgenerateBackupPoms=false"
}
}
fun BuildSteps.commitAndPush(
name: String,
commitMessage: String,
includeFiles: String = "\\*pom.xml",
dryRunParameter: String = "dry-run"
): ScriptBuildStep {
return this.script {
this.name = name
scriptContent =
"""
#!/bin/bash -eu
git add $includeFiles
git commit -m "$commitMessage"
git push
"""
.trimIndent()
conditions { doesNotMatch(dryRunParameter, "true") }
}
}
fun BuildSteps.pullImage(version: Neo4jVersion): DockerCommandStep =
this.dockerCommand {
name = "pull neo4j test image"
commandType = other {
subCommand = "image"
commandArgs = "pull ${version.dockerImage}"
}
}
================================================
FILE: .teamcity/builds/Empty.kt
================================================
package builds
import jetbrains.buildServer.configs.kotlin.BuildType
import jetbrains.buildServer.configs.kotlin.toId
class Empty(id: String, name: String) :
BuildType({
this.id(id.toId())
this.name = name
requirements { runOnLinux(LinuxSize.SMALL) }
})
================================================
FILE: .teamcity/builds/JavaIntegrationTests.kt
================================================
package builds
import jetbrains.buildServer.configs.kotlin.BuildType
import jetbrains.buildServer.configs.kotlin.toId
class JavaIntegrationTests(
id: String,
name: String,
javaVersion: JavaVersion,
scalaVersion: ScalaVersion,
neo4jVersion: Neo4jVersion,
init: BuildType.() -> Unit
) :
BuildType(
{
this.id(id.toId())
this.name = name
init()
artifactRules =
"""
+:diagnostics => diagnostics.zip
"""
.trimIndent()
params { text("env.NEO4J_TEST_IMAGE", neo4jVersion.dockerImage) }
steps {
if (neo4jVersion != Neo4jVersion.V_NONE) {
pullImage(neo4jVersion)
}
runMaven(javaVersion) {
this.goals = "verify"
this.runnerArgs =
"$MAVEN_DEFAULT_ARGS -Djava.version=${javaVersion.version} -Dscala-${scalaVersion.version} -DskipUnitTests"
}
}
features { buildCache(javaVersion, scalaVersion) }
requirements { runOnLinux(LinuxSize.LARGE) }
},
)
================================================
FILE: .teamcity/builds/Maven.kt
================================================
package builds
import jetbrains.buildServer.configs.kotlin.BuildType
import jetbrains.buildServer.configs.kotlin.toId
open class Maven(
id: String,
name: String,
goals: String,
javaVersion: JavaVersion,
scalaVersion: ScalaVersion,
neo4jVersion: Neo4jVersion = Neo4jVersion.V_NONE,
args: String? = null
) :
BuildType(
{
this.id(id.toId())
this.name = name
params {
text("env.JAVA_VERSION", javaVersion.version)
text("env.NEO4J_TEST_IMAGE", neo4jVersion.dockerImage)
}
steps {
if (neo4jVersion != Neo4jVersion.V_NONE) {
pullImage(neo4jVersion)
}
runMaven(javaVersion) {
this.goals = goals
this.runnerArgs =
"$MAVEN_DEFAULT_ARGS -Djava.version=${javaVersion.version} -Dscala-${scalaVersion.version} ${args ?: ""}"
}
}
features { buildCache(javaVersion, scalaVersion) }
requirements { runOnLinux(LinuxSize.SMALL) }
},
)
================================================
FILE: .teamcity/builds/PRCheck.kt
================================================
package builds
import jetbrains.buildServer.configs.kotlin.BuildType
import jetbrains.buildServer.configs.kotlin.buildFeatures.dockerSupport
import jetbrains.buildServer.configs.kotlin.buildSteps.ScriptBuildStep
import jetbrains.buildServer.configs.kotlin.buildSteps.script
import jetbrains.buildServer.configs.kotlin.toId
class PRCheck(id: String, name: String) :
BuildType({
this.id(id.toId())
this.name = name
steps {
script {
scriptContent =
"""
#!/bin/bash
set -eu
export DANGER_GITHUB_API_TOKEN=%github-pull-request-token%
export PULL_REQUEST_URL=https://github.com/$GITHUB_OWNER/$GITHUB_REPOSITORY/%teamcity.build.branch%
# process pull request
npm ci
npx danger ci --verbose --failOnErrors
"""
.trimIndent()
dockerImage = "node:18.4"
dockerImagePlatform = ScriptBuildStep.ImagePlatform.Linux
}
}
features { dockerSupport {} }
requirements { runOnLinux(LinuxSize.SMALL) }
})
================================================
FILE: .teamcity/builds/Package.kt
================================================
package builds
import jetbrains.buildServer.configs.kotlin.BuildType
import jetbrains.buildServer.configs.kotlin.buildSteps.ScriptBuildStep
import jetbrains.buildServer.configs.kotlin.buildSteps.script
import jetbrains.buildServer.configs.kotlin.toId
class Package(
id: String,
name: String,
javaVersion: JavaVersion,
scalaVersion: ScalaVersion,
) :
BuildType({
this.id(id.toId())
this.name = name
params { text("env.JAVA_VERSION", javaVersion.version) }
steps {
script {
scriptContent =
"""
./maven-release.sh package ${scalaVersion.version}
"""
.trimIndent()
dockerImagePlatform = ScriptBuildStep.ImagePlatform.Linux
dockerImage = javaVersion.dockerImage
dockerRunParameters = "--volume /var/run/docker.sock:/var/run/docker.sock"
}
}
features { buildCache(javaVersion, scalaVersion) }
requirements { runOnLinux(LinuxSize.SMALL) }
})
================================================
FILE: .teamcity/builds/PythonIntegrationTests.kt
================================================
package builds
import jetbrains.buildServer.configs.kotlin.BuildType
import jetbrains.buildServer.configs.kotlin.buildSteps.ScriptBuildStep
import jetbrains.buildServer.configs.kotlin.buildSteps.script
import jetbrains.buildServer.configs.kotlin.toId
class PythonIntegrationTests(
id: String,
name: String,
javaVersion: JavaVersion,
pythonVersion: PythonVersion,
scalaVersion: ScalaVersion,
sparkVersion: SparkVersion,
neo4jVersion: Neo4jVersion,
init: BuildType.() -> Unit
) :
BuildType(
{
this.id(id.toId())
this.name = name
init()
artifactRules =
"""
+:diagnostics => diagnostics.zip
"""
.trimIndent()
params { text("env.NEO4J_TEST_IMAGE", neo4jVersion.dockerImage) }
steps {
if (neo4jVersion != Neo4jVersion.V_NONE) {
pullImage(neo4jVersion)
}
script {
scriptContent =
"""
#!/bin/bash -eu
apt-get update
apt-get install -o Acquire::Retries=10 --yes build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev curl git libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev
curl -fsSL https://pyenv.run | bash
export PYENV_ROOT="${'$'}HOME/.pyenv"
export PATH="${'$'}PYENV_ROOT/bin:${'$'}PATH"
eval "$(pyenv init - bash)"
pyenv install ${pythonVersion.version}
pyenv global ${pythonVersion.version}
python -m pip install --upgrade pip
pip install pyspark==${sparkVersion.version} "testcontainers[neo4j]" six tzlocal==2.1
project_version="$(./mvnw help:evaluate -Dexpression="project.version" --quiet -DforceStdout)"
jar_name="neo4j-connector-apache-spark_${scalaVersion.version}-${'$'}{project_version}_for_spark_${sparkVersion.short}.jar"
cd ./scripts/python
python test_spark.py "${'$'}{jar_name}" "${neo4jVersion.dockerImage}"
"""
.trimIndent()
dockerImagePlatform = ScriptBuildStep.ImagePlatform.Linux
dockerImage = javaVersion.dockerImage
dockerRunParameters = "--volume /var/run/docker.sock:/var/run/docker.sock"
}
}
requirements { runOnLinux(LinuxSize.SMALL) }
},
)
================================================
FILE: .teamcity/builds/Release.kt
================================================
package builds
import jetbrains.buildServer.configs.kotlin.AbsoluteId
import jetbrains.buildServer.configs.kotlin.BuildType
import jetbrains.buildServer.configs.kotlin.ParameterDisplay
import jetbrains.buildServer.configs.kotlin.buildSteps.ScriptBuildStep
import jetbrains.buildServer.configs.kotlin.buildSteps.script
import jetbrains.buildServer.configs.kotlin.toId
private const val DRY_RUN = "dry-run"
class Release(id: String, name: String, javaVersion: JavaVersion) :
BuildType(
{
this.id(id.toId())
this.name = name
templates(AbsoluteId("FetchSigningKey"))
params {
text(
"releaseVersion",
"",
label = "Version to release",
display = ParameterDisplay.PROMPT,
allowEmpty = false,
)
text(
"nextSnapshotVersion",
"",
label = "Next snapshot version",
description = "Next snapshot version to set after release",
display = ParameterDisplay.PROMPT,
allowEmpty = false,
)
checkbox(
DRY_RUN,
"true",
"Dry run?",
description =
"Whether to perform a dry run where nothing is published and released",
display = ParameterDisplay.PROMPT,
checked = "true",
unchecked = "false",
)
password("env.JRELEASER_GITHUB_TOKEN", "%github-pull-request-token%")
text("env.JRELEASER_DRY_RUN", "%$DRY_RUN%")
text("env.JRELEASER_PROJECT_VERSION", "%releaseVersion%")
text("env.JRELEASER_ANNOUNCE_SLACK_ACTIVE", "NEVER")
text("env.JRELEASER_ANNOUNCE_SLACK_TOKEN", "%slack-token%")
text("env.JRELEASER_ANNOUNCE_SLACK_WEBHOOK", "%slack-webhook%")
password("env.JRELEASER_GPG_PASSPHRASE", "%signing-key-passphrase%")
text("env.JRELEASER_MAVENCENTRAL_USERNAME", "%publish-username%")
password("env.JRELEASER_MAVENCENTRAL_TOKEN", "%publish-password%")
}
steps {
setVersion("Set release version", "%releaseVersion%", javaVersion)
commitAndPush(
"Push release version",
"build: release version %releaseVersion%",
dryRunParameter = DRY_RUN,
)
script {
scriptContent =
"""
#!/bin/bash
set -eux
apt-get update
apt-get install -o Acquire::Retries=10 --yes build-essential curl git unzip zip
# Get the jreleaser downloader
curl -sL https://raw.githubusercontent.com/jreleaser/release-action/refs/tags/2.5.0/get_jreleaser.java > get_jreleaser.java
# Download JReleaser
java get_jreleaser.java 1.22.0
if [ "%dry-run%" = "true" ]; then
echo "we are on a dry run, only performing upload to maven central"
export JRELEASER_MAVENCENTRAL_STAGE=UPLOAD
export JRELEASER_ANNOUNCE_SLACK_ACTIVE=NEVER
else
echo "we will do a full deploy to maven central"
export JRELEASER_MAVENCENTRAL_STAGE=FULL
export JRELEASER_ANNOUNCE_SLACK_ACTIVE=ALWAYS
fi
# Execute JReleaser
java -jar jreleaser-cli.jar assemble
java -jar jreleaser-cli.jar full-release --debug
"""
.trimIndent()
dockerImagePlatform = ScriptBuildStep.ImagePlatform.Linux
dockerImage = javaVersion.dockerImage
dockerRunParameters =
"--volume /var/run/docker.sock:/var/run/docker.sock --volume %teamcity.build.checkoutDir%/signingkeysandbox:/root/.gnupg"
}
setVersion("Set next snapshot version", "%nextSnapshotVersion%", javaVersion)
commitAndPush(
"Push next snapshot version",
"build: update version to %nextSnapshotVersion%",
dryRunParameter = DRY_RUN,
)
}
artifactRules =
"""
+:artifacts => artifacts
+:out/jreleaser => jreleaser
"""
.trimIndent()
dependencies {
artifacts(AbsoluteId("Tools_ReleaseTool")) {
buildRule = lastSuccessful()
artifactRules = "rt.jar => lib"
}
}
requirements { runOnLinux(LinuxSize.SMALL) }
},
)
================================================
FILE: .teamcity/builds/SemgrepCheck.kt
================================================
package builds
import jetbrains.buildServer.configs.kotlin.buildSteps.ScriptBuildStep
class SemgrepCheck(id: String, name: String, scalaVersion: ScalaVersion) :
Maven(
id,
name,
"dependency:tree",
JavaVersion.V_17,
scalaVersion,
Neo4jVersion.V_NONE,
"-DoutputFile=maven_dep_tree.txt") {
init {
params.password("env.SEMGREP_APP_TOKEN", "%semgrep-app-token%")
params.text("env.SEMGREP_REPO_NAME", FULL_GITHUB_REPOSITORY)
params.text("env.SEMGREP_REPO_URL", GITHUB_URL)
params.text("env.SEMGREP_BRANCH", "%teamcity.build.branch%")
params.text("env.SEMGREP_JOB_URL", "%env.BUILD_URL%")
params.text("env.SEMGREP_COMMIT", "%env.BUILD_VCS_NUMBER%")
steps.step(
ScriptBuildStep {
scriptContent = "semgrep ci --no-git-ignore"
dockerImagePlatform = ScriptBuildStep.ImagePlatform.Linux
dockerImage = SEMGREP_DOCKER_IMAGE
dockerRunParameters =
"--volume /var/run/docker.sock:/var/run/docker.sock --volume %teamcity.build.checkoutDir%/signingkeysandbox:/root/.gnupg"
})
}
}
================================================
FILE: .teamcity/builds/WhiteListCheck.kt
================================================
package builds
import jetbrains.buildServer.configs.kotlin.AbsoluteId
import jetbrains.buildServer.configs.kotlin.BuildType
import jetbrains.buildServer.configs.kotlin.buildSteps.script
import jetbrains.buildServer.configs.kotlin.toId
class WhiteListCheck(id: String, name: String) :
BuildType({
this.id(id.toId())
this.name = name
dependencies {
artifacts(AbsoluteId("Tools_WhitelistCheck")) {
buildRule = lastSuccessful()
cleanDestination = true
artifactRules = "whitelist-check.tar.gz!** => whitelist-check/"
}
}
steps {
script {
scriptContent =
"""
#!/bin/bash -eu
BRANCH=%teamcity.pullRequest.source.branch%
if [[ "${'$'}BRANCH" =~ dependabot/.* ]]; then
echo "Raised by dependabot, skipping the white list check"
exit 0
fi
echo "Checking committers on PR %teamcity.build.branch%"
TOKEN="%github-pull-request-token%"
# process pull request
./whitelist-check/bin/examine-pull-request $GITHUB_OWNER $GITHUB_REPOSITORY "${'$'}{TOKEN}" %teamcity.build.branch% whitelist-check/cla-database.csv
"""
.trimIndent()
formatStderrAsError = true
}
}
requirements { runOnLinux(LinuxSize.SMALL) }
})
================================================
FILE: .teamcity/pom.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<project>
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.jetbrains.teamcity</groupId>
<artifactId>configs-dsl-kotlin-parent</artifactId>
<version>1.0-SNAPSHOT</version>
<relativePath/>
</parent>
<groupId>Connectors_Neo4jSparkConnector</groupId>
<artifactId>teamcity-pipeline</artifactId>
<version>1.0-SNAPSHOT</version>
<name>Connectors_Neo4jSparkConnector Config DSL Script</name>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<sortpom-maven-plugin.version>4.0.0</sortpom-maven-plugin.version>
<spotless-maven-plugin.version>2.40.0</spotless-maven-plugin.version>
</properties>
<dependencies>
<dependency>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-script-runtime</artifactId>
<version>${kotlin.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-stdlib-jdk8</artifactId>
<version>${kotlin.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.jetbrains.teamcity</groupId>
<artifactId>configs-dsl-kotlin-latest</artifactId>
<version>${teamcity.dsl.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.jetbrains.teamcity</groupId>
<artifactId>configs-dsl-kotlin-plugins-latest</artifactId>
<version>1.0-SNAPSHOT</version>
<type>pom</type>
<scope>compile</scope>
</dependency>
</dependencies>
<repositories>
<repository>
<snapshots>
<enabled>true</enabled>
</snapshots>
<id>jetbrains-all</id>
<url>https://download.jetbrains.com/teamcity-repository</url>
</repository>
<repository>
<snapshots>
<enabled>true</enabled>
</snapshots>
<id>teamcity-server</id>
<url>https://live.neo4j-build.io/app/dsl-plugins-repository</url>
</repository>
</repositories>
<pluginRepositories>
<pluginRepository>
<id>JetBrains</id>
<url>https://download.jetbrains.com/teamcity-repository</url>
</pluginRepository>
</pluginRepositories>
<build>
<plugins>
<plugin>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-maven-plugin</artifactId>
<version>${kotlin.version}</version>
<configuration/>
<executions>
<execution>
<id>compile</id>
<goals>
<goal>compile</goal>
</goals>
<phase>process-sources</phase>
</execution>
<execution>
<id>test-compile</id>
<goals>
<goal>test-compile</goal>
</goals>
<phase>process-test-sources</phase>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.jetbrains.teamcity</groupId>
<artifactId>teamcity-configs-maven-plugin</artifactId>
<version>${teamcity.dsl.version}</version>
<configuration>
<format>kotlin</format>
<dstDir>target/generated-configs</dstDir>
</configuration>
</plugin>
<plugin>
<groupId>com.github.ekryd.sortpom</groupId>
<artifactId>sortpom-maven-plugin</artifactId>
<version>${sortpom-maven-plugin.version}</version>
<configuration>
<encoding>${project.build.sourceEncoding}</encoding>
<keepBlankLines>false</keepBlankLines>
<indentAttribute>schemaLocation</indentAttribute>
<nrOfIndentSpace>4</nrOfIndentSpace>
<sortProperties>true</sortProperties>
<sortDependencies>scope,groupId,artifactId</sortDependencies>
<createBackupFile>false</createBackupFile>
<expandEmptyElements>false</expandEmptyElements>
</configuration>
<executions>
<execution>
<goals>
<goal>verify</goal>
</goals>
<phase>validate</phase>
<configuration>
<verifyFail>STOP</verifyFail>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>com.diffplug.spotless</groupId>
<artifactId>spotless-maven-plugin</artifactId>
<version>${spotless-maven-plugin.version}</version>
<configuration>
<kotlin>
<!-- These are the defaults, you can override if you want -->
<includes>
<include>**/*.kt</include>
<include>**/*.kts</include>
</includes>
<ktfmt>
<version>0.46</version>
<style>DEFAULT</style>
<blockIndent>2</blockIndent>
<continuationIndent>4</continuationIndent>
<removeUnusedImport>true</removeUnusedImport>
<maxWidth>100</maxWidth>
</ktfmt>
</kotlin>
</configuration>
<executions>
<execution>
<goals>
<goal>check</goal>
</goals>
<phase>compile</phase>
</execution>
</executions>
</plugin>
</plugins>
<sourceDirectory>${basedir}</sourceDirectory>
</build>
</project>
================================================
FILE: .teamcity/settings.kts
================================================
import builds.Build
import builds.DEFAULT_BRANCH
import builds.JavaVersion
import builds.Neo4jSparkConnectorVcs
import builds.Neo4jVersion
import builds.PySparkVersion
import builds.ScalaVersion
import jetbrains.buildServer.configs.kotlin.Project
import jetbrains.buildServer.configs.kotlin.failureConditions.BuildFailureOnText
import jetbrains.buildServer.configs.kotlin.failureConditions.failOnText
import jetbrains.buildServer.configs.kotlin.project
import jetbrains.buildServer.configs.kotlin.triggers.schedule
import jetbrains.buildServer.configs.kotlin.triggers.vcs
import jetbrains.buildServer.configs.kotlin.version
version = "2025.11"
project {
params {
text("default-spark-branch", DEFAULT_BRANCH)
text("osssonatypeorg-username", "%publish-username%")
password("osssonatypeorg-password", "%publish-password%")
password("signing-key-passphrase", "%publish-signing-key-password%")
password("github-commit-status-token", "%github-token%")
password("github-pull-request-token", "%github-token%")
password("semgrep-app-token", "%semgrep-token%")
}
vcsRoot(Neo4jSparkConnectorVcs)
subProject(
Build(
name = "main",
javaVersions =
setOf(JavaVersion.V_8, JavaVersion.V_11, JavaVersion.V_17, JavaVersion.V_21),
scalaVersions = setOf(ScalaVersion.V2_12, ScalaVersion.V2_13),
pysparkVersions = setOf(PySparkVersion.V3_4, PySparkVersion.V3_5),
neo4jVersions = setOf(Neo4jVersion.V_4_4, Neo4jVersion.V_5, Neo4jVersion.V_CALVER),
forPullRequests = false,
) {
triggers {
vcs {
this.branchFilter = buildString {
appendLine("+:$DEFAULT_BRANCH")
appendLine("+:refs/heads/$DEFAULT_BRANCH")
}
this.triggerRules =
"""
-:comment=^build.*release version.*:**
-:comment=^build.*update version.*:**
"""
.trimIndent()
}
}
},
)
subProject(
Build(
name = "pull-request",
javaVersions = setOf(JavaVersion.V_8, JavaVersion.V_11, JavaVersion.V_17),
scalaVersions = setOf(ScalaVersion.V2_12, ScalaVersion.V2_13),
pysparkVersions = setOf(PySparkVersion.V3_5),
neo4jVersions = setOf(Neo4jVersion.V_4_4, Neo4jVersion.V_5, Neo4jVersion.V_CALVER),
forPullRequests = true,
) {
triggers {
vcs {
this.branchFilter = buildString {
appendLine("+:pull/*")
appendLine("+:refs/heads/pull/*")
}
}
}
// when a PR gets closed, TC falls back to main branch to run the pipeline, which we don't
// want
failureConditions {
failOnText {
conditionType = BuildFailureOnText.ConditionType.CONTAINS
pattern = "which does not correspond to any branch monitored by the build VCS roots"
failureMessage = "Error: The branch %teamcity.build.branch% does not exist"
reverse = false
stopBuildOnFailure = true
}
}
},
)
subProject(
Project {
this.id("compatibility")
name = "compatibility"
Neo4jVersion.entries.minus(Neo4jVersion.V_NONE).forEach { neo4j ->
subProject(
Build(
name = neo4j.version,
javaVersions =
setOf(JavaVersion.V_8, JavaVersion.V_11, JavaVersion.V_17, JavaVersion.V_21),
scalaVersions = setOf(ScalaVersion.V2_12, ScalaVersion.V2_13),
pysparkVersions = setOf(PySparkVersion.V3_4, PySparkVersion.V3_5),
neo4jVersions = setOf(neo4j),
forPullRequests = false,
forCompatibility = true,
) {
triggers {
vcs { enabled = false }
schedule {
branchFilter = buildString {
appendLine("+:$DEFAULT_BRANCH")
appendLine("+:refs/heads/$DEFAULT_BRANCH")
}
schedulingPolicy = daily {
hour = 7
minute = 0
}
triggerBuild = always()
withPendingChangesOnly = false
enforceCleanCheckout = true
enforceCleanCheckoutForDependencies = true
}
}
},
)
}
},
)
}
================================================
FILE: LICENSE.txt
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
# Neo4j Connector for Apache Spark
This repository contains the Neo4j Connector for Apache Spark.
## License
This neo4j-connector-apache-spark is Apache 2 Licensed
## Documentation
The documentation for Neo4j Connector for Apache Spark lives at https://github.com/neo4j/docs-spark repository.
## Building for Spark 3
You can build for Spark 3.x with both Scala 2.12 and Scala 2.13
```
./maven-release.sh package 2.12
./maven-release.sh package 2.13
```
These commands will generate the corresponding targets
* `spark-3/target/neo4j-connector-apache-spark_2.12-<version>_for_spark_3.jar`
* `spark-3/target/neo4j-connector-apache-spark_2.13-<version>_for_spark_3.jar`
## Integration with Apache Spark Applications
**spark-shell, pyspark, or spark-submit**
`$SPARK_HOME/bin/spark-shell --jars neo4j-connector-apache-spark_2.12-<version>_for_spark_3.jar`
`$SPARK_HOME/bin/spark-shell --packages org.neo4j:neo4j-connector-apache-spark_2.12:<version>_for_spark_3`
**sbt**
If you use the [sbt-spark-package plugin](https://github.com/databricks/sbt-spark-package), in your sbt build file, add:
```scala
resolvers += "Spark Packages Repo" at "http://dl.bintray.com/spark-packages/maven"
libraryDependencies += "org.neo4j" % "neo4j-connector-apache-spark_2.12" % "<version>_for_spark_3"
```
**maven**
In your pom.xml, add:
```xml
<dependencies>
<!-- list of dependencies -->
<dependency>
<groupId>org.neo4j</groupId>
<artifactId>neo4j-connector-apache-spark_2.12</artifactId>
<version>[version]_for_spark_3</version>
</dependency>
</dependencies>
```
For more info about the available version visit https://neo4j.com/developer/spark/overview/#_compatibility
================================================
FILE: common/LICENSES.txt
================================================
This file contains the full license text of the included third party
libraries. For an overview of the licenses see the NOTICE.txt file.
------------------------------------------------------------------------------
Apache Software License, Version 2.0
IntelliJ IDEA Annotations
Kotlin Stdlib
Netty/Buffer
Netty/Codec
Netty/Common
Netty/Handler
Netty/Resolver
Netty/TomcatNative [OpenSSL - Classes]
Netty/Transport
Netty/Transport/Native/Unix/Common
Non-Blocking Reactive Foundation for the JVM
org.apiguardian:apiguardian-api
------------------------------------------------------------------------------
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
------------------------------------------------------------------------------
MIT License
SLF4J API Module
------------------------------------------------------------------------------
The MIT License
Copyright (c) <year> <copyright holders>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
------------------------------------------------------------------------------
MIT No Attribution License
reactive-streams
------------------------------------------------------------------------------
MIT No Attribution
Copyright <year> <copyright holders>
Permission is hereby granted, free of charge, to any person obtaining a copy of this
software and associated documentation files (the "Software"), to deal in the Software
without restriction, including without limitation the rights to use, copy, modify,
merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
================================================
FILE: common/NOTICE.txt
================================================
Copyright (c) "Neo4j"
Neo4j Sweden AB [https://neo4j.com]
This file is part of Neo4j.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Full license texts are found in LICENSES.txt.
Third-party licenses
--------------------
Apache Software License, Version 2.0
IntelliJ IDEA Annotations
Kotlin Stdlib
Netty/Buffer
Netty/Codec
Netty/Common
Netty/Handler
Netty/Resolver
Netty/TomcatNative [OpenSSL - Classes]
Netty/Transport
Netty/Transport/Native/Unix/Common
Non-Blocking Reactive Foundation for the JVM
org.apiguardian:apiguardian-api
MIT License
SLF4J API Module
MIT No Attribution License
reactive-streams
================================================
FILE: common/pom.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.neo4j</groupId>
<artifactId>neo4j-connector-apache-spark_parent</artifactId>
<version>5.4.3-SNAPSHOT</version>
</parent>
<artifactId>neo4j-connector-apache-spark_common</artifactId>
<packaging>jar</packaging>
<name>neo4j-connector-apache-spark-common</name>
<description>Common Services for Neo4j Connector for Apache Spark using the binary Bolt Driver</description>
<dependencies>
<dependency>
<groupId>org.neo4j</groupId>
<artifactId>caniuse-core</artifactId>
</dependency>
<dependency>
<groupId>org.neo4j</groupId>
<artifactId>caniuse-neo4j-detection</artifactId>
</dependency>
<dependency>
<groupId>org.neo4j</groupId>
<artifactId>neo4j-cypher-dsl</artifactId>
</dependency>
<dependency>
<groupId>org.neo4j.connectors</groupId>
<artifactId>commons-authn-spi</artifactId>
</dependency>
<dependency>
<groupId>org.neo4j.connectors</groupId>
<artifactId>commons-reauth-driver</artifactId>
</dependency>
<dependency>
<groupId>org.neo4j.driver</groupId>
<artifactId>neo4j-java-driver-slim</artifactId>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-reflect</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.neo4j.connectors</groupId>
<artifactId>commons-authn-provided</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.neo4j</groupId>
<artifactId>neo4j-connector-apache-spark_test-support</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.binary.version}</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalatestplus</groupId>
<artifactId>junit-4-13_${scala.binary.version}</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>pl.pragmatists</groupId>
<artifactId>JUnitParams</artifactId>
<version>1.1.1</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<resources>
<resource>
<filtering>true</filtering>
<directory>src/main/resources</directory>
</resource>
</resources>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-failsafe-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>
================================================
FILE: common/src/main/java/org/neo4j/spark/util/ReflectionUtils.java
================================================
/*
* Copyright (c) "Neo4j"
* Neo4j Sweden AB [https://neo4j.com]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.neo4j.spark.util;
import org.apache.spark.sql.connector.expressions.Expression;
import org.apache.spark.sql.connector.expressions.NamedReference;
import org.apache.spark.sql.connector.expressions.aggregate.Aggregation;
import java.lang.invoke.MethodHandle;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
import java.util.Optional;
import java.util.stream.Stream;
public class ReflectionUtils {
private static final MethodHandles.Lookup lookup = MethodHandles.lookup();
private static Optional<MethodHandle> getGroupByColumns() {
try {
return Optional.of(lookup
.findVirtual(Aggregation.class, "groupByColumns", MethodType.methodType(NamedReference[].class))
.asType(MethodType.methodType(Expression[].class, Aggregation.class)));
} catch (Exception e) {
return Optional.empty();
}
}
private static Optional<MethodHandle> getGroupByExpressions() {
try {
return Optional.of(lookup
.findVirtual(Aggregation.class, "groupByExpressions", MethodType.methodType(Expression[].class)));
} catch (Exception e) {
return Optional.empty();
}
}
private static final Optional<MethodHandle> groupByColumns = getGroupByColumns();
private static final Optional<MethodHandle> groupByExpressions = getGroupByExpressions();
private static final Expression[] EMPTY = new Expression[0];
public static Expression[] groupByCols(Aggregation agg) {
return Stream.of(groupByExpressions, groupByColumns)
.filter(Optional::isPresent)
.map(Optional::get)
.map(mh -> {
try {
return (Expression[]) mh.invokeExact(agg);
} catch (Throwable e) {
return EMPTY;
}
})
.findFirst()
.orElse(EMPTY);
}
}
================================================
FILE: common/src/main/resources/neo4j-spark-connector.properties
================================================
version=${project.version}
================================================
FILE: common/src/main/scala/org/neo4j/spark/config/TopN.scala
================================================
/*
* Copyright (c) "Neo4j"
* Neo4j Sweden AB [https://neo4j.com]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.neo4j.spark.config
import org.apache.spark.sql.connector.expressions.SortOrder
case class TopN(limit: Long, orders: Array[SortOrder] = Array.empty)
================================================
FILE: common/src/main/scala/org/neo4j/spark/converter/DataConverter.scala
================================================
/*
* Copyright (c) "Neo4j"
* Neo4j Sweden AB [https://neo4j.com]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.neo4j.spark.converter
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericRow
import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
import org.apache.spark.sql.catalyst.expressions.UnsafeRow
import org.apache.spark.sql.catalyst.util.ArrayBasedMapData
import org.apache.spark.sql.catalyst.util.ArrayData
import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.apache.spark.sql.catalyst.util.MapData
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
import org.neo4j.driver.Value
import org.neo4j.driver.Values
import org.neo4j.driver.internal._
import org.neo4j.driver.types.IsoDuration
import org.neo4j.driver.types.Node
import org.neo4j.driver.types.Relationship
import org.neo4j.spark.service.SchemaService
import org.neo4j.spark.util.Neo4jOptions
import org.neo4j.spark.util.Neo4jUtil
import java.time._
import java.time.format.DateTimeFormatter
import scala.annotation.tailrec
import scala.collection.JavaConverters._
trait DataConverter[T] {
def convert(value: Any, dataType: DataType = null): T
@tailrec
final private[converter] def extractStructType(dataType: DataType): StructType = dataType match {
case structType: StructType => structType
case mapType: MapType => extractStructType(mapType.valueType)
case arrayType: ArrayType => extractStructType(arrayType.elementType)
case _ => throw new UnsupportedOperationException(s"$dataType not supported")
}
}
object SparkToNeo4jDataConverter {
def apply(options: Neo4jOptions): SparkToNeo4jDataConverter = new SparkToNeo4jDataConverter(options)
private def dayTimeMicrosToNeo4jDuration(micros: Long): Value = {
val oneSecondInMicros = 1000000L
val oneDayInMicros = 24 * 3600 * oneSecondInMicros
val numberDays = Math.floorDiv(micros, oneDayInMicros)
val remainderMicros = Math.floorMod(micros, oneDayInMicros)
val numberSeconds = Math.floorDiv(remainderMicros, oneSecondInMicros)
val numberNanos = Math.floorMod(remainderMicros, oneSecondInMicros) * 1000
Values.isoDuration(0L, numberDays, numberSeconds, numberNanos.toInt)
}
// while Neo4j supports years, this driver version's API does not expose it.
private def yearMonthIntervalToNeo4jDuration(months: Int): Value = {
Values.isoDuration(months.toLong, 0L, 0L, 0)
}
}
class SparkToNeo4jDataConverter(options: Neo4jOptions) extends DataConverter[Value] {
override def convert(value: Any, dataType: DataType): Value = {
value match {
case date: java.sql.Date => convert(date.toLocalDate, dataType)
case timestamp: java.sql.Timestamp =>
if (options.legacyTypeConversionEnabled) {
convert(timestamp.toLocalDateTime, dataType)
} else {
convert(timestamp.toInstant.atZone(ZoneOffset.UTC), dataType)
}
case intValue: Int if dataType == DataTypes.DateType =>
convert(
DateTimeUtils
.toJavaDate(intValue),
dataType
)
case intValue: Int if dataType.isInstanceOf[YearMonthIntervalType] && !options.legacyTypeConversionEnabled =>
SparkToNeo4jDataConverter.yearMonthIntervalToNeo4jDuration(intValue)
case longValue: Long if dataType == DataTypes.TimestampType =>
convert(DateTimeUtils.toJavaTimestamp(longValue), dataType)
case longValue: Long if dataType == DataTypes.TimestampNTZType && !options.legacyTypeConversionEnabled =>
convert(DateTimeUtils.microsToLocalDateTime(longValue), dataType)
case longValue: Long if dataType.isInstanceOf[DayTimeIntervalType] && !options.legacyTypeConversionEnabled =>
SparkToNeo4jDataConverter.dayTimeMicrosToNeo4jDuration(longValue)
case unsafeRow: UnsafeRow => {
val structType = extractStructType(dataType)
val row = new GenericRowWithSchema(unsafeRow.toSeq(structType).toArray, structType)
convert(row)
}
case struct: GenericRow => {
def toMap(struct: GenericRow): Value = {
Values.value(
struct.schema.fields.map(f => f.name -> convert(struct.getAs(f.name), f.dataType)).toMap.asJava
)
}
try {
struct.getAs[UTF8String]("type").toString match {
case SchemaService.POINT_TYPE_2D => Values.point(
struct.getAs[Number]("srid").intValue(),
struct.getAs[Number]("x").doubleValue(),
struct.getAs[Number]("y").doubleValue()
)
case SchemaService.POINT_TYPE_3D => Values.point(
struct.getAs[Number]("srid").intValue(),
struct.getAs[Number]("x").doubleValue(),
struct.getAs[Number]("y").doubleValue(),
struct.getAs[Number]("z").doubleValue()
)
case SchemaService.DURATION_TYPE => Values.isoDuration(
struct.getAs[Number]("months").longValue(),
struct.getAs[Number]("days").longValue(),
struct.getAs[Number]("seconds").longValue(),
struct.getAs[Number]("nanoseconds").intValue()
)
case SchemaService.TIME_TYPE_OFFSET =>
Values.value(OffsetTime.parse(struct.getAs[UTF8String]("value").toString))
case SchemaService.TIME_TYPE_LOCAL =>
Values.value(LocalTime.parse(struct.getAs[UTF8String]("value").toString))
case _ => toMap(struct)
}
} catch {
case _: Throwable => toMap(struct)
}
}
case unsafeArray: ArrayData => {
val sparkType = dataType match {
case arrayType: ArrayType => arrayType.elementType
case _ => dataType
}
if (sparkType == DataTypes.ByteType && !options.legacyTypeConversionEnabled) {
Values.value(unsafeArray.toByteArray)
} else {
val javaList = unsafeArray.toSeq[AnyRef](sparkType)
.map(elem => convert(elem, sparkType))
.asJava
Values.value(javaList)
}
}
case unsafeMapData: MapData => { // Neo4j only supports Map[String, AnyRef]
val mapType = dataType.asInstanceOf[MapType]
val map: Map[String, AnyRef] = (0 until unsafeMapData.numElements())
.map(i =>
(unsafeMapData.keyArray().getUTF8String(i).toString, unsafeMapData.valueArray().get(i, mapType.valueType))
)
.toMap[String, AnyRef]
.mapValues(innerValue => convert(innerValue, mapType.valueType))
.toMap[String, AnyRef]
Values.value(map.asJava)
}
case string: UTF8String => convert(string.toString)
case decimal: Decimal if dataType.isInstanceOf[DecimalType] => Values.value(decimal.toString)
case _ => Values.value(value)
}
}
}
object Neo4jToSparkDataConverter {
def apply(options: Neo4jOptions): Neo4jToSparkDataConverter = new Neo4jToSparkDataConverter(options)
}
class Neo4jToSparkDataConverter(options: Neo4jOptions) extends DataConverter[Any] {
override def convert(value: Any, dataType: DataType): Any = {
if (dataType != null && dataType == DataTypes.StringType && value != null && !value.isInstanceOf[String]) {
convert(Neo4jUtil.mapper.writeValueAsString(value), dataType)
} else {
value match {
case node: Node => {
val map = node.asMap()
val structType = extractStructType(dataType)
val fields = structType
.filter(field => field.name != Neo4jUtil.INTERNAL_ID_FIELD && field.name != Neo4jUtil.INTERNAL_LABELS_FIELD)
.map(field => convert(map.get(field.name), field.dataType))
InternalRow.fromSeq(Seq(convert(node.id()), convert(node.labels())) ++ fields)
}
case rel: Relationship => {
val map = rel.asMap()
val structType = extractStructType(dataType)
val fields = structType
.filter(field =>
field.name != Neo4jUtil.INTERNAL_REL_ID_FIELD
&& field.name != Neo4jUtil.INTERNAL_REL_TYPE_FIELD
&& field.name != Neo4jUtil.INTERNAL_REL_SOURCE_ID_FIELD
&& field.name != Neo4jUtil.INTERNAL_REL_TARGET_ID_FIELD
)
.map(field => convert(map.get(field.name), field.dataType))
InternalRow.fromSeq(Seq(
convert(rel.id()),
convert(rel.`type`()),
convert(rel.startNodeId()),
convert(rel.endNodeId())
) ++ fields)
}
case d: IsoDuration => {
val months = d.months()
val days = d.days()
val nanoseconds: Integer = d.nanoseconds()
val seconds = d.seconds()
InternalRow.fromSeq(Seq(
UTF8String.fromString(SchemaService.DURATION_TYPE),
months,
days,
seconds,
nanoseconds,
UTF8String.fromString(d.toString)
))
}
case zt: ZonedDateTime => DateTimeUtils.instantToMicros(zt.toInstant)
case dt: LocalDateTime => {
if (options.legacyTypeConversionEnabled) {
DateTimeUtils.instantToMicros(dt.toInstant(ZoneOffset.UTC))
} else {
DateTimeUtils.localDateTimeToMicros(dt)
}
}
case d: LocalDate => d.toEpochDay.toInt
case lt: LocalTime => {
InternalRow.fromSeq(Seq(
UTF8String.fromString(SchemaService.TIME_TYPE_LOCAL),
UTF8String.fromString(lt.format(DateTimeFormatter.ISO_TIME))
))
}
case t: OffsetTime => {
InternalRow.fromSeq(Seq(
UTF8String.fromString(SchemaService.TIME_TYPE_OFFSET),
UTF8String.fromString(t.format(DateTimeFormatter.ISO_TIME))
))
}
case p: InternalPoint2D => {
val srid: Integer = p.srid()
InternalRow.fromSeq(Seq(UTF8String.fromString(SchemaService.POINT_TYPE_2D), srid, p.x(), p.y(), null))
}
case p: InternalPoint3D => {
val srid: Integer = p.srid()
InternalRow.fromSeq(Seq(UTF8String.fromString(SchemaService.POINT_TYPE_3D), srid, p.x(), p.y(), p.z()))
}
case l: java.util.List[_] => {
val elementType = if (dataType != null) dataType.asInstanceOf[ArrayType].elementType else null
ArrayData.toArrayData(l.asScala.map(e => convert(e, elementType)).toArray)
}
case map: java.util.Map[_, _] => {
if (dataType != null) {
val mapType = dataType.asInstanceOf[MapType]
ArrayBasedMapData(map.asScala.map(t => (convert(t._1, mapType.keyType), convert(t._2, mapType.valueType))))
} else {
ArrayBasedMapData(map.asScala.map(t => (convert(t._1), convert(t._2))))
}
}
case s: String => UTF8String.fromString(s)
case _ => value
}
}
}
}
================================================
FILE: common/src/main/scala/org/neo4j/spark/converter/TypeConverter.scala
================================================
/*
* Copyright (c) "Neo4j"
* Neo4j Sweden AB [https://neo4j.com]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.neo4j.spark.converter
import org.apache.spark.sql.types.DataType
import org.apache.spark.sql.types.DataTypes
import org.apache.spark.sql.types.DayTimeIntervalType
import org.apache.spark.sql.types.DecimalType
import org.apache.spark.sql.types.YearMonthIntervalType
import org.neo4j.driver.types.Entity
import org.neo4j.spark.converter.CypherToSparkTypeConverter.cleanTerms
import org.neo4j.spark.converter.CypherToSparkTypeConverter.durationType
import org.neo4j.spark.converter.CypherToSparkTypeConverter.pointType
import org.neo4j.spark.converter.CypherToSparkTypeConverter.timeType
import org.neo4j.spark.converter.SparkToCypherTypeConverter.mapping
import org.neo4j.spark.service.SchemaService.normalizedClassName
import org.neo4j.spark.util.Neo4jImplicits.EntityImplicits
import org.neo4j.spark.util.Neo4jOptions
import scala.collection.JavaConverters._
trait TypeConverter[SOURCE_TYPE, DESTINATION_TYPE] {
def convert(sourceType: SOURCE_TYPE, value: Any = null): DESTINATION_TYPE
}
object CypherToSparkTypeConverter {
def apply(options: Neo4jOptions): CypherToSparkTypeConverter = new CypherToSparkTypeConverter(options)
private val cleanTerms: String = "Unmodifiable|Internal|Iso|2D|3D|Offset"
val durationType: DataType = DataTypes.createStructType(Array(
DataTypes.createStructField("type", DataTypes.StringType, false),
DataTypes.createStructField("months", DataTypes.LongType, false),
DataTypes.createStructField("days", DataTypes.LongType, false),
DataTypes.createStructField("seconds", DataTypes.LongType, false),
DataTypes.createStructField("nanoseconds", DataTypes.IntegerType, false),
DataTypes.createStructField("value", DataTypes.StringType, false)
))
val pointType: DataType = DataTypes.createStructType(Array(
DataTypes.createStructField("type", DataTypes.StringType, false),
DataTypes.createStructField("srid", DataTypes.IntegerType, false),
DataTypes.createStructField("x", DataTypes.DoubleType, false),
DataTypes.createStructField("y", DataTypes.DoubleType, false),
DataTypes.createStructField("z", DataTypes.DoubleType, true)
))
val timeType: DataType = DataTypes.createStructType(Array(
DataTypes.createStructField("type", DataTypes.StringType, false),
DataTypes.createStructField("value", DataTypes.StringType, false)
))
}
class CypherToSparkTypeConverter(options: Neo4jOptions) extends TypeConverter[String, DataType] {
override def convert(sourceType: String, value: Any = null): DataType = {
var cleanedSourceType = sourceType.replaceAll(cleanTerms, "")
if (options.legacyTypeConversionEnabled) {
cleanedSourceType = cleanedSourceType.replaceAll("Local|Zoned", "")
}
cleanedSourceType match {
case "Node" | "Relationship" =>
if (value != null) value.asInstanceOf[Entity].toStruct(options) else DataTypes.NullType
case "NodeArray" | "RelationshipArray" =>
if (value != null) DataTypes.createArrayType(value.asInstanceOf[Entity].toStruct(options))
else DataTypes.NullType
case "Boolean" => DataTypes.BooleanType
case "Long" => DataTypes.LongType
case "Double" => DataTypes.DoubleType
case "Point" => pointType
case "DateTime" | "ZonedDateTime" => DataTypes.TimestampType
case "LocalDateTime" =>
if (options.legacyTypeConversionEnabled) {
DataTypes.TimestampType
} else {
DataTypes.TimestampNTZType
}
case "Time" | "LocalTime" => timeType
case "Date" | "LocalDate" => DataTypes.DateType
case "Duration" => durationType
case "ByteArray" => DataTypes.BinaryType
case "Map" => {
val valueType = if (value == null) {
DataTypes.NullType
} else {
val map = value.asInstanceOf[java.util.Map[String, AnyRef]].asScala
val types = map.values
.map(value => normalizedClassName(value, options))
.toSet
if (types.size == 1) convert(types.head, map.values.head) else DataTypes.StringType
}
DataTypes.createMapType(DataTypes.StringType, valueType)
}
case "Array" => {
val valueType = if (value == null) {
DataTypes.NullType
} else {
val list = value.asInstanceOf[java.util.List[AnyRef]].asScala
val types = list
.map(value => normalizedClassName(value, options))
.toSet
if (types.size == 1) convert(types.head, list.head) else DataTypes.StringType
}
DataTypes.createArrayType(valueType)
}
// These are from APOC
case "StringArray" => DataTypes.createArrayType(DataTypes.StringType)
case "LongArray" => DataTypes.createArrayType(DataTypes.LongType)
case "DoubleArray" => DataTypes.createArrayType(DataTypes.DoubleType)
case "BooleanArray" => DataTypes.createArrayType(DataTypes.BooleanType)
case "PointArray" => DataTypes.createArrayType(pointType)
case "DateTimeArray" | "ZonedDateTimeArray" => DataTypes.createArrayType(DataTypes.TimestampType)
case "TimeArray" | "LocalTimeArray" => DataTypes.createArrayType(timeType)
case "DateArray" | "LocalDateArray" => DataTypes.createArrayType(DataTypes.DateType)
case "DurationArray" => DataTypes.createArrayType(durationType)
// Default is String
case _ => DataTypes.StringType
}
}
}
object SparkToCypherTypeConverter {
def apply(options: Neo4jOptions): SparkToCypherTypeConverter = new SparkToCypherTypeConverter(options)
private val baseMappings: Map[DataType, String] = Map(
DataTypes.BooleanType -> "BOOLEAN",
DataTypes.StringType -> "STRING",
DecimalType.SYSTEM_DEFAULT -> "STRING",
DataTypes.ByteType -> "INTEGER",
DataTypes.ShortType -> "INTEGER",
DataTypes.IntegerType -> "INTEGER",
DataTypes.LongType -> "INTEGER",
DataTypes.FloatType -> "FLOAT",
DataTypes.DoubleType -> "FLOAT",
DataTypes.DateType -> "DATE",
durationType -> "DURATION",
pointType -> "POINT",
// Cypher graph entities do not allow null values in arrays
DataTypes.createArrayType(DataTypes.BooleanType, false) -> "LIST<BOOLEAN NOT NULL>",
DataTypes.createArrayType(DataTypes.StringType, false) -> "LIST<STRING NOT NULL>",
DataTypes.createArrayType(DecimalType.SYSTEM_DEFAULT, false) -> "LIST<STRING NOT NULL>",
DataTypes.createArrayType(DataTypes.ShortType, false) -> "LIST<INTEGER NOT NULL>",
DataTypes.createArrayType(DataTypes.IntegerType, false) -> "LIST<INTEGER NOT NULL>",
DataTypes.createArrayType(DataTypes.LongType, false) -> "LIST<INTEGER NOT NULL>",
DataTypes.createArrayType(DataTypes.FloatType, false) -> "LIST<FLOAT NOT NULL>",
DataTypes.createArrayType(DataTypes.DoubleType, false) -> "LIST<FLOAT NOT NULL>",
DataTypes.createArrayType(DataTypes.DateType, false) -> "LIST<DATE NOT NULL>",
DataTypes.createArrayType(durationType, false) -> "LIST<DURATION NOT NULL>",
DataTypes.createArrayType(pointType, false) -> "LIST<POINT NOT NULL>"
)
private def mapping(sourceType: DataType, options: Neo4jOptions): String = {
val mappings = sourceTypeMappings(options)
mappings(sourceType)
}
private def sourceTypeMappings(options: Neo4jOptions): Map[DataType, String] = {
var result = baseMappings
if (options.legacyTypeConversionEnabled) {
result += (DataTypes.TimestampType -> "LOCAL DATETIME")
result += (DataTypes.createArrayType(DataTypes.TimestampType, false) -> "LIST<LOCAL DATETIME NOT NULL>")
result += (DataTypes.createArrayType(DataTypes.TimestampType, true) -> "LIST<LOCAL DATETIME NOT NULL>")
} else {
result += (DataTypes.TimestampType -> "ZONED DATETIME")
result += (DataTypes.TimestampNTZType -> "LOCAL DATETIME")
result += (DayTimeIntervalType() -> "DURATION")
result += (YearMonthIntervalType() -> "DURATION")
result += (DataTypes.createArrayType(DataTypes.ByteType, false) -> "ByteArray")
result += (DataTypes.createArrayType(DataTypes.TimestampType, false) -> "LIST<ZONED DATETIME NOT NULL>")
result += (DataTypes.createArrayType(DataTypes.TimestampNTZType, false) -> "LIST<LOCAL DATETIME NOT NULL>")
result += (DataTypes.createArrayType(DayTimeIntervalType(), false) -> "LIST<DURATION NOT NULL>")
result += (DataTypes.createArrayType(DayTimeIntervalType(), true) -> "LIST<DURATION NOT NULL>")
result += (DataTypes.createArrayType(YearMonthIntervalType(), false) -> "LIST<DURATION NOT NULL>")
result += (DataTypes.createArrayType(YearMonthIntervalType(), true) -> "LIST<DURATION NOT NULL>")
}
result
}
}
class SparkToCypherTypeConverter(options: Neo4jOptions) extends TypeConverter[DataType, String] {
override def convert(sourceType: DataType, value: Any): String = mapping(sourceType, options)
}
================================================
FILE: common/src/main/scala/org/neo4j/spark/cypher/Cypher5Renderer.scala
================================================
/*
* Copyright (c) "Neo4j"
* Neo4j Sweden AB [https://neo4j.com]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.neo4j.spark.cypher
import org.neo4j.caniuse.Neo4j
import org.neo4j.caniuse.Neo4jVersion
import org.neo4j.cypherdsl.core.Statement
import org.neo4j.cypherdsl.core.renderer.Configuration
import org.neo4j.cypherdsl.core.renderer.Dialect
import org.neo4j.cypherdsl.core.renderer.Renderer
import org.neo4j.spark.cypher.Cypher5Renderer.Neo4jV5
import org.neo4j.spark.cypher.CypherVersionSelector.selectCypherVersionClause
class Cypher5Renderer(neo4j: Neo4j) extends Renderer {
private val delegate =
Renderer.getRenderer(
Configuration.newConfig()
.withDialect(
if (neo4j.getVersion.compareTo(Neo4jV5) < 0) {
Dialect.DEFAULT
} else {
Dialect.NEO4J_5
}
)
.build()
)
override def render(statement: Statement): String = {
val rendered = delegate.render(statement)
s"${selectCypherVersionClause(neo4j)}$rendered"
}
}
private object Cypher5Renderer {
private val Neo4jV5 = new Neo4jVersion(5, 0, 0)
}
================================================
FILE: common/src/main/scala/org/neo4j/spark/cypher/CypherVersionSelector.scala
================================================
/*
* Copyright (c) "Neo4j"
* Neo4j Sweden AB [https://neo4j.com]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.neo4j.spark.cypher
import org.neo4j.caniuse.CanIUse.INSTANCE.canIUse
import org.neo4j.caniuse.Cypher.{INSTANCE => Cypher}
import org.neo4j.caniuse.Neo4j
object CypherVersionSelector {
def selectCypherVersionClause(neo4j: Neo4j): String = {
if (canIUse(Cypher.explicitCypher5Selection()).withNeo4j(neo4j)) {
"CYPHER 5 "
} else {
""
}
}
}
================================================
FILE: common/src/main/scala/org/neo4j/spark/reader/BasePartitionReader.scala
================================================
/*
* Copyright (c) "Neo4j"
* Neo4j Sweden AB [https://neo4j.com]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.neo4j.spark.reader
import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.connector.expressions.aggregate.AggregateFunc
import org.apache.spark.sql.sources.Filter
import org.apache.spark.sql.types.StructType
import org.neo4j.caniuse.Neo4j
import org.neo4j.driver.Record
import org.neo4j.driver.Session
import org.neo4j.driver.Transaction
import org.neo4j.driver.Values
import org.neo4j.spark.service.MappingService
import org.neo4j.spark.service.Neo4jQueryReadStrategy
import org.neo4j.spark.service.Neo4jQueryService
import org.neo4j.spark.service.Neo4jQueryStrategy
import org.neo4j.spark.service.Neo4jReadMappingStrategy
import org.neo4j.spark.service.PartitionPagination
import org.neo4j.spark.util.DriverCache
import org.neo4j.spark.util.Neo4jOptions
import org.neo4j.spark.util.Neo4jUtil
import org.neo4j.spark.util.QueryType
import java.io.IOException
import java.time.Duration
import java.util
import java.util.concurrent.atomic.AtomicInteger
import java.util.concurrent.locks.LockSupport
import scala.collection.JavaConverters._
abstract class BasePartitionReader(
private val neo4j: Neo4j,
private val options: Neo4jOptions,
private val filters: Array[Filter],
private val schema: StructType,
private val jobId: String,
private val partitionSkipLimit: PartitionPagination,
private val scriptResult: java.util.List[java.util.Map[String, AnyRef]],
private val requiredColumns: StructType,
private val aggregateColumns: Array[AggregateFunc]
) extends Logging {
private var result: Iterator[Record] = _
private var session: Session = _
private var transaction: Transaction = _
protected val name: String =
if (partitionSkipLimit.partitionNumber > 0) s"$jobId-${partitionSkipLimit.partitionNumber}" else jobId
protected val driverCache: DriverCache = new DriverCache(options.connection)
private var nextRow: InternalRow = _
private lazy val values = {
val params = new java.util.HashMap[String, Any]()
params.put(Neo4jQueryStrategy.VARIABLE_SCRIPT_RESULT, scriptResult)
Neo4jUtil.paramsFromFilters(filters)
.foreach(p => params.put(p._1, p._2))
if (options.query.queryType == QueryType.GDS) {
params.putAll(options.gdsMetadata.parameters)
}
params
}
private val mappingService = new MappingService(new Neo4jReadMappingStrategy(options, requiredColumns), options)
@volatile
private var error: Boolean = false
private val retries = new AtomicInteger(options.transactionSettings.retries)
@throws(classOf[IOException])
def next: Boolean =
try {
nextHandler()
} catch {
case t: Throwable =>
if (options.transactionSettings.shouldFailOn(t)) {
error = true
logError("Error while invoking next due to explicitly configured failure condition:", t)
throw new IOException(t)
}
if (Neo4jUtil.isRetryableException(t) && retries.get() > 0) {
val currentRetry = retries.decrementAndGet
logInfo(
s"encountered a transient exception while reading, retrying ${options.transactionSettings.retries - currentRetry} time(s)",
t
)
close()
result = null // Reset result to force new query
// Wait before retry
LockSupport.parkNanos(Duration.ofMillis(options.transactionSettings.retryTimeout).toNanos)
next
} else {
error = true
logError("Error while invoking next:", t)
throw new IOException(t)
}
}
private def nextHandler(): Boolean = {
if (result == null) {
session = driverCache.getOrCreate().session(options.session.toNeo4jSession())
transaction = session.beginTransaction(options.toNeo4jTransactionConfig)
val queryText = query()
val queryParams = queryParameters
logInfo(s"Running the following query on Neo4j: $queryText")
logDebug(s"with parameters $queryParams")
result = transaction.run(queryText, Values.value(queryParams))
.asScala
}
if (result.hasNext) {
nextRow = mappingService.convert(result.next(), schema)
true
} else {
false
}
}
def get: InternalRow = nextRow
def close(): Unit = {
Neo4jUtil.closeSafely(transaction, log)
Neo4jUtil.closeSafely(session, log)
driverCache.close()
}
def hasError(): Boolean = error
protected def query(): String = {
new Neo4jQueryService(
options,
new Neo4jQueryReadStrategy(
neo4j,
filters,
partitionSkipLimit,
requiredColumns.fieldNames,
aggregateColumns,
jobId
)
)
.createQuery()
}
protected def queryParameters: util.Map[String, Any] = values
}
================================================
FILE: common/src/main/scala/org/neo4j/spark/service/MappingService.scala
================================================
/*
* Copyright (c) "Neo4j"
* Neo4j Sweden AB [https://neo4j.com]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.neo4j.spark.service
import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.types.StructType
import org.neo4j.driver.Record
import org.neo4j.driver.Value
import org.neo4j.driver.Values
import org.neo4j.driver.internal.value.MapValue
import org.neo4j.driver.types.Node
import org.neo4j.spark.converter.Neo4jToSparkDataConverter
import org.neo4j.spark.converter.SparkToNeo4jDataConverter
import org.neo4j.spark.service.Neo4jWriteMappingStrategy.KEYS
import org.neo4j.spark.service.Neo4jWriteMappingStrategy.PROPERTIES
import org.neo4j.spark.util.Neo4jImplicits._
import org.neo4j.spark.util.Neo4jNodeMetadata
import org.neo4j.spark.util.Neo4jOptions
import org.neo4j.spark.util.Neo4jUtil
import org.neo4j.spark.util.QueryType
import org.neo4j.spark.util.RelationshipSaveStrategy
import java.util
import java.util.function
import java.util.function.BiConsumer
import scala.collection.JavaConverters._
import scala.collection.mutable
class Neo4jWriteMappingStrategy(private val options: Neo4jOptions)
extends Neo4jMappingStrategy[InternalRow, Option[java.util.Map[String, AnyRef]]]
with Logging {
private val dataConverter = SparkToNeo4jDataConverter(options)
override def node(row: InternalRow, schema: StructType): Option[java.util.Map[String, AnyRef]] = {
val rowMap: java.util.Map[String, Object] = new java.util.HashMap[String, Object]
val keys: java.util.Map[String, Object] = new java.util.HashMap[String, Object]
val properties: java.util.Map[String, Object] = new java.util.HashMap[String, Object]
rowMap.put(KEYS, keys)
rowMap.put(PROPERTIES, properties)
query(row, schema)
.get
.forEach(new BiConsumer[String, AnyRef] {
override def accept(key: String, value: AnyRef): Unit = if (options.nodeMetadata.nodeKeys.contains(key)) {
keys.put(options.nodeMetadata.nodeKeys.getOrElse(key, key), value)
} else {
properties.put(options.nodeMetadata.properties.getOrElse(key, key), value)
}
})
if (options.nodeMetadata.skipNullKeys && containsNull(keys)) {
logSkipping("node keys", options.nodeMetadata.nodeKeys.values)
None
} else {
Some(rowMap)
}
}
private def nativeStrategyConsumer(): MappingBiConsumer = new MappingBiConsumer {
override def accept(key: String, value: AnyRef): Unit = {
if (key.startsWith(Neo4jUtil.RELATIONSHIP_ALIAS.concat("."))) {
relMap.get(PROPERTIES).put(key.removeAlias(), value)
} else if (key.startsWith(Neo4jUtil.RELATIONSHIP_SOURCE_ALIAS.concat("."))) {
if (options.relationshipMetadata.source.nodeKeys.contains(key)) {
sourceNodeMap.get(KEYS).put(key.removeAlias(), value)
} else {
sourceNodeMap.get(PROPERTIES).put(key.removeAlias(), value)
}
} else if (key.startsWith(Neo4jUtil.RELATIONSHIP_TARGET_ALIAS.concat("."))) {
if (options.relationshipMetadata.target.nodeKeys.contains(key)) {
targetNodeMap.get(KEYS).put(key.removeAlias(), value)
} else {
targetNodeMap.get(PROPERTIES).put(key.removeAlias(), value)
}
}
}
}
private def addToNodeMap(
nodeMap: util.Map[String, util.Map[String, AnyRef]],
nodeMetadata: Neo4jNodeMetadata,
key: String,
value: AnyRef
): Unit = {
if (nodeMetadata.nodeKeys.contains(key)) {
nodeMap.get(KEYS).put(nodeMetadata.nodeKeys.getOrElse(key, key), value)
}
if (nodeMetadata.properties.contains(key)) {
nodeMap.get(PROPERTIES).put(nodeMetadata.properties.getOrElse(key, key), value)
}
}
private def keysStrategyConsumer(): MappingBiConsumer = new MappingBiConsumer {
override def accept(key: String, value: AnyRef): Unit = {
val source = options.relationshipMetadata.source
val target = options.relationshipMetadata.target
addToNodeMap(sourceNodeMap, source, key, value)
addToNodeMap(targetNodeMap, target, key, value)
if (options.relationshipMetadata.relationshipKeys.contains(key)) {
relMap.get(KEYS).put(options.relationshipMetadata.relationshipKeys.getOrElse(key, key), value)
} else {
val propertyKey = options.relationshipMetadata.properties match {
case Some(relProperties) => relProperties.get(key)
case None =>
if (!source.includesProperty(key) && !target.includesProperty(key)) {
Some(key)
} else {
None
}
}
propertyKey.foreach(k => relMap.get(PROPERTIES).put(k, value))
}
}
}
override def relationship(row: InternalRow, schema: StructType): Option[java.util.Map[String, AnyRef]] = {
val rowMap: java.util.Map[String, AnyRef] = new java.util.HashMap[String, AnyRef]
val consumer = options.relationshipMetadata.saveStrategy match {
case RelationshipSaveStrategy.NATIVE => nativeStrategyConsumer()
case RelationshipSaveStrategy.KEYS => keysStrategyConsumer()
}
query(row, schema).get.forEach(consumer)
if (
options.relationshipMetadata.saveStrategy.equals(RelationshipSaveStrategy.NATIVE)
&& consumer.relMap.get(PROPERTIES).isEmpty
&& consumer.sourceNodeMap.get(PROPERTIES).isEmpty && consumer.sourceNodeMap.get(KEYS).isEmpty
&& consumer.targetNodeMap.get(PROPERTIES).isEmpty && consumer.targetNodeMap.get(KEYS).isEmpty
) {
throw new IllegalArgumentException(
"NATIVE write strategy requires a schema like: rel.[props], source.[props], target.[props]. " +
"All of these columns are empty in the current schema."
)
}
if (options.relationshipMetadata.skipNullKeys && containsNull(consumer.relMap, KEYS)) {
logSkipping("relationship keys", options.relationshipMetadata.relationshipKeys.values)
None
} else if (options.relationshipMetadata.source.skipNullKeys && containsNull(consumer.sourceNodeMap, KEYS)) {
logSkipping("source node keys", options.relationshipMetadata.source.nodeKeys.values)
None
} else if (options.relationshipMetadata.target.skipNullKeys && containsNull(consumer.targetNodeMap, KEYS)) {
logSkipping("target node keys", options.relationshipMetadata.target.nodeKeys.values)
None
} else {
rowMap.put(Neo4jUtil.RELATIONSHIP_ALIAS, consumer.relMap)
rowMap.put(Neo4jUtil.RELATIONSHIP_SOURCE_ALIAS, consumer.sourceNodeMap)
rowMap.put(Neo4jUtil.RELATIONSHIP_TARGET_ALIAS, consumer.targetNodeMap)
Some(rowMap)
}
}
override def query(row: InternalRow, schema: StructType): Option[java.util.Map[String, AnyRef]] = {
val seq = row.toSeq(schema)
Some(
schema.indices
.flatMap(i => {
val field = schema(i)
val neo4jValue = dataConverter.convert(seq(i), field.dataType)
neo4jValue match {
case map: MapValue =>
map.asMap().asScala.toMap
.flattenMap(field.name, options.schemaMetadata.mapGroupDuplicateKeys)
.mapValues(value => Values.value(value).asInstanceOf[AnyRef])
.toSeq
case _ => Seq((field.name, neo4jValue))
}
})
.toMap
.asJava
)
}
// Helper methods
private def containsNull(map: java.util.Map[String, Object]): Boolean = {
map.containsValue(Values.NULL)
}
private def containsNull(map: java.util.Map[String, java.util.Map[String, AnyRef]], key: String): Boolean = {
map.get(key).containsValue(Values.NULL)
}
private def logSkipping(keyType: String, keys: Iterable[String]): Unit = {
logTrace(s"Skipping row because it contains null value for one of the $keyType: [${keys.mkString(", ")}]")
}
}
class Neo4jReadMappingStrategy(private val options: Neo4jOptions, requiredColumns: StructType)
extends Neo4jMappingStrategy[Record, InternalRow] {
private val dataConverter = Neo4jToSparkDataConverter(options)
override def node(record: Record, schema: StructType): InternalRow = {
if (requiredColumns.nonEmpty) {
query(record, schema)
} else {
val node = record.get(Neo4jUtil.NODE_ALIAS).asNode()
val nodeMap = new util.HashMap[String, Any](node.asMap())
nodeMap.put(Neo4jUtil.INTERNAL_ID_FIELD, node.id())
nodeMap.put(Neo4jUtil.INTERNAL_LABELS_FIELD, node.labels())
mapToInternalRow(nodeMap, schema)
}
}
private def mapToInternalRow(map: util.Map[String, Any], schema: StructType) = InternalRow
.fromSeq(
schema.map(field => dataConverter.convert(map.get(field.name), field.dataType))
)
private def flatRelNodeMapping(node: Node, alias: String): mutable.Map[String, Any] = {
val nodeMap: mutable.Map[String, Any] = node.asMap().asScala
.map(t => (s"$alias.${t._1}", t._2))
nodeMap.put(
s"<$alias.${
Neo4jUtil.INTERNAL_ID_FIELD
.replaceAll("[<|>]", "")
}>",
node.id()
)
nodeMap.put(
s"<$alias.${
Neo4jUtil.INTERNAL_LABELS_FIELD
.replaceAll("[<|>]", "")
}>",
node.labels()
)
nodeMap
}
private def mapRelNodeMapping(node: Node, alias: String): Map[String, util.Map[String, String]] = {
val nodeMap: util.Map[String, String] =
new util.HashMap[String, String](node.asMap(new function.Function[Value, String] {
override def apply(t: Value): String = t.toString
}))
nodeMap.put(Neo4jUtil.INTERNAL_ID_FIELD, Neo4jUtil.mapper.writeValueAsString(node.id()))
nodeMap.put(Neo4jUtil.INTERNAL_LABELS_FIELD, Neo4jUtil.mapper.writeValueAsString(node.labels()))
Map(s"<$alias>" -> nodeMap)
}
override def relationship(record: Record, schema: StructType): InternalRow = {
if (requiredColumns.nonEmpty) {
query(record, schema)
} else {
val rel = record.get(Neo4jUtil.RELATIONSHIP_ALIAS).asRelationship()
val relMap = new util.HashMap[String, Any](rel.asMap())
.asScala
.map(t => (s"rel.${t._1}", t._2))
.asJava
relMap.put(Neo4jUtil.INTERNAL_REL_ID_FIELD, rel.id())
relMap.put(Neo4jUtil.INTERNAL_REL_TYPE_FIELD, rel.`type`())
val source = record.get(Neo4jUtil.RELATIONSHIP_SOURCE_ALIAS).asNode()
val target = record.get(Neo4jUtil.RELATIONSHIP_TARGET_ALIAS).asNode()
val (sourceMap, targetMap) = if (options.relationshipMetadata.nodeMap) {
(
mapRelNodeMapping(source, Neo4jUtil.RELATIONSHIP_SOURCE_ALIAS),
mapRelNodeMapping(target, Neo4jUtil.RELATIONSHIP_TARGET_ALIAS)
)
} else {
(
flatRelNodeMapping(source, Neo4jUtil.RELATIONSHIP_SOURCE_ALIAS),
flatRelNodeMapping(target, Neo4jUtil.RELATIONSHIP_TARGET_ALIAS)
)
}
relMap.putAll(sourceMap.toMap.asJava)
relMap.putAll(targetMap.toMap.asJava)
mapToInternalRow(relMap, schema)
}
}
override def query(elem: Record, schema: StructType): InternalRow = mapToInternalRow(
elem.asMap(new function.Function[Value, Any] {
override def apply(t: Value): Any = t.asObject()
}),
schema
)
}
abstract class Neo4jMappingStrategy[IN, OUT] extends Serializable {
def node(elem: IN, schema: StructType): OUT
def relationship(elem: IN, schema: StructType): OUT
def query(elem: IN, schema: StructType): OUT
}
class MappingService[IN, OUT](private val strategy: Neo4jMappingStrategy[IN, OUT], private val options: Neo4jOptions)
extends Serializable {
def convert(record: IN, schema: StructType): OUT = options.query.queryType match {
case QueryType.LABELS => strategy.node(record, schema)
case QueryType.RELATIONSHIP => strategy.relationship(record, schema)
case QueryType.QUERY => strategy.query(record, schema)
case QueryType.GDS => strategy.query(record, schema)
}
}
object Neo4jWriteMappingStrategy {
val KEYS = "keys"
val PROPERTIES = "properties"
}
abstract private class MappingBiConsumer extends BiConsumer[String, AnyRef] {
val relMap = new util.HashMap[String, util.Map[String, AnyRef]]()
val sourceNodeMap = new util.HashMap[String, util.Map[String, AnyRef]]()
val targetNodeMap = new util.HashMap[String, util.Map[String, AnyRef]]()
relMap.put(KEYS, new util.HashMap[String, AnyRef]())
relMap.put(PROPERTIES, new util.HashMap[String, AnyRef]())
sourceNodeMap.put(PROPERTIES, new util.HashMap[String, AnyRef]())
sourceNodeMap.put(KEYS, new util.HashMap[String, AnyRef]())
targetNodeMap.put(PROPERTIES, new util.HashMap[String, AnyRef]())
targetNodeMap.put(KEYS, new util.HashMap[String, AnyRef]())
}
================================================
FILE: common/src/main/scala/org/neo4j/spark/service/Neo4jQueryService.scala
================================================
/*
* Copyright (c) "Neo4j"
* Neo4j Sweden AB [https://neo4j.com]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.neo4j.spark.service
import org.apache.commons.lang3.StringUtils
import org.apache.spark.internal.Logging
import org.apache.spark.sql.SaveMode
import org.apache.spark.sql.connector.expressions.SortDirection
import org.apache.spark.sql.connector.expressions.SortOrder
import org.apache.spark.sql.connector.expressions.aggregate._
import org.apache.spark.sql.sources.And
import org.apache.spark.sql.sources.Filter
import org.apache.spark.sql.sources.Or
import org.neo4j.caniuse.Neo4j
import org.neo4j.cypherdsl.core._
import org.neo4j.cypherdsl.core.renderer.Renderer
import org.neo4j.spark.cypher.Cypher5Renderer
import org.neo4j.spark.cypher.CypherVersionSelector.selectCypherVersionClause
import org.neo4j.spark.util.Neo4jImplicits._
import org.neo4j.spark.util.Neo4jOptions
import org.neo4j.spark.util.Neo4jUtil
import org.neo4j.spark.util.NodeSaveMode
import org.neo4j.spark.util.QueryType
import scala.collection.JavaConverters._
class Neo4jQueryWriteStrategy(private val neo4j: Neo4j, private val saveMode: SaveMode) extends Neo4jQueryStrategy {
override def createStatementForQuery(options: Neo4jOptions): String =
s"""WITH ${"$"}scriptResult AS ${Neo4jQueryStrategy.VARIABLE_SCRIPT_RESULT}
|UNWIND ${"$"}events AS ${Neo4jQueryStrategy.VARIABLE_EVENT}
|${options.query.value}
|""".stripMargin
private def createPropsList(props: Map[String, String], prefix: String): String = {
props
.map(key => {
s"${key._2.quote()}: ${Neo4jQueryStrategy.VARIABLE_EVENT}.$prefix.${key._2.quote()}"
}).mkString(", ")
}
private def keywordFromSaveMode(saveMode: Any): String = {
saveMode match {
case NodeSaveMode.Overwrite | SaveMode.Overwrite => "MERGE"
case NodeSaveMode.ErrorIfExists | SaveMode.ErrorIfExists | SaveMode.Append | NodeSaveMode.Append => "CREATE"
case NodeSaveMode.Match => "MATCH"
case _ => throw new UnsupportedOperationException(s"SaveMode $saveMode not supported")
}
}
private def createQueryPart(keyword: String, labels: String, keys: String, alias: String): String = {
val setStatement = if (!keyword.equals("MATCH"))
s" SET $alias += ${Neo4jQueryStrategy.VARIABLE_EVENT}.$alias.${Neo4jWriteMappingStrategy.PROPERTIES}"
else ""
s"""$keyword ($alias${if (labels.isEmpty) "" else s":$labels"} ${
if (keys.isEmpty) ""
else s"{$keys}"
})$setStatement""".stripMargin
}
override def createStatementForRelationships(options: Neo4jOptions): String = {
val relationshipKeyword = keywordFromSaveMode(saveMode)
val sourceKeyword = keywordFromSaveMode(options.relationshipMetadata.sourceSaveMode)
val targetKeyword = keywordFromSaveMode(options.relationshipMetadata.targetSaveMode)
val relationship = options.relationshipMetadata.relationshipType.quote()
val sourceLabels = options.relationshipMetadata.source.labels
.map(_.quote())
.mkString(":")
val targetLabels = options.relationshipMetadata.target.labels
.map(_.quote())
.mkString(":")
val sourceKeys = createPropsList(
options.relationshipMetadata.source.nodeKeys,
s"source.${Neo4jWriteMappingStrategy.KEYS}"
)
val targetKeys = createPropsList(
options.relationshipMetadata.target.nodeKeys,
s"target.${Neo4jWriteMappingStrategy.KEYS}"
)
val sourceQueryPart = createQueryPart(sourceKeyword, sourceLabels, sourceKeys, Neo4jUtil.RELATIONSHIP_SOURCE_ALIAS)
val targetQueryPart = createQueryPart(targetKeyword, targetLabels, targetKeys, Neo4jUtil.RELATIONSHIP_TARGET_ALIAS)
val withQueryPart = if (sourceKeyword != "MATCH" && targetKeyword == "MATCH")
"\nWITH source, event"
else {
""
}
val relKeys = if (options.relationshipMetadata.relationshipKeys.nonEmpty) {
options.relationshipMetadata.relationshipKeys
.map(t =>
s"${t._2}: ${Neo4jQueryStrategy.VARIABLE_EVENT}.${Neo4jUtil.RELATIONSHIP_ALIAS}.${Neo4jWriteMappingStrategy.KEYS}.${t._1}"
)
.mkString("{", ", ", "}")
} else {
""
}
s"""${selectCypherVersionClause(neo4j)}UNWIND ${"$"}events AS ${Neo4jQueryStrategy.VARIABLE_EVENT}
|$sourceQueryPart$withQueryPart
|$targetQueryPart
|$relationshipKeyword (${Neo4jUtil.RELATIONSHIP_SOURCE_ALIAS})-[${Neo4jUtil.RELATIONSHIP_ALIAS}:$relationship$relKeys]->(${Neo4jUtil.RELATIONSHIP_TARGET_ALIAS})
|SET ${Neo4jUtil.RELATIONSHIP_ALIAS} += ${Neo4jQueryStrategy.VARIABLE_EVENT}.${Neo4jUtil.RELATIONSHIP_ALIAS}.${Neo4jWriteMappingStrategy.PROPERTIES}
|""".stripMargin
}
override def createStatementForNodes(options: Neo4jOptions): String = {
val keyword = keywordFromSaveMode(saveMode)
val labels = options.nodeMetadata.labels
.map(_.quote())
.mkString(":")
val keys = createPropsList(
options.nodeMetadata.nodeKeys,
Neo4jWriteMappingStrategy.KEYS
)
s"""${selectCypherVersionClause(neo4j)}UNWIND ${"$"}events AS ${Neo4jQueryStrategy.VARIABLE_EVENT}
|$keyword (node${if (labels.isEmpty) "" else s":$labels"} ${if (keys.isEmpty) "" else s"{$keys}"})
|SET node += ${Neo4jQueryStrategy.VARIABLE_EVENT}.${Neo4jWriteMappingStrategy.PROPERTIES}
|""".stripMargin
}
override def createStatementForGDS(options: Neo4jOptions): String =
throw new UnsupportedOperationException("Write operations with GDS are currently not supported")
}
class Neo4jQueryReadStrategy(
neo4j: Neo4j,
filters: Array[Filter] = Array.empty[Filter],
partitionPagination: PartitionPagination = PartitionPagination.EMPTY,
requiredColumns: Seq[String] = Seq.empty,
aggregateColumns: Array[AggregateFunc] = Array.empty,
jobId: String = ""
) extends Neo4jQueryStrategy with Logging {
private val renderer: Renderer = new Cypher5Renderer(neo4j)
private val hasSkipLimit: Boolean = partitionPagination.skip != -1 && partitionPagination.topN.limit != -1
override def createStatementForQuery(options: Neo4jOptions): String = {
if (partitionPagination.topN.orders.nonEmpty) {
logWarning(
s"""Top N push-down optimizations with aggregations are not supported for custom queries.
|\tThese aggregations are going to be ignored.
|\tPlease specify the aggregations in the custom query directly""".stripMargin
)
}
val limitedQuery = if (hasSkipLimit) {
s"${options.query.value} SKIP ${partitionPagination.skip} LIMIT ${partitionPagination.topN.limit}"
} else {
s"${options.query.value}"
}
s"WITH $$scriptResult AS scriptResult $limitedQuery"
}
override def createStatementForRelationships(options: Neo4jOptions): String = {
val sourceNode = createNode(Neo4jUtil.RELATIONSHIP_SOURCE_ALIAS, options.relationshipMetadata.source.labels)
val targetNode = createNode(Neo4jUtil.RELATIONSHIP_TARGET_ALIAS, options.relationshipMetadata.target.labels)
val relationship = sourceNode.relationshipTo(targetNode, options.relationshipMetadata.relationshipType)
.named(Neo4jUtil.RELATIONSHIP_ALIAS)
val matchQuery: StatementBuilder.OngoingReadingWithoutWhere =
filterRelationship(sourceNode, targetNode, relationship)
val returnExpressions: Seq[Expression] = buildReturnExpression(sourceNode, targetNode, relationship)
val stmt = if (aggregateColumns.isEmpty) {
val query = matchQuery.returning(returnExpressions: _*)
buildStatement(options, query, relationship)
} else {
buildStatementAggregation(options, matchQuery, relationship, returnExpressions)
}
renderer.render(stmt)
}
private def convertSort(entity: PropertyContainer, order: SortOrder): SortItem = {
val sortExpression = order.expression().describe()
val container: Option[PropertyContainer] = entity match {
case relationship: Relationship =>
if (sortExpression.contains(s"${Neo4jUtil.RELATIONSHIP_SOURCE_ALIAS}.")) {
Some(relationship.getLeft)
} else if (sortExpression.contains(s"${Neo4jUtil.RELATIONSHIP_TARGET_ALIAS}.")) {
Some(relationship.getRight)
} else if (sortExpression.contains(s"${Neo4jUtil.RELATIONSHIP_ALIAS}.")) {
Some(relationship)
} else {
None
}
case _ => Some(entity)
}
val direction =
if (order.direction() == SortDirection.ASCENDING) SortItem.Direction.ASC else SortItem.Direction.DESC
Cypher.sort(
container
.map(_.property(sortExpression.removeAlias()))
.getOrElse(Cypher.name(sortExpression.unquote())),
direction
)
}
private def buildReturnExpression(sourceNode: Node, targetNode: Node, relationship: Relationship): Seq[Expression] = {
if (requiredColumns.isEmpty) {
Seq(
relationship.getRequiredSymbolicName,
sourceNode.as(Neo4jUtil.RELATIONSHIP_SOURCE_ALIAS),
targetNode.as(Neo4jUtil.RELATIONSHIP_TARGET_ALIAS)
)
} else {
requiredColumns.map(column => {
val splatColumn = column.split('.')
val entityName = splatColumn.head
val entity = if (entityName.contains(Neo4jUtil.RELATIONSHIP_ALIAS)) {
relationship
} else if (entityName.contains(Neo4jUtil.RELATIONSHIP_SOURCE_ALIAS)) {
sourceNode
} else if (entityName.contains(Neo4jUtil.RELATIONSHIP_TARGET_ALIAS)) {
targetNode
} else {
null
}
if (entity != null && splatColumn.length == 1) {
entity match {
case n: Node => n.as(entityName.quote())
case r: Relationship => r.getRequiredSymbolicName
}
} else {
getCorrectProperty(column, entity)
}
})
}
}
private def buildStatementAggregation(
options: Neo4jOptions,
query: StatementBuilder.OngoingReadingWithoutWhere,
entity: PropertyContainer,
fields: Seq[Expression]
): Statement = {
val ret = if (hasSkipLimit) {
val id = entity match {
case node: Node => Functions.id(node)
case rel: Relationship => Functions.id(rel)
}
query
.`with`(entity)
// Spark does not push down limits/top N when aggregation is involved
.orderBy(id)
.skip(partitionPagination.skip)
.limit(partitionPagination.topN.limit)
.returning(fields: _*)
} else {
val orderByProp = options.streamingOrderBy
if (StringUtils.isBlank(orderByProp)) {
query.returning(fields: _*)
} else {
query
.`with`(entity)
.orderBy(entity.property(orderByProp))
.ascending()
.returning(fields: _*)
}
}
ret.build()
}
private def buildStatement(
options: Neo4jOptions,
returning: StatementBuilder.TerminalExposesSkip
with StatementBuilder.TerminalExposesLimit
with StatementBuilder.TerminalExposesOrderBy
with StatementBuilder.BuildableStatement[_],
entity: PropertyContainer = null
): Statement = {
def addSkipLimit(ret: StatementBuilder.TerminalExposesSkip
with StatementBuilder.TerminalExposesLimit
with StatementBuilder.BuildableStatement[_]) = {
if (partitionPagination.skip == 0) {
ret.limit(partitionPagination.topN.limit)
} else {
ret.skip(partitionPagination.skip)
.limit(partitionPagination.topN.limit)
}
}
val ret = if (entity == null) {
if (hasSkipLimit) addSkipLimit(returning) else returning
} else {
if (hasSkipLimit) {
if (options.partitions == 1 || partitionPagination.topN.orders.nonEmpty) {
addSkipLimit(returning.orderBy(partitionPagination.topN.orders.map(order => convertSort(entity, order)): _*))
} else {
val id = entity match {
case node: Node => Functions.id(node)
case rel: Relationship => Functions.id(rel)
}
addSkipLimit(returning.orderBy(id))
}
} else {
val orderByProp = options.streamingOrderBy
if (StringUtils.isBlank(orderByProp)) returning else returning.orderBy(entity.property(orderByProp))
}
}
ret.build()
}
private def filterRelationship(sourceNode: Node, targetNode: Node, relationship: Relationship) = {
val matchQuery = Cypher.`match`(sourceNode).`match`(targetNode).`match`(relationship)
def getContainer(filter: Filter): PropertyContainer = {
if (filter.isAttribute(Neo4jUtil.RELATIONSHIP_SOURCE_ALIAS)) {
sourceNode
} else if (filter.isAttribute(Neo4jUtil.RELATIONSHIP_TARGET_ALIAS)) {
targetNode
} else if (filter.isAttribute(Neo4jUtil.RELATIONSHIP_ALIAS)) {
relationship
} else {
throw new IllegalArgumentException(s"Attribute '${filter.getAttribute.get}' is not valid")
}
}
if (filters.nonEmpty) {
def mapFilter(filter: Filter): Condition = {
filter match {
case and: And => mapFilter(and.left).and(mapFilter(and.right))
case or: Or => mapFilter(or.left).or(mapFilter(or.right))
case filter: Filter =>
Neo4jUtil.mapSparkFiltersToCypher(filter, getContainer(filter), filter.getAttributeWithoutEntityName)
}
}
val cypherFilters = filters.map(mapFilter)
assembleConditionQuery(matchQuery, cypherFilters)
}
matchQuery
}
private def getCorrectProperty(column: String, entity: PropertyContainer): Expression = {
def propertyOrSymbolicName(col: String) = {
if (entity != null) entity.property(col) else Cypher.name(col)
}
column match {
case Neo4jUtil.INTERNAL_ID_FIELD => Functions.id(entity.asInstanceOf[Node]).as(Neo4jUtil.INTERNAL_ID_FIELD)
case Neo4jUtil.INTERNAL_REL_ID_FIELD =>
Functions.id(entity.asInstanceOf[Relationship]).as(Neo4jUtil.INTERNAL_REL_ID_FIELD)
case Neo4jUtil.INTERNAL_REL_SOURCE_ID_FIELD =>
Functions.id(entity.asInstanceOf[Node]).as(Neo4jUtil.INTERNAL_REL_SOURCE_ID_FIELD)
case Neo4jUtil.INTERNAL_REL_TARGET_ID_FIELD =>
Functions.id(entity.asInstanceOf[Node]).as(Neo4jUtil.INTERNAL_REL_TARGET_ID_FIELD)
case Neo4jUtil.INTERNAL_REL_TYPE_FIELD =>
Functions.`type`(entity.asInstanceOf[Relationship]).as(Neo4jUtil.INTERNAL_REL_TYPE_FIELD)
case Neo4jUtil.INTERNAL_LABELS_FIELD =>
Functions.labels(entity.asInstanceOf[Node]).as(Neo4jUtil.INTERNAL_LABELS_FIELD)
case Neo4jUtil.INTERNAL_REL_SOURCE_LABELS_FIELD =>
Functions.labels(entity.asInstanceOf[Node]).as(Neo4jUtil.INTERNAL_REL_SOURCE_LABELS_FIELD)
case Neo4jUtil.INTERNAL_REL_TARGET_LABELS_FIELD =>
Functions.labels(entity.asInstanceOf[Node]).as(Neo4jUtil.INTERNAL_REL_TARGET_LABELS_FIELD)
case "*" => Asterisk.INSTANCE
case name => {
val cleanedName = name.removeAlias()
aggregateColumns.find(_.toString == name)
.map {
case count: Count => {
val col = count.column().describe().unquote().removeAlias()
val prop = propertyOrSymbolicName(col)
if (count.isDistinct) {
Functions.countDistinct(prop).as(name)
} else {
Functions.count(prop).as(name)
}
}
case countStar: CountStar => Functions.count(Asterisk.INSTANCE).as(name)
case max: Max =>
val col = max.column().describe().unquote().removeAlias()
val prop = propertyOrSymbolicName(col)
Functions.max(prop).as(name)
case min: Min =>
val col = min.column().describe().unquote().removeAlias()
val prop = propertyOrSymbolicName(col)
Functions.min(prop).as(name)
case sum: Sum => {
val col = sum.column().describe().unquote().removeAlias()
val prop = propertyOrSymbolicName(col)
if (sum.isDistinct) {
Functions.sumDistinct(prop).as(name)
} else {
Functions.sum(prop).as(name)
}
}
}
.getOrElse(propertyOrSymbolicName(cleanedName).as(name))
.asInstanceOf[Expression]
}
}
}
override def createStatementForNodes(options: Neo4jOptions): String = {
val node = createNode(Neo4jUtil.NODE_ALIAS, options.nodeMetadata.labels)
val matchQuery = filterNode(node)
val expressions = requiredColumns.map(column => getCorrectProperty(column, node))
val stmt = if (aggregateColumns.nonEmpty) {
buildStatementAggregation(options, matchQuery, node, expressions)
} else {
val ret = if (requiredColumns.isEmpty) {
matchQuery.returning(node)
} else {
matchQuery.returning(expressions: _*)
}
buildStatement(options, ret, node)
}
renderer.render(stmt)
}
private def filterNode(node: Node) = {
val matchQuery = Cypher.`match`(node)
if (filters.nonEmpty) {
def mapFilter(filter: Filter): Condition = {
filter match {
case and: And => mapFilter(and.left).and(mapFilter(and.right))
case or: Or => mapFilter(or.left).or(mapFilter(or.right))
case filter: Filter => Neo4jUtil.mapSparkFiltersToCypher(filter, node)
}
}
val cypherFilters = filters.map(mapFilter)
assembleConditionQuery(matchQuery, cypherFilters)
}
matchQuery
}
def createStatementForNodeCount(options: Neo4jOptions): String = {
val node = createNode(Neo4jUtil.NODE_ALIAS, options.nodeMetadata.labels)
val matchQuery = filterNode(node)
renderer.render(buildStatement(options, matchQuery.returning(Functions.count(node).as("count"))))
}
def createStatementForRelationshipCount(options: Neo4jOptions): String = {
val sourceNode = createNode(Neo4jUtil.RELATIONSHIP_SOURCE_ALIAS, options.relationshipMetadata.source.labels)
val targetNode = createNode(Neo4jUtil.RELATIONSHIP_TARGET_ALIAS, options.relationshipMetadata.target.labels)
val relationship = sourceNode.relationshipTo(targetNode, options.relationshipMetadata.relationshipType)
.named(Neo4jUtil.RELATIONSHIP_ALIAS)
val matchQuery: StatementBuilder.OngoingReadingWithoutWhere =
filterRelationship(sourceNode, targetNode, relationship)
renderer.render(buildStatement(options, matchQuery.returning(Functions.count(sourceNode).as("count"))))
}
private def assembleConditionQuery(
matchQuery: StatementBuilder.OngoingReadingWithoutWhere,
filters: Array[Condition]
): StatementBuilder.OngoingReadingWithWhere = {
matchQuery.where(
filters.fold(Conditions.noCondition()) { (a, b) => a.and(b) }
)
}
private def createNode(name: String, labels: Seq[String]) = {
val primaryLabel = labels.head
val otherLabels = labels.tail
if (labels.isEmpty) {
Cypher.anyNode(name)
} else {
Cypher.node(primaryLabel, otherLabels.asJava).named(name)
}
}
override def createStatementForGDS(options: Neo4jOptions): String = {
val retCols = requiredColumns.map(column => getCorrectProperty(column, null))
// we need it in order to parse the field YIELD by the GDS procedure...
val (yieldFields, args) = Neo4jUtil.callSchemaService(
neo4j,
options,
jobId,
filters,
{ ss => (ss.struct().fieldNames, ss.inputForGDSProc(options.query.value)) }
)
val cypherParams = args
.filter(t => {
if (!t._2) {
true
} else {
options.gdsMetadata.parameters.containsKey(t._1)
}
})
.map(_._1)
.map(Cypher.parameter)
val statement = Cypher.call(options.query.value)
.withArgs(cypherParams: _*)
.`yield`(yieldFields: _*)
.returning(retCols: _*)
.build()
renderer.render(statement)
}
}
object Neo4jQueryStrategy {
val VARIABLE_EVENT = "event"
val VARIABLE_EVENTS = "events"
val VARIABLE_SCRIPT_RESULT = "scriptResult"
val VARIABLE_STREAM = "stream"
}
abstract class Neo4jQueryStrategy {
def createStatementForQuery(options: Neo4jOptions): String
def createStatementForRelationships(options: Neo4jOptions): String
def createStatementForNodes(options: Neo4jOptions): String
def createStatementForGDS(options: Neo4jOptions): String
}
class Neo4jQueryService(private val options: Neo4jOptions, val strategy: Neo4jQueryStrategy) extends Serializable {
def createQuery(): String = options.query.queryType match {
case QueryType.LABELS => strategy.createStatementForNodes(options)
case QueryType.RELATIONSHIP => strategy.createStatementForRelationships(options)
case QueryType.QUERY => strategy.createStatementForQuery(options)
case QueryType.GDS => strategy.createStatementForGDS(options)
case _ => throw new UnsupportedOperationException(
s"""Query Type not supported.
|You provided ${options.query.queryType},
|supported types: ${QueryType.values.mkString(",")}""".stripMargin
)
}
}
================================================
FILE: common/src/main/scala/org/neo4j/spark/service/SchemaService.scala
================================================
/*
* Copyright (c) "Neo4j"
* Neo4j Sweden AB [https://neo4j.com]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.neo4j.spark.service
import org.apache.commons.lang3.exception.ExceptionUtils
import org.apache.spark.internal.Logging
import org.apache.spark.sql.sources.Filter
import org.apache.spark.sql.types.DataTypes
import org.apache.spark.sql.types.StructField
import org.apache.spark.sql.types.StructType
import org.neo4j.caniuse.Neo4j
import org.neo4j.driver.Record
import org.neo4j.driver.Session
import org.neo4j.driver.Transaction
import org.neo4j.driver.TransactionWork
import org.neo4j.driver.Value
import org.neo4j.driver.Values
import org.neo4j.driver.exceptions.ClientException
import org.neo4j.driver.summary
import org.neo4j.spark.config.TopN
import org.neo4j.spark.converter.CypherToSparkTypeConverter
import org.neo4j.spark.converter.SparkToCypherTypeConverter
import org.neo4j.spark.cypher.CypherVersionSelector.selectCypherVersionClause
import org.neo4j.spark.service.SchemaService.normalizedClassName
import org.neo4j.spark.service.SchemaService.normalizedClassNameFromGraphEntity
import org.neo4j.spark.util.Neo4jImplicits.CypherImplicits
import org.neo4j.spark.util.Neo4jImplicits.ValueImplicits
import org.neo4j.spark.util.OptimizationType
import org.neo4j.spark.util._
import java.util
import java.util.Collections
import java.util.function
import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
object PartitionPagination {
val EMPTY: PartitionPagination = PartitionPagination(0, -1, TopN(-1))
}
case class PartitionPagination(partitionNumber: Int, skip: Long, topN: TopN)
class SchemaService(
private val neo4j: Neo4j,
private val options: Neo4jOptions,
private val driverCache: DriverCache,
private val filters: Array[Filter] = Array.empty
) extends AutoCloseable with Logging {
private val queryReadStrategy = new Neo4jQueryReadStrategy(neo4j, filters)
private val session: Session = driverCache.getOrCreate().session(options.session.toNeo4jSession())
private val sessionTransactionConfig = options.toNeo4jTransactionConfig
private val cypherToSparkTypeConverter = CypherToSparkTypeConverter(options)
private val sparkToCypherTypeConverter = SparkToCypherTypeConverter(options)
private def structForNode(labels: Seq[String] = options.nodeMetadata.labels) = {
val structFields: mutable.Buffer[StructField] = (try {
val query =
s"""${selectCypherVersionClause(neo4j)}CALL apoc.meta.nodeTypeProperties($$config)
|YIELD propertyName, propertyTypes
|WITH DISTINCT propertyName, propertyTypes
|WITH propertyName, collect(propertyTypes) AS propertyTypes
|RETURN propertyName, reduce(acc = [], elem IN propertyTypes | acc + elem) AS propertyTypes
|""".stripMargin
val apocConfig = options.apocConfig.procedureConfigMap
.getOrElse("apoc.meta.nodeTypeProperties", Map.empty[String, AnyRef])
.asInstanceOf[Map[String, AnyRef]] ++ Map[String, AnyRef]("includeLabels" -> labels.asJava)
retrieveSchemaFromApoc(query, Collections.singletonMap("config", apocConfig.asJava))
} catch {
case e: ClientException =>
logResolutionChange("Switching to query schema resolution", e)
// TODO get back to Cypher DSL when rand function will be available
val query =
s"""${selectCypherVersionClause(neo4j)}MATCH (${Neo4jUtil.NODE_ALIAS}:${labels.map(_.quote()).mkString(":")})
|RETURN ${Neo4jUtil.NODE_ALIAS}
|ORDER BY rand()
|LIMIT ${options.schemaMetadata.flattenLimit}
|""".stripMargin
val params = Collections.emptyMap[String, AnyRef]()
retrieveSchema(query, params, { record => record.get(Neo4jUtil.NODE_ALIAS).asNode.asMap.asScala.toMap })
})
.sortBy(t => t.name)
structFields += StructField(
Neo4jUtil.INTERNAL_LABELS_FIELD,
DataTypes.createArrayType(DataTypes.StringType),
nullable = true
)
structFields += StructField(Neo4jUtil.INTERNAL_ID_FIELD, DataTypes.LongType, nullable = false)
StructType(structFields.reverse.toSeq)
}
private def retrieveSchemaFromApoc(
query: String,
params: java.util.Map[String, AnyRef]
): mutable.Buffer[StructField] = {
val fields = session.run(query, params, sessionTransactionConfig)
.list
.asScala
.filter(record => !record.get("propertyName").isNull && !record.get("propertyName").isEmpty)
.map(record => {
val fieldTypesList = record.get("propertyTypes")
.asList(new function.Function[Value, String]() {
override def apply(v: Value): String = v.asString()
})
.asScala
val fieldType: String = if (fieldTypesList.size > 1) {
log.warn(
s"""
|The field ${record.get("propertyName")} has different types: $fieldTypesList
|Every value will be casted to string.
|""".stripMargin
)
"String"
} else {
fieldTypesList.head
}
StructField(record.get("propertyName").asString, cypherToSparkTypeConverter.convert(fieldType))
})
if (fields.isEmpty) {
throw new ClientException("Unable to compute the resulting schema from APOC")
}
fields
}
private def retrieveSchema(
query: String,
params: java.util.Map[String, AnyRef],
extractFunction: Record => Map[String, AnyRef]
): mutable.Buffer[StructField] = {
session.run(query, params, sessionTransactionConfig).list.asScala
.flatMap(extractFunction)
.groupBy(_._1)
.mapValues(_.map(_._2))
.map(t =>
options.schemaMetadata.strategy match {
case SchemaStrategy.SAMPLE => {
val types = t._2.map(value => {
if (options.query.queryType == QueryType.QUERY) {
normalizedClassName(value, options)
} else {
normalizedClassNameFromGraphEntity(value, options)
}
}).toSet
if (types.size > 1) {
log.warn(
s"""
|The field ${t._1} has different types: ${types.toString}
|Every value will be casted to string.
|""".stripMargin
)
StructField(t._1, DataTypes.StringType)
} else {
val value = t._2.head
StructField(t._1, cypherToSparkTypeConverter.convert(types.head, value))
}
}
case SchemaStrategy.STRING => StructField(t._1, DataTypes.StringType)
}
)
.toBuffer
}
private def mapStructField(alias: String, field: StructField): StructField = {
val name = field.name match {
case Neo4jUtil.INTERNAL_ID_FIELD | Neo4jUtil.INTERNAL_LABELS_FIELD =>
s"<$alias.${field.name.replaceAll("[<|>]", "")}>"
case _ => s"$alias.${field.name}"
}
StructField(name, field.dataType, field.nullable, field.metadata)
}
private def structForRelationship() = {
val structFields: mutable.Buffer[StructField] = ArrayBuffer(
StructField(Neo4jUtil.INTERNAL_REL_ID_FIELD, DataTypes.LongType, false),
StructField(Neo4jUtil.INTERNAL_REL_TYPE_FIELD, DataTypes.StringType, false)
)
if (options.relationshipMetadata.nodeMap) {
structFields += StructField(
s"<${Neo4jUtil.RELATIONSHIP_SOURCE_ALIAS}>",
DataTypes.createMapType(DataTypes.StringType, DataTypes.StringType),
false
)
structFields += StructField(
s"<${Neo4jUtil.RELATIONSHIP_TARGET_ALIAS}>",
DataTypes.createMapType(DataTypes.StringType, DataTypes.StringType),
false
)
} else {
structFields ++= structForNode(options.relationshipMetadata.source.labels)
.map(field => mapStructField(Neo4jUtil.RELATIONSHIP_SOURCE_ALIAS, field))
structFields ++= structForNode(options.relationshipMetadata.target.labels)
.map(field => mapStructField(Neo4jUtil.RELATIONSHIP_TARGET_ALIAS, field))
}
structFields ++= (try {
val query =
s"""${selectCypherVersionClause(
neo4j
)}CALL apoc.meta.relTypeProperties($$config) YIELD sourceNodeLabels, targetNodeLabels,
| propertyName, propertyTypes
|WITH *
|WHERE sourceNodeLabels = $$sourceLabels AND targetNodeLabels = $$targetLabels
|RETURN *
|""".stripMargin
val apocConfig = options.apocConfig.procedureConfigMap
.getOrElse("apoc.meta.relTypeProperties", Map.empty[String, AnyRef])
.asInstanceOf[Map[String, AnyRef]]
val config = apocConfig ++ Map("includeRels" -> Seq(options.relationshipMetadata.relationshipType).asJava)
val params = Map[String, AnyRef](
"config" -> config.asJava,
"sourceLabels" -> options.relationshipMetadata.source.labels.asJava,
"targetLabels" -> options.relationshipMetadata.target.labels.asJava
)
.asJava
retrieveSchemaFromApoc(query, params)
} catch {
case e: ClientException =>
logResolutionChange("Switching to query schema resolution", e)
// TODO get back to Cypher DSL when rand function will be available
val query =
s"""${selectCypherVersionClause(
neo4j
)}MATCH (${Neo4jUtil.RELATIONSHIP_SOURCE_ALIAS}:${options.relationshipMetadata.source.labels.map(
_.quote()
).mkString(":")})
|MATCH (${Neo4jUtil.RELATIONSHIP_TARGET_ALIAS}:${options.relationshipMetadata.target.labels.map(
_.quote()
).mkString(":")})
|MATCH (${Neo4jUtil.RELATIONSHIP_SOURCE_ALIAS})-[${Neo4jUtil.RELATIONSHIP_ALIAS}:${options.relationshipMetadata.relationshipType}]->(${Neo4jUtil.RELATIONSHIP_TARGET_ALIAS})
|RETURN ${Neo4jUtil.RELATIONSHIP_ALIAS}
|ORDER BY rand()
|LIMIT ${options.schemaMetadata.flattenLimit}
|""".stripMargin
val params = Collections.emptyMap[String, AnyRef]()
retrieveSchema(
query,
params,
{ record => record.get(Neo4jUtil.RELATIONSHIP_ALIAS).asRelationship.asMap.asScala.toMap }
)
})
.map(field => StructField(s"rel.${field.name}", field.dataType, field.nullable, field.metadata))
.sortBy(t => t.name)
StructType(structFields.toSeq)
}
private def structForQuery(): StructType = {
val query = queryReadStrategy.createStatementForQuery(options)
if (!isValidQuery(query, summary.QueryType.READ_ONLY)) {
return new StructType()
}
val params = Map[String, AnyRef](
Neo4jQueryStrategy.VARIABLE_SCRIPT_RESULT -> Collections.emptyList(),
Neo4jQueryStrategy.VARIABLE_STREAM -> Collections.emptyMap()
)
.asJava
val randLimitedQueryForSchema =
s"""
|$query
|ORDER BY rand()
|LIMIT ${options.schemaMetadata.flattenLimit}
|""".stripMargin
val randCallLimitedQueryForSchema =
s"""
|CALL {
| $query
|} RETURN *
|ORDER BY rand()
|LIMIT ${options.schemaMetadata.flattenLimit}
|""".stripMargin
val limitedQuery =
if (isValidQuery(randLimitedQueryForSchema)) randLimitedQueryForSchema else randCallLimitedQueryForSchema
val structFields = retrieveSchema(limitedQuery, params, { record => record.asMap.asScala.toMap })
val columns = getReturnedColumns(query)
if (columns.isEmpty && structFields.isEmpty) {
throw new ClientException(
"Unable to compute the resulting schema; this may mean your result set is empty or your version of Neo4j does not permit schema inference for empty sets"
)
}
if (columns.isEmpty) {
return StructType(structFields.toSeq)
}
val sortedStructFields = if (structFields.isEmpty) {
// df: we arrived here because there are no data returned by the query
// so we want to return an empty dataset which schema is equals to the columns
// specified by the RETURN statement
columns.map(StructField(_, DataTypes.StringType))
} else {
try {
columns.map(column => structFields.find(_.name.quote() == column.quote()).orNull).filter(_ != null)
} catch {
case _: Throwable => structFields.toArray
}
}
StructType(sortedStructFields)
}
private def structForGDS() = {
val query =
s"""
|${selectCypherVersionClause(neo4j)}CALL gds.list() YIELD name, signature, type
|WHERE name = $$procName AND type = 'procedure'
|WITH split(signature, ') :: (')[1] AS fields
|WITH substring(fields, 0, size(fields) - 1) AS fields
|WITH split(fields, ',') AS fields
|WITH [field IN fields | split(field, ' :: ')] AS fields
|UNWIND fields AS field
|WITH field
|RETURN *
|""".stripMargin
val map: util.Map[String, AnyRef] = Map[String, AnyRef]("procName" -> options.query.value).asJava
val fields = session.run(query, map, sessionTransactionConfig).list.asScala
.map(r => r.get("field").asList((t: Value) => t.asString()).asScala)
.map(r =>
(
r.head.trim,
r(1).replaceAll("\\?", "") match {
case "STRING" => ("String", null)
case "INTEGER" => ("Long", null)
case "FLOAT" | "NUMBER" => ("Double", null)
case "DATETIME" => ("DateTime", null)
case "BOOLEAN" => ("Boolean", null)
case "LOCALTIME" => ("LocalTime", null)
case "LIST OF INTEGER" | "LIST<INTEGER>" | "LIST<INTEGER NOT NULL>" => ("LongArray", null)
case "LIST OF FLOAT" | "LIST<FLOAT>" | "LIST<FLOAT NOT NULL>" => ("DoubleArray", null)
case "LIST OF STRING" | "LIST<STRING>" | "LIST<STRING NOT NULL>" => ("StringArray", null)
case "MAP" =>
logWarning(
s"""
|For procedure ${options.query.value}
|Neo4j return type MAP? of field ${r.head.trim} not fully supported.
|We'll coerce it to a Map<String, String>
|""".stripMargin
)
("Map", Map("key" -> "").asJava) // dummy value
case "LIST OF MAP" | "LIST<MAP>" | "LIST<MAP NOT NULL>" =>
logWarning(
s"""
|For procedure ${options.query.value}
|Neo4j return type LIST? OF MAP? of field ${r.head.trim} not fully supported.
|We'll coerce it to a [Map<String, String>]
|""".stripMargin
)
("MapArray", Seq(Map("key" -> "").asJava).asJava) // dummy value
case "PATH" => ("Path", null)
case _ => throw new IllegalArgumentException(s"Neo4j type ${r(1)} not supported")
}
)
)
.map(r => StructField(r._1, cypherToSparkTypeConverter.convert(r._2._1, r._2._2)))
.toSeq
StructType(fields)
}
def inputForGDSProc(procName: String): Seq[(String, Boolean)] = {
val query =
"""
|WITH $procName AS procName
|CALL gds.list() YIELD name, signature, type
|WHERE name = procName AND type = 'procedure'
|WITH replace(signature, procName + '(', '') AS signature
|WITH split(signature, ') :: (')[0] AS fields
|WITH substring(fields, 0, size(fields) - 1) AS fields
|WITH split(fields, ',') AS fields
|WITH [field IN fields | split(field, ' :: ')] AS fields
|UNWIND fields AS field
|WITH trim(split(field[0], ' = ')[0]) AS fieldName, field[0] contains ' = ' AS optional
|RETURN *
|""".stripMargin
val map: util.Map[String, AnyRef] = Map[String, AnyRef]("procName" -> procName).asJava
session.run(query, map, sessionTransactionConfig)
.list
.asScala
.map(r => (r.get("fieldName").asString(), r.get("optional").asBoolean()))
.toSeq
}
private def getReturnedColumns(query: String): Array[String] =
session.run("EXPLAIN " + query, sessionTransactionConfig)
.keys().asScala.toArray
def struct(): StructType = {
val struct = options.query.queryType match {
case QueryType.LABELS => structForNode()
case QueryType.RELATIONSHIP => structForRelationship()
case QueryType.QUERY => structForQuery()
case QueryType.GDS => structForGDS()
}
struct
}
def countForNodeWithQuery(filters: Array[Filter]): Long = {
val query = if (filters.isEmpty) {
options.nodeMetadata.labels
.map(_.quote())
.map(label =>
s"""
|MATCH (:$label)
|RETURN count(*) AS count""".stripMargin
)
.mkString(" UNION ALL ")
} else {
queryReadStrategy.createStatementForNodeCount(options)
}
log.info(s"Executing the following counting query on Neo4j: $query")
session.readTransaction(
tx =>
tx.run(query, Values.value(Neo4jUtil.paramsFromFilters(filters).asJava))
.list()
.asScala
.map(_.get("count"))
.map(count => if (count.isNull) 0L else count.asLong())
.min,
sessionTransactionConfig
)
}
def countForRelationshipWithQuery(filters: Array[Filter]): Long = {
val query = if (filters.isEmpty) {
val sourceQueries = options.relationshipMetadata.source.labels
.map(_.quote())
.map(label =>
s"""MATCH (:$label)-[${Neo4jUtil.RELATIONSHIP_ALIAS}:${options.relationshipMetadata.relationshipType.quote()}]->()
|RETURN count(${Neo4jUtil.RELATIONSHIP_ALIAS}) AS count
|""".stripMargin
)
val targetQueries = options.relationshipMetadata.target.labels
.map(_.quote())
.map(label =>
s"""MATCH ()-[${Neo4jUtil.RELATIONSHIP_ALIAS}:${options.relationshipMetadata.relationshipType.quote()}]->(:$label)
|RETURN count(${Neo4jUtil.RELATIONSHIP_ALIAS}) AS count
|""".stripMargin
)
(sourceQueries ++ targetQueries)
.mkString(" UNION ALL ")
} else {
queryReadStrategy.createStatementForRelationshipCount(options)
}
log.info(s"Executing the following counting query on Neo4j: $query")
session.run(query, sessionTransactionConfig)
.list()
.asScala
.map(_.get("count"))
.map(count => if (count.isNull) 0L else count.asLong())
.min
}
def countForNode(filters: Array[Filter]): Long =
try {
/*
* we try to leverage the count store in order to have the faster response possible
* https://neo4j.com/developer/kb/fast-counts-using-the-count-store/
* so in this scenario we have some limitations given the fact that we get the min
* for the sequence of counts returned
*/
if (filters.isEmpty) {
val query = "CALL apoc.meta.stats() yield labels RETURN labels"
val map = session.run(query, sessionTransactionConfig).single()
.asMap()
.asScala
.get("labels")
.getOrElse(Collections.emptyMap())
.asInstanceOf[util.Map[String, Long]].asScala
map.filterKeys(k => options.nodeMetadata.labels.contains(k))
.values.min
} else {
countForNodeWithQuery(filters)
}
} catch {
case e: ClientException => {
logResolutionChange("Switching to query count resolution", e)
countForNodeWithQuery(filters)
}
case e: Throwable => logExceptionForCount(e)
}
def countForRelationship(filters: Array[Filter]): Long =
try {
if (filters.isEmpty) {
val query = "CALL apoc.meta.stats() yield relTypes RETURN relTypes"
val map = session.run(query, sessionTransactionConfig).single()
.asMap()
.asScala
.get("relTypes")
.getOrElse(Collections.emptyMap())
.asInstanceOf[util.Map[String, Long]]
.asScala
val minFromSource = options.relationshipMetadata.source.labels
.map(_.quote())
.map(label =>
map.get(s"(:$label)-[:${options.relationshipMetadata.relationshipType}]->()").getOrElse(Long.MaxValue)
)
.min
val minFromTarget = options.relationshipMetadata.target.labels
.map(_.quote())
.map(label =>
map.get(s"()-[:${options.relationshipMetadata.relationshipType}]->(:$label)").getOrElse(Long.MaxValue)
)
.min
Math.min(minFromSource, minFromTarget)
} else {
countForRelationshipWithQuery(filters)
}
} catch {
case e: ClientException => {
logResolutionChange("Switching to query count resolution", e)
countForRelationshipWithQuery(filters)
}
case e: Throwable => logExceptionForCount(e)
}
private def logExceptionForCount(e: Throwable): Long = {
log.error("Cannot compute the count because the following exception:", e)
-1
}
def skipLimitFromPartition(topN: Option[TopN]): Seq[PartitionPagination] =
if (options.partitions == 1) {
val skipLimit = topN.map(top => PartitionPagination(0, 0, top)).getOrElse(PartitionPagination.EMPTY)
Seq(skipLimit)
} else {
val count: Long = this.count()
if (count <= 0) {
Seq(PartitionPagination.EMPTY)
} else {
val partitionSize = Math.ceil(count.toDouble / options.partitions).toLong
(0 until options.partitions)
.map(index => PartitionPagination(index, index * partitionSize, TopN(partitionSize)))
}
}
def count(filters: Array[Filter] = this.filters): Long = options.query.queryType match {
case QueryType.LABELS => countForNode(filters)
case QueryType.RELATIONSHIP => countForRelationship(filters)
case QueryType.QUERY => countForQuery()
}
private def countForQuery(): Long = {
val queryCount: String = options.queryMetadata.queryCount
if (Neo4jUtil.isLong(queryCount)) {
queryCount.trim.toLong
} else {
val query = if (queryCount.nonEmpty) {
options.queryMetadata.queryCount
} else {
s"""CALL { ${options.query.value} }
|RETURN count(*) AS count
|""".stripMargin
}
session.run(query, sessionTransactionConfig).single().get("count").asLong()
}
}
def isGdsProcedure(procName: String): Boolean = {
val params: util.Map[String, AnyRef] = Map[String, AnyRef]("procName" -> procName).asJava
session.run(
"""
|CALL gds.list() YIELD name, type
|WHERE name = $procName AND type = 'procedure'
|RETURN count(*) = 1
|""".stripMargin,
params,
sessionTransactionConfig
)
.single()
.get(0)
.asBoolean()
}
def validateQuery(query: String, expectedQueryTypes: org.neo4j.driver.summary.QueryType*): String =
try {
val queryType = session.run(s"EXPLAIN $query", sessionTransactionConfig).consume().queryType()
if (expectedQueryTypes.isEmpty || expectedQueryTypes.contains(queryType)) {
""
} else {
s"Invalid query `${cleanQuery(query)}` because the accepted types are [${expectedQueryTypes.mkString(", ")}], but the actual type is $queryType"
}
} catch {
case e: Throwable => s"Query not compiled for the following exception: ${ExceptionUtils.getMessage(e)}"
}
private def cleanQuery(query: String) = {
query
.replace(
s"WITH {} AS ${Neo4jQueryStrategy.VARIABLE_EVENT}, [] as ${Neo4jQueryStrategy.VARIABLE_SCRIPT_RESULT}",
""
)
.replace(s"WITH [] as ${Neo4jQueryStrategy.VARIABLE_SCRIPT_RESULT}", "")
.replace(s"WITH {} AS ${Neo4jQueryStrategy.VARIABLE_EVENT}", "")
.trim
}
def validateQueryCount(query: String): String =
try {
val resultSummary = session.run(s"EXPLAIN $query", sessionTransactionConfig).consume()
val queryType = resultSummary.queryType()
val plan = resultSummary.plan()
val expectedQueryTypes =
Set(org.neo4j.driver.summary.QueryType.READ_ONLY, org.neo4j.driver.summary.QueryType.SCHEMA_WRITE)
val isReadOnly = expectedQueryTypes.contains(queryType)
val hasCountIdentifier = plan.identifiers().asScala.toSet == Set("count")
if (isReadOnly && hasCountIdentifier) {
""
} else {
s"Invalid query `${cleanQuery(query)}` because the expected type should be [${expectedQueryTypes.mkString(", ")}], but the actual type is $queryType"
}
} catch {
case e: Throwable => s"Query count not compiled for the following exception: ${ExceptionUtils.getMessage(e)}"
}
def isValidQuery(query: String, expectedQueryTypes: org.neo4j.driver.summary.QueryType*): Boolean =
try {
val queryType = session.run(s"EXPLAIN $query", sessionTransactionConfig).consume().queryType()
expectedQueryTypes.isEmpty || expectedQueryTypes.contains(queryType)
} catch {
case e: Throwable => {
if (log.isDebugEnabled) {
log.debug("Query not compiled because of the following exception:", e)
}
false
}
}
@deprecated("use createEntityConstraint instead")
private def createIndexOrConstraint(action: OptimizationType.Value, label: String, props: Seq[String]): Unit =
action match {
case OptimizationType.NONE => log.info("No optimization type provided")
case _ => {
try {
val quotedLabel = label.quote()
val quotedProps = props
.map(prop => s"${Neo4jUtil.NODE_ALIAS}.${prop.quote()}")
.mkString(", ")
val isNeo4j4 = neo4j.getVersion.getMajor == 4
val uniqueFieldName = if (!isNeo4j4) "owningConstraint" else "uniqueness"
val dashSeparatedProps = props.mkString("-")
val (querySuffix, uniqueCondition) = action match {
case OptimizationType.INDEX => (
s"FOR (${Neo4jUtil.NODE_ALIAS}:$quotedLabel) ON ($quotedProps)",
if (!isNeo4j4) s"$uniqueFieldName IS NULL" else s"$uniqueFieldName = 'NONUNIQUE'"
)
case OptimizationType.NODE_CONSTRAINTS => {
val assertType = if (props.size > 1) "NODE KEY" else "UNIQUE"
(
s"FOR (${Neo4jUtil.NODE_ALIAS}:$quotedLabel) REQUIRE ($quotedProps) IS $assertType",
if (!isNeo4j4) s"$uniqueFieldName IS NOT NULL" else s"$uniqueFieldName = 'UNIQUE'"
)
}
}
val actionName = s"spark_${action.toString}_${label}_$dashSeparatedProps".quote()
val queryPrefix = action match {
case OptimizationType.INDEX => s"CREATE INDEX $actionName"
case OptimizationType.NODE_CONSTRAINTS => s"CREATE CONSTRAINT $actionName"
}
val queryCheck =
s"""SHOW INDEXES YIELD labelsOrTypes, properties, $uniqueFieldName
|WHERE labelsOrTypes = ${'$'}labels
|AND properties = ${'$'}properties
|AND $uniqueCondition
|RETURN count(*) > 0 AS isPresent""".stripMargin
val params: util.Map[String, AnyRef] = Map(
"labels" -> Seq(label).asJava,
"properties" -> props.asJava
).asJava.asInstanceOf[util.Map[String, AnyRef]]
val isPresent = session.run(queryCheck, params, sessionTransactionConfig)
.single()
.get("isPresent")
.asBoolean()
val status = if (isPresent) {
"KEPT"
} else {
val query = s"$queryPrefix $querySuffix"
log.info(s"Performing the following schema query: $query")
session.run(query, sessionTransactionConfig)
"CREATED"
}
log.info(s"Status for $action named with label $quotedLabel and props $quotedProps is: $status")
} catch {
case e: Throwable => log.info("Cannot perform the optimization query because of the following exception:", e)
}
}
}
private def createEntityConstraint(
entityType: String,
entityIdentifier: String,
constraintsOptimizationType: ConstraintsOptimizationType.Value,
keys: Map[String, String]
): Unit = {
val constraintType = if (constraintsOptimizationType == ConstraintsOptimizationType.UNIQUE) {
"UNIQUE"
} else {
s"$entityType KEY"
}
val dashSeparatedProps = keys.values.mkString("-")
val constraintName =
s"spark_${entityType}_${constraintType.replace(s"$entityType ", "")}-CONSTRAINT_${entityIdentifier}_$dashSeparatedProps".quote()
val props = keys.values.map(_.quote()).map("e." + _).mkString(", ")
val asciiRepresentation: String = createCypherPattern(entityType, entityIdentifier)
session.writeTransaction(
tx => {
tx.run(
s"CREATE CONSTRAINT $constraintName IF NOT EXISTS FOR $asciiRepresentation REQUIRE ($props) IS $constraintType"
)
},
sessionTransactionConfig
)
}
private def createCypherPattern(entityType: String, entityIdentifier: String) = {
val asciiRepresentation = entityType match {
case "NODE" => s"(e:${entityIdentifier.quote()})"
case "RELATIONSHIP" => s"()-[e:${entityIdentifier.quote()}]->()"
case _ => throw new IllegalArgumentException(s"$entityType not supported")
}
asciiRepresentation
}
private def createEntityTypeConstraint(
entityType: String,
entityIdentifier: String,
properties: Map[String, String],
struct: StructType,
constraints: Set[SchemaConstraintsOptimizationType.Value]
): Unit = {
val asciiRepresentation: String = createCypherPattern(entityType, entityIdentifier)
session.writeTransaction(
tx => {
properties
.filter(t => struct.exists(f => f.name == t._1))
.map(t => {
val field = struct.find(f => f.name == t._1).get
(t._2, sparkToCypherTypeConverter.convert(field.dataType), field.nullable)
})
.foreach(t => {
val prop = t._1.quote()
val cypherType = t._2
val isNullable = t._3
if (constraints.contains(SchemaConstraintsOptimizationType.TYPE)) {
val typeConstraintName = s"spark_$entityType-TYPE-CONSTRAINT-$entityIdentifier-$prop".quote()
tx.run(
s"CREATE CONSTRAINT $typeConstraintName IF NOT EXISTS FOR $asciiRepresentation REQUIRE e.$prop IS :: $cypherType"
).consume()
}
if (constraints.contains(SchemaConstraintsOptimizationType.EXISTS)) {
if (!isNullable) {
val notNullConstraintName = s"spark_$entityType-NOT_NULL-CONSTRAINT-$entityIdentifier-$prop".quote()
tx.run(
s"CREATE CONSTRAINT $notNullConstraintName IF NOT EXISTS FOR $asciiRepresentation REQUIRE e.$prop IS NOT NULL"
).consume()
}
}
})
},
sessionTransactionConfig
)
}
private def createOptimizationsForNode(struct: StructType): Unit = {
val schemaMetadata = options.schemaMetadata.optimization
if (
schemaMetadata.nodeConstraint != ConstraintsOptimizationType.NONE
|| schemaMetadata.schemaConstraints != Set(SchemaConstraintsOptimizationType.NONE)
) {
if (schemaMetadata.nodeConstraint != ConstraintsOptimizationType.NONE) {
createEntityConstraint(
"NODE",
options.nodeMetadata.labels.head,
schemaMetadata.nodeConstraint,
options.nodeMetadata.nodeKeys
)
}
if (schemaMetadata.schemaConstraints.nonEmpty) {
val propsFromStruct: Map[String, String] = struct
.map(f => (f.name, f.name))
.toMap
val propsFromMeta: Map[String, String] = options.nodeMetadata.nodeKeys ++ options.nodeMetadata.properties
createEntityTypeConstraint(
"NODE",
options.nodeMetadata.labels.head,
propsFromStruct ++ propsFromMeta,
struct,
schemaMetadata.schemaConstraints
)
}
} else { // TODO old behaviour, remove it in the future
options.schemaMetadata.optimizationType match {
case OptimizationType.INDEX | OptimizationType.NODE_CONSTRAINTS => {
createIndexOrConstraint(
options.schemaMetadata.optimizationType,
options.nodeMetadata.labels.head,
options.nodeMetadata.nodeKeys.values.toSeq
)
}
case _ => // do nothing
}
}
}
private def createOptimizationsForRelationship(struct: StructType): Unit = {
val schemaMetadata = options.schemaMetadata.optimization
if (
schemaMetadata.nodeConstraint != ConstraintsOptimizationType.NONE
|| schemaMetadata.relConstraint != ConstraintsOptimizationType.NONE
|| schemaMetadata.schemaConstraints != Set(SchemaConstraintsOptimizationType.NONE)
) {
if (schemaMetadata.nodeConstraint != ConstraintsOptimizationType.NONE) {
createEntityConstraint(
"NODE",
options.relationshipMetadata.source.labels.head,
schemaMetadata.nodeConstraint,
options.relationshipMetadata.source.nodeKeys
)
createEntityConstraint(
"NODE",
options.relationshipMetadata.target.labels.head,
schemaMetadata.nodeConstraint,
options.relationshipMetadata.target.nodeKeys
)
}
if (schemaMetadata.relConstraint != ConstraintsOptimizationType.NONE) {
createEntityConstraint(
"RELATIONSHIP",
options.relationshipMetadata.relationshipType,
schemaMetadata.relConstraint,
options.relationshipMetadata.relationshipKeys
)
}
if (schemaMetadata.schemaConstraints.nonEmpty) {
val sourceNodeProps: Map[String, String] =
options.relationshipMetadata.source.nodeKeys ++ options.relationshipMetadata.source.properties
val targetNodeProps: Map[String, String] =
options.relationshipMetadata.target.nodeKeys ++ options.relationshipMetadata.target.properties
val allNodeProps: Map[String, String] = sourceNodeProps ++ targetNodeProps
val relStruct: StructType = StructType(struct.filterNot(f => allNodeProps.contains(f.name)))
val propsFromRelStruct: Map[String, String] = relStruct
.map(f => (f.name, f.name))
.toMap
val propsFromMeta: Map[String, String] =
options.relationshipMetadata.relationshipKeys ++ options.relationshipMetadata.properties.getOrElse(Map.empty)
createEntityTypeConstraint(
"RELATIONSHIP",
options.relationshipMetadata.relationshipType,
propsFromRelStruct ++ propsFromMeta,
struct,
schemaMetadata.schemaConstraints
)
createEntityTypeConstraint(
"NODE",
options.relationshipMetadata.source.labels.head,
sourceNodeProps,
struct,
schemaMetadata.schemaConstraints
)
createEntityTypeConstraint(
"NODE",
options.relationshipMetadata.target.labels.head,
targetNodeProps,
struct,
schemaMetadata.schemaConstraints
)
}
} else { // TODO old behaviour, remove it in the future
options.schemaMetadata.optimizationType match {
case OptimizationType.INDEX | OptimizationType.NODE_CONSTRAINTS => {
createIndexOrConstraint(
options.schemaMetadata.optimizationType,
options.relationshipMetadata.source.labels.head,
options.relationshipMetadata.source.nodeKeys.values.toSeq
)
createIndexOrConstraint(
options.schemaMetadata.optimizationType,
options.relationshipMetadata.target.labels.head,
options.relationshipMetadata.target.nodeKeys.values.toSeq
)
}
case _ => // do nothing
}
}
}
def createOptimizations(struct: StructType): Unit = {
Validations.validate(ValidateSchemaOptions(options, struct))
options.query.queryType match {
case QueryType.LABELS => createOptimizationsForNode(struct)
case QueryType.RELATIONSHIP => createOptimizationsForRelationship(struct)
case _ => // do nothing
}
}
def execute(queries: Seq[String]): util.List[util.Map[String, AnyRef]] = {
val queryMap = queries
.map(query => {
(session.run(s"EXPLAIN $query", sessionTransactionConfig).consume().queryType(), query)
})
.groupBy(_._1)
.mapValues(_.map(_._2))
val schemaQueries = queryMap.getOrElse(org.neo4j.driver.summary.QueryType.SCHEMA_WRITE, Seq.empty[String])
schemaQueries.foreach(session.run(_, sessionTransactionConfig))
val others = queryMap
.filterKeys(key => key != org.neo4j.driver.summary.QueryType.SCHEMA_WRITE)
.values
.flatten
.toSeq
if (others.isEmpty) {
Collections.emptyList()
} else {
session
.writeTransaction(
new TransactionWork[util.List[java.util.Map[String, AnyRef]]] {
override def execute(transaction: Transaction): util.List[util.Map[String, AnyRef]] = {
others.size match {
case 1 => transaction.run(others.head).list()
.asScala
.map(_.asMap())
.asJava
case _ => {
others
.slice(0, queries.size - 1)
.foreach(transaction.run)
val result = transaction.run(others.last).list()
.asScala
.map(_.asMap())
.asJava
result
}
}
}
},
sessionTransactionConfig
)
}
}
def lastOffset(): Option[Long] = options.query.queryType match {
case QueryType.LABELS => lastOffsetForNode()
case QueryType.RELATIONSHIP => lastOffsetForRelationship()
case QueryType.QUERY => lastOffsetForQuery()
}
private def lastOffsetForNode(): Option[Long] = {
val label = options.nodeMetadata.labels.head
session.run(
s"""MATCH (n:$label)
|RETURN max(n.${options.streamingOptions.propertyName}) AS ${options.streamingOptions.propertyName}""".stripMargin,
sessionTransactionConfig
)
.single()
.get(options.streamingOptions.propertyName)
.asOptionalLong()
}
private def lastOffsetForRelationship(): Option[Long] = {
val sourceLabel = options.relationshipMetadata.source.labels.head.quote()
val targetLabel = options.relationshipMetadata.target.labels.head.quote()
val relType = options.relationshipMetadata.relationshipType.quote()
session.run(
s"""MATCH (s:$sourceLabel)-[r:$relType]->(t:$targetLabel)
|RETURN max(r.${options.streamingOptions.propertyName}) AS ${options.streamingOptions.propertyName}""".stripMargin,
sessionTransactionConfig
)
.single()
.get(options.streamingOptions.propertyName)
.asOptionalLong()
}
private def lastOffsetForQuery(): Option[Long] = {
session.run(options.streamingOptions.queryOffset, sessionTransactionConfig)
.single()
.get(0)
.asOptionalLong()
}
private def logResolutionChange(message: String, e: ClientException): Unit = {
log.warn(message)
if (!e.code().equals("Neo.ClientError.Procedure.ProcedureNotFound")) {
log.warn(s"For the following exception", e)
}
}
override def close(): Unit = {
Neo4jUtil.closeSafely(session, log)
}
}
object SchemaService {
val POINT_TYPE_2D = "point-2d"
val POINT_TYPE_3D = "point-3d"
val TIME_TYPE_OFFSET = "offset-time"
val TIME_TYPE_LOCAL = "local-time"
val DURATION_TYPE = "duration"
def normalizedClassName(value: AnyRef, options: Neo4jOptions): String = value match {
case binary: Array[Byte] => if (options.legacyTypeConversionEnabled) value.getClass.getSimpleName else "ByteArray"
case list: java.util.List[_] => "Array"
case map: java.util.Map[String, _] => "Map"
case null => "String"
case _ => value.getClass.getSimpleName
}
// from nodes and relationships we cannot have maps as properties and elements in lists are the same type
// special treatment for ByteArray required (pattern matching on Array != List)
def normalizedClassNameFromGraphEntity(value: AnyRef, options: Neo4jOptions): String = value match {
case binary: Array[Byte] => if (options.legacyTypeConversionEnabled) value.getClass.getSimpleName else "ByteArray"
case list: java.util.List[_] => s"${list.get(0).getClass.getSimpleName}Array"
case null => "String"
case _ => value.getClass.getSimpleName
}
}
================================================
FILE: common/src/main/scala/org/neo4j/spark/streaming/BaseStreamingPartitionReader.scala
================================================
/*
* Copyright (c) "Neo4j"
* Neo4j Sweden AB [https://neo4j.com]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.neo4j.spark.streaming
import org.apache.spark.sql.connector.expressions.aggregate.AggregateFunc
import org.apache.spark.sql.sources.Filter
import org.apache.spark.sql.sources.GreaterThan
import org.apache.spark.sql.sources.LessThanOrEqual
import org.apache.spark.sql.types.StructType
import org.neo4j.caniuse.Neo4j
import org.neo4j.cypherdsl.core.Cypher
import org.neo4j.spark.reader.BasePartitionReader
import org.neo4j.spark.service.Neo4jQueryStrategy
import org.neo4j.spark.service.PartitionPagination
import org.neo4j.spark.streaming.BaseStreamingPartitionReader.offsetUsagePatterns
import org.neo4j.spark.util.Neo4jImplicits._
import org.neo4j.spark.util.Neo4jOptions
import org.neo4j.spark.util.Neo4jUtil
import org.neo4j.spark.util.QueryType._
import org.neo4j.spark.util.StreamingFrom
import java.util
import java.util.function.Predicate
import java.util.regex.Pattern
import scala.collection.JavaConverters.mapAsJavaMapConverter
class BaseStreamingPartitionReader(
private val neo4j: Neo4j,
private val options: Neo4jOptions,
private val filters: Array[Filter],
private val schema: StructType,
private val jobId: String,
private val partitionSkipLimit: PartitionPagination,
private val scriptResult: java.util.List[java.util.Map[String, AnyRef]],
private val requiredColum
gitextract_girzruyy/
├── .commitlintrc.json
├── .github/
│ ├── CODEOWNERS
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.md
│ │ └── feature_request.md
│ └── dependabot.yml
├── .gitignore
├── .husky/
│ ├── commit-msg
│ └── pre-commit
├── .mvn/
│ └── wrapper/
│ └── maven-wrapper.properties
├── .teamcity/
│ ├── .editorconfig
│ ├── builds/
│ │ ├── Build.kt
│ │ ├── Common.kt
│ │ ├── Empty.kt
│ │ ├── JavaIntegrationTests.kt
│ │ ├── Maven.kt
│ │ ├── PRCheck.kt
│ │ ├── Package.kt
│ │ ├── PythonIntegrationTests.kt
│ │ ├── Release.kt
│ │ ├── SemgrepCheck.kt
│ │ └── WhiteListCheck.kt
│ ├── pom.xml
│ └── settings.kts
├── LICENSE.txt
├── README.md
├── common/
│ ├── LICENSES.txt
│ ├── NOTICE.txt
│ ├── pom.xml
│ └── src/
│ ├── main/
│ │ ├── java/
│ │ │ └── org/
│ │ │ └── neo4j/
│ │ │ └── spark/
│ │ │ └── util/
│ │ │ └── ReflectionUtils.java
│ │ ├── resources/
│ │ │ └── neo4j-spark-connector.properties
│ │ └── scala/
│ │ └── org/
│ │ └── neo4j/
│ │ └── spark/
│ │ ├── config/
│ │ │ └── TopN.scala
│ │ ├── converter/
│ │ │ ├── DataConverter.scala
│ │ │ └── TypeConverter.scala
│ │ ├── cypher/
│ │ │ ├── Cypher5Renderer.scala
│ │ │ └── CypherVersionSelector.scala
│ │ ├── reader/
│ │ │ └── BasePartitionReader.scala
│ │ ├── service/
│ │ │ ├── MappingService.scala
│ │ │ ├── Neo4jQueryService.scala
│ │ │ └── SchemaService.scala
│ │ ├── streaming/
│ │ │ └── BaseStreamingPartitionReader.scala
│ │ ├── util/
│ │ │ ├── DriverCache.scala
│ │ │ ├── Neo4jImplicits.scala
│ │ │ ├── Neo4jOptions.scala
│ │ │ ├── Neo4jUtil.scala
│ │ │ ├── ValidationUtil.scala
│ │ │ └── Validations.scala
│ │ └── writer/
│ │ ├── BaseDataWriter.scala
│ │ └── DataWriterMetrics.scala
│ └── test/
│ └── scala/
│ └── org/
│ └── neo4j/
│ └── spark/
│ ├── CommonTestSuiteIT.scala
│ ├── CommonTestSuiteWithApocIT.scala
│ ├── service/
│ │ ├── AuthenticationTest.scala
│ │ ├── Neo4jQueryServiceIT.scala
│ │ ├── Neo4jQueryServiceTest.scala
│ │ ├── SchemaServiceTSE.scala
│ │ ├── SchemaServiceTest.scala
│ │ └── SchemaServiceWithApocTSE.scala
│ └── util/
│ ├── DummyNamedReference.scala
│ ├── Neo4jImplicitsTest.scala
│ ├── Neo4jOptionsIT.scala
│ ├── Neo4jOptionsTest.scala
│ ├── Neo4jUtilTest.scala
│ ├── ValidationsIT.scala
│ └── ValidationsTest.scala
├── dangerfile.mjs
├── examples/
│ ├── neo4j_data_engineering.ipynb
│ └── neo4j_data_science.ipynb
├── jreleaser.yml
├── maven-release.sh
├── mvnw
├── mvnw.cmd
├── package.json
├── pom.xml
├── scripts/
│ ├── python/
│ │ ├── requirements.txt
│ │ └── test_spark.py
│ └── release/
│ └── upload_to_spark_packages.sh
├── spark-3/
│ ├── LICENSES.txt
│ ├── NOTICE.txt
│ ├── pom.xml
│ └── src/
│ ├── jreleaser/
│ │ └── assemblers/
│ │ └── zip/
│ │ └── README.txt.tpl
│ ├── main/
│ │ ├── assemblies/
│ │ │ └── spark-packages-assembly.xml
│ │ ├── distributions/
│ │ │ └── spark-packages.pom
│ │ ├── resources/
│ │ │ ├── META-INF/
│ │ │ │ └── services/
│ │ │ │ └── org.apache.spark.sql.sources.DataSourceRegister
│ │ │ └── neo4j-spark-connector.properties
│ │ └── scala/
│ │ └── org/
│ │ └── neo4j/
│ │ └── spark/
│ │ ├── DataSource.scala
│ │ ├── Neo4jTable.scala
│ │ ├── reader/
│ │ │ ├── Neo4jPartitionReader.scala
│ │ │ ├── Neo4jPartitionReaderFactory.scala
│ │ │ ├── Neo4jScan.scala
│ │ │ └── Neo4jScanBuilder.scala
│ │ ├── streaming/
│ │ │ ├── Neo4jMicroBatchReader.scala
│ │ │ ├── Neo4jOffset.scala
│ │ │ ├── Neo4jStreamingDataWriterFactory.scala
│ │ │ ├── Neo4jStreamingPartitionReader.scala
│ │ │ ├── Neo4jStreamingPartitionReaderFactory.scala
│ │ │ └── Neo4jStreamingWriter.scala
│ │ └── writer/
│ │ ├── Neo4jBatchWriter.scala
│ │ ├── Neo4jDataWriter.scala
│ │ ├── Neo4jDataWriterFactory.scala
│ │ └── Neo4jWriterBuilder.scala
│ └── test/
│ ├── java/
│ │ └── org/
│ │ └── neo4j/
│ │ └── spark/
│ │ ├── DataSourceReaderTypesTSE.java
│ │ └── SparkConnectorSuiteIT.java
│ ├── resources/
│ │ ├── log4j2.properties
│ │ ├── neo4j-keycloak.jks
│ │ └── neo4j-sso-test-realm.json
│ └── scala/
│ └── org/
│ └── neo4j/
│ └── spark/
│ ├── DataSourceAggregationTSE.scala
│ ├── DataSourceReaderNeo4jTSE.scala
│ ├── DataSourceReaderNeo4jWithApocTSE.scala
│ ├── DataSourceReaderTSE.scala
│ ├── DataSourceReaderWithApocTSE.scala
│ ├── DataSourceSchemaWriterTSE.scala
│ ├── DataSourceStreamingReaderTSE.scala
│ ├── DataSourceStreamingWriterTSE.scala
│ ├── DataSourceWriterNeo4jSkipNullKeysTSE.scala
│ ├── DataSourceWriterNeo4jTSE.scala
│ ├── DataSourceWriterTSE.scala
│ ├── DefaultConfigTSE.scala
│ ├── GraphDataScienceIT.scala
│ ├── ReauthenticationIT.scala
│ ├── SparkConnector30ScalaSuiteIT.scala
│ ├── SparkConnector30ScalaSuiteWithApocIT.scala
│ ├── SparkConnectorAuraTest.scala
│ └── TransactionTimeoutIT.scala
└── test-support/
├── pom.xml
└── src/
├── main/
│ ├── java/
│ │ └── org/
│ │ └── neo4j/
│ │ └── spark/
│ │ └── Assert.java
│ ├── resources/
│ │ └── simplelogger.properties
│ └── scala/
│ └── org/
│ └── neo4j/
│ ├── Closeables.scala
│ ├── Neo4jContainerExtension.scala
│ └── spark/
│ ├── RowUtil.scala
│ ├── SparkConnectorScalaBaseTSE.scala
│ ├── SparkConnectorScalaBaseWithApocTSE.scala
│ ├── SparkConnectorScalaSuiteIT.scala
│ ├── SparkConnectorScalaSuiteWithApocIT.scala
│ ├── SparkConnectorScalaSuiteWithGdsBase.scala
│ └── TestUtil.scala
└── test/
└── scala/
└── org/
└── neo4j/
└── spark/
└── VersionTest.scala
SYMBOL INDEX (74 symbols across 5 files)
FILE: common/src/main/java/org/neo4j/spark/util/ReflectionUtils.java
class ReflectionUtils (line 29) | public class ReflectionUtils {
method getGroupByColumns (line 33) | private static Optional<MethodHandle> getGroupByColumns() {
method getGroupByExpressions (line 43) | private static Optional<MethodHandle> getGroupByExpressions() {
method groupByCols (line 57) | public static Expression[] groupByCols(Aggregation agg) {
FILE: scripts/python/test_spark.py
class SparkTest (line 13) | class SparkTest(unittest.TestCase):
method tearDown (line 18) | def tearDown(self):
method init_test (line 22) | def init_test(self, query, parameters=None):
method test_string (line 36) | def test_string(self):
method test_int (line 42) | def test_int(self):
method test_double (line 48) | def test_double(self):
method test_boolean (line 54) | def test_boolean(self):
method test_time (line 59) | def test_time(self):
method test_datetime (line 71) | def test_datetime(self):
method test_date (line 82) | def test_date(self):
method test_point (line 90) | def test_point(self):
method test_point3d (line 99) | def test_point3d(self):
method test_geopoint (line 111) | def test_geopoint(self):
method test_duration (line 122) | def test_duration(self):
method test_binary (line 134) | def test_binary(self):
method test_string_array (line 140) | def test_string_array(self):
method test_int_array (line 147) | def test_int_array(self):
method test_double_array (line 154) | def test_double_array(self):
method test_boolean_array (line 161) | def test_boolean_array(self):
method test_time_array (line 168) | def test_time_array(self):
method test_datetime_array (line 189) | def test_datetime_array(self):
method test_date_array (line 201) | def test_date_array(self):
method test_point_array (line 213) | def test_point_array(self):
method test_point3d_array (line 229) | def test_point3d_array(self):
method test_geopoint_array (line 247) | def test_geopoint_array(self):
method test_duration_array (line 263) | def test_duration_array(self):
method test_unexisting_property (line 281) | def test_unexisting_property(self):
method test_gds (line 296) | def test_gds(self):
FILE: spark-3/src/test/java/org/neo4j/spark/DataSourceReaderTypesTSE.java
class DataSourceReaderTypesTSE (line 35) | public class DataSourceReaderTypesTSE extends SparkConnectorScalaBaseTSE {
method testReadNodeWithString (line 37) | @Test
method testReadNodeWithLong (line 45) | @Test
method testReadNodeWithDouble (line 53) | @Test
method testReadNodeWithLocalTime (line 61) | @Test
method testReadNodeWithTime (line 71) | @Test
method testReadNodeWithLocalDateTime (line 85) | @Test
method testReadNodeWithZonedDateTime (line 95) | @Test
method testReadNodeWithPoint (line 104) | @Test
method testReadNodeWithGeoPoint (line 115) | @Test
method testReadNodeWithPoint3D (line 126) | @Test
method testReadNodeWithDate (line 138) | @Test
method testReadNodeWithDuration (line 146) | @Test
method testReadNodeWithStringArray (line 159) | @Test
method testReadNodeWithLongArray (line 168) | @Test
method testReadNodeWithDoubleArray (line 177) | @Test
method testReadNodeWithLocalTimeArray (line 186) | @Test
method testReadNodeWithBooleanArray (line 197) | @Test
method testReadNodeWithArrayDate (line 206) | @Test
method testReadNodeWithArrayZonedDateTime (line 215) | @Test
method testReadNodeWithArrayDurations (line 226) | @Test
method testReadNodeWithPointArray (line 246) | @Test
method testReadNodeWithGeoPointArray (line 262) | @Test
method testReadNodeWithPoint3DArray (line 278) | @Test
method testReadNodeWithMap (line 294) | @Test
method initTest (line 309) | Dataset<Row> initTest(String query) {
FILE: spark-3/src/test/java/org/neo4j/spark/SparkConnectorSuiteIT.java
class SparkConnectorSuiteIT (line 22) | @RunWith(Suite.class)
FILE: test-support/src/main/java/org/neo4j/spark/Assert.java
class Assert (line 33) | public final class Assert {
method Assert (line 34) | private Assert() {
type ThrowingSupplier (line 37) | public interface ThrowingSupplier<T, E extends Exception> {
method get (line 38) | T get() throws E;
method throwingSupplier (line 40) | static <TYPE> ThrowingSupplier<TYPE, RuntimeException> throwingSuppl...
type ThrowingAction (line 53) | public interface ThrowingAction<E extends Exception> {
method apply (line 54) | void apply() throws E;
method noop (line 56) | static <E extends Exception> ThrowingAction<E> noop() {
method assertException (line 62) | public static <E extends Exception> void assertException(ThrowingActio...
method assertException (line 66) | public static <E extends Exception> void assertException(ThrowingActio...
method assertEventually (line 82) | public static <T, E extends Exception> void assertEventually(ThrowingS...
method assertEventually (line 88) | public static <T, E extends Exception> void assertEventually(String re...
method assertEventually (line 94) | public static <T, E extends Exception> void assertEventually(Function<...
method newAssertionError (line 116) | private static AssertionError newAssertionError(String message, Object...
method prettyPrint (line 120) | private static String prettyPrint(Object o) {
Condensed preview — 135 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (2,140K chars).
[
{
"path": ".commitlintrc.json",
"chars": 61,
"preview": "{\n \"extends\": [\n \"@commitlint/config-conventional\"\n ]\n}\n"
},
{
"path": ".github/CODEOWNERS",
"chars": 74,
"preview": "* @neo4j/team-connectors\n\n/.github/ @ali-ince @fbiville @venikkin\n"
},
{
"path": ".github/ISSUE_TEMPLATE/bug_report.md",
"chars": 974,
"preview": "---\nname: Bug report\nabout: Create a report to help us improve\nlabels: bug\n---\n\n## Guidelines\n\nPlease note that GitHub i"
},
{
"path": ".github/ISSUE_TEMPLATE/feature_request.md",
"chars": 847,
"preview": "---\nname: Feature request\nabout: Suggest an idea for this project\n\n---\n\n## Guidelines\n\nPlease note that GitHub issues ar"
},
{
"path": ".github/dependabot.yml",
"chars": 453,
"preview": "version: 2\nupdates:\n - package-ecosystem: \"github-actions\"\n directory: \"/\"\n schedule:\n interval: \"daily\"\n "
},
{
"path": ".gitignore",
"chars": 396,
"preview": "neo4j-home\n.gradle\ngradle/\nbuild/\n*~\n\\#*\ntarget\nout\n.project\n.classpath\n.settings\n.externalToolBuilders/\n.scala_dependen"
},
{
"path": ".husky/commit-msg",
"chars": 54,
"preview": "#!/usr/bin/env sh\n\nnpx --no -- commitlint --edit \"$1\"\n"
},
{
"path": ".husky/pre-commit",
"chars": 128,
"preview": "#!/usr/bin/env sh\n\n./mvnw sortpom:sort spotless:apply -f .teamcity\n./mvnw sortpom:sort spotless:apply\ngit update-index -"
},
{
"path": ".mvn/wrapper/maven-wrapper.properties",
"chars": 951,
"preview": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements. See the NOTICE f"
},
{
"path": ".teamcity/.editorconfig",
"chars": 4310,
"preview": "# This .editorconfig section approximates ktfmt's formatting rules. You can include it in an\n# existing .editorconfig fi"
},
{
"path": ".teamcity/builds/Build.kt",
"chars": 6409,
"preview": "package builds\n\nimport jetbrains.buildServer.configs.kotlin.BuildType\nimport jetbrains.buildServer.configs.kotlin.Projec"
},
{
"path": ".teamcity/builds/Common.kt",
"chars": 9277,
"preview": "package builds\n\nimport builds.Neo4jSparkConnectorVcs.branchSpec\nimport jetbrains.buildServer.configs.kotlin.BuildFeature"
},
{
"path": ".teamcity/builds/Empty.kt",
"chars": 283,
"preview": "package builds\n\nimport jetbrains.buildServer.configs.kotlin.BuildType\nimport jetbrains.buildServer.configs.kotlin.toId\n\n"
},
{
"path": ".teamcity/builds/JavaIntegrationTests.kt",
"chars": 1147,
"preview": "package builds\n\nimport jetbrains.buildServer.configs.kotlin.BuildType\nimport jetbrains.buildServer.configs.kotlin.toId\n\n"
},
{
"path": ".teamcity/builds/Maven.kt",
"chars": 1087,
"preview": "package builds\n\nimport jetbrains.buildServer.configs.kotlin.BuildType\nimport jetbrains.buildServer.configs.kotlin.toId\n\n"
},
{
"path": ".teamcity/builds/PRCheck.kt",
"chars": 1185,
"preview": "package builds\n\nimport jetbrains.buildServer.configs.kotlin.BuildType\nimport jetbrains.buildServer.configs.kotlin.buildF"
},
{
"path": ".teamcity/builds/Package.kt",
"chars": 1028,
"preview": "package builds\n\nimport jetbrains.buildServer.configs.kotlin.BuildType\nimport jetbrains.buildServer.configs.kotlin.buildS"
},
{
"path": ".teamcity/builds/PythonIntegrationTests.kt",
"chars": 2575,
"preview": "package builds\n\nimport jetbrains.buildServer.configs.kotlin.BuildType\nimport jetbrains.buildServer.configs.kotlin.buildS"
},
{
"path": ".teamcity/builds/Release.kt",
"chars": 4817,
"preview": "package builds\n\nimport jetbrains.buildServer.configs.kotlin.AbsoluteId\nimport jetbrains.buildServer.configs.kotlin.Build"
},
{
"path": ".teamcity/builds/SemgrepCheck.kt",
"chars": 1128,
"preview": "package builds\n\nimport jetbrains.buildServer.configs.kotlin.buildSteps.ScriptBuildStep\n\nclass SemgrepCheck(id: String, n"
},
{
"path": ".teamcity/builds/WhiteListCheck.kt",
"chars": 1487,
"preview": "package builds\n\nimport jetbrains.buildServer.configs.kotlin.AbsoluteId\nimport jetbrains.buildServer.configs.kotlin.Build"
},
{
"path": ".teamcity/pom.xml",
"chars": 6510,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project>\n <modelVersion>4.0.0</modelVersion>\n <parent>\n <groupId>or"
},
{
"path": ".teamcity/settings.kts",
"chars": 4587,
"preview": "import builds.Build\nimport builds.DEFAULT_BRANCH\nimport builds.JavaVersion\nimport builds.Neo4jSparkConnectorVcs\nimport b"
},
{
"path": "LICENSE.txt",
"chars": 11358,
"preview": "\n Apache License\n Version 2.0, January 2004\n "
},
{
"path": "README.md",
"chars": 1695,
"preview": "# Neo4j Connector for Apache Spark\n\nThis repository contains the Neo4j Connector for Apache Spark.\n\n## License\n\nThis neo"
},
{
"path": "common/LICENSES.txt",
"chars": 14393,
"preview": "This file contains the full license text of the included third party\nlibraries. For an overview of the licenses see the "
},
{
"path": "common/NOTICE.txt",
"chars": 1031,
"preview": "Copyright (c) \"Neo4j\"\nNeo4j Sweden AB [https://neo4j.com]\n\nThis file is part of Neo4j.\n\nLicensed under the Apache Licens"
},
{
"path": "common/pom.xml",
"chars": 4169,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project xmlns=\"http://maven.apache.org/POM/4.0.0\" xmlns:xsi=\"http://www.w3.org/2"
},
{
"path": "common/src/main/java/org/neo4j/spark/util/ReflectionUtils.java",
"chars": 2648,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/main/resources/neo4j-spark-connector.properties",
"chars": 26,
"preview": "version=${project.version}"
},
{
"path": "common/src/main/scala/org/neo4j/spark/config/TopN.scala",
"chars": 791,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/main/scala/org/neo4j/spark/converter/DataConverter.scala",
"chars": 11682,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/main/scala/org/neo4j/spark/converter/TypeConverter.scala",
"chars": 9727,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/main/scala/org/neo4j/spark/cypher/Cypher5Renderer.scala",
"chars": 1645,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/main/scala/org/neo4j/spark/cypher/CypherVersionSelector.scala",
"chars": 1009,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/main/scala/org/neo4j/spark/reader/BasePartitionReader.scala",
"chars": 5421,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/main/scala/org/neo4j/spark/service/MappingService.scala",
"chars": 13227,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/main/scala/org/neo4j/spark/service/Neo4jQueryService.scala",
"chars": 21768,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/main/scala/org/neo4j/spark/service/SchemaService.scala",
"chars": 41945,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/main/scala/org/neo4j/spark/streaming/BaseStreamingPartitionReader.scala",
"chars": 4997,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/main/scala/org/neo4j/spark/util/DriverCache.scala",
"chars": 1522,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/main/scala/org/neo4j/spark/util/Neo4jImplicits.scala",
"chars": 16806,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/main/scala/org/neo4j/spark/util/Neo4jOptions.scala",
"chars": 27586,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/main/scala/org/neo4j/spark/util/Neo4jUtil.scala",
"chars": 9321,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/main/scala/org/neo4j/spark/util/ValidationUtil.scala",
"chars": 1620,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/main/scala/org/neo4j/spark/util/Validations.scala",
"chars": 21432,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/main/scala/org/neo4j/spark/writer/BaseDataWriter.scala",
"chars": 6549,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/main/scala/org/neo4j/spark/writer/DataWriterMetrics.scala",
"chars": 6430,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/test/scala/org/neo4j/spark/CommonTestSuiteIT.scala",
"chars": 911,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/test/scala/org/neo4j/spark/CommonTestSuiteWithApocIT.scala",
"chars": 943,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/test/scala/org/neo4j/spark/service/AuthenticationTest.scala",
"chars": 3013,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/test/scala/org/neo4j/spark/service/Neo4jQueryServiceIT.scala",
"chars": 3344,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/test/scala/org/neo4j/spark/service/Neo4jQueryServiceTest.scala",
"chars": 39332,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/test/scala/org/neo4j/spark/service/SchemaServiceTSE.scala",
"chars": 9461,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/test/scala/org/neo4j/spark/service/SchemaServiceTest.scala",
"chars": 2311,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/test/scala/org/neo4j/spark/service/SchemaServiceWithApocTSE.scala",
"chars": 9573,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/test/scala/org/neo4j/spark/util/DummyNamedReference.scala",
"chars": 971,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/test/scala/org/neo4j/spark/util/Neo4jImplicitsTest.scala",
"chars": 8011,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/test/scala/org/neo4j/spark/util/Neo4jOptionsIT.scala",
"chars": 2110,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/test/scala/org/neo4j/spark/util/Neo4jOptionsTest.scala",
"chars": 10231,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/test/scala/org/neo4j/spark/util/Neo4jUtilTest.scala",
"chars": 1341,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/test/scala/org/neo4j/spark/util/ValidationsIT.scala",
"chars": 5880,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "common/src/test/scala/org/neo4j/spark/util/ValidationsTest.scala",
"chars": 2751,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "dangerfile.mjs",
"chars": 2745,
"preview": "import load from '@commitlint/load';\nimport lint from '@commitlint/lint';\n\nconst minPRDescriptionLength = 10;\n\n// Utilit"
},
{
"path": "examples/neo4j_data_engineering.ipynb",
"chars": 147872,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"source\": [\n \"Open this notebook in Google Colab <a tar"
},
{
"path": "examples/neo4j_data_science.ipynb",
"chars": 909999,
"preview": "{\n \"nbformat\": 4,\n \"nbformat_minor\": 0,\n \"metadata\": {\n \"colab\": {\n \"provenance\": [],\n \"toc_visible\": tr"
},
{
"path": "jreleaser.yml",
"chars": 3041,
"preview": "# Generated with JReleaser 1.17.0 at 2025-04-28T13:27:24.943485+01:00\nproject:\n name: neo4j-spark-connector\n descripti"
},
{
"path": "maven-release.sh",
"chars": 3385,
"preview": "#!/bin/bash\n\nset -eEuxo pipefail\n\nif [[ $# -lt 2 ]] ; then\n echo \"Usage ./maven-release.sh <GOAL> <SCALA-VERSION> [<A"
},
{
"path": "mvnw",
"chars": 10665,
"preview": "#!/bin/sh\n# ----------------------------------------------------------------------------\n# Licensed to the Apache Softwa"
},
{
"path": "mvnw.cmd",
"chars": 7061,
"preview": "<# : batch portion\r\n@REM ----------------------------------------------------------------------------\r\n@REM Licensed to "
},
{
"path": "package.json",
"chars": 275,
"preview": "{\n \"devDependencies\": {\n \"@commitlint/cli\": \"^20.4.2\",\n \"@commitlint/config-conventional\": \"^20.4.2\",\n \"@commi"
},
{
"path": "pom.xml",
"chars": 29395,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project xmlns=\"http://maven.apache.org/POM/4.0.0\" xmlns:xsi=\"http://www.w3.org/2"
},
{
"path": "scripts/python/requirements.txt",
"chars": 53,
"preview": "pyspark==3.5.5\ntestcontainers[neo4j]\nsix\ntzlocal==2.1"
},
{
"path": "scripts/python/test_spark.py",
"chars": 14121,
"preview": "#!/usr/bin/env python3\n\nimport unittest\nimport sys\nimport datetime\n\nfrom tzlocal import get_localzone\nfrom testcontainer"
},
{
"path": "scripts/release/upload_to_spark_packages.sh",
"chars": 758,
"preview": "#!/bin/bash\n\nset -eEuo pipefail\n\nif [[ $# -lt 5 ]] ; then\n echo \"Usage ./upload_to_spark_packages.sh <USER> <TOKEN> <"
},
{
"path": "spark-3/LICENSES.txt",
"chars": 14394,
"preview": "This file contains the full license text of the included third party\nlibraries. For an overview of the licenses see the "
},
{
"path": "spark-3/NOTICE.txt",
"chars": 1032,
"preview": "Copyright (c) \"Neo4j\"\nNeo4j Sweden AB [https://neo4j.com]\n\nThis file is part of Neo4j.\n\nLicensed under the Apache Licens"
},
{
"path": "spark-3/pom.xml",
"chars": 5140,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project xmlns=\"http://maven.apache.org/POM/4.0.0\" xmlns:xsi=\"http://www.w3.org/2"
},
{
"path": "spark-3/src/jreleaser/assemblers/zip/README.txt.tpl",
"chars": 692,
"preview": "Neo4j Connector for Apache Spark {{projectVersion}}\n\nThis archive contains release materials for the Neo4j Connector for"
},
{
"path": "spark-3/src/main/assemblies/spark-packages-assembly.xml",
"chars": 915,
"preview": "<assembly xmlns=\"http://maven.apache.org/ASSEMBLY/2.2.0\"\n xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\""
},
{
"path": "spark-3/src/main/distributions/spark-packages.pom",
"chars": 216,
"preview": "<project>\n <modelVersion>4.0.0</modelVersion>\n <groupId>${spark-packages.groupId}</groupId>\n <artifactId>${spar"
},
{
"path": "spark-3/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister",
"chars": 26,
"preview": "org.neo4j.spark.DataSource"
},
{
"path": "spark-3/src/main/resources/neo4j-spark-connector.properties",
"chars": 26,
"preview": "version=${project.version}"
},
{
"path": "spark-3/src/main/scala/org/neo4j/spark/DataSource.scala",
"chars": 3474,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/main/scala/org/neo4j/spark/Neo4jTable.scala",
"chars": 2707,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/main/scala/org/neo4j/spark/reader/Neo4jPartitionReader.scala",
"chars": 1712,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/main/scala/org/neo4j/spark/reader/Neo4jPartitionReaderFactory.scala",
"chars": 1899,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/main/scala/org/neo4j/spark/reader/Neo4jScan.scala",
"chars": 3320,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/main/scala/org/neo4j/spark/reader/Neo4jScanBuilder.scala",
"chars": 5868,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/main/scala/org/neo4j/spark/streaming/Neo4jMicroBatchReader.scala",
"chars": 3568,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/main/scala/org/neo4j/spark/streaming/Neo4jOffset.scala",
"chars": 829,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/main/scala/org/neo4j/spark/streaming/Neo4jStreamingDataWriterFactory.scala",
"chars": 1537,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/main/scala/org/neo4j/spark/streaming/Neo4jStreamingPartitionReader.scala",
"chars": 1824,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/main/scala/org/neo4j/spark/streaming/Neo4jStreamingPartitionReaderFactory.scala",
"chars": 2137,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/main/scala/org/neo4j/spark/streaming/Neo4jStreamingWriter.scala",
"chars": 2938,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/main/scala/org/neo4j/spark/writer/Neo4jBatchWriter.scala",
"chars": 2279,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/main/scala/org/neo4j/spark/writer/Neo4jDataWriter.scala",
"chars": 1256,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/main/scala/org/neo4j/spark/writer/Neo4jDataWriterFactory.scala",
"chars": 1486,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/main/scala/org/neo4j/spark/writer/Neo4jWriterBuilder.scala",
"chars": 3689,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/test/java/org/neo4j/spark/DataSourceReaderTypesTSE.java",
"chars": 12557,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/test/java/org/neo4j/spark/SparkConnectorSuiteIT.java",
"chars": 877,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/test/resources/log4j2.properties",
"chars": 389,
"preview": "appender.console.type=Console\nappender.console.name=STDOUT_TXT\nappender.console.target=SYSTEM_OUT\nappender.console.layou"
},
{
"path": "spark-3/src/test/resources/neo4j-sso-test-realm.json",
"chars": 81292,
"preview": "{\n \"id\": \"7d10aebd-a60d-45a9-bce3-bb3a0a372ae9\",\n \"realm\": \"neo4j-sso-test\",\n \"notBefore\": 0,\n \"defaultSignatureAlgo"
},
{
"path": "spark-3/src/test/scala/org/neo4j/spark/DataSourceAggregationTSE.scala",
"chars": 5310,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/test/scala/org/neo4j/spark/DataSourceReaderNeo4jTSE.scala",
"chars": 15339,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/test/scala/org/neo4j/spark/DataSourceReaderNeo4jWithApocTSE.scala",
"chars": 2987,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/test/scala/org/neo4j/spark/DataSourceReaderTSE.scala",
"chars": 64616,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/test/scala/org/neo4j/spark/DataSourceReaderWithApocTSE.scala",
"chars": 32851,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/test/scala/org/neo4j/spark/DataSourceSchemaWriterTSE.scala",
"chars": 39618,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/test/scala/org/neo4j/spark/DataSourceStreamingReaderTSE.scala",
"chars": 21575,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/test/scala/org/neo4j/spark/DataSourceStreamingWriterTSE.scala",
"chars": 11320,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/test/scala/org/neo4j/spark/DataSourceWriterNeo4jSkipNullKeysTSE.scala",
"chars": 19722,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/test/scala/org/neo4j/spark/DataSourceWriterNeo4jTSE.scala",
"chars": 26022,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/test/scala/org/neo4j/spark/DataSourceWriterTSE.scala",
"chars": 78595,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/test/scala/org/neo4j/spark/DefaultConfigTSE.scala",
"chars": 1482,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/test/scala/org/neo4j/spark/GraphDataScienceIT.scala",
"chars": 16307,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/test/scala/org/neo4j/spark/ReauthenticationIT.scala",
"chars": 6535,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/test/scala/org/neo4j/spark/SparkConnector30ScalaSuiteIT.scala",
"chars": 1175,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/test/scala/org/neo4j/spark/SparkConnector30ScalaSuiteWithApocIT.scala",
"chars": 989,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/test/scala/org/neo4j/spark/SparkConnectorAuraTest.scala",
"chars": 3547,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "spark-3/src/test/scala/org/neo4j/spark/TransactionTimeoutIT.scala",
"chars": 5156,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "test-support/pom.xml",
"chars": 4354,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project xmlns=\"http://maven.apache.org/POM/4.0.0\" xmlns:xsi=\"http://www.w3.org/2"
},
{
"path": "test-support/src/main/java/org/neo4j/spark/Assert.java",
"chars": 6018,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "test-support/src/main/resources/simplelogger.properties",
"chars": 150,
"preview": "org.slf4j.simpleLogger.defaultLogLevel=error\norg.slf4j.simpleLogger.showDateTime=true\norg.slf4j.simpleLogger.dateTimeFor"
},
{
"path": "test-support/src/main/scala/org/neo4j/Closeables.scala",
"chars": 800,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "test-support/src/main/scala/org/neo4j/Neo4jContainerExtension.scala",
"chars": 4174,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "test-support/src/main/scala/org/neo4j/spark/RowUtil.scala",
"chars": 789,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "test-support/src/main/scala/org/neo4j/spark/SparkConnectorScalaBaseTSE.scala",
"chars": 1982,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "test-support/src/main/scala/org/neo4j/spark/SparkConnectorScalaBaseWithApocTSE.scala",
"chars": 1945,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "test-support/src/main/scala/org/neo4j/spark/SparkConnectorScalaSuiteIT.scala",
"chars": 2579,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "test-support/src/main/scala/org/neo4j/spark/SparkConnectorScalaSuiteWithApocIT.scala",
"chars": 2508,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "test-support/src/main/scala/org/neo4j/spark/SparkConnectorScalaSuiteWithGdsBase.scala",
"chars": 2993,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "test-support/src/main/scala/org/neo4j/spark/TestUtil.scala",
"chars": 3253,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
},
{
"path": "test-support/src/test/scala/org/neo4j/spark/VersionTest.scala",
"chars": 921,
"preview": "/*\n * Copyright (c) \"Neo4j\"\n * Neo4j Sweden AB [https://neo4j.com]\n *\n * Licensed under the Apache License, Version 2.0 "
}
]
// ... and 1 more files (download for full content)
About this extraction
This page contains the full source code of the neo4j-contrib/neo4j-spark-connector GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 135 files (2.0 MB), approximately 911.8k tokens, and a symbol index with 74 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.