Repository: jaegertracing/spark-dependencies Branch: main Commit: 225c99c2f602 Files: 85 Total size: 246.4 KB Directory structure: gitextract_0_ynlnqz/ ├── .github/ │ └── workflows/ │ ├── ci-cd.yml │ └── stale.yml ├── .gitignore ├── .mvn/ │ └── wrapper/ │ ├── maven-wrapper.jar │ └── maven-wrapper.properties ├── AGENTS.md ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── RELEASES.md ├── entrypoint.sh ├── header.txt ├── jaeger-spark-dependencies/ │ ├── pom.xml │ └── src/ │ └── main/ │ ├── java/ │ │ └── io/ │ │ └── jaegertracing/ │ │ └── spark/ │ │ └── dependencies/ │ │ └── DependenciesSparkJob.java │ └── resources/ │ └── log4j.properties ├── jaeger-spark-dependencies-cassandra/ │ ├── pom.xml │ └── src/ │ ├── main/ │ │ ├── java/ │ │ │ └── io/ │ │ │ └── jaegertracing/ │ │ │ └── spark/ │ │ │ └── dependencies/ │ │ │ └── cassandra/ │ │ │ ├── CassandraDependenciesJob.java │ │ │ └── CassandraSpan.java │ │ └── resources/ │ │ ├── log4j2.component.properties │ │ └── log4j2.properties │ └── test/ │ ├── java/ │ │ └── io/ │ │ └── jaegertracing/ │ │ └── spark/ │ │ └── dependencies/ │ │ └── cassandra/ │ │ ├── CassandraDependenciesDockerJobTest.java │ │ ├── CassandraDependenciesJobTest.java │ │ └── JaegerTestDriverContainer.java │ └── resources/ │ ├── jaeger-v2-config-cassandra.yaml │ └── log4j.properties ├── jaeger-spark-dependencies-common/ │ ├── pom.xml │ └── src/ │ ├── main/ │ │ └── java/ │ │ └── io/ │ │ └── jaegertracing/ │ │ └── spark/ │ │ └── dependencies/ │ │ ├── DependenciesSparkHelper.java │ │ ├── SpansToDependencyLinks.java │ │ ├── Utils.java │ │ ├── json/ │ │ │ ├── JsonHelper.java │ │ │ ├── KeyValueDeserializer.java │ │ │ ├── KeyValueMixin.java │ │ │ ├── ReferenceDeserializer.java │ │ │ ├── ReferenceMixin.java │ │ │ ├── SpanDeserializer.java │ │ │ └── SpanMixin.java │ │ └── model/ │ │ ├── Dependency.java │ │ ├── KeyValue.java │ │ ├── Process.java │ │ ├── Reference.java │ │ └── Span.java │ └── test/ │ └── java/ │ └── io/ │ └── jaegertracing/ │ └── spark/ │ └── dependencies/ │ ├── SpansToDependencyLinksTest.java │ └── model/ │ └── SpanTest.java ├── jaeger-spark-dependencies-elasticsearch/ │ ├── pom.xml │ └── src/ │ ├── main/ │ │ ├── java/ │ │ │ └── io/ │ │ │ └── jaegertracing/ │ │ │ └── spark/ │ │ │ └── dependencies/ │ │ │ └── elastic/ │ │ │ ├── ElasticTupleToSpan.java │ │ │ └── ElasticsearchDependenciesJob.java │ │ └── resources/ │ │ ├── log4j2.component.properties │ │ └── log4j2.properties │ └── test/ │ ├── java/ │ │ └── io/ │ │ └── jaegertracing/ │ │ └── spark/ │ │ └── dependencies/ │ │ └── elastic/ │ │ ├── ElasticsearchDependenciesDockerJobTest.java │ │ ├── ElasticsearchDependenciesJobTest.java │ │ ├── ElasticsearchDependenciesTagFieldsJobTest.java │ │ └── JaegerElasticsearchEnvironment.java │ └── resources/ │ └── jaeger-v2-config-elasticsearch.yaml ├── jaeger-spark-dependencies-opensearch/ │ ├── pom.xml │ └── src/ │ ├── main/ │ │ ├── java/ │ │ │ └── io/ │ │ │ └── jaegertracing/ │ │ │ └── spark/ │ │ │ └── dependencies/ │ │ │ └── opensearch/ │ │ │ ├── OpenSearchDependenciesJob.java │ │ │ └── OpenSearchTupleToSpan.java │ │ └── resources/ │ │ ├── log4j.properties │ │ ├── log4j2.component.properties │ │ └── log4j2.properties │ └── test/ │ ├── java/ │ │ └── io/ │ │ └── jaegertracing/ │ │ └── spark/ │ │ └── dependencies/ │ │ └── opensearch/ │ │ ├── JaegerOpenSearchEnvironment.java │ │ ├── OpenSearchDependenciesDockerJobTest.java │ │ └── OpenSearchDependenciesJobTest.java │ └── resources/ │ └── jaeger-v2-config-opensearch.yaml ├── jaeger-spark-dependencies-test/ │ ├── pom.xml │ └── src/ │ ├── main/ │ │ └── java/ │ │ └── io/ │ │ └── jaegertracing/ │ │ └── spark/ │ │ └── dependencies/ │ │ ├── LogToConsolePrinter.java │ │ └── test/ │ │ ├── DependenciesTest.java │ │ ├── DependencyLinkDerivator.java │ │ ├── TracersGenerator.java │ │ ├── rest/ │ │ │ ├── DependencyLink.java │ │ │ ├── JsonHelper.java │ │ │ └── RestResult.java │ │ └── tree/ │ │ ├── Node.java │ │ ├── TracingWrapper.java │ │ ├── Traversals.java │ │ └── TreeGenerator.java │ └── test/ │ └── java/ │ └── io/ │ └── jaegertracing/ │ └── spark/ │ └── dependencies/ │ └── test/ │ ├── DependencyLinksDerivatorTest.java │ ├── MockTracingWrapper.java │ ├── rest/ │ │ └── DeserializationTest.java │ └── tree/ │ ├── TraversalsTest.java │ └── TreeGeneratorTest.java ├── mvnw ├── mvnw.cmd ├── pom.xml └── renovate.json ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/ci-cd.yml ================================================ # # Copyright (c) The Jaeger Authors # SPDX-License-Identifier: Apache-2.0 # name: "CI/CD Pipeline" on: push: branches: [ main ] pull_request: branches: [ main ] release: types: [ published ] workflow_dispatch: inputs: tag: description: "Tag to publish in addition to `latest`" required: true type: string jobs: # Define the matrix once for all jobs setup: runs-on: ubuntu-24.04 outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5 with: distribution: "temurin" java-version: "21" cache: "maven" - name: Resolve all dependencies # go-offline ensures the .m2 folder is full before the matrix starts run: | mvn dependency:go-offline -B --no-transfer-progress \ -Dmaven.wagon.http.retryHandler.count=3 \ -Dmaven.wagon.http.connectionTimeout=120000 \ -Dmaven.wagon.http.readTimeout=120000 \ -Dhttp.retryHandler.count=5 - id: set-matrix run: | # Define configuration for all storage variants STRATEGY='{ "include": [ { "variant": "cassandra", "storage": "cassandra", "es_version": "", "image_tag": "test-cassandra", "es_test_ver": "" }, { "variant": "elasticsearch7", "storage": "elasticsearch", "es_version": "7.17.29", "image_tag": "test-es7", "es_test_ver": "7.3.0" }, { "variant": "elasticsearch8", "storage": "elasticsearch", "es_version": "8.13.4", "image_tag": "test-es8", "es_test_ver": "8.13.4" }, { "variant": "elasticsearch9", "storage": "elasticsearch", "es_version": "9.1.3", "image_tag": "test-es9", "es_test_ver": "9.1.3" }, { "variant": "opensearch", "storage": "opensearch", "es_version": "", "image_tag": "test-opensearch", "es_test_ver": "2.14.0" } ] }' # Convert to a single line and output echo "matrix=$(echo $STRATEGY | jq -c .)" >> $GITHUB_OUTPUT build-jars: name: Build JAR - ${{ matrix.variant }} runs-on: ubuntu-24.04 needs: setup strategy: fail-fast: false matrix: ${{ fromJSON(needs.setup.outputs.matrix) }} steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Set up JDK 21 uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5 with: distribution: "temurin" java-version: "21" cache: "maven" - name: Build JAR env: MAVEN_ES_PROP: ${{ matrix.es_version != '' && format('-Dversion.elasticsearch.spark={0}', matrix.es_version) || '' }} run: | ./mvnw clean package --batch-mode --no-transfer-progress -Dlicense.skip=true -DskipTests \ -pl jaeger-spark-dependencies-${{ matrix.storage }} -am \ $MAVEN_ES_PROP - name: Prepare artifact run: | mkdir -p artifact-target cp jaeger-spark-dependencies-${{ matrix.storage }}/target/jaeger-spark-dependencies-${{ matrix.storage }}-0.0.1-SNAPSHOT.jar \ artifact-target/ - name: Upload JAR artifact uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: jar-${{ matrix.variant }} path: artifact-target/*.jar retention-days: 1 e2e-tests: name: E2E Tests - ${{ matrix.variant }} runs-on: ubuntu-24.04 needs: [setup, build-jars] strategy: fail-fast: false matrix: ${{ fromJSON(needs.setup.outputs.matrix) }} steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Set up JDK 21 uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5 with: distribution: "temurin" java-version: "21" cache: "maven" - name: Download JAR artifact uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 with: name: jar-${{ matrix.variant }} path: artifact-target/ - name: Build local Docker image uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6.19.2 with: context: . load: true push: false tags: ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:${{ matrix.image_tag }} build-args: | VARIANT=${{ matrix.variant }} - name: Run integration tests env: SPARK_DEPENDENCIES_JOB_IMAGE_TAG: ${{ matrix.image_tag }} ELASTICSEARCH_VERSION: ${{ matrix.es_test_ver }} OPENSEARCH_VERSION: ${{ matrix.es_test_ver }} # The es_spark version is only needed when testing Elasticsearch variants MAVEN_ES_PROP: ${{ matrix.es_version != '' && format('-Dversion.elasticsearch.spark={0}', matrix.es_version) || '' }} run: | for attempt in 1 2 3; do echo "Integration test attempt $attempt of 3" ./mvnw --batch-mode --no-transfer-progress test -am \ -pl jaeger-spark-dependencies-${{ matrix.storage }} \ $MAVEN_ES_PROP && break if [ $attempt -lt 3 ]; then echo "Attempt $attempt failed, retrying after 15 seconds..." sleep 15 else echo "All attempts failed" exit 1 fi done publish: name: Publish - ${{ matrix.variant }} runs-on: ubuntu-24.04 needs: [setup, e2e-tests] strategy: fail-fast: false matrix: ${{ fromJSON(needs.setup.outputs.matrix) }} steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Download JAR artifact uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 with: name: jar-${{ matrix.variant }} path: artifact-target/ - name: Set up QEMU uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3.7.0 - name: Set up Docker Buildx uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0 with: install: true platforms: | linux/amd64 linux/arm64 - name: Compute Tags id: compute-tags run: | prefix="ghcr.io/jaegertracing/spark-dependencies/spark-dependencies" variant="${{ matrix.variant }}" # For main releases, use variant suffix if [[ "${{ github.event_name }}" == "release" ]] && [[ "${{ github.ref }}" == refs/tags/* ]]; then release=$(echo ${{ github.ref }} | sed 's/refs\/tags\///g') tags="$prefix:$release-$variant" # elasticsearch9 gets the 'latest' tag as it supports the newest ES version if [[ "$variant" == "elasticsearch9" ]]; then tags="$tags,$prefix:latest" fi elif [[ -n "${{ inputs.tag }}" ]]; then tags="$prefix:${{ inputs.tag }}-$variant" else # For main branch builds, use main-variant tags="$prefix:main-$variant" fi echo "Computed tags for publication ($variant): $tags" echo "tags=$tags" >> $GITHUB_OUTPUT - name: Login to GitHub Package Registry uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3.7.0 if: github.event_name != 'pull_request' with: registry: ghcr.io username: ${{ github.repository_owner }} password: ${{ secrets.GITHUB_TOKEN }} - name: Build and push multi-arch images uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6.19.2 with: context: . push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.compute-tags.outputs.tags }} platforms: | linux/amd64 linux/arm64 build-args: | VARIANT=${{ matrix.variant }} ================================================ FILE: .github/workflows/stale.yml ================================================ # # Copyright (c) The Jaeger Authors # SPDX-License-Identifier: Apache-2.0 # name: 'Close stale issues and PRs' on: schedule: # Run every Monday at 1:30 AM UTC - cron: '30 1 * * 1' workflow_dispatch: permissions: issues: write pull-requests: write jobs: stale: runs-on: ubuntu-latest steps: - name: Harden Runner uses: step-security/harden-runner@e3f713f2d8f53843e71c69a996d56f51aa9adfb9 with: egress-policy: audit - uses: actions/stale@dcd2b9469d2220b7e8d08aedc00c105d277fd46b with: # Issues configuration days-before-issue-stale: 90 days-before-issue-close: 14 stale-issue-message: > This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. To keep it open either add a comment or the label `do-not-expire`. close-issue-message: > This issue has been automatically closed due to inactivity. stale-issue-label: 'stale' exempt-issue-labels: 'do-not-expire,help-wanted' only-issue-labels: 'question' # Pull requests configuration days-before-pr-stale: 60 days-before-pr-close: 14 stale-pr-message: > This pull request has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. You may re-open it if you need more time. close-pr-message: > This pull request has been automatically closed due to inactivity. You may re-open it if you need more time. We really appreciate your contribution and we are sorry that this has not been completed. stale-pr-label: 'stale' exempt-pr-labels: 'do-not-expire' # General configuration operations-per-run: 100 remove-stale-when-updated: true ================================================ FILE: .gitignore ================================================ target/ pom.xml.tag pom.xml.releaseBackup pom.xml.versionsBackup pom.xml.next release.properties dependency-reduced-pom.xml buildNumber.properties .mvn/timing.properties .*.swp *.iml .idea # Avoid ignoring Maven wrapper jar file (.jar files are usually ignored) !/.mvn/wrapper/maven-wrapper.jar .testcontainers-tmp-* .vscode/ artifact-target/ ================================================ FILE: .mvn/wrapper/maven-wrapper.properties ================================================ distributionUrl=https://repo1.maven.org/maven2/org/apache/maven/apache-maven/3.9.12/apache-maven-3.9.12-bin.zip ================================================ FILE: AGENTS.md ================================================ # Integration Tests Guide This guide provides instructions for running integration tests for the Jaeger Spark Dependencies project. For detailed information about integration tests, including prerequisites, troubleshooting, and environment variables, see the [Running Integration Tests](README.md#running-integration-tests) section in the README. ## Quick Start The project includes make targets for running integration tests against different storage backends: ```bash make e2e-cassandra # Run Cassandra 4.x integration tests make e2e-es7 # Run Elasticsearch 7 integration tests make e2e-es8 # Run Elasticsearch 8 integration tests make e2e-es9 # Run Elasticsearch 9 integration tests ``` Each target builds the appropriate Docker image and runs the corresponding integration test suite. For more details, see the [Running Integration Tests](README.md#running-integration-tests) section in the README. ================================================ FILE: Dockerfile ================================================ # # Copyright (c) The Jaeger Authors # SPDX-License-Identifier: Apache-2.0 # # Simple runtime image that receives a pre-built JAR from the host FROM eclipse-temurin:21.0.9_10-jre@sha256:b0f6befb3f2af49704998c4425cb6313c1da505648a8e78cee731531996f735d LABEL org.opencontainers.image.authors="The Jaeger Authors " # Build argument to specify the variant type # Supported values: cassandra, elasticsearch7, elasticsearch8, elasticsearch9 ARG VARIANT=elasticsearch9 ENV APP_HOME=/app/ ENV VARIANT_TYPE=${VARIANT} # The JAR is provided by the GHA runner into the artifact-target folder COPY artifact-target/jaeger-spark-dependencies*.jar $APP_HOME/app.jar WORKDIR $APP_HOME COPY entrypoint.sh / RUN chgrp root /etc/passwd && chmod g+rw /etc/passwd USER 185 ENTRYPOINT ["/entrypoint.sh"] ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "{}" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright {yyyy} {name of copyright owner} Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: Makefile ================================================ # # Copyright (c) The Jaeger Authors # SPDX-License-Identifier: Apache-2.0 # .PHONY: e2e-cassandra e2e-es7 e2e-es8 e2e-es9 help help: @echo "Available targets:" @echo " e2e-cassandra - Run Cassandra integration tests" @echo " e2e-es7 - Run Elasticsearch 7 integration tests" @echo " e2e-es8 - Run Elasticsearch 8 integration tests" @echo " e2e-es9 - Run Elasticsearch 9 integration tests" e2e-cassandra: @echo "Building Docker image for Cassandra variant..." docker build \ --build-arg VARIANT=cassandra \ -t ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:test-cassandra \ . @echo "Running Cassandra integration tests..." SPARK_DEPENDENCIES_JOB_TAG=test-cassandra \ ./mvnw --batch-mode clean test -am -pl jaeger-spark-dependencies-cassandra e2e-es7: @echo "Building Docker image for ES7 variant..." docker build \ --build-arg VARIANT=elasticsearch7 \ --build-arg ELASTICSEARCH_SPARK_VERSION=7.17.10 \ -t ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:test-es7 \ . @echo "Running ES7 integration tests..." SPARK_DEPENDENCIES_JOB_TAG=test-es7 \ ELASTICSEARCH_VERSION=7.3.0 \ ./mvnw --batch-mode clean test -am \ -pl jaeger-spark-dependencies-elasticsearch \ -Dversion.elasticsearch.spark=7.17.10 e2e-es8: @echo "Building Docker image for ES8 variant..." docker build \ --build-arg VARIANT=elasticsearch8 \ --build-arg ELASTICSEARCH_SPARK_VERSION=8.13.4 \ -t ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:test-es8 \ . @echo "Running ES8 integration tests..." SPARK_DEPENDENCIES_JOB_TAG=test-es8 \ ELASTICSEARCH_VERSION=8.3.1 \ ./mvnw --batch-mode clean test -am \ -pl jaeger-spark-dependencies-elasticsearch \ -Dversion.elasticsearch.spark=8.13.4 e2e-es9: @echo "Building Docker image for ES9 variant (unified/mega-jar)..." docker build \ --build-arg VARIANT=unified \ --build-arg ELASTICSEARCH_SPARK_VERSION=9.1.3 \ -t ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:test-es9 \ . @echo "Running ES9 integration tests..." SPARK_DEPENDENCIES_JOB_TAG=test-es9 \ ELASTICSEARCH_VERSION=9.1.3 \ ./mvnw --batch-mode clean test -am \ -pl jaeger-spark-dependencies-elasticsearch \ -Dversion.elasticsearch.spark=9.1.3 ================================================ FILE: README.md ================================================ [![Latest image](https://ghcr-badge.egpl.dev/jaegertracing/spark-dependencies/spark-dependencies/latest_tag?trim=major&label=latest)](https://github.com/jaegertracing/spark-dependencies/pkgs/container/spark-dependencies%2Fspark-dependencies) # Jaeger Spark dependencies This is a Spark job that collects spans from storage, analyze links between services, and stores them for later presentation in the UI. Note that it is needed for the production deployment. `all-in-one` distribution does not need this job. This job parses all traces on a given day, based on UTC. By default, it processes the current day, but other days can be explicitly specified. ## Quick-start Spark job can be run as docker container and also as java executable: ### Container Image Variants Starting with version 0.6.x, Docker images are published with variant-specific tags. **Each variant automatically uses the appropriate storage backend, so the `STORAGE` environment variable is no longer needed.** The images are named `ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:{VERSION}-{VARIANT}`: - **`VERSION-cassandra`**: For Cassandra storage (uses CassandraDependenciesJob directly) - **`VERSION-elasticsearch7`**: For Elasticsearch 7.12-7.16 (uses ElasticsearchDependenciesJob with ES connector 7.17.29) - **`VERSION-elasticsearch8`**: For Elasticsearch 7.17+ and 8.x (uses ElasticsearchDependenciesJob with ES connector 8.13.4) - **`VERSION-elasticsearch9`**: For Elasticsearch 9.x (uses ElasticsearchDependenciesJob with ES connector 9.1.3) - also tagged as `:latest` - **`VERSION-opensearch`**: For OpenSearch 2.x and 3.x (uses OpenSearchDependenciesJob with OpenSearch Java client) Example for Cassandra: ```bash $ docker run \ --env CASSANDRA_CONTACT_POINTS=host1,host2 \ ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:v0.5.3-cassandra ``` Example for Elasticsearch 8.x: ```bash $ docker run \ --env ES_NODES=http://elasticsearch:9200 \ ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:v0.5.3-elasticsearch8 ``` Example for OpenSearch: ```bash $ docker run \ --env OS_NODES=http://opensearch:9200 \ ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:v0.5.3-opensearch ``` #### Advanced Configuration Use `--env JAVA_OPTS` to pass additional Java options such as memory settings, SSL trust store, or other JVM properties: ```bash # Example: Configure SSL trust store $ docker run \ --env ES_NODES=https://elasticsearch:9200 \ --env JAVA_OPTS="-Djavax.net.ssl.trustStore=/path/to/truststore -Djavax.net.ssl.trustStorePassword=changeit" \ ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:v0.5.3-elasticsearch8 # Example: Increase JVM heap size $ docker run \ --env OS_NODES=http://opensearch:9200 \ --env JAVA_OPTS="-Xmx2g -Xms1g" \ ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:v0.5.3-opensearch ``` Use `--env LOG4J_STATUS_LOGGER_LEVEL` to control Log4j2 internal status messages (defaults to `OFF`): ```bash # Example: Enable Log4j2 debug logging for troubleshooting $ docker run \ --env OS_NODES=http://opensearch:9200 \ --env LOG4J_STATUS_LOGGER_LEVEL=DEBUG \ ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:v0.5.3-opensearch ``` Note: the latest versions are hosted on `ghcr.io`, not on Docker Hub. As jar file: ```bash STORAGE=cassandra java -jar jaeger-spark-dependencies.jar ``` ## Usage By default, this job parses all traces since midnight UTC. You can parse traces for a different day via an argument in YYYY-mm-dd format, like 2016-07-16 or specify the date via an env property. ```bash # ex to run the job to process yesterday's traces on OS/X $ STORAGE=cassandra java -jar jaeger-spark-dependencies.jar `date -uv-1d +%F` # or on Linux $ STORAGE=cassandra java -jar jaeger-spark-dependencies.jar `date -u -d '1 day ago' +%F` ``` ### Configuration `jaeger-spark-dependencies` applies configuration parameters through environment variables. The following variables are common to all storage layers: * `SPARK_MASTER`: Spark master to submit the job to; Defaults to `local[*]` * `DATE`: Date in YYYY-mm-dd format. Denotes a day for which dependency links will be created. * `PEER_SERVICE_TAG`: Tag name used to identify peer service in spans. Defaults to `peer.service` * `JAVA_OPTS`: Additional Java options to pass to the JVM. Use this to configure memory, SSL properties, or other JVM settings. Example: `JAVA_OPTS="-Xmx2g -Djavax.net.ssl.trustStore=/path/to/truststore"`. Note: The required `--add-opens` flags for Spark on Java 21+ are already included in the container image. * `LOG4J_STATUS_LOGGER_LEVEL`: Log4j2 StatusLogger level. Defaults to `OFF` to suppress internal Log4j2 status messages. Set to `TRACE`, `DEBUG`, `INFO`, `WARN`, `ERROR`, or `FATAL` if you need to debug logging configuration issues. ### Cassandra Cassandra is used when `STORAGE=cassandra`. * `CASSANDRA_KEYSPACE`: The keyspace to use. Defaults to "jaeger_v1_dc1". * `CASSANDRA_CONTACT_POINTS`: Comma separated list of hosts / ip addresses part of Cassandra cluster. Defaults to localhost * `CASSANDRA_LOCAL_DC`: The local DC to connect to (other nodes will be ignored) * `CASSANDRA_USERNAME` and `CASSANDRA_PASSWORD`: Cassandra authentication. Will throw an exception on startup if authentication fails * `CASSANDRA_USE_SSL`: Requires `javax.net.ssl.trustStore` and `javax.net.ssl.trustStorePassword`, Defaults to false. * `CASSANDRA_CLIENT_AUTH_ENABLED`: If set enables client authentication on SSL connections. Requires `javax.net.ssl.keyStore` and `javax.net.ssl.keyStorePassword`, defaults to false. Example usage: ```bash $ STORAGE=cassandra CASSANDRA_CONTACT_POINTS=localhost:9042 java -jar jaeger-spark-dependencies.jar ``` ### Elasticsearch Elasticsearch is used when `STORAGE=elasticsearch`. **Important**: Use the appropriate Docker image variant for your Elasticsearch version: - ES 7.12-7.16: Use `:VERSION-elasticsearch7` tag - ES 7.17-8.x: Use `:VERSION-elasticsearch8` tag - ES 9.x: Use `:VERSION-elasticsearch9` tag (or `:latest`) #### Configuration * `ES_NODES`: A comma separated list of elasticsearch hosts advertising http. Defaults to 127.0.0.1. Add port section if not listening on port 9200. Only one of these hosts needs to be available to fetch the remaining nodes in the cluster. It is recommended to set this to all the master nodes of the cluster. Use url format for SSL. For example, "https://yourhost:8888" * `ES_NODES_WAN_ONLY`: Set to true to only use the values set in ES_NODES, for example if your elasticsearch cluster is in Docker. If you're using a cloudprovider such as AWS Elasticsearch, set this to true. Defaults to false * `ES_USERNAME` and `ES_PASSWORD`: Elasticsearch basic authentication. Use when X-Pack security (formerly Shield) is in place. By default no username or password is provided to elasticsearch. * `ES_CLIENT_NODE_ONLY`: Set to true to disable elasticsearch cluster nodes.discovery and enable nodes.client.only. If your elasticsearch cluster's data nodes only listen on loopback ip, set this to true. Defaults to false. * `ES_INDEX_PREFIX`: index prefix of Jaeger indices. By default unset. * `ES_INDEX_DATE_SEPARATOR`: index date separator of Jaeger indices. The default value is `-`. For example `.` will find index "jaeger-span-2020.11.25". * `ES_TIME_RANGE`: How far in the past the job should look to for spans, the maximum and default is `24h`. Any value accepted by [date-math](https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#date-math) can be used here, but the anchor is always `now`. * `ES_USE_ALIASES`: Set to true to use index alias names to read from and write to. Usually required when using rollover indices. Example usage: ```bash $ STORAGE=elasticsearch ES_NODES=http://localhost:9200 java -jar jaeger-spark-dependencies.jar ``` ### OpenSearch OpenSearch is used when `STORAGE=opensearch`. **Important**: Use the `:VERSION-opensearch` Docker image variant. #### Configuration * `OS_NODES`: A comma separated list of OpenSearch hosts advertising http. Defaults to 127.0.0.1. Add port section if not listening on port 9200. Only one of these hosts needs to be available to fetch the remaining nodes in the cluster. It is recommended to set this to all the master nodes of the cluster. Use url format for SSL. For example, "https://yourhost:8888" * `OS_NODES_WAN_ONLY`: Set to true to only use the values set in OS_NODES, for example if your OpenSearch cluster is in Docker. If you're using a cloudprovider such as AWS OpenSearch, set this to true. Defaults to false. * `OS_USERNAME` and `OS_PASSWORD`: OpenSearch basic authentication. By default no username or password is provided. * `OS_INDEX_PREFIX`: index prefix of Jaeger indices. By default unset. * `OS_INDEX_DATE_SEPARATOR`: index date separator of Jaeger indices. The default value is `-`. For example `.` will find index "jaeger-span-2020.11.25". * `OS_TIME_RANGE`: How far in the past the job should look to for spans, the maximum and default is `24h`. Any value accepted by [date-math](https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#date-math) can be used here, but the anchor is always `now`. Example usage: ```bash $ docker run \ --env OS_NODES=http://opensearch:9200 \ ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:v0.5.3-opensearch ``` ## Design At a high-level, this job does the following: * read lots of spans from a time period * group them by traceId * construct a graph using parent-child relationships expressed in span references * for each edge `(parent span, child span)` output `(parent service, child service, count)` * write the results to the database (e.g. `dependencies_v2` table in [Cassandra](https://github.com/jaegertracing/jaeger/blob/12e44faabf10cdd866391b78933eec5d6ac50fa9/plugin/storage/cassandra/schema/v004.cql.tmpl#L186)) ## Building locally To build the job locally and run tests: ```bash ./mvnw clean install # if failed add SPARK_LOCAL_IP=127.0.0.1 ``` To run the unified jar (includes all): ```bash STORAGE=cassandra java -jar jaeger-spark-dependencies/target/jaeger-spark-dependencies-0.0.1-SNAPSHOT.jar #or STORAGE=elasticsearch ES_NODES=http://localhost:9200 java -jar jaeger-spark-dependencies/target/jaeger-spark-dependencies-0.0.1-SNAPSHOT.jar #or STORAGE=opensearch OS_NODES=http://localhost:9200 java -jar jaeger-spark-dependencies/target/jaeger-spark-dependencies-0.0.1-SNAPSHOT.jar ``` To run storage-specific jars directly (without STORAGE variable): ```bash # Cassandra java -jar jaeger-spark-dependencies-cassandra/target/jaeger-spark-dependencies-cassandra-0.0.1-SNAPSHOT.jar # Elasticsearch ES_NODES=http://localhost:9200 java -jar jaeger-spark-dependencies-elasticsearch/target/jaeger-spark-dependencies-elasticsearch-0.0.1-SNAPSHOT.jar # OpenSearch OS_NODES=http://localhost:9200 java -jar jaeger-spark-dependencies-opensearch/target/jaeger-spark-dependencies-opensearch-0.0.1-SNAPSHOT.jar ``` To build Docker image: **Note:** The Dockerfile now requires a pre-built JAR. First build the JAR using Maven, then build the Docker image. For Cassandra: ```bash ./mvnw clean package --batch-mode -Dlicense.skip=true -DskipTests -pl jaeger-spark-dependencies-cassandra -am mkdir -p artifact-target cp jaeger-spark-dependencies-cassandra/target/jaeger-spark-dependencies-cassandra-0.0.1-SNAPSHOT.jar artifact-target/ docker build --build-arg VARIANT=cassandra -t jaegertracing/spark-dependencies:cassandra . ``` For Elasticsearch 9: ```bash ./mvnw clean package --batch-mode -Dlicense.skip=true -DskipTests -Dversion.elasticsearch.spark=9.1.3 -pl jaeger-spark-dependencies-elasticsearch -am mkdir -p artifact-target cp jaeger-spark-dependencies-elasticsearch/target/jaeger-spark-dependencies-elasticsearch-0.0.1-SNAPSHOT.jar artifact-target/ docker build --build-arg VARIANT=elasticsearch9 -t jaegertracing/spark-dependencies:elasticsearch9 . ``` In tests it's possible to specify version of Jaeger images by env variable `JAEGER_VERSION` or system property `jaeger.version`. By default tests are using latest images. ## Running Integration Tests The integration tests validate the Spark dependencies job against different storage backends: - Cassandra 4.x - Elasticsearch 7 - Elasticsearch 8 - Elasticsearch 9 ### Prerequisites Before running integration tests, ensure you have the following installed: - **Java 21** (Temurin distribution recommended) - **Docker** (for building images and running testcontainers) - **Maven** (included via `./mvnw` wrapper) ### Quick Start Use the following make targets to run integration tests: ```bash make e2e-cassandra # Run Cassandra integration tests make e2e-es7 # Run Elasticsearch 7 integration tests make e2e-es8 # Run Elasticsearch 8 integration tests make e2e-es9 # Run Elasticsearch 9 integration tests ``` ### What Each Target Does Each test suite performs two steps: 1. Builds a Docker image with the appropriate storage variant 2. Runs tests using testcontainers against that variant ### Environment Variables The following environment variables are used in integration tests: - `SPARK_DEPENDENCIES_JOB_TAG`: Specifies the Docker image tag to use in tests (e.g., `test-cassandra`, `test-es7`, `test-es8`, `test-es9`) - `ELASTICSEARCH_VERSION`: Specifies the Elasticsearch version for testcontainers to use - `JAEGER_VERSION`: (Optional) Specifies the version of Jaeger images to use in tests. Defaults to latest. You can also set this as a system property: ```bash ./mvnw test -Djaeger.version=2.14.0 ``` ### Troubleshooting #### Docker Permission Issues If you encounter Docker permission issues, ensure your user is in the `docker` group: ```bash sudo usermod -aG docker $USER ``` Then log out and log back in. #### Testcontainers Issues If testcontainers fail to start, ensure: 1. Docker is running and accessible 2. The Ryuk image is pulled: `docker pull testcontainersofficial/ryuk:latest` 3. You have sufficient disk space for Docker images #### Build Failures If you encounter build failures: 1. Ensure you have Java 21 installed 2. Clean the Maven cache: `./mvnw clean` 3. Try running with the `-U` flag to force update dependencies: `./mvnw -U clean install` #### Port Conflicts If tests fail due to port conflicts, ensure no other services are running on the ports used by testcontainers (typically ephemeral ports, but sometimes standard ports like 9042 for Cassandra or 9200 for Elasticsearch). ## CI/CD Pipeline The project uses a unified CI/CD pipeline (`.github/workflows/ci-cd.yml`) that implements a **Host-Build Matrix Pattern**: 1. **Setup & Dependency Download** - Downloads all Maven dependencies once and warms the cache for subsequent jobs 2. **Build JARs** - Builds storage-specific JARs on the GitHub runner (parallel for all variants) 3. **E2E Tests** - Tests each variant using Docker containers with pre-built JARs 4. **Publish** - Publishes multi-arch Docker images (linux/amd64, linux/arm64) to GitHub Container Registry The pipeline supports four variants: - `cassandra` - For Cassandra storage - `elasticsearch7` - For Elasticsearch 7.12-7.16 (ES connector 7.17.29) - `elasticsearch8` - For Elasticsearch 7.17+ and 8.x (ES connector 8.13.4) - `elasticsearch9` - For Elasticsearch 9.x (ES connector 9.1.3) This approach eliminates Maven downloads inside Docker builds and parallelizes builds across all storage variants. ## License [Apache 2.0 License](./LICENSE). ================================================ FILE: RELEASES.md ================================================ # Release process 1. Create a new GitHub release with a new tag 2. Use "generate release notes" button 3. Once the release is published a `publish release` workflow will build and push container images ================================================ FILE: entrypoint.sh ================================================ #!/bin/sh # # Copyright (c) The Jaeger Authors # SPDX-License-Identifier: Apache-2.0 # # Taken from https://github.com/radanalyticsio/openshift-spark/blob/2.4/modules/common/added/scripts/entrypoint#L50 # OpenShift passes random UID and spark requires it to be present in /etc/passwd patch_uid() { # Check whether there is a passwd entry for the container UID myuid=$(id -u) mygid=$(id -g) uidentry=$(getent passwd $myuid) # If there is no passwd entry for the container UID, attempt to create one if [ -z "$uidentry" ] ; then if [ -w /etc/passwd ] ; then echo "$myuid:x:$myuid:$mygid:anonymous uid:${PWD}:/bin/false" >> /etc/passwd else echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID" fi fi } patch_uid # Use the single JAR name JAR_PATH="$APP_HOME/app.jar" # Determine main class based on VARIANT_TYPE if [ "$VARIANT_TYPE" = "cassandra" ]; then MAIN_CLASS="io.jaegertracing.spark.dependencies.cassandra.CassandraDependenciesJob" elif [ -n "$VARIANT_TYPE" ] && [ "${VARIANT_TYPE#elasticsearch}" != "$VARIANT_TYPE" ]; then # VARIANT_TYPE starts with "elasticsearch" MAIN_CLASS="io.jaegertracing.spark.dependencies.elastic.ElasticsearchDependenciesJob" elif [ "$VARIANT_TYPE" = "opensearch" ]; then MAIN_CLASS="io.jaegertracing.spark.dependencies.opensearch.OpenSearchDependenciesJob" else # Fallback to unified JAR (for backward compatibility or local builds) MAIN_CLASS="io.jaegertracing.spark.dependencies.DependenciesSparkJob" fi # Set default Log4j2 StatusLogger level if not already set # This suppresses Log4j2 StatusLogger errors triggered by OpenSearch's programmatic logging configuration # Users can override this by setting the LOG4J_STATUS_LOGGER_LEVEL environment variable LOG4J_STATUS_LOGGER_LEVEL="${LOG4J_STATUS_LOGGER_LEVEL:-OFF}" # Required Java module options for Spark to work with Java 21+ # These --add-opens flags are necessary for Spark to access internal Java APIs SPARK_JAVA_OPTS="--add-opens=java.base/java.lang=ALL-UNNAMED \ --add-opens=java.base/java.lang.invoke=ALL-UNNAMED \ --add-opens=java.base/java.lang.reflect=ALL-UNNAMED \ --add-opens=java.base/java.io=ALL-UNNAMED \ --add-opens=java.base/java.net=ALL-UNNAMED \ --add-opens=java.base/java.nio=ALL-UNNAMED \ --add-opens=java.base/java.util=ALL-UNNAMED \ --add-opens=java.base/java.util.concurrent=ALL-UNNAMED \ --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED \ --add-opens=java.base/sun.nio.ch=ALL-UNNAMED \ --add-opens=java.base/sun.nio.cs=ALL-UNNAMED \ --add-opens=java.base/sun.security.action=ALL-UNNAMED \ --add-opens=java.base/sun.util.calendar=ALL-UNNAMED \ -Djdk.reflect.useDirectMethodHandle=false" # Execute the job with the determined main class # SPARK_JAVA_OPTS come first (required for Spark), then JAVA_OPTS (user customizations), then Log4j config exec java ${SPARK_JAVA_OPTS} ${JAVA_OPTS} -Dorg.apache.logging.log4j.simplelog.StatusLogger.level=${LOG4J_STATUS_LOGGER_LEVEL} -cp "$JAR_PATH" "$MAIN_CLASS" "$@" ================================================ FILE: header.txt ================================================ Copyright (c) The Jaeger Authors SPDX-License-Identifier: Apache-2.0 ================================================ FILE: jaeger-spark-dependencies/pom.xml ================================================ 4.0.0 io.jaegertracing.dependencies jaeger-spark-dependencies-parent 0.0.1-SNAPSHOT jaeger-spark-dependencies ${project.groupId} jaeger-spark-dependencies-cassandra ${project.groupId} jaeger-spark-dependencies-elasticsearch maven-shade-plugin ${version.maven-shade-plugin} package shade reference.conf io.jaegertracing.spark.dependencies.DependenciesSparkJob true org.apache.hadoop:hadoop-common ** com.datastax.oss:* ** log4j:log4j org/apache/log4j/spi/LoggingEvent.class org.apache.logging.log4j:log4j-* ** io.netty:* ** org.slf4j:* ** org.scala-lang:* ** org.apache.spark:* ** org.lz4:* ** org.elasticsearch:elasticsearch-spark-* ** commons-httpclient:commons-httpclient ** xerces:xercesImpl ** com.squareup.okhttp3:* ** com.squareup.okio:* ** *:* META-INF/*.SF META-INF/*.DSA META-INF/*.RSA false ================================================ FILE: jaeger-spark-dependencies/src/main/java/io/jaegertracing/spark/dependencies/DependenciesSparkJob.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies; import io.jaegertracing.spark.dependencies.cassandra.CassandraDependenciesJob; import io.jaegertracing.spark.dependencies.elastic.ElasticsearchDependenciesJob; import java.io.UnsupportedEncodingException; import java.time.LocalDate; public final class DependenciesSparkJob { public static void main(String[] args) throws UnsupportedEncodingException { String storage = System.getenv("STORAGE"); if (storage == null) { throw new IllegalArgumentException("Missing environmental variable STORAGE"); } LocalDate date = LocalDate.now(); if (args.length == 1) { date = parseZonedDateTime(args[0]); } else if (System.getenv("DATE") != null) { date = parseZonedDateTime(System.getenv("DATE")); } run(storage, date); } private static void run(String storage, LocalDate localDate) throws UnsupportedEncodingException { String peerServiceTag = System.getenv("PEER_SERVICE_TAG"); if (peerServiceTag == null){ peerServiceTag = "peer.service"; } String jarPath = Utils.pathToUberJar(DependenciesSparkJob.class); if ("elasticsearch".equalsIgnoreCase(storage)) { ElasticsearchDependenciesJob.builder() .jars(jarPath) .day(localDate) .build() .run(peerServiceTag); } else if ("cassandra".equalsIgnoreCase(storage)) { CassandraDependenciesJob.builder() .jars(jarPath) .day(localDate) .build() .run(peerServiceTag); } else { throw new IllegalArgumentException("Unsupported storage: " + storage); } } static LocalDate parseZonedDateTime(String date) { return LocalDate.parse(date); } } ================================================ FILE: jaeger-spark-dependencies/src/main/resources/log4j.properties ================================================ # # Copyright (c) The Jaeger Authors # SPDX-License-Identifier: Apache-2.0 # # Set everything to be logged to the console log4j.rootCategory=WARN, console log4j.appender.console=org.apache.log4j.ConsoleAppender log4j.appender.console.target=System.err log4j.appender.console.layout=org.apache.log4j.PatternLayout log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n # Settings to quiet third party logs that are too verbose log4j.logger.org.spark-project.jetty=WARN log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO log4j.logger.io.jaegertracing.spark=INFO # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR ================================================ FILE: jaeger-spark-dependencies-cassandra/pom.xml ================================================ 4.0.0 io.jaegertracing.dependencies jaeger-spark-dependencies-parent 0.0.1-SNAPSHOT jaeger-spark-dependencies-cassandra 3.4.1 ${project.groupId} jaeger-spark-dependencies-common org.apache.spark spark-core_${version.scala.binary} com.fasterxml.jackson.core jackson-annotations com.datastax.spark spark-cassandra-connector_${version.scala.binary} ${spark-cassandra-connector.version} org.apache.commons commons-compress 1.26.0 org.eclipse.jetty jetty-xml 10.0.26 ${project.groupId} jaeger-spark-dependencies-test test org.testcontainers testcontainers test org.testcontainers cassandra test com.google.guava guava commons-lang commons-lang 2.6 test maven-shade-plugin ${version.maven-shade-plugin} package shade reference.conf io.jaegertracing.spark.dependencies.cassandra.CassandraDependenciesJob true org.apache.hadoop:hadoop-common ** com.datastax.oss:* ** com.datastax.spark:* ** log4j:log4j org/apache/log4j/spi/LoggingEvent.class org.apache.logging.log4j:log4j-* ** io.netty:* ** org.slf4j:* ** org.scala-lang:* ** org.apache.spark:* ** org.lz4:* ** xerces:xercesImpl ** com.squareup.okhttp3:* ** com.squareup.okio:* ** *:* META-INF/*.SF META-INF/*.DSA META-INF/*.RSA false ================================================ FILE: jaeger-spark-dependencies-cassandra/src/main/java/io/jaegertracing/spark/dependencies/cassandra/CassandraDependenciesJob.java ================================================ /** * Copyright (c) The Jaeger Authors * Copyright 2016-2017 The OpenZipkin Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.cassandra; import static com.datastax.spark.connector.japi.CassandraJavaUtil.javaFunctions; import static com.datastax.spark.connector.japi.CassandraJavaUtil.mapRowTo; import static com.datastax.spark.connector.japi.CassandraJavaUtil.mapToRow; import com.google.common.base.Joiner; import com.google.common.net.HostAndPort; import io.jaegertracing.spark.dependencies.DependenciesSparkHelper; import io.jaegertracing.spark.dependencies.Utils; import io.jaegertracing.spark.dependencies.model.Dependency; import io.jaegertracing.spark.dependencies.model.Span; import java.io.Serializable; import java.text.SimpleDateFormat; import java.time.LocalDate; import java.time.Period; import java.time.ZoneOffset; import java.time.ZonedDateTime; import java.time.temporal.ChronoUnit; import java.util.Collections; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TimeZone; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import scala.Tuple2; /** * @author OpenZipkin authors * @author Pavol Loffay */ public final class CassandraDependenciesJob { private static final Logger log = LoggerFactory.getLogger(CassandraDependenciesJob.class); public static Builder builder() { return new Builder(); } public static final class Builder { String keyspace = Utils.getEnv("CASSANDRA_KEYSPACE", "jaeger_v1_dc1"); String contactPoints = Utils.getEnv("CASSANDRA_CONTACT_POINTS", "localhost"); String localDc = Utils.getEnv("CASSANDRA_LOCAL_DC", null); // local[*] master lets us run & test the job locally without setting a Spark cluster String sparkMaster = Utils.getEnv("SPARK_MASTER", "local[*]"); String username = Utils.getEnv("CASSANDRA_USERNAME", ""); String password = Utils.getEnv("CASSANDRA_PASSWORD", ""); // needed when not in local mode String[] jars; // By default the job only works on traces whose first timestamp is today ZonedDateTime day = ZonedDateTime.of(LocalDate.now().atStartOfDay(), ZoneOffset.UTC); final Map sparkProperties = new LinkedHashMap<>(); Builder() { sparkProperties.put("spark.ui.enabled", "false"); sparkProperties.put("spark.cassandra.connection.ssl.enabled", Utils.getEnv("CASSANDRA_USE_SSL", "false")); sparkProperties.put("spark.cassandra.connection.ssl.trustStore.password", System.getProperty("javax.net.ssl.trustStorePassword", "")); sparkProperties.put("spark.cassandra.connection.ssl.trustStore.path", System.getProperty("javax.net.ssl.trustStore", "")); sparkProperties.put("spark.cassandra.connection.ssl.clientAuth.enabled", Utils.getEnv("CASSANDRA_CLIENT_AUTH_ENABLED", "false")); sparkProperties.put("spark.cassandra.connection.ssl.keyStore.path", System.getProperty("javax.net.ssl.keyStore", "")); sparkProperties.put("spark.cassandra.connection.ssl.keyStore.password", System.getProperty("javax.net.ssl.keyStorePassword", "")); } /** When set, this indicates which jars to distribute to the cluster. */ public Builder jars(String... jars) { this.jars = jars; return this; } /** Keyspace to store dependency rowsToLinks. Defaults to "jaeger_v1_test" */ public Builder keyspace(String keyspace) { Utils.checkNoTNull("keyspace", keyspace); this.keyspace = keyspace; return this; } /** Cassandra username. */ public Builder username(String username) { Utils.checkNoTNull("username", username); this.username = username; return this; } /** Cassandra username. */ public Builder password(String password) { Utils.checkNoTNull("password", password); this.password = password; return this; } /** Day to process dependencies for. Defaults to today. */ public Builder day(LocalDate day) { this.day = day.atStartOfDay(ZoneOffset.UTC); return this; } /** Comma separated list of hosts / IPs part of Cassandra cluster. Defaults to localhost */ public Builder contactPoints(String contactPoints) { this.contactPoints = contactPoints; return this; } /** The local DC to connect to (other nodes will be ignored) */ public Builder localDc(String localDc) { this.localDc = localDc; return this; } public CassandraDependenciesJob build() { return new CassandraDependenciesJob(this); } } private final String keyspace; private final ZonedDateTime day; private final SparkConf conf; CassandraDependenciesJob(Builder builder) { this.keyspace = builder.keyspace; this.day = builder.day; SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd"); df.setTimeZone(TimeZone.getTimeZone("UTC")); this.conf = new SparkConf(true) .setMaster(builder.sparkMaster) .setAppName(getClass().getName()); conf.set("spark.cassandra.connection.host", parseHosts(builder.contactPoints)); conf.set("spark.cassandra.connection.port", parsePort(builder.contactPoints)); conf.set("spark.cassandra.auth.username", builder.username); conf.set("spark.cassandra.auth.password", builder.password); if (builder.localDc != null) { conf.set("connection.local_dc", builder.localDc); } if (builder.jars != null) { conf.setJars(builder.jars); } for (Map.Entry entry : builder.sparkProperties.entrySet()) { conf.set(entry.getKey(), entry.getValue()); } } public void run(String peerServiceTag) { long microsLower = day.toInstant().toEpochMilli() * 1000; long microsUpper = day.plus(Period.ofDays(1)).toInstant().toEpochMilli() * 1000 - 1; log.info("Running Dependencies job for {}: {} ≤ Span.timestamp {}", day, microsLower, microsUpper); JavaSparkContext sc = new JavaSparkContext(conf); try { JavaPairRDD> traces = javaFunctions(sc) .cassandraTable(keyspace, "traces", mapRowTo(CassandraSpan.class)) .where("start_time < ? AND start_time > ?", microsUpper, microsLower) .mapToPair(span -> new Tuple2<>(span.getTraceId(), span)) .mapValues(span -> (Span) span) .groupByKey(); List dependencyLinks = DependenciesSparkHelper.derive(traces,peerServiceTag); store(sc, dependencyLinks); log.info("Done, {} dependency objects created", dependencyLinks.size()); } finally { sc.stop(); } } private void store(JavaSparkContext sc, List links) { String table = dependenciesTable(sc); log.info("Storing dependencies into {}", table); if ("dependencies_v2".equals(table)) { CassandraDependenciesV2 dependencies = new CassandraDependenciesV2(links, day); javaFunctions(sc.parallelize(Collections.singletonList(dependencies))) .writerBuilder(keyspace, table, mapToRow(CassandraDependenciesV2.class)) .saveToCassandra(); } else { CassandraDependencies dependencies = new CassandraDependencies(links, day); javaFunctions(sc.parallelize(Collections.singletonList(dependencies))) .writerBuilder(keyspace, table, mapToRow(CassandraDependencies.class)) .saveToCassandra(); } } static String parseHosts(String contactPoints) { List result = new LinkedList<>(); for (String contactPoint : contactPoints.split(",")) { HostAndPort parsed = HostAndPort.fromString(contactPoint); result.add(parsed.getHost()); } return Joiner.on(',').join(result); } /** Returns the consistent port across all contact points or 9042 */ static String parsePort(String contactPoints) { Set ports = new HashSet<>(); for (String contactPoint: contactPoints.split(",")) { HostAndPort parsed = HostAndPort.fromString(contactPoint); ports.add(parsed.getPortOrDefault(9042)); } return ports.size() == 1 ? String.valueOf(ports.iterator().next()) : "9042"; } private String dependenciesTable(JavaSparkContext sc) { try { javaFunctions(sc) .cassandraTable(keyspace, "dependencies_v2") .limit(1L).collect(); } catch (Exception ex) { return "dependencies"; } return "dependencies_v2"; } /** * DTO object used to store dependencies to Cassandra, see {@link com.datastax.spark.connector.mapper.JavaBeanColumnMapper} */ public final static class CassandraDependencies implements Serializable { private static final long serialVersionUID = 0L; private List dependencies; private ZonedDateTime zonedDateTime; public CassandraDependencies(List dependencies, ZonedDateTime ts) { this.dependencies = dependencies; this.zonedDateTime = ts; } public List getDependencies() { return dependencies; } public Long getTs() { return zonedDateTime.toInstant().toEpochMilli(); } public Long getTsIndex() { return zonedDateTime.toInstant().toEpochMilli(); } } /** * DTO object used to store dependencies to Cassandra, see {@link com.datastax.spark.connector.mapper.JavaBeanColumnMapper} */ public final static class CassandraDependenciesV2 implements Serializable { private static final long serialVersionUID = 0L; private List dependencies; private ZonedDateTime zonedDateTime; public CassandraDependenciesV2(List dependencies, ZonedDateTime ts) { this.dependencies = dependencies; this.zonedDateTime = ts; } public List getDependencies() { return dependencies; } public Long getTs() { return zonedDateTime.toInstant().toEpochMilli(); } public Long getTsBucket() { return zonedDateTime.toInstant().truncatedTo(ChronoUnit.DAYS).toEpochMilli(); } } /** * Entry point for running CassandraDependenciesJob directly. * This is used when the Docker image variant is cassandra-specific. */ public static void main(String[] args) throws java.io.UnsupportedEncodingException { LocalDate date = LocalDate.now(); if (args.length == 1) { date = LocalDate.parse(args[0]); } else if (System.getenv("DATE") != null) { date = LocalDate.parse(System.getenv("DATE")); } String peerServiceTag = System.getenv("PEER_SERVICE_TAG"); if (peerServiceTag == null) { peerServiceTag = "peer.service"; } String jarPath = Utils.pathToUberJar(CassandraDependenciesJob.class); CassandraDependenciesJob.builder() .jars(jarPath) .day(date) .build() .run(peerServiceTag); } } ================================================ FILE: jaeger-spark-dependencies-cassandra/src/main/java/io/jaegertracing/spark/dependencies/cassandra/CassandraSpan.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.cassandra; import io.jaegertracing.spark.dependencies.model.Reference; import io.jaegertracing.spark.dependencies.model.Span; import java.util.ArrayList; import java.util.List; /** * Jaeger > 1.5 does not store parentId. All references are stored in references table. * This class is used to maintain compatibility with older Jaeger deployments. * * @author Pavol Loffay */ public class CassandraSpan extends Span { private Long parentId; public Long getParentId() { return parentId; } public void setParentId(Long parentId) { this.parentId = parentId; } @Override public List getRefs() { ArrayList references = new ArrayList<>(super.getRefs()); Reference legacyParent = new Reference(); legacyParent.setSpanId(parentId); references.add(legacyParent); return references; } } ================================================ FILE: jaeger-spark-dependencies-cassandra/src/main/resources/log4j2.component.properties ================================================ # # Copyright (c) The Jaeger Authors # SPDX-License-Identifier: Apache-2.0 # # Disable Log4j status logger console output log4j2.StatusLogger.level = OFF ================================================ FILE: jaeger-spark-dependencies-cassandra/src/main/resources/log4j2.properties ================================================ # # Copyright (c) The Jaeger Authors # SPDX-License-Identifier: Apache-2.0 # # Set root logger level to WARN and use console appender rootLogger.level = WARN rootLogger.appenderRef.console.ref = console # Console appender configuration appender.console.type = Console appender.console.name = console appender.console.target = SYSTEM_ERR appender.console.layout.type = PatternLayout appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n # Settings to quiet third party logs that are too verbose logger.jetty.name = org.spark-project.jetty logger.jetty.level = WARN logger.jettyLifecycle.name = org.spark-project.jetty.util.component.AbstractLifeCycle logger.jettyLifecycle.level = ERROR logger.sparkReplTyper.name = org.apache.spark.repl.SparkIMain$exprTyper logger.sparkReplTyper.level = INFO logger.sparkReplInterpreter.name = org.apache.spark.repl.SparkILoop$SparkILoopInterpreter logger.sparkReplInterpreter.level = INFO logger.jaegertracing.name = io.jaegertracing.spark logger.jaegertracing.level = INFO # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support logger.hiveMetastore.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler logger.hiveMetastore.level = FATAL logger.hiveFunctionRegistry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry logger.hiveFunctionRegistry.level = ERROR ================================================ FILE: jaeger-spark-dependencies-cassandra/src/test/java/io/jaegertracing/spark/dependencies/cassandra/CassandraDependenciesDockerJobTest.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.cassandra; import io.jaegertracing.spark.dependencies.LogToConsolePrinter; import org.testcontainers.containers.GenericContainer; import org.testcontainers.utility.DockerImageName; import java.util.concurrent.TimeUnit; import static org.awaitility.Awaitility.await; public class CassandraDependenciesDockerJobTest extends CassandraDependenciesJobTest { private static String dependenciesJobTag() { String tag = System.getenv("SPARK_DEPENDENCIES_JOB_IMAGE_TAG"); if (tag == null || tag.isEmpty()) { throw new IllegalStateException( "SPARK_DEPENDENCIES_JOB_IMAGE_TAG environment variable is required but not set. " + "This variable must be set to ensure tests use the locally built Docker image."); } return tag.trim(); } @Override protected void deriveDependencies() { System.out.println("::group::🚧 🚧 🚧 CassandraDependenciesDockerJob logs"); try (GenericContainer sparkDependenciesJob = new GenericContainer<>( DockerImageName.parse("ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:" + dependenciesJobTag())) .withNetwork(network) .withLogConsumer(new LogToConsolePrinter("[spark-dependencies] ")) .withEnv("CASSANDRA_KEYSPACE", "jaeger_v1_dc1") .withEnv("CASSANDRA_CONTACT_POINTS", "cassandra") // This should be an address within the docker network .withEnv("CASSANDRA_LOCAL_DC", cassandra.getLocalDatacenter()) .withEnv("CASSANDRA_USERNAME", cassandra.getUsername()) .withEnv("CASSANDRA_PASSWORD", cassandra.getPassword()) .dependsOn(cassandra, jaegerCassandraSchema);) { sparkDependenciesJob.start(); await("spark-dependencies-job execution") .atMost(3, TimeUnit.MINUTES) .until(() -> !sparkDependenciesJob.isRunning()); } finally { System.out.println("::endgroup::"); } } } ================================================ FILE: jaeger-spark-dependencies-cassandra/src/test/java/io/jaegertracing/spark/dependencies/cassandra/CassandraDependenciesJobTest.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.cassandra; import static org.awaitility.Awaitility.await; import com.datastax.oss.driver.api.core.CqlSession; import io.jaegertracing.spark.dependencies.LogToConsolePrinter; import io.jaegertracing.spark.dependencies.test.DependenciesTest; import java.time.LocalDate; import java.util.Collections; import java.util.Optional; import java.util.Set; import java.util.concurrent.TimeUnit; import org.junit.After; import org.junit.AfterClass; import org.junit.BeforeClass; import org.testcontainers.cassandra.CassandraContainer; import org.testcontainers.containers.GenericContainer; import org.testcontainers.containers.Network; import org.testcontainers.containers.wait.strategy.HttpWaitStrategy; /** * @author Pavol Loffay */ public class CassandraDependenciesJobTest extends DependenciesTest { protected static Network network; protected static CassandraContainer cassandra; protected static GenericContainer jaegerAll; protected static GenericContainer jaegerCassandraSchema; private static int cassandraPort; @BeforeClass public static void beforeClass() { System.out.println("=== Starting CassandraDependenciesJobTest setup ==="); network = Network.newNetwork(); System.out.println("Created network: " + network.getId()); System.out.println("Starting Cassandra container (cassandra:4.1)..."); cassandra = new CassandraContainer("cassandra:4.1") .withNetwork(network) .withNetworkAliases("cassandra") .withExposedPorts(9042); cassandra.start(); cassandraPort = cassandra.getMappedPort(9042); System.out.println("Cassandra started. Mapped port: " + cassandraPort); System.out.println("Starting Jaeger Cassandra schema container (jaegertracing/jaeger-cassandra-schema:" + jaegerVersion() + ")..."); jaegerCassandraSchema = new GenericContainer<>("jaegertracing/jaeger-cassandra-schema:" + jaegerVersion()) .withLogConsumer(new LogToConsolePrinter("[jaeger-cassandra-schema] ")) .withNetwork(network); jaegerCassandraSchema.start(); System.out.println("Jaeger Cassandra schema container started, waiting for schema creation..."); /** * Wait until schema is created */ await().until(() -> !jaegerCassandraSchema.isRunning()); System.out.println("Jaeger Cassandra schema creation completed"); System.out.println("Starting Jaeger v2 unified container (jaegertracing/jaeger:" + jaegerVersion() + ")..."); jaegerAll = new GenericContainer<>("jaegertracing/jaeger:" + jaegerVersion()) .withNetwork(network) .withClasspathResourceMapping("jaeger-v2-config-cassandra.yaml", "/etc/jaeger/config.yaml", org.testcontainers.containers.BindMode.READ_ONLY) .withCommand("--config", "/etc/jaeger/config.yaml") .waitingFor(new BoundPortHttpWaitStrategy(16687) .forStatusCodeMatching(statusCode -> statusCode >= 200 && statusCode < 300)) .withExposedPorts(16687, 16686, 4317, 4318, 14268, 9411); jaegerAll.start(); System.out.println("Jaeger v2 container started"); queryUrl = String.format("http://127.0.0.1:%d", jaegerAll.getMappedPort(16686)); collectorUrl = String.format("http://127.0.0.1:%d", jaegerAll.getMappedPort(4317)); System.out.println("=== Container setup complete ==="); System.out.println("Query URL: " + queryUrl); System.out.println("Collector URL: " + collectorUrl); System.out.println("Health check port: " + jaegerAll.getMappedPort(16687)); } @AfterClass public static void afterClass() { Optional.of(cassandra).ifPresent(GenericContainer::close); Optional.of(jaegerAll).ifPresent(GenericContainer::close); Optional.of(jaegerCassandraSchema).ifPresent(GenericContainer::close); } @After public void after() { try (CqlSession session = CqlSession.builder() .addContactPoint(cassandra.getContactPoint()) .withLocalDatacenter(cassandra.getLocalDatacenter()) .build()) { session.execute("TRUNCATE jaeger_v1_dc1.traces"); session.execute(String.format("TRUNCATE jaeger_v1_dc1.%s", dependenciesTable(session))); } } private String dependenciesTable(CqlSession session) { try { session.execute("SELECT ts from jaeger_v1_dc1.dependencies_v2 limit 1;"); } catch (Exception ex) { return "dependencies"; } return "dependencies_v2"; } @Override protected void deriveDependencies() { System.out.println("::group::🚧 🚧 🚧 CassandraDependenciesJob logs"); try { CassandraDependenciesJob.builder() .contactPoints("localhost:" + cassandraPort) .day(LocalDate.now()) .keyspace("jaeger_v1_dc1") .username(cassandra.getUsername()) .password(cassandra.getPassword()) .build() .run("peer.service"); } finally { System.out.println("::endgroup::"); } } @Override protected void waitBetweenTraces() throws InterruptedException { // TODO otherwise it sometimes fails TimeUnit.SECONDS.sleep(1); } public static class BoundPortHttpWaitStrategy extends HttpWaitStrategy { private final int port; public BoundPortHttpWaitStrategy(int port) { this.port = port; } @Override protected Set getLivenessCheckPorts() { int mapptedPort = this.waitStrategyTarget.getMappedPort(port); return Collections.singleton(mapptedPort); } } } ================================================ FILE: jaeger-spark-dependencies-cassandra/src/test/java/io/jaegertracing/spark/dependencies/cassandra/JaegerTestDriverContainer.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.cassandra; import java.net.ConnectException; import java.time.Duration; import java.util.concurrent.Callable; import java.util.concurrent.TimeUnit; import okhttp3.OkHttpClient; import okhttp3.Request; import okhttp3.Response; import org.rnorth.ducttape.unreliables.Unreliables; import org.testcontainers.containers.ContainerLaunchException; import org.testcontainers.containers.GenericContainer; import org.testcontainers.containers.traits.LinkableContainer; /** * @author Pavol Loffay */ public class JaegerTestDriverContainer extends GenericContainer implements LinkableContainer { protected final OkHttpClient okHttpClient = new OkHttpClient.Builder().build(); protected final Duration waitUntilReady; public JaegerTestDriverContainer(String dockerImageName) { this(dockerImageName, Duration.ofMinutes(1)); } public JaegerTestDriverContainer(String dockerImageName, Duration waitUntilReady) { super(dockerImageName); this.waitUntilReady = waitUntilReady; } @Override protected void waitUntilContainerStarted() { String statusUrl = String.format("http://localhost:%d/", this.getMappedPort(8080)); Unreliables.retryUntilTrue((int)waitUntilReady.toMillis(), TimeUnit.MILLISECONDS, containerStartedCondition(statusUrl)); } protected Callable containerStartedCondition(String statusUrl) { return () -> { if (!isRunning()) { throw new ContainerLaunchException("Container failed to start"); } Request request = new Request.Builder() .url(statusUrl) .head() .build(); try (Response response = okHttpClient.newCall(request).execute()) { return response.code() == 200; } catch (ConnectException ex) { return false; } }; } } ================================================ FILE: jaeger-spark-dependencies-cassandra/src/test/resources/jaeger-v2-config-cassandra.yaml ================================================ # # Copyright (c) The Jaeger Authors # SPDX-License-Identifier: Apache-2.0 # service: extensions: [jaeger_storage, jaeger_query, healthcheckv2] pipelines: traces: receivers: [otlp, jaeger, zipkin] processors: [filter/jaeger, batch] exporters: [jaeger_storage_exporter] telemetry: resource: service.name: jaeger-backend metrics: level: detailed readers: - pull: exporter: prometheus: host: 0.0.0.0 port: 8888 logs: level: info traces: level: none extensions: healthcheckv2: use_v2: true http: endpoint: "0.0.0.0:16687" status: enabled: true path: "/" jaeger_query: storage: traces: some_storage jaeger_storage: backends: some_storage: cassandra: schema: keyspace: "jaeger_v1_dc1" connection: servers: ["cassandra:9042"] tls: insecure: true receivers: otlp: protocols: grpc: endpoint: "0.0.0.0:4317" http: endpoint: "0.0.0.0:4318" jaeger: protocols: grpc: thrift_binary: thrift_compact: thrift_http: endpoint: "0.0.0.0:14268" zipkin: endpoint: "0.0.0.0:9411" processors: filter/jaeger: error_mode: ignore traces: span: - 'resource.attributes["service.name"] == "jaeger"' batch: exporters: jaeger_storage_exporter: trace_storage: some_storage ================================================ FILE: jaeger-spark-dependencies-cassandra/src/test/resources/log4j.properties ================================================ # # Copyright (c) The Jaeger Authors # SPDX-License-Identifier: Apache-2.0 # # Set everything to be logged to the console log4j.rootCategory=WARN, console log4j.appender.console=org.apache.log4j.ConsoleAppender log4j.appender.console.target=System.err log4j.appender.console.layout=org.apache.log4j.PatternLayout log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n # Settings to quiet third party logs that are too verbose log4j.logger.org.spark-project.jetty=WARN log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR log4j.logger.io.jaegertracing=WARN log4j.logger.org.testcontainers=INFO log4j.logger.org.apache.http=WARN log4j.logger.com.github.dockerjava=WARN log4j.logger.org.zeroturnaround.exec=WARN ================================================ FILE: jaeger-spark-dependencies-common/pom.xml ================================================ 4.0.0 jaeger-spark-dependencies-parent io.jaegertracing.dependencies 0.0.1-SNAPSHOT jaeger-spark-dependencies-common io.opentracing opentracing-api ${version.io.opentracing} ================================================ FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/DependenciesSparkHelper.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies; import io.jaegertracing.spark.dependencies.model.Dependency; import io.jaegertracing.spark.dependencies.model.Span; import java.util.List; import org.apache.spark.api.java.JavaPairRDD; import scala.Tuple2; /** * @author Pavol Loffay */ public class DependenciesSparkHelper { private DependenciesSparkHelper() {} /** * Derives dependency links based on supplied spans (e.g. multiple traces). If there is a link A->B * in multiple traces it will return just one {@link Dependency} link with a correct {@link Dependency#callCount}. * Note that RDDs are grouped on traceId so if a span contains multiple references from different traces * the job does not produce correct result. * * @param traceIdSpans {@link org.apache.spark.api.java.JavaRDD} with trace id and a collection of * spans with that traceId. * @return Aggregated dependency links for all traces. */ public static List derive(JavaPairRDD> traceIdSpans,String peerServiceTag) { return traceIdSpans.flatMapValues(new SpansToDependencyLinks(peerServiceTag)) .values() .mapToPair(dependency -> new Tuple2<>(new Tuple2<>(dependency.getParent(), dependency.getChild()), dependency)) .reduceByKey((v1, v2) -> new Dependency(v1.getParent(), v1.getChild(), v1.getCallCount() + v2.getCallCount())) .values() .collect(); } } ================================================ FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/SpansToDependencyLinks.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies; import io.jaegertracing.spark.dependencies.model.Dependency; import io.jaegertracing.spark.dependencies.model.KeyValue; import io.jaegertracing.spark.dependencies.model.Reference; import io.jaegertracing.spark.dependencies.model.Span; import io.opentracing.tag.Tags; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import org.apache.spark.api.java.function.FlatMapFunction; /** * @author Pavol Loffay */ public class SpansToDependencyLinks implements FlatMapFunction, Dependency>{ /** * Derives dependency links based on supplied spans. * * @param trace trace * @return collection of dependency links, note that it contains duplicates * @throws Exception */ public String peerServiceTag = ""; public SpansToDependencyLinks(String peerServiceTag){ this.peerServiceTag = peerServiceTag; } @Override public java.util.Iterator call(Iterable trace) { Set uniqueSpans = new LinkedHashSet<>(); for (Span span : trace) { uniqueSpans.add(span); } Map> spanMap = new LinkedHashMap<>(); Map> spanChildrenMap = new LinkedHashMap<>(); for (Span span : uniqueSpans) { // Map of children for (Reference ref: span.getRefs()){ Set children = spanChildrenMap.get(ref.getSpanId()); if (children == null){ children = new LinkedHashSet<>(); spanChildrenMap.put(ref.getSpanId(), children); } children.add(span); } // Map of parents Set sharedSpans = spanMap.get(span.getSpanId()); if (sharedSpans == null) { sharedSpans = new LinkedHashSet<>(); spanMap.put(span.getSpanId(), sharedSpans); } sharedSpans.add(span); } // Let's start with zipkin shared spans List result = sharedSpanDependencies(spanMap); for (Span span : uniqueSpans) { if (span.getRefs() == null || span.getRefs().isEmpty() || span.getProcess() == null || span.getProcess().getServiceName() == null) { continue; } // if the current span is shared and not a client span we skip it // because the link from this span to parent should be from client span if (spanMap.get(span.getSpanId()).size() > 1 && !isClientSpan(span)) { continue; } for (Reference reference: span.getRefs()) { Set parents = spanMap.get(reference.getSpanId()); if (parents != null) { if (parents.size() > 1) { serverSpan(parents) .ifPresent(parent -> result.add(new Dependency(parent.getProcess().getServiceName(), span.getProcess().getServiceName())) ); } else { // this is jaeger span or zipkin native (not shared!) Span parent = parents.iterator().next(); if (parent.getProcess() == null || parent.getProcess().getServiceName() == null) { continue; } result.add(new Dependency(parent.getProcess().getServiceName(), span.getProcess().getServiceName())); } } } // We are on a leaf so we try to add a dependency for calls to components that calls remote components not instrumented if (spanChildrenMap.get(span.getSpanId()) == null ){ String targetName = span.getTag(peerServiceTag); if (targetName != null) { result.add(new Dependency(span.getProcess().getServiceName(), targetName)); } } } return result.iterator(); } static Optional serverSpan(Set sharedSpans) { for (Span span: sharedSpans) { if (isServerSpan(span)) { return Optional.of(span); } } return Optional.empty(); } static boolean isClientSpan(Span span) { return Tags.SPAN_KIND_CLIENT.equals(span.getTag(Tags.SPAN_KIND.getKey())); } static boolean isServerSpan(Span span) { return Tags.SPAN_KIND_SERVER.equals(span.getTag(Tags.SPAN_KIND.getKey())); } private List sharedSpanDependencies(Map> spanMap) { List dependencies = new ArrayList<>(); // create links between shared spans for (Set sharedSpans: spanMap.values()) { sharedSpanDependency(sharedSpans) .ifPresent(dependencies::add); } return dependencies; } protected Optional sharedSpanDependency(Set sharedSpans) { String clientService = null; String serverService = null; for (Span span: sharedSpans) { for (KeyValue tag: span.getTags()) { if (Tags.SPAN_KIND_CLIENT.equals(tag.getValueString()) || Tags.SPAN_KIND_PRODUCER.equals(tag.getValueString())) { clientService = span.getProcess().getServiceName(); } else if (Tags.SPAN_KIND_SERVER.equals(tag.getValueString()) || Tags.SPAN_KIND_CONSUMER.equals(tag.getValueString())) { serverService = span.getProcess().getServiceName(); } if (clientService != null && serverService != null) { return Optional.of(new Dependency(clientService, serverService)); } } } return Optional.empty(); } } ================================================ FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/Utils.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies; import java.io.UnsupportedEncodingException; import java.net.URL; import java.net.URLDecoder; /** * @author Pavol Loffay */ public class Utils { private Utils() {} public static String getEnv(String key, String defaultValue) { String result = System.getenv(key); return result != null ? result : defaultValue; } public static void checkNoTNull(String msg, Object object) { if (object == null) { throw new NullPointerException(String.format("%s is null", msg)); } } /** * Returns the path to the uber jar containing the calling class. * This is used to distribute the jar to Spark workers. */ public static String pathToUberJar(Class clazz) throws UnsupportedEncodingException { URL jarFile = clazz.getProtectionDomain().getCodeSource().getLocation(); return URLDecoder.decode(jarFile.getPath(), "UTF-8"); } } ================================================ FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/JsonHelper.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.json; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import io.jaegertracing.spark.dependencies.model.KeyValue; import io.jaegertracing.spark.dependencies.model.Reference; import io.jaegertracing.spark.dependencies.model.Span; /** * @author Pavol Loffay * @author Danish Siddiqui */ public class JsonHelper { private JsonHelper() { } public static ObjectMapper configure(ObjectMapper objectMapper) { objectMapper.addMixIn(Span.class, SpanMixin.class); objectMapper.addMixIn(KeyValue.class, KeyValueMixin.class); objectMapper.addMixIn(Reference.class, ReferenceMixin.class); objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); return objectMapper; } } ================================================ FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/KeyValueDeserializer.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.json; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.DeserializationContext; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.deser.std.StdDeserializer; import io.jaegertracing.spark.dependencies.model.KeyValue; import java.io.IOException; /** * @author Pavol Loffay * @author Danish Siddiqui */ public class KeyValueDeserializer extends StdDeserializer { // TODO Spark incorrectly serializes object mapper, therefore reinitializing // here private ObjectMapper objectMapper = JsonHelper.configure(new ObjectMapper()); public KeyValueDeserializer() { super(KeyValue.class); } @Override public KeyValue deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException, JsonProcessingException { JsonNode node = objectMapper.getFactory().setCodec(objectMapper).getCodec().readTree(jp); String key = node.get("key").asText(); String type = node.get("type").asText(); KeyValue keyValue = new KeyValue(); keyValue.setKey(key); keyValue.setValueType(type); if ("string".equalsIgnoreCase(type)) { JsonNode valueNode = node.get("value"); if (valueNode != null) { keyValue.setValueString(valueNode.asText()); } } else { // TODO: KeyValue model only supports string value for now, other types are // ignored } return keyValue; } } ================================================ FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/KeyValueMixin.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.json; import com.fasterxml.jackson.databind.annotation.JsonDeserialize; /** * @author Pavol Loffay * @author Danish Siddiqui */ @JsonDeserialize(using = KeyValueDeserializer.class) public class KeyValueMixin { } ================================================ FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/ReferenceDeserializer.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.json; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.DeserializationContext; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.deser.std.StdDeserializer; import io.jaegertracing.spark.dependencies.model.Reference; import java.io.IOException; import java.math.BigInteger; /** * @author Pavol Loffay * @author Danish Siddiqui */ public class ReferenceDeserializer extends StdDeserializer { private ObjectMapper objectMapper = JsonHelper.configure(new ObjectMapper()); protected ReferenceDeserializer() { super(Reference.class); } @Override public Reference deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException, JsonProcessingException { JsonNode node = objectMapper.getFactory().setCodec(objectMapper).getCodec().readTree(jp); String spanIdHex = node.get("spanID").asText(); Reference reference = new Reference(); reference.setSpanId(new BigInteger(spanIdHex, 16).longValue()); return reference; } } ================================================ FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/ReferenceMixin.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.json; import com.fasterxml.jackson.databind.annotation.JsonDeserialize; /** * @author Pavol Loffay * @author Danish Siddiqui */ @JsonDeserialize(using = ReferenceDeserializer.class) public class ReferenceMixin { } ================================================ FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/SpanDeserializer.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.json; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.DeserializationContext; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.deser.std.StdDeserializer; import io.jaegertracing.spark.dependencies.model.KeyValue; import io.jaegertracing.spark.dependencies.model.Process; import io.jaegertracing.spark.dependencies.model.Reference; import io.jaegertracing.spark.dependencies.model.Span; import java.io.IOException; import java.math.BigInteger; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.stream.Collectors; /** * @author Pavol Loffay * @author Danish Siddiqui */ public class SpanDeserializer extends StdDeserializer { // TODO Spark incorrectly serializes object mapper, therefore reinitializing // here private ObjectMapper objectMapper = JsonHelper.configure(new ObjectMapper()); public SpanDeserializer() { super(Span.class); } @Override public Span deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException, JsonProcessingException { JsonNode node = objectMapper.getFactory().setCodec(objectMapper).getCodec().readTree(jp); JsonNode spanIdNode = node.get("spanID"); JsonNode traceIdNode = node.get("traceID"); JsonNode startTimeNode = node.get("startTime"); if (spanIdNode == null || traceIdNode == null) { throw new JsonProcessingException("Missing required fields: spanID or traceID") { }; } String spanIdHex = spanIdNode.asText(); String traceIdHex = traceIdNode.asText(); String startTimeStr = startTimeNode != null ? startTimeNode.asText() : null; JsonNode processNode = node.get("process"); Process process = objectMapper.treeToValue(processNode, Process.class); JsonNode tagsNode = node.get("tags"); List tags = Arrays.asList(objectMapper.treeToValue(tagsNode, KeyValue[].class)); JsonNode tagFieldsNode = node.get("tag"); if (tagFieldsNode != null) { Map tagFields = objectMapper.treeToValue(tagFieldsNode, Map.class); tags = addTagFields(tags, tagFields); } Span span = new Span(); span.setSpanId(new BigInteger(spanIdHex, 16).longValue()); span.setTraceId(traceIdHex); span.setRefs(deserializeReferences(node)); span.setStartTime(startTimeStr != null ? Long.parseLong(startTimeStr) : null); span.setProcess(process); span.setTags(tags); return span; } private List addTagFields(List tags, Map tagFields) { ArrayList result = new ArrayList<>(tags.size() + tagFields.size()); result.addAll(tags); List collect = tagFields.entrySet().stream().map(stringObjectEntry -> { KeyValue kv = new KeyValue(); kv.setKey(stringObjectEntry.getKey()); kv.setValueString(stringObjectEntry.getValue().toString()); return kv; }).collect(Collectors.toList()); result.addAll(collect); return result; } private List deserializeReferences(JsonNode node) throws JsonProcessingException { List references = new ArrayList<>(); JsonNode parentSpanID = node.get("parentSpanID"); if (parentSpanID != null) { BigInteger bigInteger = new BigInteger(parentSpanID.asText(), 16); Reference reference = new Reference(); reference.setSpanId(bigInteger.longValue()); references.add(reference); } JsonNode referencesNode = node.get("references"); if (!referencesNode.isNull()) { Reference[] referencesArr = objectMapper.treeToValue(referencesNode, Reference[].class); references.addAll(Arrays.asList(referencesArr)); } return references; } } ================================================ FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/SpanMixin.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.json; import com.fasterxml.jackson.databind.annotation.JsonDeserialize; /** * @author Pavol Loffay * @author Danish Siddiqui */ @JsonDeserialize(using = SpanDeserializer.class) public class SpanMixin { } ================================================ FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/model/Dependency.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.model; import java.io.Serializable; /** * @author Pavol Loffay */ public class Dependency implements Serializable { private static final long serialVersionUID = 0L; private final String parent; private final String child; private final long callCount; public Dependency(String parent, String child) { this(parent, child, 1); } public Dependency(String parent, String child, long callCount) { this.parent = parent; this.child = child; this.callCount = callCount; } public String getParent() { return parent; } public String getChild() { return child; } public long getCallCount() { return callCount; } @Override public boolean equals(Object o) { if (this == o) { return true; } if (!(o instanceof Dependency)) { return false; } Dependency that = (Dependency) o; if (!parent.equals(that.parent)) { return false; } return (this.parent.equals(that.parent)) && (this.child.equals(that.child)) && this.callCount == that.callCount; } public String getSource() { return "jaeger"; } public void setSource(String source) { } @Override public int hashCode() { int h = 1; h *= 1000003; h ^= parent.hashCode(); h *= 1000003; h ^= child.hashCode(); h *= 1000003; h ^= (int) (h ^ ((callCount >>> 32) ^ callCount)); h *= 1000003; return h; } @Override public String toString() { return "Dependency{" + "parent='" + parent + '\'' + ", child='" + child + '\'' + ", callCount=" + callCount + '}'; } } ================================================ FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/model/KeyValue.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.model; import java.io.Serializable; /** * @author Pavol Loffay */ public class KeyValue implements Serializable { private static final long serialVersionUID = 0L; private String key; private String valueType; // TODO there are more types: double, long, binary, not needed at the moment private String valueString; public String getKey() { return key; } public void setKey(String key) { this.key = key; } public String getValueString() { return valueString; } public void setValueString(String valueString) { this.valueString = valueString; } public String getValueType() { return valueType; } public void setValueType(String valueType) { this.valueType = valueType; } @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } KeyValue keyValue = (KeyValue) o; if (key != null ? !key.equals(keyValue.key) : keyValue.key != null) { return false; } if (valueType != null ? !valueType.equals(keyValue.valueType) : keyValue.valueType != null) { return false; } return valueString != null ? valueString.equals(keyValue.valueString) : keyValue.valueString == null; } @Override public int hashCode() { int result = key != null ? key.hashCode() : 0; result = 31 * result + (valueType != null ? valueType.hashCode() : 0); result = 31 * result + (valueString != null ? valueString.hashCode() : 0); return result; } } ================================================ FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/model/Process.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.model; import java.io.Serializable; /** * @author Pavol Loffay */ public class Process implements Serializable { private static final long serialVersionUID = 0L; private String serviceName; public String getServiceName() { return serviceName; } public void setServiceName(String serviceName) { this.serviceName = serviceName; } @Override public boolean equals(Object o) { if (this == o) { return true; } if (!(o instanceof Process)) { return false; } Process process = (Process) o; return serviceName != null ? serviceName.equals(process.serviceName) : process.serviceName == null; } @Override public int hashCode() { return serviceName != null ? serviceName.hashCode() : 0; } } ================================================ FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/model/Reference.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.model; import java.io.Serializable; /** * @author Pavol Loffay */ public class Reference implements Serializable { private static final long serialVersionUID = 0L; private Long spanId; public Long getSpanId() { return spanId; } public void setSpanId(Long spanId) { this.spanId = spanId; } @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } Reference reference = (Reference) o; return spanId != null ? spanId.equals(reference.spanId) : reference.spanId == null; } @Override public int hashCode() { return spanId != null ? spanId.hashCode() : 0; } } ================================================ FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/model/Span.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.model; import java.io.Serializable; import java.util.List; /** * @author Pavol Loffay */ public class Span implements Serializable { private static final long serialVersionUID = 0L; private String traceId; private Long spanId; private Long startTime; private Process process; private List tags; private List refs; public String getTraceId() { return traceId; } public void setTraceId(String traceId) { this.traceId = traceId; } public Long getSpanId() { return spanId; } public void setSpanId(Long spanId) { this.spanId = spanId; } public long getStartTime() { return startTime; } public void setStartTime(Long startTime) { this.startTime = startTime; } public Process getProcess() { return process; } public void setProcess(Process process) { this.process = process; } public List getTags() { return tags; } public String getTag(String key){ for (KeyValue kv : tags){ if (kv.getKey().equals(key)){ return kv.getValueString(); } } return null; } public void setTags(List tags) { this.tags = tags; } public List getRefs() { return refs; } public void setRefs(List refs) { this.refs = refs; } @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } Span span = (Span) o; if (traceId != null ? !traceId.equals(span.traceId) : span.traceId != null) { return false; } if (spanId != null ? !spanId.equals(span.spanId) : span.spanId != null) { return false; } if (startTime != null ? !startTime.equals(span.startTime) : span.startTime != null) { return false; } if (process != null ? !process.equals(span.process) : span.process != null) { return false; } if (tags != null ? !tags.equals(span.tags) : span.tags != null) { return false; } return refs != null ? refs.equals(span.refs) : span.refs == null; } @Override public int hashCode() { int result = traceId != null ? traceId.hashCode() : 0; result = 31 * result + (spanId != null ? spanId.hashCode() : 0); result = 31 * result + (startTime != null ? startTime.hashCode() : 0); result = 31 * result + (process != null ? process.hashCode() : 0); result = 31 * result + (tags != null ? tags.hashCode() : 0); result = 31 * result + (refs != null ? refs.hashCode() : 0); return result; } } ================================================ FILE: jaeger-spark-dependencies-common/src/test/java/io/jaegertracing/spark/dependencies/SpansToDependencyLinksTest.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import io.jaegertracing.spark.dependencies.model.Dependency; import io.jaegertracing.spark.dependencies.model.KeyValue; import io.jaegertracing.spark.dependencies.model.Process; import io.jaegertracing.spark.dependencies.model.Span; import io.opentracing.tag.Tags; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Optional; import java.util.Set; import org.junit.Test; public class SpansToDependencyLinksTest { @Test public void shouldReturnDependencyWithClientAndServerSpans() { SpansToDependencyLinks spansToDependencyLinks = new SpansToDependencyLinks(""); Set sharedSpans = new HashSet<>(); sharedSpans.add(createSpan("clientName", Tags.SPAN_KIND_CLIENT)); sharedSpans.add(createSpan("serverName", Tags.SPAN_KIND_SERVER)); Optional result = spansToDependencyLinks.sharedSpanDependency(sharedSpans); assertTrue(result.isPresent()); assertEquals(new Dependency("clientName", "serverName"), result.get()); } @Test public void shouldReturnDependencyWithConsumerAndProducer() { SpansToDependencyLinks spansToDependencyLinks = new SpansToDependencyLinks(""); Set sharedSpans = new HashSet<>(); sharedSpans.add(createSpan("consumerName", Tags.SPAN_KIND_CONSUMER)); sharedSpans.add(createSpan("producerName", Tags.SPAN_KIND_PRODUCER)); Optional result = spansToDependencyLinks.sharedSpanDependency(sharedSpans); assertTrue(result.isPresent()); assertEquals(new Dependency("producerName", "consumerName"), result.get()); } @Test public void shouldReturnEmptyDependencyForSpansWithoutSpanKindDefinition() { SpansToDependencyLinks spansToDependencyLinks = new SpansToDependencyLinks(""); Set sharedSpans = new HashSet<>(); sharedSpans.add(createSpan("consumerName", "tag")); sharedSpans.add(createSpan("producerName", "tag")); Optional result = spansToDependencyLinks.sharedSpanDependency(sharedSpans); assertFalse(result.isPresent()); } private Span createSpan(String serviceName, String tag) { List tags = new ArrayList<>(); KeyValue keyValue = new KeyValue(); keyValue.setKey("span.kind"); keyValue.setValueString(tag); tags.add(keyValue); Span span = new Span(); Process process = new Process(); process.setServiceName(serviceName); span.setProcess(process); span.setTags(tags); return span; } } ================================================ FILE: jaeger-spark-dependencies-common/src/test/java/io/jaegertracing/spark/dependencies/model/SpanTest.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.model; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotEquals; import java.util.ArrayList; import java.util.List; import org.junit.Test; public class SpanTest { @Test public void testEquals() { Span span1 = new Span(); span1.setTraceId("trace1"); span1.setSpanId(1L); span1.setProcess(createProcess("service1")); Span span2 = new Span(); span2.setTraceId("trace1"); span2.setSpanId(1L); span2.setProcess(createProcess("service1")); assertEquals(span1, span2); // Different service name Span span3 = new Span(); span3.setTraceId("trace1"); span3.setSpanId(1L); span3.setProcess(createProcess("service2")); assertNotEquals(span1, span3); // Different tags List tags1 = new ArrayList<>(); KeyValue kv1 = new KeyValue(); kv1.setKey("key"); kv1.setValueString("value1"); tags1.add(kv1); span1.setTags(tags1); List tags2 = new ArrayList<>(); KeyValue kv2 = new KeyValue(); kv2.setKey("key"); kv2.setValueString("value1"); tags2.add(kv2); span2.setTags(tags2); assertEquals(span1, span2); tags2.get(0).setValueString("value2"); assertNotEquals(span1, span2); } private Process createProcess(String serviceName) { Process process = new Process(); process.setServiceName(serviceName); return process; } } ================================================ FILE: jaeger-spark-dependencies-elasticsearch/pom.xml ================================================ 4.0.0 jaeger-spark-dependencies-parent io.jaegertracing.dependencies 0.0.1-SNAPSHOT jaeger-spark-dependencies-elasticsearch ${project.groupId} jaeger-spark-dependencies-common com.fasterxml.jackson.core jackson-annotations org.elasticsearch elasticsearch-spark-30_${version.scala.binary} ${version.elasticsearch.spark} ${project.groupId} jaeger-spark-dependencies-test test org.testcontainers testcontainers test org.awaitility awaitility ${version.org.awaitility-awaitility} test io.opentelemetry opentelemetry-api ${version.io.opentelemetry} test maven-shade-plugin ${version.maven-shade-plugin} package shade reference.conf io.jaegertracing.spark.dependencies.elastic.ElasticsearchDependenciesJob true org.apache.hadoop:hadoop-common ** log4j:log4j org/apache/log4j/spi/LoggingEvent.class org.apache.logging.log4j:log4j-* ** io.netty:* ** org.slf4j:* ** org.scala-lang:* ** org.apache.spark:* ** org.lz4:* ** org.elasticsearch:elasticsearch-spark-* ** commons-httpclient:commons-httpclient ** xerces:xercesImpl ** com.squareup.okhttp3:* ** com.squareup.okio:* ** *:* META-INF/*.SF META-INF/*.DSA META-INF/*.RSA false ================================================ FILE: jaeger-spark-dependencies-elasticsearch/src/main/java/io/jaegertracing/spark/dependencies/elastic/ElasticTupleToSpan.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.elastic; import com.fasterxml.jackson.databind.ObjectMapper; import io.jaegertracing.spark.dependencies.json.JsonHelper; import io.jaegertracing.spark.dependencies.model.Span; import org.apache.spark.api.java.function.Function; import scala.Tuple2; /** * @author Pavol Loffay */ public class ElasticTupleToSpan implements Function, Span> { private ObjectMapper objectMapper = JsonHelper.configure(new ObjectMapper()); @Override public Span call(Tuple2 tuple) throws Exception { Span span = objectMapper.readValue(tuple._2(), Span.class); String originalTraceId = span.getTraceId(); span.setTraceId(normalizeTraceId(originalTraceId)); if (span.getTags() != null) { span.getTags().sort((o1, o2) -> o1.getKey().compareTo(o2.getKey())); } if (span.getRefs() != null) { span.getRefs().sort((o1, o2) -> o1.getSpanId().compareTo(o2.getSpanId())); } return span; } private String normalizeTraceId(String traceId) { if (traceId != null && traceId.length() < 32) { return String.format("%32s", traceId).replace(' ', '0'); } return traceId; } } ================================================ FILE: jaeger-spark-dependencies-elasticsearch/src/main/java/io/jaegertracing/spark/dependencies/elastic/ElasticsearchDependenciesJob.java ================================================ /** * Copyright (c) The Jaeger Authors * Copyright 2016-2017 The OpenZipkin Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.elastic; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import io.jaegertracing.spark.dependencies.DependenciesSparkHelper; import io.jaegertracing.spark.dependencies.Utils; import io.jaegertracing.spark.dependencies.model.Dependency; import io.jaegertracing.spark.dependencies.model.Span; import java.time.LocalDate; import java.time.ZoneOffset; import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.regex.Pattern; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; import org.elasticsearch.hadoop.rest.RestClient; import org.elasticsearch.hadoop.util.EsMajorVersion; import org.elasticsearch.spark.cfg.SparkSettings; import org.elasticsearch.spark.rdd.api.java.JavaEsSpark; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * @author OpenZipkin authors * @author Pavol Loffay */ public class ElasticsearchDependenciesJob { private static final Logger log = LoggerFactory.getLogger(ElasticsearchDependenciesJob.class); private static final Pattern PORT_PATTERN = Pattern.compile(":\\d+"); public static Builder builder() { return new Builder(); } public static final class Builder { String hosts = Utils.getEnv("ES_NODES", "127.0.0.1"); String username = Utils.getEnv("ES_USERNAME", null); String password = Utils.getEnv("ES_PASSWORD", null); Boolean clientNodeOnly = Boolean.parseBoolean(Utils.getEnv("ES_CLIENT_NODE_ONLY", "false")); Boolean nodesWanOnly = Boolean.parseBoolean(Utils.getEnv("ES_NODES_WAN_ONLY", "false")); String indexPrefix = Utils.getEnv("ES_INDEX_PREFIX", null); String indexDatePattern = datePattern(Utils.getEnv("ES_INDEX_DATE_SEPARATOR", "-")); String spanRange = Utils.getEnv("ES_TIME_RANGE", "24h"); Boolean useAliases = Boolean.parseBoolean(Utils.getEnv("ES_USE_ALIASES", "false")); final Map sparkProperties = new LinkedHashMap<>(); Builder() { sparkProperties.put("spark.ui.enabled", "false"); // don't die if there are no spans sparkProperties.put("es.index.read.missing.as.empty", "true"); sparkProperties.put("es.net.ssl.keystore.location", getSystemPropertyAsFileResource("javax.net.ssl.keyStore")); sparkProperties.put("es.net.ssl.keystore.pass", System.getProperty("javax.net.ssl.keyStorePassword", "")); sparkProperties.put("es.net.ssl.truststore.location", getSystemPropertyAsFileResource("javax.net.ssl.trustStore")); sparkProperties.put("es.net.ssl.truststore.pass", System.getProperty("javax.net.ssl.trustStorePassword", "")); } // local[*] master lets us run & test the job locally without setting a Spark cluster String sparkMaster = Utils.getEnv("SPARK_MASTER", "local[*]"); // needed when not in local mode String[] jars; // By default the job only works on traces whose first timestamp is today ZonedDateTime day = ZonedDateTime.of(LocalDate.now().atStartOfDay(), ZoneOffset.UTC); /** When set, this indicates which jars to distribute to the cluster. */ public Builder jars(String... jars) { this.jars = jars; return this; } /** es.nodes separated by ',' */ public Builder nodes(String hosts) { Utils.checkNoTNull(hosts, "nodes"); this.hosts = hosts; this.nodesWanOnly = true; return this; } /** username used for basic auth. Needed when Shield or X-Pack security is enabled */ public Builder username(String username) { this.username = username; return this; } /** password used for basic auth. Needed when Shield or X-Pack security is enabled */ public Builder password(String password) { this.password = password; return this; } /** index prefix for Jaeger indices. By default empty */ public Builder indexPrefix(String indexPrefix) { this.indexPrefix = indexPrefix; return this; } /** index date pattern for Jaeger indices. By default yyyy-MM-dd */ public Builder indexDatePattern(String indexDatePattern) { this.indexDatePattern = indexDatePattern; return this; } /** span range for Jaeger indices. By default 24h */ public Builder spanRange(String spanRange) { this.spanRange = spanRange; return this; } /** Day to process dependencies for. Defaults to today. */ public Builder day(LocalDate day) { this.day = day.atStartOfDay(ZoneOffset.UTC); return this; } /** Whether the connector is used against an Elasticsearch instance in a cloud/restricted * environment over the WAN, such as Amazon Web Services. In this mode, the * connector disables discovery and only connects through the declared es.nodes during all operations, * including reads and writes. Note that in this mode, performance is highly affected. */ public Builder nodesWanOnly(boolean wanOnly) { this.nodesWanOnly = wanOnly; return this; } private static void logIfNoPort(String hosts) { if (!PORT_PATTERN.matcher(hosts).find()) { log.warn("Port is not specified, default port 9200 will be used"); } } public ElasticsearchDependenciesJob build() { String hosts = System.getenv("ES_NODES"); String wanOnly = System.getenv("ES_NODES_WAN_ONLY"); // Optimize user configuration - nodes specified but wan only not if (hosts != null && wanOnly == null) { this.nodesWanOnly = true; } logIfNoPort(this.hosts); return new ElasticsearchDependenciesJob(this); } } private static String getSystemPropertyAsFileResource(String key) { String prop = System.getProperty(key, ""); return prop != null && !prop.isEmpty() ? "file:" + prop : prop; } private final ZonedDateTime day; private final SparkConf conf; private final String indexPrefix; private final String indexDatePattern; private final String spanRange; private final Boolean useAliases; ElasticsearchDependenciesJob(Builder builder) { this.day = builder.day; this.conf = new SparkConf(true).setMaster(builder.sparkMaster).setAppName(getClass().getName()); if (builder.jars != null) { conf.setJars(builder.jars); } if (builder.username != null) { conf.set("es.net.http.auth.user", builder.username); } if (builder.password != null) { conf.set("es.net.http.auth.pass", builder.password); } conf.set("es.nodes", builder.hosts); if (builder.hosts.indexOf("https") != -1) { conf.set("es.net.ssl", "true"); } if (builder.nodesWanOnly) { conf.set("es.nodes.wan.only", "true"); } if (builder.clientNodeOnly) { conf.set("es.nodes.discovery", "0"); conf.set("es.nodes.client.only", "1"); } for (Map.Entry entry : builder.sparkProperties.entrySet()) { conf.set(entry.getKey(), entry.getValue()); } this.indexPrefix = builder.indexPrefix; this.indexDatePattern = builder.indexDatePattern; this.spanRange = builder.spanRange; this.useAliases = builder.useAliases; } /** * https://github.com/jaegertracing/jaeger/blob/master/CHANGELOG.md#190-2019-01-21 */ private static String prefixBefore19(String prefix) { return prefix != null ? String.format("%s:", prefix) : ""; } private static String prefix(String prefix) { return prefix != null ? String.format("%s-", prefix) : ""; } private static String datePattern(String separator) { if (separator.equals("")) { return "yyyyMMdd"; } // ' is escape character in date format, we should double it here. if (separator.contains("'")) { separator = separator.replace("'", "''"); } return String.format("yyyy'%s'MM'%s'dd", separator, separator); } public void run(String peerServiceTag) { String[] readIndices; String[] writeIndex; // use alias indices common when using index rollover if (this.useAliases) { readIndices = new String[]{prefix(indexPrefix) + "jaeger-span-read", prefixBefore19(indexPrefix) + "jaeger-span-read"}; writeIndex = new String[] {prefix(indexPrefix) + "jaeger-dependencies-write", prefixBefore19(indexPrefix) + "jaeger-dependencies-write"}; } else { readIndices = indexDate("jaeger-span"); writeIndex = indexDate("jaeger-dependencies"); } run(readIndices, writeIndex, peerServiceTag); } String[] indexDate(String index) { String date = day.toLocalDate().format(DateTimeFormatter.ofPattern(indexDatePattern)); if (indexPrefix != null && indexPrefix.length() > 0) { return new String[]{String.format("%s%s-%s", prefix(indexPrefix), index, date), String.format("%s%s-%s", prefixBefore19(indexPrefix), index, date)}; } // if there is no prefix we read and write only to one index return new String[]{String.format("%s-%s", index, date)}; } void run(String[] spanIndices, String[] depIndices,String peerServiceTag) { JavaSparkContext sc = new JavaSparkContext(conf); try { for (int i = 0; i < spanIndices.length; i++) { String spanIndex = spanIndices[i]; String depIndex = depIndices[i]; log.info("Running Dependencies job for {}, reading from {} index, result storing to {}", day, spanIndex, depIndex); // Send raw query to ES to select only the docs / spans we want to consider for this job // This doesn't change the default behavior as the daily indexes only contain up to 24h of data String esQuery = String.format("{\"range\": {\"startTimeMillis\": { \"gte\": \"now-%s\" }}}", spanRange); JavaPairRDD> traces = JavaEsSpark.esJsonRDD(sc, spanIndex, esQuery) .map(new ElasticTupleToSpan()) .groupBy(Span::getTraceId); List dependencyLinks = DependenciesSparkHelper.derive(traces,peerServiceTag); EsMajorVersion esMajorVersion = getEsVersion(); // Add type for ES < 7 // WARN log is produced for older ES versions, however it's produced by spark-es library and not ES itself, it cannot be disabled // WARN Resource: Detected type name in resource [jaeger-dependencies-2019-08-14/dependencies]. Type names are deprecated and will be removed in a later release. if (esMajorVersion.before(EsMajorVersion.V_7_X)) { depIndex = depIndex + "/dependencies"; } store(sc, dependencyLinks, depIndex); log.info("Done, {} dependency objects created", dependencyLinks.size()); if (dependencyLinks.size() > 0) { // we do not derive dependencies for old prefix "prefix:" if new prefix "prefix-" contains data break; } } } finally { sc.stop(); } } private EsMajorVersion getEsVersion() { RestClient client = new RestClient(new SparkSettings(conf)); try { return client.mainInfo().getMajorVersion(); } finally { client.close(); } } private void store(JavaSparkContext javaSparkContext, List dependencyLinks, String resource) { if (dependencyLinks.isEmpty()) { return; } String json; try { ObjectMapper objectMapper = new ObjectMapper(); json = objectMapper.writeValueAsString(new ElasticsearchDependencies(dependencyLinks, day)); } catch (JsonProcessingException e) { throw new IllegalStateException("Could not serialize dependencies", e); } JavaEsSpark.saveJsonToEs(javaSparkContext.parallelize(Collections.singletonList(json)), resource); } /** * Helper class used to serialize dependencies to JSON. */ public static final class ElasticsearchDependencies { private List dependencies; private ZonedDateTime ts; public ElasticsearchDependencies(List dependencies, ZonedDateTime ts) { this.dependencies = dependencies; this.ts = ts; } public List getDependencies() { return dependencies; } public String getTimestamp() { // Jaeger ES dependency storage uses RFC3339Nano for timestamp return ts.format(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ssXXX")); } } /** * Entry point for running ElasticsearchDependenciesJob directly. * This is used when the Docker image variant is elasticsearch-specific. */ public static void main(String[] args) throws java.io.UnsupportedEncodingException { LocalDate date = LocalDate.now(); if (args.length == 1) { date = LocalDate.parse(args[0]); } else if (System.getenv("DATE") != null) { date = LocalDate.parse(System.getenv("DATE")); } String peerServiceTag = System.getenv("PEER_SERVICE_TAG"); if (peerServiceTag == null) { peerServiceTag = "peer.service"; } String jarPath = Utils.pathToUberJar(ElasticsearchDependenciesJob.class); ElasticsearchDependenciesJob.builder() .jars(jarPath) .day(date) .build() .run(peerServiceTag); } } ================================================ FILE: jaeger-spark-dependencies-elasticsearch/src/main/resources/log4j2.component.properties ================================================ # # Copyright (c) The Jaeger Authors # SPDX-License-Identifier: Apache-2.0 # # Disable Log4j status logger console output log4j2.StatusLogger.level = OFF ================================================ FILE: jaeger-spark-dependencies-elasticsearch/src/main/resources/log4j2.properties ================================================ # # Copyright (c) The Jaeger Authors # SPDX-License-Identifier: Apache-2.0 # # Set root logger level to WARN and use console appender rootLogger.level = WARN rootLogger.appenderRef.console.ref = console # Console appender configuration appender.console.type = Console appender.console.name = console appender.console.target = SYSTEM_ERR appender.console.layout.type = PatternLayout appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n # Settings to quiet third party logs that are too verbose logger.jetty.name = org.spark-project.jetty logger.jetty.level = WARN logger.jettyLifecycle.name = org.spark-project.jetty.util.component.AbstractLifeCycle logger.jettyLifecycle.level = ERROR logger.sparkReplTyper.name = org.apache.spark.repl.SparkIMain$exprTyper logger.sparkReplTyper.level = INFO logger.sparkReplInterpreter.name = org.apache.spark.repl.SparkILoop$SparkILoopInterpreter logger.sparkReplInterpreter.level = INFO logger.jaegertracing.name = io.jaegertracing.spark logger.jaegertracing.level = INFO # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support logger.hiveMetastore.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler logger.hiveMetastore.level = FATAL logger.hiveFunctionRegistry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry logger.hiveFunctionRegistry.level = ERROR ================================================ FILE: jaeger-spark-dependencies-elasticsearch/src/test/java/io/jaegertracing/spark/dependencies/elastic/ElasticsearchDependenciesDockerJobTest.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.elastic; import io.jaegertracing.spark.dependencies.LogToConsolePrinter; import org.testcontainers.containers.GenericContainer; import org.testcontainers.utility.DockerImageName; import java.util.concurrent.TimeUnit; import static org.awaitility.Awaitility.await; public class ElasticsearchDependenciesDockerJobTest extends ElasticsearchDependenciesJobTest { private static String dependenciesJobTag() { String tag = System.getenv("SPARK_DEPENDENCIES_JOB_IMAGE_TAG"); if (tag == null || tag.isEmpty()) { throw new IllegalStateException( "SPARK_DEPENDENCIES_JOB_IMAGE_TAG environment variable is required but not set. " + "This variable must be set to ensure tests use the locally built Docker image."); } return tag.trim(); } @Override protected void deriveDependencies() { // Create the dependenciesJob instance so that after() method can call // indexDate() on it dependenciesJob = ElasticsearchDependenciesJob.builder() .nodes("http://" + jaegerElasticsearchEnvironment.getElasticsearchIPPort()) .day(java.time.LocalDate.now()) .build(); try { jaegerElasticsearchEnvironment.refresh(); // Wait a bit to ensure all spans are fully indexed and visible Thread.sleep(2000); } catch (java.io.IOException e) { throw new RuntimeException("Could not refresh Elasticsearch", e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException("Interrupted while waiting", e); } // Use the same date as the test - format it as ISO-8601 date string for the // DATE env var String dateStr = java.time.LocalDate.now().toString(); System.out .println("Running Docker spark-dependencies job with DATE=" + dateStr + ", ES_NODES=http://elasticsearch:9200"); System.out.println("::group::🚧 🚧 🚧 ElasticsearchDependenciesDockerJob logs"); try (GenericContainer sparkDependenciesJob = new GenericContainer<>( DockerImageName.parse("ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:" + dependenciesJobTag())) .withNetwork(jaegerElasticsearchEnvironment.network) .withLogConsumer(new LogToConsolePrinter("[spark-dependencies] ")) .withEnv("STORAGE", "elasticsearch") .withEnv("ES_NODES", "http://elasticsearch:9200") .withEnv("DATE", dateStr) .dependsOn(jaegerElasticsearchEnvironment.elasticsearch, jaegerElasticsearchEnvironment.jaegerAll)) { sparkDependenciesJob.start(); await("spark-dependencies-job execution") .atMost(3, TimeUnit.MINUTES) .until(() -> !sparkDependenciesJob.isRunning()); } finally { System.out.println("::endgroup::"); } try { jaegerElasticsearchEnvironment.refresh(); } catch (java.io.IOException e) { throw new RuntimeException("Could not refresh Elasticsearch", e); } } } ================================================ FILE: jaeger-spark-dependencies-elasticsearch/src/test/java/io/jaegertracing/spark/dependencies/elastic/ElasticsearchDependenciesJobTest.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.elastic; import io.opentelemetry.api.trace.Span; import io.opentelemetry.api.trace.Tracer; import io.jaegertracing.spark.dependencies.test.DependenciesTest; import io.jaegertracing.spark.dependencies.test.TracersGenerator; import java.io.IOException; import java.time.LocalDate; import java.util.Collections; import java.util.HashMap; import java.util.Set; import java.util.UUID; import java.util.concurrent.TimeUnit; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; import org.testcontainers.containers.wait.strategy.HttpWaitStrategy; /** * @author Pavol Loffay */ public class ElasticsearchDependenciesJobTest extends DependenciesTest { protected ElasticsearchDependenciesJob dependenciesJob; static JaegerElasticsearchEnvironment jaegerElasticsearchEnvironment; @BeforeClass public static void beforeClass() { jaegerElasticsearchEnvironment = new JaegerElasticsearchEnvironment(); jaegerElasticsearchEnvironment.start(new HashMap<>(), jaegerVersion(), JaegerElasticsearchEnvironment.elasticsearchVersion()); collectorUrl = jaegerElasticsearchEnvironment.getCollectorUrl(); queryUrl = jaegerElasticsearchEnvironment.getQueryUrl(); } @Before public void before() { String serviceName = UUID.randomUUID().toString(); String operationName = UUID.randomUUID().toString(); TracersGenerator.Tuple tuple = TracersGenerator.createJaeger(serviceName, collectorUrl); Tracer initStorageTracer = tuple.getA(); Span span = initStorageTracer.spanBuilder(operationName).startSpan(); span.setAttribute("foo", "bar"); span.end(); tuple.getB().flush(); try { // Give extra time for spans to be exported and indexed TimeUnit.SECONDS.sleep(2); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } waitJaegerQueryContains(serviceName, "foo"); } @After public void after() throws IOException { if (dependenciesJob != null) { jaegerElasticsearchEnvironment.cleanUp(dependenciesJob.indexDate("jaeger-span"), dependenciesJob.indexDate("jaeger-dependencies")); } } @AfterClass public static void afterClass() { jaegerElasticsearchEnvironment.stop(); } @Override protected void deriveDependencies() { dependenciesJob = ElasticsearchDependenciesJob.builder() .nodes("http://" + jaegerElasticsearchEnvironment.getElasticsearchIPPort()) .day(LocalDate.now()) .build(); try { jaegerElasticsearchEnvironment.refresh(); } catch (IOException e) { throw new RuntimeException("Could not refresh Elasticsearch", e); } dependenciesJob.run("peer.service"); try { jaegerElasticsearchEnvironment.refresh(); } catch (IOException e) { throw new RuntimeException("Could not refresh Elasticsearch", e); } } @Override protected void waitBetweenTraces() throws InterruptedException { try { jaegerElasticsearchEnvironment.refresh(); } catch (IOException e) { throw new RuntimeException("Could not refresh Elasticsearch", e); } } public static class BoundPortHttpWaitStrategy extends HttpWaitStrategy { private final int port; public BoundPortHttpWaitStrategy(int port) { this.port = port; } @Override protected Set getLivenessCheckPorts() { int mapptedPort = this.waitStrategyTarget.getMappedPort(port); return Collections.singleton(mapptedPort); } } } ================================================ FILE: jaeger-spark-dependencies-elasticsearch/src/test/java/io/jaegertracing/spark/dependencies/elastic/ElasticsearchDependenciesTagFieldsJobTest.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.elastic; import java.util.HashMap; import org.junit.BeforeClass; /** * @author Pavol Loffay */ public class ElasticsearchDependenciesTagFieldsJobTest extends ElasticsearchDependenciesJobTest { @BeforeClass public static void beforeClass() { jaegerElasticsearchEnvironment = new JaegerElasticsearchEnvironment(); HashMap jaegerEnvSetting = new HashMap<>(); jaegerEnvSetting.put("ES_TAGS__AS_FIELDS_ALL", "true"); jaegerElasticsearchEnvironment.start(jaegerEnvSetting, jaegerVersion(), JaegerElasticsearchEnvironment.elasticsearchVersion()); collectorUrl = jaegerElasticsearchEnvironment.getCollectorUrl(); queryUrl = jaegerElasticsearchEnvironment.getQueryUrl(); } } ================================================ FILE: jaeger-spark-dependencies-elasticsearch/src/test/java/io/jaegertracing/spark/dependencies/elastic/JaegerElasticsearchEnvironment.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.elastic; import static io.jaegertracing.spark.dependencies.test.DependenciesTest.jaegerVersion; import io.jaegertracing.spark.dependencies.elastic.ElasticsearchDependenciesJobTest.BoundPortHttpWaitStrategy; import java.io.IOException; import java.util.Map; import java.util.Optional; import okhttp3.MediaType; import okhttp3.OkHttpClient; import okhttp3.Request; import okhttp3.RequestBody; import okhttp3.Response; import org.testcontainers.containers.GenericContainer; import org.testcontainers.containers.Network; /** * @author Pavol Loffay */ public class JaegerElasticsearchEnvironment { private OkHttpClient okHttpClient = new OkHttpClient(); Network network; GenericContainer elasticsearch; GenericContainer jaegerAll; /** * Set these in subclasses */ private String queryUrl; private String collectorUrl; public static String elasticsearchVersion() { String version = System.getProperty("elasticsearch.version", System.getenv("ELASTICSEARCH_VERSION")); return version != null ? version : "7.17.10"; } public void start(Map jaegerEnvs, String jaegerVersion, String elasticsearchVersion) { network = Network.newNetwork(); elasticsearch = new GenericContainer<>(String.format("docker.elastic.co/elasticsearch/elasticsearch:%s", elasticsearchVersion)) .withNetwork(network) .withNetworkAliases("elasticsearch") .waitingFor(new BoundPortHttpWaitStrategy(9200).forStatusCode(200)) .withExposedPorts(9200, 9300) .withEnv("xpack.security.enabled", "false") .withEnv("discovery.type", "single-node") .withEnv("network.bind_host", "elasticsearch") .withEnv("network.host", "_site_") .withEnv("network.publish_host", "_local_"); elasticsearch.start(); jaegerAll = new GenericContainer<>("jaegertracing/jaeger:" + jaegerVersion) .withNetwork(network) .withClasspathResourceMapping("jaeger-v2-config-elasticsearch.yaml", "/etc/jaeger/config.yaml", org.testcontainers.containers.BindMode.READ_ONLY) .withCommand("--config", "/etc/jaeger/config.yaml") .withEnv(jaegerEnvs) .waitingFor(new BoundPortHttpWaitStrategy(16687).forStatusCodeMatching(statusCode -> statusCode >= 200 && statusCode < 300)) .withExposedPorts(16687, 16686, 4317, 4318, 14268, 9411); jaegerAll.start(); collectorUrl = String.format("http://%s:%d", jaegerAll.getContainerIpAddress(), jaegerAll.getMappedPort(4317)); queryUrl = String.format("http://%s:%d", jaegerAll.getContainerIpAddress(), jaegerAll.getMappedPort(16686)); } public void cleanUp(String[] spanIndex, String[] dependenciesIndex) throws IOException { String matchAllQuery = "{\"query\": {\"match_all\":{} }}"; Request request = new Request.Builder() .url(String.format("http://%s:%d/%s,%s/_delete_by_query?conflicts=proceed", elasticsearch.getContainerIpAddress(), elasticsearch.getMappedPort(9200), // we don't use index prefix spanIndex[0], dependenciesIndex[0])) .post( RequestBody.create(MediaType.parse("application/json; charset=utf-8"), matchAllQuery)) .build(); try (Response response = okHttpClient.newCall(request).execute()) { if (!response.isSuccessful()) { String body = response.body().string(); throw new IllegalStateException(String.format("Could not remove data from ES: %s, %s", response, body)); } } } /** * In Elasticsearch, the _refresh endpoint is used to make recently indexed, * updated, or deleted documents visible to search, as otherwise they might * be still sitting in a memory buffer. */ public void refresh() throws IOException { Request request = new Request.Builder() .url(String.format("http://%s:%d/_refresh", elasticsearch.getContainerIpAddress(), elasticsearch.getMappedPort(9200))) .post(RequestBody.create(MediaType.parse("application/json; charset=utf-8"), "")) .build(); try (Response response = okHttpClient.newCall(request).execute()) { if (!response.isSuccessful()) { String body = response.body().string(); throw new IllegalStateException(String.format("Could not refresh ES: %s, %s", response, body)); } } } public void stop() { Optional.of(jaegerAll).ifPresent(GenericContainer::close); Optional.of(elasticsearch).ifPresent(GenericContainer::close); Optional.of(network).ifPresent(network1 -> { try { network1.close(); } catch (Exception e) { e.printStackTrace(); } }); } public String getQueryUrl() { return queryUrl; } public String getCollectorUrl() { return collectorUrl; } public String getElasticsearchIPPort() { return String.format("%s:%d", elasticsearch.getContainerIpAddress(), elasticsearch.getMappedPort(9200)); } } ================================================ FILE: jaeger-spark-dependencies-elasticsearch/src/test/resources/jaeger-v2-config-elasticsearch.yaml ================================================ # # Copyright (c) The Jaeger Authors # SPDX-License-Identifier: Apache-2.0 # service: extensions: [jaeger_storage, jaeger_query, healthcheckv2] pipelines: traces: receivers: [otlp, jaeger, zipkin] processors: [filter/jaeger, batch] exporters: [jaeger_storage_exporter] telemetry: resource: service.name: jaeger-backend metrics: level: detailed readers: - pull: exporter: prometheus: host: 0.0.0.0 port: 8888 logs: level: info traces: level: none extensions: healthcheckv2: use_v2: true http: endpoint: "0.0.0.0:16687" status: enabled: true path: "/" jaeger_query: storage: traces: some_storage jaeger_storage: backends: some_storage: elasticsearch: server_urls: - http://elasticsearch:9200 service_cache_ttl: 1s receivers: otlp: protocols: grpc: endpoint: "0.0.0.0:4317" http: endpoint: "0.0.0.0:4318" jaeger: protocols: grpc: thrift_binary: thrift_compact: thrift_http: endpoint: "0.0.0.0:14268" zipkin: endpoint: "0.0.0.0:9411" processors: filter/jaeger: error_mode: ignore traces: span: - 'resource.attributes["service.name"] == "jaeger"' batch: exporters: jaeger_storage_exporter: trace_storage: some_storage ================================================ FILE: jaeger-spark-dependencies-opensearch/pom.xml ================================================ 4.0.0 jaeger-spark-dependencies-parent io.jaegertracing.dependencies 0.0.1-SNAPSHOT jaeger-spark-dependencies-opensearch ${project.groupId} jaeger-spark-dependencies-common com.fasterxml.jackson.core jackson-annotations org.opensearch.client opensearch-spark-30_${version.scala.binary} 1.3.0 org.apache.spark spark-core_2.12 org.apache.spark spark-sql_2.12 org.opensearch.client opensearch-rest-high-level-client 2.18.0 ${project.groupId} jaeger-spark-dependencies-test test org.testcontainers testcontainers test org.opensearch opensearch-testcontainers 2.1.4 test org.awaitility awaitility ${version.org.awaitility-awaitility} test io.opentelemetry opentelemetry-api ${version.io.opentelemetry} test io.jaegertracing jaeger-client ${version.io.jaegertracing} test maven-shade-plugin ${version.maven-shade-plugin} package shade reference.conf io.jaegertracing.spark.dependencies.opensearch.OpenSearchDependenciesJob false *:* META-INF/*.SF META-INF/*.DSA META-INF/*.RSA false ================================================ FILE: jaeger-spark-dependencies-opensearch/src/main/java/io/jaegertracing/spark/dependencies/opensearch/OpenSearchDependenciesJob.java ================================================ /** * Copyright 2017 The Jaeger Authors * Copyright 2016-2017 The OpenZipkin Authors * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under * the License. */ package io.jaegertracing.spark.dependencies.opensearch; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import io.jaegertracing.spark.dependencies.DependenciesSparkHelper; import io.jaegertracing.spark.dependencies.Utils; import io.jaegertracing.spark.dependencies.model.Dependency; import io.jaegertracing.spark.dependencies.model.Span; import java.time.LocalDate; import java.time.ZoneOffset; import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.regex.Pattern; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; import org.opensearch.spark.rdd.api.java.JavaOpenSearchSpark; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * @author OpenZipkin authors * @author Pavol Loffay * @author Danish Siddiqui */ public class OpenSearchDependenciesJob { private static final Logger log = LoggerFactory.getLogger(OpenSearchDependenciesJob.class); private static final Pattern PORT_PATTERN = Pattern.compile(":\\d+"); public static Builder builder() { return new Builder(); } public static final class Builder { String hosts = Utils.getEnv("OS_NODES", "127.0.0.1"); String username = Utils.getEnv("OS_USERNAME", null); String password = Utils.getEnv("OS_PASSWORD", null); Boolean clientNodeOnly = Boolean.parseBoolean(Utils.getEnv("OS_CLIENT_NODE_ONLY", "false")); Boolean nodesWanOnly = Boolean.parseBoolean(Utils.getEnv("OS_NODES_WAN_ONLY", "false")); String indexPrefix = Utils.getEnv("OS_INDEX_PREFIX", null); String indexDatePattern = datePattern(Utils.getEnv("OS_INDEX_DATE_SEPARATOR", "-")); String spanRange = Utils.getEnv("OS_TIME_RANGE", "24h"); Boolean useAliases = Boolean.parseBoolean(Utils.getEnv("OS_USE_ALIASES", "false")); Boolean allowSelfSigned = Boolean.parseBoolean(Utils.getEnv("OS_SSL_CERT_ALLOW_SELF_SIGNED", "false")); final Map sparkProperties = new LinkedHashMap<>(); Builder() { sparkProperties.put("spark.ui.enabled", "false"); // don't die if there are no spans sparkProperties.put("opensearch.index.read.missing.as.empty", "true"); sparkProperties.put("opensearch.net.ssl.keystore.location", getSystemPropertyAsFileResource("javax.net.ssl.keyStore")); sparkProperties.put("opensearch.net.ssl.keystore.pass", System.getProperty("javax.net.ssl.keyStorePassword", "")); sparkProperties.put("opensearch.net.ssl.truststore.location", getSystemPropertyAsFileResource("javax.net.ssl.trustStore")); sparkProperties.put("opensearch.net.ssl.truststore.pass", System.getProperty("javax.net.ssl.trustStorePassword", "")); if (allowSelfSigned) { sparkProperties.put("opensearch.net.ssl.cert.allow.self.signed", "true"); } } // local[*] master lets us run & test the job locally without setting a Spark cluster String sparkMaster = Utils.getEnv("SPARK_MASTER", "local[*]"); // needed when not in local mode String[] jars; // By default the job only works on traces whose first timestamp is today ZonedDateTime day = ZonedDateTime.of(LocalDate.now().atStartOfDay(), ZoneOffset.UTC); /** When set, this indicates which jars to distribute to the cluster. */ public Builder jars(String... jars) { this.jars = jars; return this; } /** opensearch.nodes separated by ',' */ public Builder nodes(String hosts) { Utils.checkNoTNull(hosts, "nodes"); this.hosts = hosts; this.nodesWanOnly = true; return this; } /** username used for basic auth. Needed when Shield or X-Pack security is enabled */ public Builder username(String username) { this.username = username; return this; } /** password used for basic auth. Needed when Shield or X-Pack security is enabled */ public Builder password(String password) { this.password = password; return this; } /** index prefix for Jaeger indices. By default empty */ public Builder indexPrefix(String indexPrefix) { this.indexPrefix = indexPrefix; return this; } /** index date pattern for Jaeger indices. By default yyyy-MM-dd */ public Builder indexDatePattern(String indexDatePattern) { this.indexDatePattern = indexDatePattern; return this; } /** span range for Jaeger indices. By default 24h */ public Builder spanRange(String spanRange) { this.spanRange = spanRange; return this; } /** Day to process dependencies for. Defaults to today. */ public Builder day(LocalDate day) { this.day = day.atStartOfDay(ZoneOffset.UTC); return this; } /** Whether the connector is used against an OpenSearch instance in a cloud/restricted * environment over the WAN, such as Amazon Web Services. In this mode, the * connector disables discovery and only connects through the declared opensearch.nodes during all operations, * including reads and writes. Note that in this mode, performance is highly affected. */ public Builder nodesWanOnly(boolean wanOnly) { this.nodesWanOnly = wanOnly; return this; } private static void logIfNoPort(String hosts) { if (!PORT_PATTERN.matcher(hosts).find()) { log.warn("Port is not specified, default port 9200 will be used"); } } public OpenSearchDependenciesJob build() { String hosts = System.getenv("OS_NODES"); String wanOnly = System.getenv("OS_NODES_WAN_ONLY"); // Optimize user configuration - nodes specified but wan only not if (hosts != null && wanOnly == null) { this.nodesWanOnly = true; } logIfNoPort(this.hosts); return new OpenSearchDependenciesJob(this); } } private static String getSystemPropertyAsFileResource(String key) { String prop = System.getProperty(key, ""); return prop != null && !prop.isEmpty() ? "file:" + prop : prop; } private final ZonedDateTime day; private final SparkConf conf; private final String indexPrefix; private final String indexDatePattern; private final String spanRange; private final Boolean useAliases; OpenSearchDependenciesJob(Builder builder) { this.day = builder.day; this.conf = new SparkConf(true).setMaster(builder.sparkMaster).setAppName(getClass().getName()); if (builder.jars != null) { conf.setJars(builder.jars); } if (builder.username != null) { conf.set("opensearch.net.http.auth.user", builder.username); } if (builder.password != null) { conf.set("opensearch.net.http.auth.pass", builder.password); } conf.set("opensearch.nodes", builder.hosts); if (builder.hosts.indexOf("https") != -1) { conf.set("opensearch.net.ssl", "true"); } if (builder.nodesWanOnly) { conf.set("opensearch.nodes.wan.only", "true"); } if (builder.clientNodeOnly) { conf.set("opensearch.nodes.discovery", "0"); conf.set("opensearch.nodes.client.only", "1"); } for (Map.Entry entry : builder.sparkProperties.entrySet()) { conf.set(entry.getKey(), entry.getValue()); } this.indexPrefix = builder.indexPrefix; this.indexDatePattern = builder.indexDatePattern; this.spanRange = builder.spanRange; this.useAliases = builder.useAliases; } /** * https://github.com/jaegertracing/jaeger/blob/master/CHANGELOG.md#190-2019-01-21 */ private static String prefixBefore19(String prefix) { return prefix != null ? String.format("%s:", prefix) : ""; } private static String prefix(String prefix) { return prefix != null ? String.format("%s-", prefix) : ""; } private static String datePattern(String separator) { if (separator.equals("")) { return "yyyyMMdd"; } // ' is escape character in date format, we should double it here. if (separator.contains("'")) { separator = separator.replace("'", "''"); } return String.format("yyyy'%s'MM'%s'dd", separator, separator); } public void run(String peerServiceTag) { String[] readIndices; String[] writeIndex; // use alias indices common when using index rollover if (this.useAliases) { readIndices = new String[]{prefix(indexPrefix) + "jaeger-span-read", prefixBefore19(indexPrefix) + "jaeger-span-read"}; writeIndex = new String[] {prefix(indexPrefix) + "jaeger-dependencies-write", prefixBefore19(indexPrefix) + "jaeger-dependencies-write"}; } else { readIndices = indexDate("jaeger-span"); writeIndex = indexDate("jaeger-dependencies"); } run(readIndices, writeIndex, peerServiceTag); } String[] indexDate(String index) { String date = day.toLocalDate().format(DateTimeFormatter.ofPattern(indexDatePattern)); if (indexPrefix != null && indexPrefix.length() > 0) { return new String[]{String.format("%s%s-%s", prefix(indexPrefix), index, date), String.format("%s%s-%s", prefixBefore19(indexPrefix), index, date)}; } // if there is no prefix we read and write only to one index return new String[]{String.format("%s-%s", index, date)}; } void run(String[] spanIndices, String[] depIndices,String peerServiceTag) { JavaSparkContext sc = new JavaSparkContext(conf); try { for (int i = 0; i < spanIndices.length; i++) { String spanIndex = spanIndices[i]; String depIndex = depIndices[i]; log.info("Running Dependencies job for {}, reading from {} index, result storing to {}", day, spanIndex, depIndex); // Send raw query to OS to select only the docs / spans we want to consider for this job // This doesn't change the default behavior as the daily indexes only contain up to 24h of data String osQuery = String.format("{\"range\": {\"startTimeMillis\": { \"gte\": \"now-%s\" }}}", spanRange); JavaPairRDD> traces = JavaOpenSearchSpark.opensearchRDD(sc, spanIndex, osQuery) .map(new OpenSearchTupleToSpan()) .groupBy(Span::getTraceId); List dependencyLinks = DependenciesSparkHelper.derive(traces,peerServiceTag); // No version check needed for OpenSearch as we don't support types in indexes store(sc, dependencyLinks, depIndex); log.info("Done, {} dependency objects created", dependencyLinks.size()); if (dependencyLinks.size() > 0) { // we do not derive dependencies for old prefix "prefix:" if new prefix "prefix-" contains data break; } } } finally { sc.stop(); } } private void store(JavaSparkContext javaSparkContext, List dependencyLinks, String resource) { if (dependencyLinks.isEmpty()) { return; } String json; try { ObjectMapper objectMapper = new ObjectMapper(); json = objectMapper.writeValueAsString(new OpenSearchDependencies(dependencyLinks, day)); } catch (JsonProcessingException e) { throw new IllegalStateException("Could not serialize dependencies", e); } JavaOpenSearchSpark.saveJsonToOpenSearch(javaSparkContext.parallelize(Collections.singletonList(json)), resource); } /** * Helper class used to serialize dependencies to JSON. */ public static final class OpenSearchDependencies { private List dependencies; private ZonedDateTime ts; public OpenSearchDependencies(List dependencies, ZonedDateTime ts) { this.dependencies = dependencies; this.ts = ts; } public List getDependencies() { return dependencies; } public String getTimestamp() { // Jaeger OS dependency storage uses RFC3339Nano for timestamp return ts.format(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ssXXX")); } } /** * Entry point for running OpenSearchDependenciesJob directly. */ public static void main(String[] args) throws java.io.UnsupportedEncodingException { LocalDate date = LocalDate.now(); if (args.length == 1) { date = LocalDate.parse(args[0]); } else if (System.getenv("DATE") != null) { date = LocalDate.parse(System.getenv("DATE")); } String peerServiceTag = System.getenv("PEER_SERVICE_TAG"); if (peerServiceTag == null) { peerServiceTag = "peer.service"; } String jarPath = Utils.pathToUberJar(OpenSearchDependenciesJob.class); OpenSearchDependenciesJob.builder() .jars(jarPath) .day(date) .build() .run(peerServiceTag); } } ================================================ FILE: jaeger-spark-dependencies-opensearch/src/main/java/io/jaegertracing/spark/dependencies/opensearch/OpenSearchTupleToSpan.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.opensearch; import com.fasterxml.jackson.databind.ObjectMapper; import io.jaegertracing.spark.dependencies.json.JsonHelper; import io.jaegertracing.spark.dependencies.model.Span; import java.util.Map; import org.apache.spark.api.java.function.Function; import scala.Tuple2; /** * @author Pavol Loffay * @author Danish Siddiqui */ public class OpenSearchTupleToSpan implements Function>, Span> { private ObjectMapper objectMapper = JsonHelper.configure(new ObjectMapper()); @Override public Span call(Tuple2> tuple) throws Exception { Span span = objectMapper.convertValue(tuple._2(), Span.class); String originalTraceId = span.getTraceId(); span.setTraceId(normalizeTraceId(originalTraceId)); if (span.getTags() != null) { span.getTags().sort((o1, o2) -> o1.getKey().compareTo(o2.getKey())); } if (span.getRefs() != null) { span.getRefs().sort((o1, o2) -> o1.getSpanId().compareTo(o2.getSpanId())); } return span; } private String normalizeTraceId(String traceId) { if (traceId != null && traceId.length() < 32) { return String.format("%32s", traceId).replace(' ', '0'); } return traceId; } } ================================================ FILE: jaeger-spark-dependencies-opensearch/src/main/resources/log4j.properties ================================================ # # Copyright (c) The Jaeger Authors # SPDX-License-Identifier: Apache-2.0 # # Set everything to be logged to the console log4j.rootCategory=WARN, console log4j.appender.console=org.apache.log4j.ConsoleAppender log4j.appender.console.target=System.err log4j.appender.console.layout=org.apache.log4j.PatternLayout log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n # Settings to quiet third party logs that are too verbose log4j.logger.org.spark-project.jetty=WARN log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO log4j.logger.io.jaegertracing.spark=INFO # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR ================================================ FILE: jaeger-spark-dependencies-opensearch/src/main/resources/log4j2.component.properties ================================================ # # Copyright (c) The Jaeger Authors # SPDX-License-Identifier: Apache-2.0 # # Disable Log4j status logger console output log4j2.StatusLogger.level = OFF ================================================ FILE: jaeger-spark-dependencies-opensearch/src/main/resources/log4j2.properties ================================================ # # Copyright (c) The Jaeger Authors # SPDX-License-Identifier: Apache-2.0 # # Set root logger level to WARN and use console appender rootLogger.level = WARN rootLogger.appenderRef.console.ref = console # Console appender configuration appender.console.type = Console appender.console.name = console appender.console.target = SYSTEM_ERR appender.console.layout.type = PatternLayout appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n # Settings to quiet third party logs that are too verbose logger.jetty.name = org.spark-project.jetty logger.jetty.level = WARN logger.jettyLifecycle.name = org.spark-project.jetty.util.component.AbstractLifeCycle logger.jettyLifecycle.level = ERROR logger.sparkReplTyper.name = org.apache.spark.repl.SparkIMain$exprTyper logger.sparkReplTyper.level = INFO logger.sparkReplInterpreter.name = org.apache.spark.repl.SparkILoop$SparkILoopInterpreter logger.sparkReplInterpreter.level = INFO logger.jaegertracing.name = io.jaegertracing.spark logger.jaegertracing.level = INFO # SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support logger.hiveMetastore.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler logger.hiveMetastore.level = FATAL logger.hiveFunctionRegistry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry logger.hiveFunctionRegistry.level = ERROR ================================================ FILE: jaeger-spark-dependencies-opensearch/src/test/java/io/jaegertracing/spark/dependencies/opensearch/JaegerOpenSearchEnvironment.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.opensearch; import java.io.IOException; import java.util.Map; import java.util.Optional; import okhttp3.MediaType; import okhttp3.OkHttpClient; import okhttp3.Request; import okhttp3.RequestBody; import okhttp3.Response; import org.testcontainers.containers.GenericContainer; import org.testcontainers.containers.Network; /** * @author Pavol Loffay * @author Danish Siddiqui */ public class JaegerOpenSearchEnvironment { private OkHttpClient okHttpClient = new OkHttpClient(); Network network; GenericContainer opensearch; GenericContainer jaegerAll; /** * Set these in subclasses */ private String queryUrl; private String collectorUrl; public static String opensearchVersion() { String version = System.getProperty("opensearch.version", System.getenv("OPENSEARCH_VERSION")); return version != null ? version : "2.11.1"; } public void start(Map jaegerEnvs, String jaegerVersion, String opensearchVersion) { network = Network.newNetwork(); opensearch = new GenericContainer<>(String.format("opensearchproject/opensearch:%s", opensearchVersion)) .withNetwork(network) .withNetworkAliases("opensearch") .waitingFor(new BoundPortHttpWaitStrategy(9200).forStatusCode(200)) .withExposedPorts(9200) .withEnv("DISABLE_SECURITY_PLUGIN", "true") .withEnv("discovery.type", "single-node") .withEnv("network.bind_host", "opensearch") .withEnv("network.host", "0.0.0.0"); opensearch.start(); jaegerAll = new GenericContainer<>("jaegertracing/jaeger:" + jaegerVersion) .withNetwork(network) .withClasspathResourceMapping("jaeger-v2-config-opensearch.yaml", "/etc/jaeger/config.yaml", org.testcontainers.containers.BindMode.READ_ONLY) .withCommand("--config", "/etc/jaeger/config.yaml") .withEnv(jaegerEnvs) .waitingFor(new BoundPortHttpWaitStrategy(16687) .forStatusCodeMatching(statusCode -> statusCode >= 200 && statusCode < 300)) .withExposedPorts(16687, 16686, 4317, 4318, 14268, 9411); jaegerAll.start(); collectorUrl = String.format("http://%s:%d", jaegerAll.getHost(), jaegerAll.getMappedPort(4317)); queryUrl = String.format("http://%s:%d", jaegerAll.getHost(), jaegerAll.getMappedPort(16686)); } public void cleanUp(String[] spanIndex, String[] dependenciesIndex) throws IOException { String matchAllQuery = "{\"query\": {\"match_all\":{} }}"; Request request = new Request.Builder() .url(String.format("http://%s:%d/%s,%s/_delete_by_query?conflicts=proceed", opensearch.getHost(), opensearch.getMappedPort(9200), // we don't use index prefix spanIndex[0], dependenciesIndex[0])) .post( RequestBody.create(MediaType.parse("application/json; charset=utf-8"), matchAllQuery)) .build(); try (Response response = okHttpClient.newCall(request).execute()) { if (!response.isSuccessful()) { String body = response.body().string(); throw new IllegalStateException(String.format("Could not remove data from OS: %s, %s", response, body)); } } } /** * In OpenSearch, the _refresh endpoint is used to make recently indexed, * updated, or deleted documents visible to search, as otherwise they might * be still sitting in a memory buffer. */ public void refresh() throws IOException { Request request = new Request.Builder() .url(String.format("http://%s:%d/_refresh", opensearch.getHost(), opensearch.getMappedPort(9200))) .post(RequestBody.create(MediaType.parse("application/json; charset=utf-8"), "")) .build(); try (Response response = okHttpClient.newCall(request).execute()) { if (!response.isSuccessful()) { String body = response.body().string(); throw new IllegalStateException(String.format("Could not refresh OS: %s, %s", response, body)); } } } public void stop() { Optional.of(jaegerAll).ifPresent(GenericContainer::close); Optional.of(opensearch).ifPresent(GenericContainer::close); Optional.of(network).ifPresent(network1 -> { try { network1.close(); } catch (Exception e) { e.printStackTrace(); } }); } public String getQueryUrl() { return queryUrl; } public String getCollectorUrl() { return collectorUrl; } public String getOpenSearchIPPort() { return String.format("%s:%d", opensearch.getHost(), opensearch.getMappedPort(9200)); } public static class BoundPortHttpWaitStrategy extends org.testcontainers.containers.wait.strategy.HttpWaitStrategy { private final int port; public BoundPortHttpWaitStrategy(int port) { this.port = port; } @Override protected java.util.Set getLivenessCheckPorts() { int mapptedPort = this.waitStrategyTarget.getMappedPort(port); return java.util.Collections.singleton(mapptedPort); } } } ================================================ FILE: jaeger-spark-dependencies-opensearch/src/test/java/io/jaegertracing/spark/dependencies/opensearch/OpenSearchDependenciesDockerJobTest.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.opensearch; import io.jaegertracing.spark.dependencies.LogToConsolePrinter; import org.testcontainers.containers.GenericContainer; /** * @author Danish Siddiqui */ import org.testcontainers.utility.DockerImageName; import java.util.concurrent.TimeUnit; import static org.awaitility.Awaitility.await; public class OpenSearchDependenciesDockerJobTest extends OpenSearchDependenciesJobTest { private static String dependenciesJobTag() { String tag = System.getenv("SPARK_DEPENDENCIES_JOB_IMAGE_TAG"); if (tag == null || tag.trim().isEmpty()) { throw new IllegalStateException( "SPARK_DEPENDENCIES_JOB_IMAGE_TAG environment variable is required but not set. " + "This variable must be set to ensure tests use the locally built Docker image."); } return tag.trim(); } @Override protected void deriveDependencies() { // Create the dependenciesJob instance so that after() method can call // indexDate() on it dependenciesJob = OpenSearchDependenciesJob.builder() .nodes("http://" + jaegerOpenSearchEnvironment.getOpenSearchIPPort()) .day(java.time.LocalDate.now()) .build(); try { jaegerOpenSearchEnvironment.refresh(); // Wait a bit to ensure all spans are fully indexed and visible Thread.sleep(2000); } catch (java.io.IOException e) { throw new RuntimeException("Could not refresh OpenSearch", e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException("Interrupted while waiting", e); } // Use the same date as the test - format it as ISO-8601 date string for the // DATE env var String dateStr = java.time.LocalDate.now().toString(); System.out .println("Running Docker spark-dependencies job with DATE=" + dateStr + ", OS_NODES=http://opensearch:9200"); System.out.println("::group::🚧 🚧 🚧 OpenSearchDependenciesDockerJob logs"); try (GenericContainer sparkDependenciesJob = new GenericContainer<>( DockerImageName.parse("ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:" + dependenciesJobTag())) .withNetwork(jaegerOpenSearchEnvironment.network) .withLogConsumer(new LogToConsolePrinter("[spark-dependencies] ")) .withEnv("STORAGE", "opensearch") .withEnv("OS_NODES", "http://opensearch:9200") .withEnv("DATE", dateStr) .dependsOn(jaegerOpenSearchEnvironment.opensearch, jaegerOpenSearchEnvironment.jaegerAll)) { sparkDependenciesJob.start(); await("spark-dependencies-job execution") .atMost(3, TimeUnit.MINUTES) .until(() -> !sparkDependenciesJob.isRunning()); Long exitCode = sparkDependenciesJob.getCurrentContainerInfo() .getState() .getExitCodeLong(); if (exitCode != null && exitCode != 0) { throw new RuntimeException("Spark dependencies job failed with exit code: " + exitCode); } } finally { System.out.println("::endgroup::"); } try { jaegerOpenSearchEnvironment.refresh(); } catch (java.io.IOException e) { throw new RuntimeException("Could not refresh OpenSearch", e); } } } ================================================ FILE: jaeger-spark-dependencies-opensearch/src/test/java/io/jaegertracing/spark/dependencies/opensearch/OpenSearchDependenciesJobTest.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.opensearch; import io.opentelemetry.api.trace.Span; import io.opentelemetry.api.trace.Tracer; import io.jaegertracing.spark.dependencies.test.DependenciesTest; import io.jaegertracing.spark.dependencies.test.TracersGenerator; import java.io.IOException; import java.time.LocalDate; import java.util.Collections; import java.util.HashMap; import java.util.Set; import java.util.UUID; import java.util.concurrent.TimeUnit; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; import org.testcontainers.containers.wait.strategy.HttpWaitStrategy; /** * @author Pavol Loffay * @author Danish Siddiqui */ public class OpenSearchDependenciesJobTest extends DependenciesTest { protected OpenSearchDependenciesJob dependenciesJob; static JaegerOpenSearchEnvironment jaegerOpenSearchEnvironment; @BeforeClass public static void beforeClass() { jaegerOpenSearchEnvironment = new JaegerOpenSearchEnvironment(); jaegerOpenSearchEnvironment.start(new HashMap<>(), jaegerVersion(), JaegerOpenSearchEnvironment.opensearchVersion()); collectorUrl = jaegerOpenSearchEnvironment.getCollectorUrl(); queryUrl = jaegerOpenSearchEnvironment.getQueryUrl(); } @Before public void before() throws Exception { String serviceName = UUID.randomUUID().toString(); String operationName = UUID.randomUUID().toString(); TracersGenerator.Tuple tuple = TracersGenerator.createJaeger(serviceName, collectorUrl); Tracer initStorageTracer = tuple.getA(); Span span = initStorageTracer.spanBuilder(operationName).startSpan(); span.setAttribute("foo", "bar"); span.end(); tuple.getB().flush(); try { // Give extra time for spans to be exported and indexed TimeUnit.SECONDS.sleep(2); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } waitJaegerQueryContains(serviceName, "foo"); } @After public void after() throws IOException { if (dependenciesJob != null) { jaegerOpenSearchEnvironment.cleanUp(dependenciesJob.indexDate("jaeger-span"), dependenciesJob.indexDate("jaeger-dependencies")); } } @AfterClass public static void afterClass() { jaegerOpenSearchEnvironment.stop(); } @Override protected void deriveDependencies() { dependenciesJob = OpenSearchDependenciesJob.builder() .nodes("http://" + jaegerOpenSearchEnvironment.getOpenSearchIPPort()) .day(LocalDate.now()) .build(); try { jaegerOpenSearchEnvironment.refresh(); } catch (IOException e) { throw new RuntimeException("Could not refresh OpenSearch", e); } dependenciesJob.run("peer.service"); try { jaegerOpenSearchEnvironment.refresh(); } catch (IOException e) { throw new RuntimeException("Could not refresh OpenSearch", e); } } @Override protected void waitBetweenTraces() throws InterruptedException { try { jaegerOpenSearchEnvironment.refresh(); } catch (IOException e) { throw new RuntimeException("Could not refresh OpenSearch", e); } } public static class BoundPortHttpWaitStrategy extends HttpWaitStrategy { private final int port; public BoundPortHttpWaitStrategy(int port) { this.port = port; } @Override protected Set getLivenessCheckPorts() { int mapptedPort = this.waitStrategyTarget.getMappedPort(port); return Collections.singleton(mapptedPort); } } } ================================================ FILE: jaeger-spark-dependencies-opensearch/src/test/resources/jaeger-v2-config-opensearch.yaml ================================================ # # Copyright (c) The Jaeger Authors # SPDX-License-Identifier: Apache-2.0 # service: extensions: [jaeger_storage, jaeger_query, healthcheckv2] pipelines: traces: receivers: [otlp, jaeger, zipkin] processors: [filter/jaeger, batch] exporters: [jaeger_storage_exporter] telemetry: resource: service.name: jaeger-backend metrics: level: detailed readers: - pull: exporter: prometheus: host: 0.0.0.0 port: 8888 logs: level: info traces: level: none extensions: healthcheckv2: use_v2: true http: endpoint: "0.0.0.0:16687" status: enabled: true path: "/" jaeger_query: storage: traces: some_storage jaeger_storage: backends: some_storage: elasticsearch: server_urls: - http://opensearch:9200 service_cache_ttl: 1s receivers: otlp: protocols: grpc: endpoint: "0.0.0.0:4317" http: endpoint: "0.0.0.0:4318" jaeger: protocols: grpc: thrift_binary: thrift_compact: thrift_http: endpoint: "0.0.0.0:14268" zipkin: endpoint: "0.0.0.0:9411" processors: filter/jaeger: error_mode: ignore traces: span: - 'resource.attributes["service.name"] == "jaeger"' batch: exporters: jaeger_storage_exporter: trace_storage: some_storage ================================================ FILE: jaeger-spark-dependencies-test/pom.xml ================================================ 4.0.0 jaeger-spark-dependencies-parent io.jaegertracing.dependencies 0.0.1-SNAPSHOT jaeger-spark-dependencies-test junit junit ${version.junit} org.testcontainers testcontainers io.opentelemetry opentelemetry-api ${version.io.opentelemetry} io.opentelemetry opentelemetry-sdk ${version.io.opentelemetry} io.opentelemetry opentelemetry-exporter-otlp ${version.io.opentelemetry} io.opentelemetry.semconv opentelemetry-semconv 1.37.0 io.opentracing opentracing-mock ${version.io.opentracing} test com.squareup.okhttp3 okhttp ${version.com.squareup.okhttp3-okhttp} org.awaitility awaitility ${version.org.awaitility-awaitility} ================================================ FILE: jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/LogToConsolePrinter.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies; import org.testcontainers.containers.output.OutputFrame; import java.util.function.Consumer; public final class LogToConsolePrinter implements Consumer { private final String prefix; public LogToConsolePrinter(String prefix) { this.prefix = prefix; } @Override public void accept(OutputFrame outputFrame) { String message = outputFrame.getUtf8String(); if (message != null && !message.isEmpty()) { System.out.print(prefix); System.out.print(message); } } } ================================================ FILE: jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/DependenciesTest.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.test; import static org.awaitility.Awaitility.await; import static org.junit.Assert.assertEquals; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import io.opentelemetry.api.trace.Span; import io.opentelemetry.api.trace.Tracer; import io.jaegertracing.spark.dependencies.test.TracersGenerator.Flushable; import io.jaegertracing.spark.dependencies.test.TracersGenerator.Tuple; import io.jaegertracing.spark.dependencies.test.rest.DependencyLink; import io.jaegertracing.spark.dependencies.test.rest.JsonHelper; import io.jaegertracing.spark.dependencies.test.rest.RestResult; import io.jaegertracing.spark.dependencies.test.tree.Node; import io.jaegertracing.spark.dependencies.test.tree.TracingWrapper.OpenTelemetryWrapper; import io.jaegertracing.spark.dependencies.test.tree.Traversals; import io.jaegertracing.spark.dependencies.test.tree.TreeGenerator; import java.io.IOException; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; import java.util.concurrent.TimeUnit; import okhttp3.OkHttpClient; import okhttp3.Request; import okhttp3.Response; import org.junit.Test; /** * @author Pavol Loffay */ public abstract class DependenciesTest { protected OkHttpClient okHttpClient = new OkHttpClient.Builder().build(); protected ObjectMapper objectMapper = JsonHelper.configure(new ObjectMapper()); /** * Set these in subclasses */ protected static String queryUrl; protected static String collectorUrl; public static String jaegerVersion() { String jaegerVersion = System.getProperty("jaeger.version", System.getenv("JAEGER_VERSION")); return jaegerVersion != null ? jaegerVersion : "latest"; } /** * Override this and run spark job */ protected abstract void deriveDependencies() throws Exception; /** * Wait between submitting different traces */ protected abstract void waitBetweenTraces() throws InterruptedException; @Test public void testJaegerOneTrace() throws Exception { System.out.println("=== Starting testJaegerOneTrace ==="); System.out.println("Generating Jaeger trace tree with 5 tracers, 50 nodes, depth 3..."); TreeGenerator treeGenerator = new TreeGenerator( TracersGenerator.generateJaeger(5, collectorUrl)); Node root = treeGenerator.generateTree(50, 3); System.out.println("Trace tree generated. Root service: " + root.getServiceName() + ", operation: " + root.getTracingWrapper().operationName()); System.out.println("Finishing spans..."); Traversals.postOrder(root, (node, parent) -> node.getTracingWrapper().get().getSpan().end()); waitBetweenTraces(); System.out.println("Flushing and closing tracers..."); treeGenerator.getTracers().forEach(tracer -> { tracer.flushable().flush(); }); // Give extra time for spans to be exported and indexed TimeUnit.SECONDS.sleep(2); System.out.println("Waiting for traces to appear in Jaeger Query..."); waitJaegerQueryContains(root.getServiceName(), root.getTracingWrapper().operationName()); System.out.println("Traces found in Jaeger Query"); System.out.println("Deriving dependencies..."); deriveDependencies(); System.out.println("Dependencies derived, asserting results..."); assertDependencies(DependencyLinkDerivator.serviceDependencies(root)); System.out.println("=== testJaegerOneTrace completed successfully ==="); } @Test public void testJaegerMultipleTraces() throws Exception { System.out.println("=== Starting testJaegerMultipleTraces ==="); System.out.println("Generating 20 Jaeger trace trees with 50 tracers each..."); TreeGenerator treeGenerator = new TreeGenerator( TracersGenerator.generateJaeger(50, collectorUrl)); Map> expectedDependencies = new LinkedHashMap<>(); for (int i = 0; i < 20; i++) { System.out.println("Generating trace " + (i + 1) + "/20..."); Node root = treeGenerator.generateTree(50, 15); DependencyLinkDerivator.serviceDependencies(root, expectedDependencies); Traversals.postOrder(root, (node, parent) -> node.getTracingWrapper().get().getSpan().end()); waitBetweenTraces(); waitJaegerQueryContains(root.getServiceName(), root.getTracingWrapper().operationName()); } System.out.println("All 20 traces generated and verified"); System.out.println("Flushing and closing tracers..."); // flush and wait for reported data treeGenerator.getTracers().forEach(tracer -> tracer.flushable().flush()); // Give extra time for spans to be exported and indexed TimeUnit.SECONDS.sleep(2); System.out.println("Deriving dependencies..."); deriveDependencies(); System.out.println("Dependencies derived, asserting results..."); assertDependencies(expectedDependencies); System.out.println("=== testJaegerMultipleTraces completed successfully ==="); } @Test public void testMultipleReferences() throws Exception { System.out.println("=== Starting testMultipleReferences ==="); System.out.println("Creating tracers for services S1, S2, S3..."); Tuple s1Tuple = TracersGenerator.createJaeger("S1", collectorUrl); Tuple s2Tuple = TracersGenerator.createJaeger("S2", collectorUrl); Tuple s3Tuple = TracersGenerator.createJaeger("S3", collectorUrl); System.out.println("Creating spans with multiple references..."); // Note: OpenTelemetry doesn't support FOLLOWS_FROM references like OpenTracing did. // In OpenTelemetry, a span can only have one parent. Both s2Span and s3Span will have // s1Span as their parent, creating S1->S2 and S1->S3 dependencies. Span s1Span = s1Tuple.getA().spanBuilder("foo").startSpan(); Span s2Span = s2Tuple.getA().spanBuilder("bar") .setParent(io.opentelemetry.context.Context.current().with(s1Span)) .startSpan(); Span s3Span = s3Tuple.getA().spanBuilder("baz") .setParent(io.opentelemetry.context.Context.current().with(s1Span)) .startSpan(); System.out.println("Finishing and flushing spans..."); s1Span.end(); s2Span.end(); s3Span.end(); s1Tuple.getB().flush(); s2Tuple.getB().flush(); s3Tuple.getB().flush(); // Give extra time for spans to be exported and indexed TimeUnit.SECONDS.sleep(2); System.out.println("Waiting for traces to appear in Jaeger Query..."); waitJaegerQueryContains("S1", "foo"); waitJaegerQueryContains("S2", "bar"); waitJaegerQueryContains("S3", "baz"); System.out.println("All traces found in Jaeger Query"); System.out.println("Deriving dependencies..."); deriveDependencies(); Map> expectedDependencies = new HashMap<>(); Map s1Descendants = new HashMap<>(); s1Descendants.put("S2", 1L); s1Descendants.put("S3", 1L); expectedDependencies.put("S1", s1Descendants); System.out.println("Dependencies derived, asserting results..."); assertDependencies(expectedDependencies); System.out.println("=== testMultipleReferences completed successfully ==="); } protected void assertDependencies(Map> expectedDependencies) throws IOException { Request request = new Request.Builder() .url(queryUrl + "/api/dependencies?endTs=" + System.currentTimeMillis()) .get() .build(); try (Response response = okHttpClient.newCall(request).execute()) { assertEquals(200, response.code()); RestResult restResult = objectMapper.readValue(response.body().string(), new TypeReference>() {}); assertEquals(null, restResult.getErrors()); assertEquals(expectedDependencies, DependencyLinkDerivator.serviceDependencies(restResult.getData())); } } protected void waitJaegerQueryContains(String service, String spanContainsThis) { String url = String.format("%s/api/traces?service=%s", queryUrl, service); System.out.println("Waiting for trace in Jaeger Query. Service: " + service + ", looking for: " + spanContainsThis); System.out.println("Query URL: " + url); Request request = new Request.Builder() .url(url) .get() .build(); await() .pollInterval(1, TimeUnit.SECONDS) .atMost(30, TimeUnit.SECONDS) .until(() -> { try(Response response = okHttpClient.newCall(request).execute()) { String responseBody = response.body().string(); int statusCode = response.code(); boolean contains = responseBody.contains(spanContainsThis); if (!contains) { // Log the response when condition is not met to help with debugging System.out.println("Trace not found yet. Status code: " + statusCode); System.out.println("Response body preview (first 500 chars): " + (responseBody.length() > 500 ? responseBody.substring(0, 500) + "..." : responseBody)); } return contains; } }); System.out.println("Trace found for service: " + service); } } ================================================ FILE: jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/DependencyLinkDerivator.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.test; import io.jaegertracing.spark.dependencies.test.rest.DependencyLink; import io.jaegertracing.spark.dependencies.test.tree.Node; import io.jaegertracing.spark.dependencies.test.tree.TracingWrapper; import io.jaegertracing.spark.dependencies.test.tree.Traversals; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; /** * @author Pavol Loffay */ public class DependencyLinkDerivator { public static Map> serviceDependencies(Node root) { return serviceDependencies(root, new LinkedHashMap<>()); } public static Map> serviceDependencies(Node root, Map> dependenciesMap) { Traversals.postOrder(root, (Node child, Node parent) -> { if (parent != null) { Map childMap = dependenciesMap.get(parent.getServiceName()); if (childMap == null) { childMap = new LinkedHashMap<>(); dependenciesMap.put(parent.getServiceName(), childMap); } Long callCount = childMap.get(child.getServiceName()); if (callCount == null) { callCount = 0L; } childMap.put(child.getServiceName(), ++callCount); } }); return dependenciesMap; } public static Map> serviceDependencies(List dependencyLinks) { Map> parentDependencyMap = new LinkedHashMap<>(); dependencyLinks.forEach(dependencyLink -> { Map childCallCountMap = parentDependencyMap.get(dependencyLink.getParent()); if (childCallCountMap == null) { childCallCountMap = new LinkedHashMap<>(); parentDependencyMap.put(dependencyLink.getParent(), childCallCountMap); } childCallCountMap.put(dependencyLink.getChild(), dependencyLink.getCallCount()); }); return parentDependencyMap; } } ================================================ FILE: jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/TracersGenerator.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.test; import io.opentelemetry.api.OpenTelemetry; import io.opentelemetry.api.trace.Tracer; import io.opentelemetry.exporter.otlp.trace.OtlpGrpcSpanExporter; import io.opentelemetry.sdk.OpenTelemetrySdk; import io.opentelemetry.sdk.resources.Resource; import io.opentelemetry.sdk.trace.SdkTracerProvider; import io.opentelemetry.sdk.trace.export.BatchSpanProcessor; import io.opentelemetry.semconv.ServiceAttributes; import io.jaegertracing.spark.dependencies.test.tree.TracingWrapper; import io.jaegertracing.spark.dependencies.test.tree.TracingWrapper.OpenTelemetryWrapper; import java.util.ArrayList; import java.util.List; import java.util.UUID; import java.util.concurrent.TimeUnit; /** * @author Pavol Loffay */ public class TracersGenerator { public static class Tuple { private final A a; private final B b; Tuple(A a, B b) { this.a = a; this.b = b; } public A getA() { return a; } public B getB() { return b; } } public interface Flushable { void flush(); } public static class TracerHolder { private T tracer; private String serviceName; private Flushable flushable; TracerHolder(T tracer, String serviceName, Flushable flushable) { this.tracer = tracer; this.serviceName = serviceName; this.flushable = flushable; } public T getTracer() { return tracer; } public TracingWrapper tracingWrapper() { return new OpenTelemetryWrapper((Tracer)tracer, serviceName); } public Flushable flushable() { return flushable; } } public static List> generateJaeger(int number, String collectorUrl) { List> tracers = new ArrayList<>(number); for (int i = 0; i < number; i++) { String serviceName = serviceName(); Tuple jaegerTracer = createJaeger(serviceName, collectorUrl); tracers.add(new TracerHolder<>(jaegerTracer.getA(), serviceName, jaegerTracer.getB())); } return tracers; } public static Tuple createJaeger(String serviceName, String collectorUrl) { // Parse collectorUrl to extract host and port for OTLP gRPC // collectorUrl is in format "http://host:port" String host = "localhost"; int port = 4317; // default try { // Parse the URL to extract host and port String urlStr = collectorUrl; // Remove scheme if (urlStr.startsWith("http://")) { urlStr = urlStr.substring(7); } else if (urlStr.startsWith("https://")) { urlStr = urlStr.substring(8); } // Remove path if present int slashIndex = urlStr.indexOf('/'); if (slashIndex > 0) { urlStr = urlStr.substring(0, slashIndex); } // Extract host and port int colonIndex = urlStr.lastIndexOf(':'); if (colonIndex > 0) { host = urlStr.substring(0, colonIndex); port = Integer.parseInt(urlStr.substring(colonIndex + 1)); } else { host = urlStr; } } catch (Exception e) { System.err.println("[ERROR TracersGenerator] Failed to parse collectorUrl: " + collectorUrl + ", error: " + e.getMessage()); } // Reconstruct endpoint in the format expected by gRPC exporter String otlpEndpoint = "http://" + host + ":" + port; Resource resource = Resource.getDefault() .merge(Resource.builder() .put(ServiceAttributes.SERVICE_NAME, serviceName) .build()); // For gRPC, the endpoint should include the scheme (http:// or https://) OtlpGrpcSpanExporter spanExporter = OtlpGrpcSpanExporter.builder() .setEndpoint(otlpEndpoint) .setTimeout(10, TimeUnit.SECONDS) .build(); SdkTracerProvider sdkTracerProvider = SdkTracerProvider.builder() .addSpanProcessor(BatchSpanProcessor.builder(spanExporter) .setMaxQueueSize(100000) .setScheduleDelay(1, TimeUnit.MILLISECONDS) .build()) .setResource(resource) .build(); OpenTelemetry openTelemetry = OpenTelemetrySdk.builder() .setTracerProvider(sdkTracerProvider) .build(); Tracer tracer = openTelemetry.getTracer(serviceName); return new Tuple<>(tracer, () -> { try { // Force flush to ensure all spans are exported sdkTracerProvider.forceFlush().join(10, TimeUnit.SECONDS); // Shutdown to ensure proper cleanup sdkTracerProvider.shutdown().join(10, TimeUnit.SECONDS); } catch (Exception ex) { throw new IllegalStateException("Failed to flush and shutdown tracer provider", ex); } }); } private static String serviceName() { return UUID.randomUUID().toString().replace("-", ""); } } ================================================ FILE: jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/rest/DependencyLink.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.test.rest; /** * @author Pavol Loffay */ public class DependencyLink { private String parent; private String child; private long callCount; // for jackson public DependencyLink() {} public DependencyLink(String parent, String child, long callCount) { this.parent = parent; this.child = child; this.callCount = callCount; } public String getParent() { return parent; } public void setParent(String parent) { this.parent = parent; } public String getChild() { return child; } public void setChild(String child) { this.child = child; } public long getCallCount() { return callCount; } public void setCallCount(long callCount) { this.callCount = callCount; } } ================================================ FILE: jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/rest/JsonHelper.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.test.rest; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; /** * @author Pavol Loffay */ public class JsonHelper { private JsonHelper() {} public static ObjectMapper configure(ObjectMapper objectMapper) { objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); return objectMapper; } } ================================================ FILE: jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/rest/RestResult.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.test.rest; import java.util.List; /** * @author Pavol Loffay */ public class RestResult { private List data; private List errors; public List getData() { return data; } public void setData(List data) { this.data = data; } public List getErrors() { return errors; } public void setErrors(List errors) { this.errors = errors; } } ================================================ FILE: jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/tree/Node.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.test.tree; import java.util.ArrayList; import java.util.Collections; import java.util.List; /** * Node in tree with N descendants. Node encapsulates * {@link TracingWrapper} which holds tracing information e.g span/tracer. * * @author Pavol Loffay */ public class Node { private List> descendants = new ArrayList<>(); private TracingWrapper tracingWrapper; public Node(TracingWrapper tracingWrapper, Node parent) { this.tracingWrapper = tracingWrapper; if (parent != null) { tracingWrapper.createChildSpan(parent.getTracingWrapper()); parent.addDescendant(this); } else { tracingWrapper.createChildSpan(null); } } public TracingWrapper getTracingWrapper() { return tracingWrapper; } private void addDescendant(Node descendant) { this.descendants.add(descendant); } public List getDescendants() { return Collections.unmodifiableList(descendants); } public String getServiceName() { return tracingWrapper.serviceName(); } } ================================================ FILE: jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/tree/TracingWrapper.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.test.tree; import io.opentelemetry.api.trace.Span; import io.opentelemetry.api.trace.Tracer; import io.opentelemetry.context.Context; import java.util.UUID; /** * Encapsulates tracing information about one node(service) in the graph. * It allows to create one span for this node. Caller is responsible to * call {@link #createChildSpan(TracingWrapper)} and finish the span. The * parent parameter in createChildSpan should be of the same type as the * implementing wrapper. * * @author Pavol Loffay */ public interface TracingWrapper { T get(); String serviceName(); String operationName(); void createChildSpan(TracingWrapper parent); class OpenTelemetryWrapper implements TracingWrapper { private final Tracer tracer; private final String serviceName; private Span span; private String operationName; public OpenTelemetryWrapper(Tracer tracer, String serviceName) { this.tracer = tracer; this.serviceName = serviceName; } @Override public OpenTelemetryWrapper get() { return this; } @Override public String serviceName() { return serviceName; } @Override public String operationName() { return operationName; } @Override public void createChildSpan(TracingWrapper parent) { operationName = UUID.randomUUID().toString().replace("-", ""); if (parent != null && parent.get().span != null) { Context parentContext = Context.current().with(parent.get().span); span = tracer.spanBuilder(operationName) .setParent(parentContext) .startSpan(); } else { span = tracer.spanBuilder(operationName) .setNoParent() .startSpan(); } } public Span getSpan() { return span; } } } ================================================ FILE: jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/tree/Traversals.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.test.tree; import java.util.function.BiConsumer; /** * @author Pavol Loffay */ public class Traversals { /** * Traverse tree postOrder * * @param root root node * @param fce */ public static void postOrder(Node root, BiConsumer, Node> fce) { postOrder(null, root, fce); } /** * @param node node * @param fce */ private static void postOrder(Node parent, Node node, BiConsumer, Node> fce) { for (Node descendant : node.getDescendants()) { postOrder(node, descendant, fce); } fce.accept(node, parent); } } ================================================ FILE: jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/tree/TreeGenerator.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.test.tree; import io.jaegertracing.spark.dependencies.test.TracersGenerator.TracerHolder; import java.util.ArrayList; import java.util.Collections; import java.util.LinkedList; import java.util.List; import java.util.Queue; import java.util.Random; /** * @author Pavol Loffay */ public class TreeGenerator { private Random tracersRandom = new Random(); private List> tracers; public TreeGenerator(List> tracers) { if (tracers == null || tracers.isEmpty()) { throw new IllegalArgumentException(); } this.tracers = new ArrayList<>(tracers); } public Node generateTree(int numOfNodes, int maxNumberOfDescendants) { if (numOfNodes <= 0 || maxNumberOfDescendants == 0) { throw new IllegalArgumentException(); } Node root = new Node(tracers.get(0).tracingWrapper(), null); generateDescendants(new LinkedList<>(Collections.singletonList(root)), numOfNodes - 1, maxNumberOfDescendants); return root; } private void generateDescendants(Queue queue, int numOfNodes, final int maxNumberOfDescendants) { if (numOfNodes <= 0) { return; } Node parent = queue.poll(); if (parent == null) { return; } for (int i = 0; i < maxNumberOfDescendants; i++) { Node descendant = new Node(tracers.get(tracersRandom.nextInt(tracers.size())).tracingWrapper(), parent); queue.add(descendant); if (--numOfNodes <= 0) { return; } } generateDescendants(queue, numOfNodes, maxNumberOfDescendants); } public List> getTracers() { return Collections.unmodifiableList(tracers); } } ================================================ FILE: jaeger-spark-dependencies-test/src/test/java/io/jaegertracing/spark/dependencies/test/DependencyLinksDerivatorTest.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.test; import static org.junit.Assert.assertEquals; import io.jaegertracing.spark.dependencies.test.rest.DependencyLink; import io.jaegertracing.spark.dependencies.test.tree.Node; import io.opentracing.mock.MockTracer; import java.util.Arrays; import java.util.Map; import org.junit.Test; /** * @author Pavol Loffay */ public class DependencyLinksDerivatorTest { @Test public void testRootToMap() { Node root = new Node<>(new MockTracingWrapper(new MockTracer(), "foo"), null); new Node<>(new MockTracingWrapper(new MockTracer(), "child1"), root); new Node<>(new MockTracingWrapper(new MockTracer(), "child1"), root); new Node<>(new MockTracingWrapper(new MockTracer(), "child2"), root); Node child3 = new Node<>(new MockTracingWrapper(new MockTracer(), "child3"), root); Node child33 = new Node<>(new MockTracingWrapper(new MockTracer(), "child33"), child3); new Node<>(new MockTracingWrapper(new MockTracer(), "child333"), child33); Map> depLinks = DependencyLinkDerivator.serviceDependencies(root); // 3 parents assertEquals(3, depLinks.size()); assertEquals(3, depLinks.get("foo").size()); assertEquals(1, depLinks.get("child3").size()); assertEquals(1, depLinks.get("child33").size()); assertEquals(Long.valueOf(2), depLinks.get("foo").get("child1")); assertEquals(Long.valueOf(1), depLinks.get("foo").get("child2")); assertEquals(Long.valueOf(1), depLinks.get("foo").get("child3")); assertEquals(Long.valueOf(1), depLinks.get("child3").get("child33")); assertEquals(Long.valueOf(1), depLinks.get("child33").get("child333")); } @Test public void testDepLinkToMap() { DependencyLink rootChild = new DependencyLink("root", "child", 3); DependencyLink childRoot = new DependencyLink("child", "root", 2); DependencyLink childChild2 = new DependencyLink("child", "child2", 6); Map> depLinks = DependencyLinkDerivator.serviceDependencies( Arrays.asList(rootChild, childRoot, childChild2)); assertEquals(2, depLinks.size()); assertEquals(1, depLinks.get("root").size()); assertEquals(2, depLinks.get("child").size()); assertEquals(Long.valueOf(3), depLinks.get("root").get("child")); assertEquals(Long.valueOf(2), depLinks.get("child").get("root")); assertEquals(Long.valueOf(6), depLinks.get("child").get("child2")); } } ================================================ FILE: jaeger-spark-dependencies-test/src/test/java/io/jaegertracing/spark/dependencies/test/MockTracingWrapper.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.test; import io.jaegertracing.spark.dependencies.test.tree.TracingWrapper; import io.opentracing.mock.MockSpan; import io.opentracing.mock.MockTracer; import io.opentracing.mock.MockTracer.SpanBuilder; /** * @author Pavol Loffay */ public class MockTracingWrapper implements TracingWrapper { private String serviceName; private MockTracer tracer; private MockSpan span; public MockTracingWrapper(MockTracer mockTracer, String serviceName) { this.serviceName = serviceName; this.tracer = mockTracer; } @Override public MockTracingWrapper get() { return this; } @Override public String serviceName() { return serviceName; } @Override public String operationName() { return span != null ? span.operationName() : null; } @Override public void createChildSpan(TracingWrapper parent) { SpanBuilder spanBuilder = tracer.buildSpan(parent == null ? "|" : parent.get().operationName() + "->"); if (parent != null) { spanBuilder.asChildOf(parent.get().span); } span = spanBuilder.start(); } } ================================================ FILE: jaeger-spark-dependencies-test/src/test/java/io/jaegertracing/spark/dependencies/test/rest/DeserializationTest.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.test.rest; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import java.io.IOException; import org.junit.Assert; import org.junit.Test; /** * @author Pavol Loffay */ public class DeserializationTest { private ObjectMapper objectMapper = JsonHelper.configure(new ObjectMapper()); @Test public void testDependencyLinkDeserialization() throws IOException { String json = "{\"data\":[{\"parent\":\"service1\",\"child\":\"service2\",\"callCount\":1}],\"total\":0," + "\"limit\":0,\"offset\":0,\"errors\":null}"; RestResult restResult = objectMapper.readValue(json, new TypeReference>() {}); Assert.assertEquals(null, restResult.getErrors()); Assert.assertEquals(1, restResult.getData().size()); Assert.assertEquals("service1", restResult.getData().get(0).getParent()); Assert.assertEquals("service2", restResult.getData().get(0).getChild()); Assert.assertEquals(1L, restResult.getData().get(0).getCallCount()); } } ================================================ FILE: jaeger-spark-dependencies-test/src/test/java/io/jaegertracing/spark/dependencies/test/tree/TraversalsTest.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.test.tree; import static org.junit.Assert.assertEquals; import io.jaegertracing.spark.dependencies.test.MockTracingWrapper; import io.opentracing.mock.MockTracer; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.junit.Test; /** * @author Pavol Loffay */ public class TraversalsTest { @Test public void testInorder() { Node root = new Node<>(new MockTracingWrapper(new MockTracer(), "foo"), null); Node child1 = new Node<>(new MockTracingWrapper(new MockTracer(), "child1"), root); Node child2 = new Node<>(new MockTracingWrapper(new MockTracer(), "child2"), root); Node child3 = new Node<>(new MockTracingWrapper(new MockTracer(), "child3"), root); List nodes = new ArrayList<>(); Traversals.postOrder(root, (node, parent) -> { if (parent != null) { assertEquals(root, parent); } else { assertEquals(null, parent); } nodes.add(node); }); assertEquals(new ArrayList<>(Arrays.asList(child1, child2, child3, root)), nodes); Node child33 = new Node<>(new MockTracingWrapper(new MockTracer(), "child33"), child3); Node child333 = new Node<>(new MockTracingWrapper(new MockTracer(), "child333"), child33); List nodes2 = new ArrayList<>(); List parents2 = new ArrayList<>(); Traversals.postOrder(root, (node, parent) -> { nodes2.add(node); parents2.add(parent); }); assertEquals(new ArrayList<>(Arrays.asList(child1, child2, child333, child33, child3, root)), nodes2); assertEquals(new ArrayList<>(Arrays.asList(root, root, child33, child3, root, null)), parents2); } } ================================================ FILE: jaeger-spark-dependencies-test/src/test/java/io/jaegertracing/spark/dependencies/test/tree/TreeGeneratorTest.java ================================================ /** * Copyright (c) The Jaeger Authors * SPDX-License-Identifier: Apache-2.0 */ package io.jaegertracing.spark.dependencies.test.tree; import static junit.framework.TestCase.assertTrue; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import io.jaegertracing.spark.dependencies.test.TracersGenerator; import io.jaegertracing.spark.dependencies.test.tree.TracingWrapper.OpenTelemetryWrapper; import java.util.ArrayList; import java.util.List; import org.junit.Test; /** * @author Pavol Loffay */ public class TreeGeneratorTest { @Test public void testGenerateOne() { Node root = new TreeGenerator(TracersGenerator.generateJaeger(1, "http://localhost")) .generateTree(1, 3); assertEquals(0, root.getDescendants().size()); assertNotNull(root.getServiceName()); assertNotNull(root.getTracingWrapper().get().getSpan()); assertNotNull(root.getTracingWrapper().get().operationName()); } @Test public void testBranchingFactorOne() { Node root = new TreeGenerator(TracersGenerator.generateJaeger(1, "http://localhost")) .generateTree(16, 3); List nodes = new ArrayList<>(); Traversals.postOrder(root, (jaegerWrapperNode, jaegerWrapperNode2) -> { assertTrue(jaegerWrapperNode.getDescendants().size() <= 3); nodes.add(jaegerWrapperNode); }); assertEquals(16, nodes.size()); } } ================================================ FILE: mvnw ================================================ #!/bin/sh # ---------------------------------------------------------------------------- # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # ---------------------------------------------------------------------------- # ---------------------------------------------------------------------------- # Maven2 Start Up Batch script # # Required ENV vars: # ------------------ # JAVA_HOME - location of a JDK home dir # # Optional ENV vars # ----------------- # M2_HOME - location of maven2's installed home dir # MAVEN_OPTS - parameters passed to the Java VM when running Maven # e.g. to debug Maven itself, use # set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 # MAVEN_SKIP_RC - flag to disable loading of mavenrc files # ---------------------------------------------------------------------------- if [ -z "$MAVEN_SKIP_RC" ] ; then if [ -f /etc/mavenrc ] ; then . /etc/mavenrc fi if [ -f "$HOME/.mavenrc" ] ; then . "$HOME/.mavenrc" fi fi # OS specific support. $var _must_ be set to either true or false. cygwin=false; darwin=false; mingw=false case "`uname`" in CYGWIN*) cygwin=true ;; MINGW*) mingw=true;; Darwin*) darwin=true # # Look for the Apple JDKs first to preserve the existing behaviour, and then look # for the new JDKs provided by Oracle. # if [ -z "$JAVA_HOME" ] && [ -L /System/Library/Frameworks/JavaVM.framework/Versions/CurrentJDK ] ; then # # Apple JDKs # export JAVA_HOME=/System/Library/Frameworks/JavaVM.framework/Versions/CurrentJDK/Home fi if [ -z "$JAVA_HOME" ] && [ -L /System/Library/Java/JavaVirtualMachines/CurrentJDK ] ; then # # Apple JDKs # export JAVA_HOME=/System/Library/Java/JavaVirtualMachines/CurrentJDK/Contents/Home fi if [ -z "$JAVA_HOME" ] && [ -L "/Library/Java/JavaVirtualMachines/CurrentJDK" ] ; then # # Oracle JDKs # export JAVA_HOME=/Library/Java/JavaVirtualMachines/CurrentJDK/Contents/Home fi if [ -z "$JAVA_HOME" ] && [ -x "/usr/libexec/java_home" ]; then # # Apple JDKs # export JAVA_HOME=`/usr/libexec/java_home` fi ;; esac if [ -z "$JAVA_HOME" ] ; then if [ -r /etc/gentoo-release ] ; then JAVA_HOME=`java-config --jre-home` fi fi if [ -z "$M2_HOME" ] ; then ## resolve links - $0 may be a link to maven's home PRG="$0" # need this for relative symlinks while [ -h "$PRG" ] ; do ls=`ls -ld "$PRG"` link=`expr "$ls" : '.*-> \(.*\)$'` if expr "$link" : '/.*' > /dev/null; then PRG="$link" else PRG="`dirname "$PRG"`/$link" fi done saveddir=`pwd` M2_HOME=`dirname "$PRG"`/.. # make it fully qualified M2_HOME=`cd "$M2_HOME" && pwd` cd "$saveddir" # echo Using m2 at $M2_HOME fi # For Cygwin, ensure paths are in UNIX format before anything is touched if $cygwin ; then [ -n "$M2_HOME" ] && M2_HOME=`cygpath --unix "$M2_HOME"` [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"` [ -n "$CLASSPATH" ] && CLASSPATH=`cygpath --path --unix "$CLASSPATH"` fi # For Migwn, ensure paths are in UNIX format before anything is touched if $mingw ; then [ -n "$M2_HOME" ] && M2_HOME="`(cd "$M2_HOME"; pwd)`" [ -n "$JAVA_HOME" ] && JAVA_HOME="`(cd "$JAVA_HOME"; pwd)`" # TODO classpath? fi if [ -z "$JAVA_HOME" ]; then javaExecutable="`which javac`" if [ -n "$javaExecutable" ] && ! [ "`expr \"$javaExecutable\" : '\([^ ]*\)'`" = "no" ]; then # readlink(1) is not available as standard on Solaris 10. readLink=`which readlink` if [ ! `expr "$readLink" : '\([^ ]*\)'` = "no" ]; then if $darwin ; then javaHome="`dirname \"$javaExecutable\"`" javaExecutable="`cd \"$javaHome\" && pwd -P`/javac" else javaExecutable="`readlink -f \"$javaExecutable\"`" fi javaHome="`dirname \"$javaExecutable\"`" javaHome=`expr "$javaHome" : '\(.*\)/bin'` JAVA_HOME="$javaHome" export JAVA_HOME fi fi fi if [ -z "$JAVACMD" ] ; then if [ -n "$JAVA_HOME" ] ; then if [ -x "$JAVA_HOME/jre/sh/java" ] ; then # IBM's JDK on AIX uses strange locations for the executables JAVACMD="$JAVA_HOME/jre/sh/java" else JAVACMD="$JAVA_HOME/bin/java" fi else JAVACMD="`which java`" fi fi if [ ! -x "$JAVACMD" ] ; then echo "Error: JAVA_HOME is not defined correctly." >&2 echo " We cannot execute $JAVACMD" >&2 exit 1 fi if [ -z "$JAVA_HOME" ] ; then echo "Warning: JAVA_HOME environment variable is not set." fi CLASSWORLDS_LAUNCHER=org.codehaus.plexus.classworlds.launcher.Launcher # traverses directory structure from process work directory to filesystem root # first directory with .mvn subdirectory is considered project base directory find_maven_basedir() { local basedir=$(pwd) local wdir=$(pwd) while [ "$wdir" != '/' ] ; do if [ -d "$wdir"/.mvn ] ; then basedir=$wdir break fi wdir=$(cd "$wdir/.."; pwd) done echo "${basedir}" } # concatenates all lines of a file concat_lines() { if [ -f "$1" ]; then echo "$(tr -s '\n' ' ' < "$1")" fi } export MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-$(find_maven_basedir)} MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS" # For Cygwin, switch paths to Windows format before running java if $cygwin; then [ -n "$M2_HOME" ] && M2_HOME=`cygpath --path --windows "$M2_HOME"` [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"` [ -n "$CLASSPATH" ] && CLASSPATH=`cygpath --path --windows "$CLASSPATH"` [ -n "$MAVEN_PROJECTBASEDIR" ] && MAVEN_PROJECTBASEDIR=`cygpath --path --windows "$MAVEN_PROJECTBASEDIR"` fi # Provide a "standardized" way to retrieve the CLI args that will # work with both Windows and non-Windows executions. MAVEN_CMD_LINE_ARGS="$MAVEN_CONFIG $@" export MAVEN_CMD_LINE_ARGS WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain # avoid using MAVEN_CMD_LINE_ARGS below since that would loose parameter escaping in $@ exec "$JAVACMD" \ $MAVEN_OPTS \ -classpath "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" \ "-Dmaven.home=${M2_HOME}" "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \ ${WRAPPER_LAUNCHER} $MAVEN_CONFIG "$@" ================================================ FILE: mvnw.cmd ================================================ @REM ---------------------------------------------------------------------------- @REM Licensed to the Apache Software Foundation (ASF) under one @REM or more contributor license agreements. See the NOTICE file @REM distributed with this work for additional information @REM regarding copyright ownership. The ASF licenses this file @REM to you under the Apache License, Version 2.0 (the @REM "License"); you may not use this file except in compliance @REM with the License. You may obtain a copy of the License at @REM @REM http://www.apache.org/licenses/LICENSE-2.0 @REM @REM Unless required by applicable law or agreed to in writing, @REM software distributed under the License is distributed on an @REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @REM KIND, either express or implied. See the License for the @REM specific language governing permissions and limitations @REM under the License. @REM ---------------------------------------------------------------------------- @REM ---------------------------------------------------------------------------- @REM Maven2 Start Up Batch script @REM @REM Required ENV vars: @REM JAVA_HOME - location of a JDK home dir @REM @REM Optional ENV vars @REM M2_HOME - location of maven2's installed home dir @REM MAVEN_BATCH_ECHO - set to 'on' to enable the echoing of the batch commands @REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a key stroke before ending @REM MAVEN_OPTS - parameters passed to the Java VM when running Maven @REM e.g. to debug Maven itself, use @REM set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 @REM MAVEN_SKIP_RC - flag to disable loading of mavenrc files @REM ---------------------------------------------------------------------------- @REM Begin all REM lines with '@' in case MAVEN_BATCH_ECHO is 'on' @echo off @REM enable echoing my setting MAVEN_BATCH_ECHO to 'on' @if "%MAVEN_BATCH_ECHO%" == "on" echo %MAVEN_BATCH_ECHO% @REM set %HOME% to equivalent of $HOME if "%HOME%" == "" (set "HOME=%HOMEDRIVE%%HOMEPATH%") @REM Execute a user defined script before this one if not "%MAVEN_SKIP_RC%" == "" goto skipRcPre @REM check for pre script, once with legacy .bat ending and once with .cmd ending if exist "%HOME%\mavenrc_pre.bat" call "%HOME%\mavenrc_pre.bat" if exist "%HOME%\mavenrc_pre.cmd" call "%HOME%\mavenrc_pre.cmd" :skipRcPre @setlocal set ERROR_CODE=0 @REM To isolate internal variables from possible post scripts, we use another setlocal @setlocal @REM ==== START VALIDATION ==== if not "%JAVA_HOME%" == "" goto OkJHome echo. echo Error: JAVA_HOME not found in your environment. >&2 echo Please set the JAVA_HOME variable in your environment to match the >&2 echo location of your Java installation. >&2 echo. goto error :OkJHome if exist "%JAVA_HOME%\bin\java.exe" goto init echo. echo Error: JAVA_HOME is set to an invalid directory. >&2 echo JAVA_HOME = "%JAVA_HOME%" >&2 echo Please set the JAVA_HOME variable in your environment to match the >&2 echo location of your Java installation. >&2 echo. goto error @REM ==== END VALIDATION ==== :init set MAVEN_CMD_LINE_ARGS=%MAVEN_CONFIG% %* @REM Find the project base dir, i.e. the directory that contains the folder ".mvn". @REM Fallback to current working directory if not found. set MAVEN_PROJECTBASEDIR=%MAVEN_BASEDIR% IF NOT "%MAVEN_PROJECTBASEDIR%"=="" goto endDetectBaseDir set EXEC_DIR=%CD% set WDIR=%EXEC_DIR% :findBaseDir IF EXIST "%WDIR%"\.mvn goto baseDirFound cd .. IF "%WDIR%"=="%CD%" goto baseDirNotFound set WDIR=%CD% goto findBaseDir :baseDirFound set MAVEN_PROJECTBASEDIR=%WDIR% cd "%EXEC_DIR%" goto endDetectBaseDir :baseDirNotFound set MAVEN_PROJECTBASEDIR=%EXEC_DIR% cd "%EXEC_DIR%" :endDetectBaseDir IF NOT EXIST "%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config" goto endReadAdditionalConfig @setlocal EnableExtensions EnableDelayedExpansion for /F "usebackq delims=" %%a in ("%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config") do set JVM_CONFIG_MAVEN_PROPS=!JVM_CONFIG_MAVEN_PROPS! %%a @endlocal & set JVM_CONFIG_MAVEN_PROPS=%JVM_CONFIG_MAVEN_PROPS% :endReadAdditionalConfig SET MAVEN_JAVA_EXE="%JAVA_HOME%\bin\java.exe" set WRAPPER_JAR=""%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.jar"" set WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain # avoid using MAVEN_CMD_LINE_ARGS below since that would loose parameter escaping in %* %MAVEN_JAVA_EXE% %JVM_CONFIG_MAVEN_PROPS% %MAVEN_OPTS% %MAVEN_DEBUG_OPTS% -classpath %WRAPPER_JAR% "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %* if ERRORLEVEL 1 goto error goto end :error set ERROR_CODE=1 :end @endlocal & set ERROR_CODE=%ERROR_CODE% if not "%MAVEN_SKIP_RC%" == "" goto skipRcPost @REM check for post script, once with legacy .bat ending and once with .cmd ending if exist "%HOME%\mavenrc_post.bat" call "%HOME%\mavenrc_post.bat" if exist "%HOME%\mavenrc_post.cmd" call "%HOME%\mavenrc_post.cmd" :skipRcPost @REM pause the script if MAVEN_BATCH_PAUSE is set to 'on' if "%MAVEN_BATCH_PAUSE%" == "on" pause if "%MAVEN_TERMINATE_CMD%" == "on" exit %ERROR_CODE% exit /B %ERROR_CODE% ================================================ FILE: pom.xml ================================================ 4.0.0 io.jaegertracing.dependencies jaeger-spark-dependencies-parent 0.0.1-SNAPSHOT pom jaeger-spark-dependencies-test jaeger-spark-dependencies-common jaeger-spark-dependencies-cassandra jaeger-spark-dependencies-elasticsearch jaeger-spark-dependencies-opensearch jaeger-spark-dependencies 21 21 UTF-8 UTF-8 2.12 3.5.8 0.35.5 0.33.0 1.57.0 2.10.0 4.13.2 3.27.6 1.21.4 4.12.0 4.3.0 3.0 3.11.0 3.1.1 3.2.1 0.3.4 3.6.2 2.21.1 3.3.6 9.1.3 2017 The Apache Software License, Version 2.0 http://www.apache.org/licenses/LICENSE-2.0.txt repo jaegertracing Jaegertracing Gitter https://gitter.im/jaegertracing/Lobby ${project.groupId} jaeger-spark-dependencies-common ${project.version} ${project.groupId} jaeger-spark-dependencies-test ${project.version} ${project.groupId} jaeger-spark-dependencies-cassandra ${project.version} ${project.groupId} jaeger-spark-dependencies-elasticsearch ${project.version} ${project.groupId} jaeger-spark-dependencies-opensearch ${project.version} org.apache.spark spark-core_${version.scala.binary} ${version.org.apache.spark} commons-codec commons-codec org.apache.commons commons-compress org.apache.zookeeper zookeeper com.fasterxml.jackson.module jackson-module-scala_${version.scala.binary} org.apache.hadoop hadoop-client-api org.apache.hadoop hadoop-client-runtime org.testcontainers testcontainers ${version.org.testcontainers} org.testcontainers cassandra ${version.org.testcontainers} com.datastax.cassandra cassandra-driver-core com.squareup.okio okio 3.16.4 com.squareup.okio okio-jvm 3.16.4 com.google.guava guava 32.1.3-jre commons-collections commons-collections 3.2.2 commons-beanutils commons-beanutils 1.11.0 org.scala-lang scala-library ${version.scala.binary}.18 org.apache.spark spark-core_${version.scala.binary} org.apache.spark spark-sql_${version.scala.binary} ${version.org.apache.spark} org.apache.avro avro com.fasterxml.jackson.module jackson-module-scala_${version.scala.binary} com.google.protobuf protobuf-java 3.25.8 org.apache.zookeeper zookeeper 3.9.5 com.fasterxml.jackson.core jackson-core ${version.jackson} com.fasterxml.jackson.core jackson-databind ${version.jackson} com.fasterxml.jackson.module jackson-module-scala_${version.scala.binary} ${version.jackson} com.fasterxml.jackson.jaxrs jackson-jaxrs-base ${version.jackson} com.fasterxml.jackson.jaxrs jackson-jaxrs-json-provider ${version.jackson} org.apache.hadoop hadoop-client ${version.hadoop.client} org.apache.hadoop.thirdparty hadoop-shaded-protobuf_3_7 io.netty netty-all 4.2.9.Final org.apache.avro avro 1.12.1 junit junit ${version.junit} test org.assertj assertj-core ${version.org.assertj} test com.mycila license-maven-plugin ${version.maven-license-plugin} io.takari maven ${version.maven-plugi} org.apache.maven.plugins maven-surefire-plugin 2.22.2 com.mycila license-maven-plugin
header.txt
true true SCRIPT_STYLE SCRIPT_STYLE LICENSE mvnw mvnw.cmd .mvn/wrapper/maven-wrapper.properties **swp .github/java-upgrade/** **/ElasticsearchDependenciesJob.java **/OpenSearchDependenciesJob.java **/CassandraDependenciesJob.java
check compile
java17-plus [17,) org.apache.maven.plugins maven-surefire-plugin --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false
================================================ FILE: renovate.json ================================================ { "$schema": "https://docs.renovatebot.com/renovate-schema.json", "extends": [ "config:best-practices", ":gitSignOff" ], "schedule": [ "on the first day of the month" ], "dependencyDashboard": true, "labels": [ "changelog:dependencies" ], "packageRules": [ { "matchManagers": ["github-actions"], "matchUpdateTypes": ["major", "minor"], "groupName": "GitHub Actions" }, { "matchManagers": ["github-actions"], "matchUpdateTypes": ["patch"], "enabled": false } ] }