main 225c99c2f602 cached
85 files
246.4 KB
64.3k tokens
346 symbols
1 requests
Download .txt
Showing preview only (278K chars total). Download the full file or copy to clipboard to get everything.
Repository: jaegertracing/spark-dependencies
Branch: main
Commit: 225c99c2f602
Files: 85
Total size: 246.4 KB

Directory structure:
gitextract_0_ynlnqz/

├── .github/
│   └── workflows/
│       ├── ci-cd.yml
│       └── stale.yml
├── .gitignore
├── .mvn/
│   └── wrapper/
│       ├── maven-wrapper.jar
│       └── maven-wrapper.properties
├── AGENTS.md
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── RELEASES.md
├── entrypoint.sh
├── header.txt
├── jaeger-spark-dependencies/
│   ├── pom.xml
│   └── src/
│       └── main/
│           ├── java/
│           │   └── io/
│           │       └── jaegertracing/
│           │           └── spark/
│           │               └── dependencies/
│           │                   └── DependenciesSparkJob.java
│           └── resources/
│               └── log4j.properties
├── jaeger-spark-dependencies-cassandra/
│   ├── pom.xml
│   └── src/
│       ├── main/
│       │   ├── java/
│       │   │   └── io/
│       │   │       └── jaegertracing/
│       │   │           └── spark/
│       │   │               └── dependencies/
│       │   │                   └── cassandra/
│       │   │                       ├── CassandraDependenciesJob.java
│       │   │                       └── CassandraSpan.java
│       │   └── resources/
│       │       ├── log4j2.component.properties
│       │       └── log4j2.properties
│       └── test/
│           ├── java/
│           │   └── io/
│           │       └── jaegertracing/
│           │           └── spark/
│           │               └── dependencies/
│           │                   └── cassandra/
│           │                       ├── CassandraDependenciesDockerJobTest.java
│           │                       ├── CassandraDependenciesJobTest.java
│           │                       └── JaegerTestDriverContainer.java
│           └── resources/
│               ├── jaeger-v2-config-cassandra.yaml
│               └── log4j.properties
├── jaeger-spark-dependencies-common/
│   ├── pom.xml
│   └── src/
│       ├── main/
│       │   └── java/
│       │       └── io/
│       │           └── jaegertracing/
│       │               └── spark/
│       │                   └── dependencies/
│       │                       ├── DependenciesSparkHelper.java
│       │                       ├── SpansToDependencyLinks.java
│       │                       ├── Utils.java
│       │                       ├── json/
│       │                       │   ├── JsonHelper.java
│       │                       │   ├── KeyValueDeserializer.java
│       │                       │   ├── KeyValueMixin.java
│       │                       │   ├── ReferenceDeserializer.java
│       │                       │   ├── ReferenceMixin.java
│       │                       │   ├── SpanDeserializer.java
│       │                       │   └── SpanMixin.java
│       │                       └── model/
│       │                           ├── Dependency.java
│       │                           ├── KeyValue.java
│       │                           ├── Process.java
│       │                           ├── Reference.java
│       │                           └── Span.java
│       └── test/
│           └── java/
│               └── io/
│                   └── jaegertracing/
│                       └── spark/
│                           └── dependencies/
│                               ├── SpansToDependencyLinksTest.java
│                               └── model/
│                                   └── SpanTest.java
├── jaeger-spark-dependencies-elasticsearch/
│   ├── pom.xml
│   └── src/
│       ├── main/
│       │   ├── java/
│       │   │   └── io/
│       │   │       └── jaegertracing/
│       │   │           └── spark/
│       │   │               └── dependencies/
│       │   │                   └── elastic/
│       │   │                       ├── ElasticTupleToSpan.java
│       │   │                       └── ElasticsearchDependenciesJob.java
│       │   └── resources/
│       │       ├── log4j2.component.properties
│       │       └── log4j2.properties
│       └── test/
│           ├── java/
│           │   └── io/
│           │       └── jaegertracing/
│           │           └── spark/
│           │               └── dependencies/
│           │                   └── elastic/
│           │                       ├── ElasticsearchDependenciesDockerJobTest.java
│           │                       ├── ElasticsearchDependenciesJobTest.java
│           │                       ├── ElasticsearchDependenciesTagFieldsJobTest.java
│           │                       └── JaegerElasticsearchEnvironment.java
│           └── resources/
│               └── jaeger-v2-config-elasticsearch.yaml
├── jaeger-spark-dependencies-opensearch/
│   ├── pom.xml
│   └── src/
│       ├── main/
│       │   ├── java/
│       │   │   └── io/
│       │   │       └── jaegertracing/
│       │   │           └── spark/
│       │   │               └── dependencies/
│       │   │                   └── opensearch/
│       │   │                       ├── OpenSearchDependenciesJob.java
│       │   │                       └── OpenSearchTupleToSpan.java
│       │   └── resources/
│       │       ├── log4j.properties
│       │       ├── log4j2.component.properties
│       │       └── log4j2.properties
│       └── test/
│           ├── java/
│           │   └── io/
│           │       └── jaegertracing/
│           │           └── spark/
│           │               └── dependencies/
│           │                   └── opensearch/
│           │                       ├── JaegerOpenSearchEnvironment.java
│           │                       ├── OpenSearchDependenciesDockerJobTest.java
│           │                       └── OpenSearchDependenciesJobTest.java
│           └── resources/
│               └── jaeger-v2-config-opensearch.yaml
├── jaeger-spark-dependencies-test/
│   ├── pom.xml
│   └── src/
│       ├── main/
│       │   └── java/
│       │       └── io/
│       │           └── jaegertracing/
│       │               └── spark/
│       │                   └── dependencies/
│       │                       ├── LogToConsolePrinter.java
│       │                       └── test/
│       │                           ├── DependenciesTest.java
│       │                           ├── DependencyLinkDerivator.java
│       │                           ├── TracersGenerator.java
│       │                           ├── rest/
│       │                           │   ├── DependencyLink.java
│       │                           │   ├── JsonHelper.java
│       │                           │   └── RestResult.java
│       │                           └── tree/
│       │                               ├── Node.java
│       │                               ├── TracingWrapper.java
│       │                               ├── Traversals.java
│       │                               └── TreeGenerator.java
│       └── test/
│           └── java/
│               └── io/
│                   └── jaegertracing/
│                       └── spark/
│                           └── dependencies/
│                               └── test/
│                                   ├── DependencyLinksDerivatorTest.java
│                                   ├── MockTracingWrapper.java
│                                   ├── rest/
│                                   │   └── DeserializationTest.java
│                                   └── tree/
│                                       ├── TraversalsTest.java
│                                       └── TreeGeneratorTest.java
├── mvnw
├── mvnw.cmd
├── pom.xml
└── renovate.json

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/workflows/ci-cd.yml
================================================
#
# Copyright (c) The Jaeger Authors
# SPDX-License-Identifier: Apache-2.0
#

name: "CI/CD Pipeline"

on:
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]
  release:
    types: [ published ]
  workflow_dispatch:
    inputs:
      tag:
        description: "Tag to publish in addition to `latest`"
        required: true
        type: string

jobs:
  # Define the matrix once for all jobs
  setup:
    runs-on: ubuntu-24.04
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
      
      - uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5
        with:
          distribution: "temurin"
          java-version: "21"
          cache: "maven"

      - name: Resolve all dependencies
        # go-offline ensures the .m2 folder is full before the matrix starts
        run: |
          mvn dependency:go-offline -B --no-transfer-progress \
            -Dmaven.wagon.http.retryHandler.count=3 \
            -Dmaven.wagon.http.connectionTimeout=120000 \
            -Dmaven.wagon.http.readTimeout=120000 \
            -Dhttp.retryHandler.count=5

      - id: set-matrix
        run: |
          # Define configuration for all storage variants
          STRATEGY='{
            "include": [
              {
                "variant": "cassandra",
                "storage": "cassandra",
                "es_version": "",
                "image_tag": "test-cassandra",
                "es_test_ver": ""
              },
              {
                "variant": "elasticsearch7",
                "storage": "elasticsearch",
                "es_version": "7.17.29",
                "image_tag": "test-es7",
                "es_test_ver": "7.3.0"
              },
              {
                "variant": "elasticsearch8",
                "storage": "elasticsearch",
                "es_version": "8.13.4",
                "image_tag": "test-es8",
                "es_test_ver": "8.13.4"
              },
              {
                "variant": "elasticsearch9",
                "storage": "elasticsearch",
                "es_version": "9.1.3",
                "image_tag": "test-es9",
                "es_test_ver": "9.1.3"
              },
              {
                "variant": "opensearch",
                "storage": "opensearch",
                "es_version": "",
                "image_tag": "test-opensearch",
                "es_test_ver": "2.14.0"
              }
            ]
          }'
          # Convert to a single line and output
          echo "matrix=$(echo $STRATEGY | jq -c .)" >> $GITHUB_OUTPUT

  build-jars:
    name: Build JAR - ${{ matrix.variant }}
    runs-on: ubuntu-24.04
    needs: setup
    strategy:
      fail-fast: false
      matrix: ${{ fromJSON(needs.setup.outputs.matrix) }}
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

      - name: Set up JDK 21
        uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5
        with:
          distribution: "temurin"
          java-version: "21"
          cache: "maven"

      - name: Build JAR
        env:
          MAVEN_ES_PROP: ${{ matrix.es_version != '' && format('-Dversion.elasticsearch.spark={0}', matrix.es_version) || '' }}
        run: |
          ./mvnw clean package --batch-mode --no-transfer-progress -Dlicense.skip=true -DskipTests \
            -pl jaeger-spark-dependencies-${{ matrix.storage }} -am \
            $MAVEN_ES_PROP

      - name: Prepare artifact
        run: |
          mkdir -p artifact-target
          cp jaeger-spark-dependencies-${{ matrix.storage }}/target/jaeger-spark-dependencies-${{ matrix.storage }}-0.0.1-SNAPSHOT.jar \
            artifact-target/

      - name: Upload JAR artifact
        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
        with:
          name: jar-${{ matrix.variant }}
          path: artifact-target/*.jar
          retention-days: 1

  e2e-tests:
    name: E2E Tests - ${{ matrix.variant }}
    runs-on: ubuntu-24.04
    needs: [setup, build-jars]
    strategy:
      fail-fast: false
      matrix: ${{ fromJSON(needs.setup.outputs.matrix) }}
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

      - name: Set up JDK 21
        uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5
        with:
          distribution: "temurin"
          java-version: "21"
          cache: "maven"

      - name: Download JAR artifact
        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
        with:
          name: jar-${{ matrix.variant }}
          path: artifact-target/

      - name: Build local Docker image
        uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6.19.2
        with:
          context: .
          load: true
          push: false
          tags: ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:${{ matrix.image_tag }}
          build-args: |
            VARIANT=${{ matrix.variant }}

      - name: Run integration tests
        env:
          SPARK_DEPENDENCIES_JOB_IMAGE_TAG: ${{ matrix.image_tag }}
          ELASTICSEARCH_VERSION: ${{ matrix.es_test_ver }}
          OPENSEARCH_VERSION: ${{ matrix.es_test_ver }}
          # The es_spark version is only needed when testing Elasticsearch variants
          MAVEN_ES_PROP: ${{ matrix.es_version != '' && format('-Dversion.elasticsearch.spark={0}', matrix.es_version) || '' }}
        run: |
          for attempt in 1 2 3; do
            echo "Integration test attempt $attempt of 3"
            ./mvnw --batch-mode --no-transfer-progress test -am \
              -pl jaeger-spark-dependencies-${{ matrix.storage }} \
              $MAVEN_ES_PROP && break
            if [ $attempt -lt 3 ]; then
              echo "Attempt $attempt failed, retrying after 15 seconds..."
              sleep 15
            else
              echo "All attempts failed"
              exit 1
            fi
          done

  publish:
    name: Publish - ${{ matrix.variant }}
    runs-on: ubuntu-24.04
    needs: [setup, e2e-tests]
    strategy:
      fail-fast: false
      matrix: ${{ fromJSON(needs.setup.outputs.matrix) }}
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

      - name: Download JAR artifact
        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
        with:
          name: jar-${{ matrix.variant }}
          path: artifact-target/

      - name: Set up QEMU
        uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3.7.0

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0
        with:
          install: true
          platforms: |
            linux/amd64
            linux/arm64

      - name: Compute Tags
        id: compute-tags
        run: |
          prefix="ghcr.io/jaegertracing/spark-dependencies/spark-dependencies"
          variant="${{ matrix.variant }}"
          
          # For main releases, use variant suffix
          if [[ "${{ github.event_name }}" == "release" ]] && [[ "${{ github.ref }}" == refs/tags/* ]]; then
            release=$(echo ${{ github.ref }} | sed 's/refs\/tags\///g')
            tags="$prefix:$release-$variant"
            
            # elasticsearch9 gets the 'latest' tag as it supports the newest ES version
            if [[ "$variant" == "elasticsearch9" ]]; then
              tags="$tags,$prefix:latest"
            fi
          elif [[ -n "${{ inputs.tag }}" ]]; then
            tags="$prefix:${{ inputs.tag }}-$variant"
          else
            # For main branch builds, use main-variant
            tags="$prefix:main-$variant"
          fi

          echo "Computed tags for publication ($variant): $tags"
          echo "tags=$tags" >> $GITHUB_OUTPUT

      - name: Login to GitHub Package Registry
        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3.7.0
        if: github.event_name != 'pull_request'
        with:
          registry: ghcr.io
          username: ${{ github.repository_owner }}
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: Build and push multi-arch images
        uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6.19.2
        with:
          context: .
          push: ${{ github.event_name != 'pull_request' }}
          tags: ${{ steps.compute-tags.outputs.tags }}
          platforms: |
            linux/amd64
            linux/arm64
          build-args: |
            VARIANT=${{ matrix.variant }}


================================================
FILE: .github/workflows/stale.yml
================================================
#
# Copyright (c) The Jaeger Authors
# SPDX-License-Identifier: Apache-2.0
#

name: 'Close stale issues and PRs'

on:
  schedule:
    # Run every Monday at 1:30 AM UTC
    - cron: '30 1 * * 1'
  workflow_dispatch:

permissions:
  issues: write
  pull-requests: write

jobs:
  stale:
    runs-on: ubuntu-latest
    steps:
      - name: Harden Runner
        uses: step-security/harden-runner@e3f713f2d8f53843e71c69a996d56f51aa9adfb9
        with:
          egress-policy: audit

      - uses: actions/stale@dcd2b9469d2220b7e8d08aedc00c105d277fd46b
        with:
          # Issues configuration
          days-before-issue-stale: 90
          days-before-issue-close: 14
          stale-issue-message: >
            This issue has been automatically marked as stale because it has not had
            recent activity. It will be closed if no further activity occurs.
            To keep it open either add a comment or the label `do-not-expire`.
          close-issue-message: >
            This issue has been automatically closed due to inactivity.
          stale-issue-label: 'stale'
          exempt-issue-labels: 'do-not-expire,help-wanted'
          only-issue-labels: 'question'
          
          # Pull requests configuration  
          days-before-pr-stale: 60
          days-before-pr-close: 14
          stale-pr-message: >
            This pull request has been automatically marked as stale because it has not had
            recent activity. It will be closed if no further activity occurs. You may re-open
            it if you need more time.
          close-pr-message: >
            This pull request has been automatically closed due to inactivity. You may re-open
            it if you need more time. We really appreciate your contribution and we are sorry
            that this has not been completed.
          stale-pr-label: 'stale'
          exempt-pr-labels: 'do-not-expire'
          
          # General configuration
          operations-per-run: 100
          remove-stale-when-updated: true


================================================
FILE: .gitignore
================================================
target/
pom.xml.tag
pom.xml.releaseBackup
pom.xml.versionsBackup
pom.xml.next
release.properties
dependency-reduced-pom.xml
buildNumber.properties
.mvn/timing.properties
.*.swp
*.iml
.idea

# Avoid ignoring Maven wrapper jar file (.jar files are usually ignored)
!/.mvn/wrapper/maven-wrapper.jar

.testcontainers-tmp-*
.vscode/
artifact-target/


================================================
FILE: .mvn/wrapper/maven-wrapper.properties
================================================
distributionUrl=https://repo1.maven.org/maven2/org/apache/maven/apache-maven/3.9.12/apache-maven-3.9.12-bin.zip


================================================
FILE: AGENTS.md
================================================
# Integration Tests Guide

This guide provides instructions for running integration tests for the Jaeger Spark Dependencies project.

For detailed information about integration tests, including prerequisites, troubleshooting, and environment variables, see the [Running Integration Tests](README.md#running-integration-tests) section in the README.

## Quick Start

The project includes make targets for running integration tests against different storage backends:

```bash
make e2e-cassandra  # Run Cassandra 4.x integration tests
make e2e-es7        # Run Elasticsearch 7 integration tests
make e2e-es8        # Run Elasticsearch 8 integration tests
make e2e-es9        # Run Elasticsearch 9 integration tests
```

Each target builds the appropriate Docker image and runs the corresponding integration test suite.

For more details, see the [Running Integration Tests](README.md#running-integration-tests) section in the README.


================================================
FILE: Dockerfile
================================================
#
# Copyright (c) The Jaeger Authors
# SPDX-License-Identifier: Apache-2.0
#

# Simple runtime image that receives a pre-built JAR from the host
FROM eclipse-temurin:21.0.9_10-jre@sha256:b0f6befb3f2af49704998c4425cb6313c1da505648a8e78cee731531996f735d
LABEL org.opencontainers.image.authors="The Jaeger Authors <cncf-jaeger-maintainers@lists.cncf.io>"

# Build argument to specify the variant type
# Supported values: cassandra, elasticsearch7, elasticsearch8, elasticsearch9
ARG VARIANT=elasticsearch9

ENV APP_HOME=/app/
ENV VARIANT_TYPE=${VARIANT}

# The JAR is provided by the GHA runner into the artifact-target folder
COPY artifact-target/jaeger-spark-dependencies*.jar $APP_HOME/app.jar

WORKDIR $APP_HOME

COPY entrypoint.sh /

RUN chgrp root /etc/passwd && chmod g+rw /etc/passwd
USER 185

ENTRYPOINT ["/entrypoint.sh"]


================================================
FILE: LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "{}"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright {yyyy} {name of copyright owner}

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: Makefile
================================================
#
# Copyright (c) The Jaeger Authors
# SPDX-License-Identifier: Apache-2.0
#

.PHONY: e2e-cassandra e2e-es7 e2e-es8 e2e-es9 help

help:
	@echo "Available targets:"
	@echo "  e2e-cassandra  - Run Cassandra integration tests"
	@echo "  e2e-es7        - Run Elasticsearch 7 integration tests"
	@echo "  e2e-es8        - Run Elasticsearch 8 integration tests"
	@echo "  e2e-es9        - Run Elasticsearch 9 integration tests"

e2e-cassandra:
	@echo "Building Docker image for Cassandra variant..."
	docker build \
	  --build-arg VARIANT=cassandra \
	  -t ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:test-cassandra \
	  .
	@echo "Running Cassandra integration tests..."
	SPARK_DEPENDENCIES_JOB_TAG=test-cassandra \
	./mvnw --batch-mode clean test -am -pl jaeger-spark-dependencies-cassandra

e2e-es7:
	@echo "Building Docker image for ES7 variant..."
	docker build \
	  --build-arg VARIANT=elasticsearch7 \
	  --build-arg ELASTICSEARCH_SPARK_VERSION=7.17.10 \
	  -t ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:test-es7 \
	  .
	@echo "Running ES7 integration tests..."
	SPARK_DEPENDENCIES_JOB_TAG=test-es7 \
	ELASTICSEARCH_VERSION=7.3.0 \
	./mvnw --batch-mode clean test -am \
	  -pl jaeger-spark-dependencies-elasticsearch \
	  -Dversion.elasticsearch.spark=7.17.10

e2e-es8:
	@echo "Building Docker image for ES8 variant..."
	docker build \
	  --build-arg VARIANT=elasticsearch8 \
	  --build-arg ELASTICSEARCH_SPARK_VERSION=8.13.4 \
	  -t ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:test-es8 \
	  .
	@echo "Running ES8 integration tests..."
	SPARK_DEPENDENCIES_JOB_TAG=test-es8 \
	ELASTICSEARCH_VERSION=8.3.1 \
	./mvnw --batch-mode clean test -am \
	  -pl jaeger-spark-dependencies-elasticsearch \
	  -Dversion.elasticsearch.spark=8.13.4

e2e-es9:
	@echo "Building Docker image for ES9 variant (unified/mega-jar)..."
	docker build \
	  --build-arg VARIANT=unified \
	  --build-arg ELASTICSEARCH_SPARK_VERSION=9.1.3 \
	  -t ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:test-es9 \
	  .
	@echo "Running ES9 integration tests..."
	SPARK_DEPENDENCIES_JOB_TAG=test-es9 \
	ELASTICSEARCH_VERSION=9.1.3 \
	./mvnw --batch-mode clean test -am \
	  -pl jaeger-spark-dependencies-elasticsearch \
	  -Dversion.elasticsearch.spark=9.1.3


================================================
FILE: README.md
================================================
[![Latest image](https://ghcr-badge.egpl.dev/jaegertracing/spark-dependencies/spark-dependencies/latest_tag?trim=major&label=latest)](https://github.com/jaegertracing/spark-dependencies/pkgs/container/spark-dependencies%2Fspark-dependencies)

# Jaeger Spark dependencies
This is a Spark job that collects spans from storage, analyze links between services,
and stores them for later presentation in the UI. Note that it is needed for the production deployment.
`all-in-one` distribution does not need this job.

This job parses all traces on a given day, based on UTC. By default, it processes the current day,
but other days can be explicitly specified.

## Quick-start
Spark job can be run as docker container and also as java executable:

### Container Image Variants

Starting with version 0.6.x, Docker images are published with variant-specific tags. **Each variant automatically uses the appropriate storage backend, so the `STORAGE` environment variable is no longer needed.**

The images are named `ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:{VERSION}-{VARIANT}`:

- **`VERSION-cassandra`**: For Cassandra storage (uses CassandraDependenciesJob directly)
- **`VERSION-elasticsearch7`**: For Elasticsearch 7.12-7.16 (uses ElasticsearchDependenciesJob with ES connector 7.17.29)
- **`VERSION-elasticsearch8`**: For Elasticsearch 7.17+ and 8.x (uses ElasticsearchDependenciesJob with ES connector 8.13.4)
- **`VERSION-elasticsearch9`**: For Elasticsearch 9.x (uses ElasticsearchDependenciesJob with ES connector 9.1.3) - also tagged as `:latest`
- **`VERSION-opensearch`**: For OpenSearch 2.x and 3.x (uses OpenSearchDependenciesJob with OpenSearch Java client)

Example for Cassandra:
```bash
$ docker run \
  --env CASSANDRA_CONTACT_POINTS=host1,host2 \
  ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:v0.5.3-cassandra
```

Example for Elasticsearch 8.x:
```bash
$ docker run \
  --env ES_NODES=http://elasticsearch:9200 \
  ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:v0.5.3-elasticsearch8
```

Example for OpenSearch:
```bash
$ docker run \
  --env OS_NODES=http://opensearch:9200 \
  ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:v0.5.3-opensearch
```

#### Advanced Configuration

Use `--env JAVA_OPTS` to pass additional Java options such as memory settings, SSL trust store, or other JVM properties:

```bash
# Example: Configure SSL trust store
$ docker run \
  --env ES_NODES=https://elasticsearch:9200 \
  --env JAVA_OPTS="-Djavax.net.ssl.trustStore=/path/to/truststore -Djavax.net.ssl.trustStorePassword=changeit" \
  ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:v0.5.3-elasticsearch8

# Example: Increase JVM heap size
$ docker run \
  --env OS_NODES=http://opensearch:9200 \
  --env JAVA_OPTS="-Xmx2g -Xms1g" \
  ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:v0.5.3-opensearch
```

Use `--env LOG4J_STATUS_LOGGER_LEVEL` to control Log4j2 internal status messages (defaults to `OFF`):

```bash
# Example: Enable Log4j2 debug logging for troubleshooting
$ docker run \
  --env OS_NODES=http://opensearch:9200 \
  --env LOG4J_STATUS_LOGGER_LEVEL=DEBUG \
  ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:v0.5.3-opensearch
```

Note: the latest versions are hosted on `ghcr.io`, not on Docker Hub.

As jar file:
```bash
STORAGE=cassandra java -jar jaeger-spark-dependencies.jar
```

## Usage
By default, this job parses all traces since midnight UTC. You can parse traces for a different day
via an argument in YYYY-mm-dd format, like 2016-07-16 or specify the date via an env property.

```bash
# ex to run the job to process yesterday's traces on OS/X
$ STORAGE=cassandra java -jar jaeger-spark-dependencies.jar `date -uv-1d +%F`
# or on Linux
$ STORAGE=cassandra java -jar jaeger-spark-dependencies.jar `date -u -d '1 day ago' +%F`
```

### Configuration
`jaeger-spark-dependencies` applies configuration parameters through environment variables.

The following variables are common to all storage layers:

* `SPARK_MASTER`: Spark master to submit the job to; Defaults to `local[*]`
* `DATE`: Date in YYYY-mm-dd format. Denotes a day for which dependency links will be created.
* `PEER_SERVICE_TAG`: Tag name used to identify peer service in spans. Defaults to `peer.service`
* `JAVA_OPTS`: Additional Java options to pass to the JVM. Use this to configure memory, SSL properties, or other JVM settings. Example: `JAVA_OPTS="-Xmx2g -Djavax.net.ssl.trustStore=/path/to/truststore"`. Note: The required `--add-opens` flags for Spark on Java 21+ are already included in the container image.
* `LOG4J_STATUS_LOGGER_LEVEL`: Log4j2 StatusLogger level. Defaults to `OFF` to suppress internal Log4j2 status messages. Set to `TRACE`, `DEBUG`, `INFO`, `WARN`, `ERROR`, or `FATAL` if you need to debug logging configuration issues.

### Cassandra
Cassandra is used when `STORAGE=cassandra`.

* `CASSANDRA_KEYSPACE`: The keyspace to use. Defaults to "jaeger_v1_dc1".
* `CASSANDRA_CONTACT_POINTS`: Comma separated list of hosts / ip addresses part of Cassandra cluster.
  Defaults to localhost
* `CASSANDRA_LOCAL_DC`: The local DC to connect to (other nodes will be ignored)
* `CASSANDRA_USERNAME` and `CASSANDRA_PASSWORD`: Cassandra authentication. Will throw an exception
  on startup if authentication fails
* `CASSANDRA_USE_SSL`: Requires `javax.net.ssl.trustStore` and `javax.net.ssl.trustStorePassword`,
  Defaults to false.
* `CASSANDRA_CLIENT_AUTH_ENABLED`: If set enables client authentication on SSL connections.
  Requires `javax.net.ssl.keyStore` and `javax.net.ssl.keyStorePassword`, defaults to false.

Example usage:

```bash
$ STORAGE=cassandra CASSANDRA_CONTACT_POINTS=localhost:9042 java -jar jaeger-spark-dependencies.jar
```
### Elasticsearch
Elasticsearch is used when `STORAGE=elasticsearch`.

**Important**: Use the appropriate Docker image variant for your Elasticsearch version:
- ES 7.12-7.16: Use `:VERSION-elasticsearch7` tag
- ES 7.17-8.x: Use `:VERSION-elasticsearch8` tag  
- ES 9.x: Use `:VERSION-elasticsearch9` tag (or `:latest`)

#### Configuration

* `ES_NODES`: A comma separated list of elasticsearch hosts advertising http. Defaults to
  127.0.0.1. Add port section if not listening on port 9200. Only one of these hosts
  needs to be available to fetch the remaining nodes in the cluster. It is
  recommended to set this to all the master nodes of the cluster. Use url format for
  SSL. For example, "https://yourhost:8888"
* `ES_NODES_WAN_ONLY`: Set to true to only use the values set in ES_NODES, for example if your
  elasticsearch cluster is in Docker. If you're using a cloudprovider
  such as AWS Elasticsearch, set this to true. Defaults to false
* `ES_USERNAME` and `ES_PASSWORD`: Elasticsearch basic authentication. Use when X-Pack security
  (formerly Shield) is in place. By default no username or password is provided to elasticsearch.
* `ES_CLIENT_NODE_ONLY`: Set to true to disable elasticsearch cluster nodes.discovery and enable nodes.client.only.
  If your elasticsearch cluster's data nodes only listen on loopback ip, set this to true.
  Defaults to false.
* `ES_INDEX_PREFIX`: index prefix of Jaeger indices. By default unset.
* `ES_INDEX_DATE_SEPARATOR`: index date separator of Jaeger indices. The default value is `-`.
  For example `.` will find index "jaeger-span-2020.11.25".
* `ES_TIME_RANGE`: How far in the past the job should look to for spans, the maximum and default is `24h`.
  Any value accepted by [date-math](https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#date-math) can be used here, but the anchor is always `now`.
* `ES_USE_ALIASES`: Set to true to use index alias names to read from and write to.
  Usually required when using rollover indices.

Example usage:

```bash
$ STORAGE=elasticsearch ES_NODES=http://localhost:9200 java -jar jaeger-spark-dependencies.jar
```

### OpenSearch
OpenSearch is used when `STORAGE=opensearch`.

**Important**: Use the `:VERSION-opensearch` Docker image variant.

#### Configuration

* `OS_NODES`: A comma separated list of OpenSearch hosts advertising http. Defaults to
  127.0.0.1. Add port section if not listening on port 9200. Only one of these hosts
  needs to be available to fetch the remaining nodes in the cluster. It is
  recommended to set this to all the master nodes of the cluster. Use url format for
  SSL. For example, "https://yourhost:8888"
* `OS_NODES_WAN_ONLY`: Set to true to only use the values set in OS_NODES, for example if your
  OpenSearch cluster is in Docker. If you're using a cloudprovider
  such as AWS OpenSearch, set this to true. Defaults to false.
* `OS_USERNAME` and `OS_PASSWORD`: OpenSearch basic authentication. By default no username or password is provided.
* `OS_INDEX_PREFIX`: index prefix of Jaeger indices. By default unset.
* `OS_INDEX_DATE_SEPARATOR`: index date separator of Jaeger indices. The default value is `-`.
  For example `.` will find index "jaeger-span-2020.11.25".
* `OS_TIME_RANGE`: How far in the past the job should look to for spans, the maximum and default is `24h`.
  Any value accepted by [date-math](https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#date-math) can be used here, but the anchor is always `now`.

Example usage:

```bash
$ docker run \
  --env OS_NODES=http://opensearch:9200 \
  ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:v0.5.3-opensearch
```

## Design

At a high-level, this job does the following:

* read lots of spans from a time period
* group them by traceId
* construct a graph using parent-child relationships expressed in span references
* for each edge `(parent span, child span)` output `(parent service, child service, count)`
* write the results to the database (e.g. `dependencies_v2` table in [Cassandra](https://github.com/jaegertracing/jaeger/blob/12e44faabf10cdd866391b78933eec5d6ac50fa9/plugin/storage/cassandra/schema/v004.cql.tmpl#L186))

## Building locally
To build the job locally and run tests:
```bash
./mvnw clean install # if failed add SPARK_LOCAL_IP=127.0.0.1
```
To run the unified jar (includes all):
```bash
STORAGE=cassandra java -jar jaeger-spark-dependencies/target/jaeger-spark-dependencies-0.0.1-SNAPSHOT.jar
#or
STORAGE=elasticsearch ES_NODES=http://localhost:9200 java -jar jaeger-spark-dependencies/target/jaeger-spark-dependencies-0.0.1-SNAPSHOT.jar
#or
STORAGE=opensearch OS_NODES=http://localhost:9200 java -jar jaeger-spark-dependencies/target/jaeger-spark-dependencies-0.0.1-SNAPSHOT.jar
```

To run storage-specific jars directly (without STORAGE variable):
```bash
# Cassandra
java -jar jaeger-spark-dependencies-cassandra/target/jaeger-spark-dependencies-cassandra-0.0.1-SNAPSHOT.jar
# Elasticsearch
ES_NODES=http://localhost:9200 java -jar jaeger-spark-dependencies-elasticsearch/target/jaeger-spark-dependencies-elasticsearch-0.0.1-SNAPSHOT.jar
# OpenSearch
OS_NODES=http://localhost:9200 java -jar jaeger-spark-dependencies-opensearch/target/jaeger-spark-dependencies-opensearch-0.0.1-SNAPSHOT.jar
```

To build Docker image:

**Note:** The Dockerfile now requires a pre-built JAR. First build the JAR using Maven, then build the Docker image.

For Cassandra:
```bash
./mvnw clean package --batch-mode -Dlicense.skip=true -DskipTests -pl jaeger-spark-dependencies-cassandra -am
mkdir -p artifact-target
cp jaeger-spark-dependencies-cassandra/target/jaeger-spark-dependencies-cassandra-0.0.1-SNAPSHOT.jar artifact-target/
docker build --build-arg VARIANT=cassandra -t jaegertracing/spark-dependencies:cassandra .
```

For Elasticsearch 9:
```bash
./mvnw clean package --batch-mode -Dlicense.skip=true -DskipTests -Dversion.elasticsearch.spark=9.1.3 -pl jaeger-spark-dependencies-elasticsearch -am
mkdir -p artifact-target
cp jaeger-spark-dependencies-elasticsearch/target/jaeger-spark-dependencies-elasticsearch-0.0.1-SNAPSHOT.jar artifact-target/
docker build --build-arg VARIANT=elasticsearch9 -t jaegertracing/spark-dependencies:elasticsearch9 .
```

In tests it's possible to specify version of Jaeger images by env variable `JAEGER_VERSION`
or system property `jaeger.version`. By default tests are using latest images.

## Running Integration Tests

The integration tests validate the Spark dependencies job against different storage backends:
- Cassandra 4.x
- Elasticsearch 7
- Elasticsearch 8
- Elasticsearch 9

### Prerequisites

Before running integration tests, ensure you have the following installed:

- **Java 21** (Temurin distribution recommended)
- **Docker** (for building images and running testcontainers)
- **Maven** (included via `./mvnw` wrapper)

### Quick Start

Use the following make targets to run integration tests:

```bash
make e2e-cassandra  # Run Cassandra integration tests
make e2e-es7        # Run Elasticsearch 7 integration tests
make e2e-es8        # Run Elasticsearch 8 integration tests
make e2e-es9        # Run Elasticsearch 9 integration tests
```

### What Each Target Does

Each test suite performs two steps:
1. Builds a Docker image with the appropriate storage variant
2. Runs tests using testcontainers against that variant

### Environment Variables

The following environment variables are used in integration tests:

- `SPARK_DEPENDENCIES_JOB_TAG`: Specifies the Docker image tag to use in tests (e.g., `test-cassandra`, `test-es7`, `test-es8`, `test-es9`)
- `ELASTICSEARCH_VERSION`: Specifies the Elasticsearch version for testcontainers to use
- `JAEGER_VERSION`: (Optional) Specifies the version of Jaeger images to use in tests. Defaults to latest.

You can also set this as a system property:
```bash
./mvnw test -Djaeger.version=2.14.0
```

### Troubleshooting

#### Docker Permission Issues
If you encounter Docker permission issues, ensure your user is in the `docker` group:
```bash
sudo usermod -aG docker $USER
```
Then log out and log back in.

#### Testcontainers Issues
If testcontainers fail to start, ensure:
1. Docker is running and accessible
2. The Ryuk image is pulled: `docker pull testcontainersofficial/ryuk:latest`
3. You have sufficient disk space for Docker images

#### Build Failures
If you encounter build failures:
1. Ensure you have Java 21 installed
2. Clean the Maven cache: `./mvnw clean`
3. Try running with the `-U` flag to force update dependencies: `./mvnw -U clean install`

#### Port Conflicts
If tests fail due to port conflicts, ensure no other services are running on the ports used by testcontainers (typically ephemeral ports, but sometimes standard ports like 9042 for Cassandra or 9200 for Elasticsearch).

## CI/CD Pipeline

The project uses a unified CI/CD pipeline (`.github/workflows/ci-cd.yml`) that implements a **Host-Build Matrix Pattern**:

1. **Setup & Dependency Download** - Downloads all Maven dependencies once and warms the cache for subsequent jobs
2. **Build JARs** - Builds storage-specific JARs on the GitHub runner (parallel for all variants)
3. **E2E Tests** - Tests each variant using Docker containers with pre-built JARs
4. **Publish** - Publishes multi-arch Docker images (linux/amd64, linux/arm64) to GitHub Container Registry

The pipeline supports four variants:
- `cassandra` - For Cassandra storage
- `elasticsearch7` - For Elasticsearch 7.12-7.16 (ES connector 7.17.29)
- `elasticsearch8` - For Elasticsearch 7.17+ and 8.x (ES connector 8.13.4)
- `elasticsearch9` - For Elasticsearch 9.x (ES connector 9.1.3)

This approach eliminates Maven downloads inside Docker builds and parallelizes builds across all storage variants.

## License

[Apache 2.0 License](./LICENSE).


================================================
FILE: RELEASES.md
================================================
# Release process

1. Create a new GitHub release with a new tag
2. Use "generate release notes" button
3. Once the release is published a `publish release` workflow will build and push container images


================================================
FILE: entrypoint.sh
================================================
#!/bin/sh
#
# Copyright (c) The Jaeger Authors
# SPDX-License-Identifier: Apache-2.0
#


# Taken from https://github.com/radanalyticsio/openshift-spark/blob/2.4/modules/common/added/scripts/entrypoint#L50
# OpenShift passes random UID and spark requires it to be present in /etc/passwd
patch_uid() {
    # Check whether there is a passwd entry for the container UID
    myuid=$(id -u)
    mygid=$(id -g)
    uidentry=$(getent passwd $myuid)

    # If there is no passwd entry for the container UID, attempt to create one
    if [ -z "$uidentry" ] ; then
        if [ -w /etc/passwd ] ; then
            echo "$myuid:x:$myuid:$mygid:anonymous uid:${PWD}:/bin/false" >> /etc/passwd
        else
            echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID"
        fi
    fi
}

patch_uid

# Use the single JAR name
JAR_PATH="$APP_HOME/app.jar"

# Determine main class based on VARIANT_TYPE
if [ "$VARIANT_TYPE" = "cassandra" ]; then
    MAIN_CLASS="io.jaegertracing.spark.dependencies.cassandra.CassandraDependenciesJob"
elif [ -n "$VARIANT_TYPE" ] && [ "${VARIANT_TYPE#elasticsearch}" != "$VARIANT_TYPE" ]; then
    # VARIANT_TYPE starts with "elasticsearch"
    MAIN_CLASS="io.jaegertracing.spark.dependencies.elastic.ElasticsearchDependenciesJob"
elif [ "$VARIANT_TYPE" = "opensearch" ]; then
    MAIN_CLASS="io.jaegertracing.spark.dependencies.opensearch.OpenSearchDependenciesJob"
else
    # Fallback to unified JAR (for backward compatibility or local builds)
    MAIN_CLASS="io.jaegertracing.spark.dependencies.DependenciesSparkJob"
fi

# Set default Log4j2 StatusLogger level if not already set
# This suppresses Log4j2 StatusLogger errors triggered by OpenSearch's programmatic logging configuration
# Users can override this by setting the LOG4J_STATUS_LOGGER_LEVEL environment variable
LOG4J_STATUS_LOGGER_LEVEL="${LOG4J_STATUS_LOGGER_LEVEL:-OFF}"

# Required Java module options for Spark to work with Java 21+
# These --add-opens flags are necessary for Spark to access internal Java APIs
SPARK_JAVA_OPTS="--add-opens=java.base/java.lang=ALL-UNNAMED \
--add-opens=java.base/java.lang.invoke=ALL-UNNAMED \
--add-opens=java.base/java.lang.reflect=ALL-UNNAMED \
--add-opens=java.base/java.io=ALL-UNNAMED \
--add-opens=java.base/java.net=ALL-UNNAMED \
--add-opens=java.base/java.nio=ALL-UNNAMED \
--add-opens=java.base/java.util=ALL-UNNAMED \
--add-opens=java.base/java.util.concurrent=ALL-UNNAMED \
--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED \
--add-opens=java.base/sun.nio.ch=ALL-UNNAMED \
--add-opens=java.base/sun.nio.cs=ALL-UNNAMED \
--add-opens=java.base/sun.security.action=ALL-UNNAMED \
--add-opens=java.base/sun.util.calendar=ALL-UNNAMED \
-Djdk.reflect.useDirectMethodHandle=false"

# Execute the job with the determined main class
# SPARK_JAVA_OPTS come first (required for Spark), then JAVA_OPTS (user customizations), then Log4j config
exec java ${SPARK_JAVA_OPTS} ${JAVA_OPTS} -Dorg.apache.logging.log4j.simplelog.StatusLogger.level=${LOG4J_STATUS_LOGGER_LEVEL} -cp "$JAR_PATH" "$MAIN_CLASS" "$@"


================================================
FILE: header.txt
================================================
Copyright (c) The Jaeger Authors
SPDX-License-Identifier: Apache-2.0


================================================
FILE: jaeger-spark-dependencies/pom.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!--

    Copyright (c) The Jaeger Authors
    SPDX-License-Identifier: Apache-2.0

-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <parent>
    <groupId>io.jaegertracing.dependencies</groupId>
    <artifactId>jaeger-spark-dependencies-parent</artifactId>
    <version>0.0.1-SNAPSHOT</version>
  </parent>

  <artifactId>jaeger-spark-dependencies</artifactId>

  <dependencies>
    <dependency>
      <groupId>${project.groupId}</groupId>
      <artifactId>jaeger-spark-dependencies-cassandra</artifactId>
    </dependency>
    <dependency>
      <groupId>${project.groupId}</groupId>
      <artifactId>jaeger-spark-dependencies-elasticsearch</artifactId>
    </dependency>
  </dependencies>

  <build>
    <plugins>
      <plugin>
        <artifactId>maven-shade-plugin</artifactId>
        <version>${version.maven-shade-plugin}</version>
        <executions>
          <execution>
            <phase>package</phase>
            <goals>
              <goal>shade</goal>
            </goals>
            <configuration>
              <transformers>
                <!-- Protect against http://stackoverflow.com/questions/31011243/no-configuration-setting-found-for-key-akka-version -->
                <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                  <resource>reference.conf</resource>
                </transformer>
                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                  <mainClass>io.jaegertracing.spark.dependencies.DependenciesSparkJob</mainClass>
                </transformer>
              </transformers>
              <!-- spark + scala + ... = lots and lots of classes! -->
              <minimizeJar>true</minimizeJar>
              <filters>
                <!-- Prevent minification from excluding classes looked up by name -->
                <filter>
                  <artifact>org.apache.hadoop:hadoop-common</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <!-- Keep classes from Cassandra Java Driver -->
                  <artifact>com.datastax.oss:*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>log4j:log4j</artifact>
                  <includes>
                    <include>org/apache/log4j/spi/LoggingEvent.class</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>org.apache.logging.log4j:log4j-*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>io.netty:*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>org.slf4j:*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>org.scala-lang:*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>org.apache.spark:*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>org.lz4:*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <!-- org.elasticsearch.spark.sql.SparkSQLCompatibilityLevel -->
                  <artifact>org.elasticsearch:elasticsearch-spark-*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <!-- elasticsearch -->
                  <artifact>commons-httpclient:commons-httpclient</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <!-- org.apache.xerces.jaxp.DocumentBuilderFactoryImpl -->
                  <artifact>xerces:xercesImpl</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <!-- Keep all okhttp3 classes to avoid NoSuchMethodError -->
                  <artifact>com.squareup.okhttp3:*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <!-- Keep all okio classes to avoid NoSuchMethodError -->
                  <artifact>com.squareup.okio:*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <!-- Prevent Invalid signature file digest for Manifest main attributes -->
                <filter>
                  <artifact>*:*</artifact>
                  <excludes>
                    <exclude>META-INF/*.SF</exclude>
                    <exclude>META-INF/*.DSA</exclude>
                    <exclude>META-INF/*.RSA</exclude>
                  </excludes>
                </filter>
              </filters>
              <createDependencyReducedPom>false</createDependencyReducedPom>
            </configuration>
          </execution>
        </executions>
      </plugin>
    </plugins>
  </build>
</project>


================================================
FILE: jaeger-spark-dependencies/src/main/java/io/jaegertracing/spark/dependencies/DependenciesSparkJob.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies;

import io.jaegertracing.spark.dependencies.cassandra.CassandraDependenciesJob;
import io.jaegertracing.spark.dependencies.elastic.ElasticsearchDependenciesJob;
import java.io.UnsupportedEncodingException;
import java.time.LocalDate;

public final class DependenciesSparkJob {

  public static void main(String[] args) throws UnsupportedEncodingException {
    String storage = System.getenv("STORAGE");
    if (storage == null) {
      throw new IllegalArgumentException("Missing environmental variable STORAGE");
    }

    LocalDate date = LocalDate.now();
    if (args.length == 1) {
      date = parseZonedDateTime(args[0]);
    } else if (System.getenv("DATE") != null) {
      date = parseZonedDateTime(System.getenv("DATE"));
    }

    run(storage, date);
  }

  private static void run(String storage, LocalDate localDate) throws UnsupportedEncodingException {
    String peerServiceTag = System.getenv("PEER_SERVICE_TAG");
    if (peerServiceTag == null){
      peerServiceTag = "peer.service";
    }
    String jarPath = Utils.pathToUberJar(DependenciesSparkJob.class);
    if ("elasticsearch".equalsIgnoreCase(storage)) {
      ElasticsearchDependenciesJob.builder()
          .jars(jarPath)
          .day(localDate)
          .build()
          .run(peerServiceTag);
    } else if ("cassandra".equalsIgnoreCase(storage)) {
      CassandraDependenciesJob.builder()
          .jars(jarPath)
          .day(localDate)
          .build()
          .run(peerServiceTag);
    } else {
      throw new IllegalArgumentException("Unsupported storage: " + storage);
    }
  }

  static LocalDate parseZonedDateTime(String date) {
    return LocalDate.parse(date);
  }
}


================================================
FILE: jaeger-spark-dependencies/src/main/resources/log4j.properties
================================================
#
# Copyright (c) The Jaeger Authors
# SPDX-License-Identifier: Apache-2.0
#

# Set everything to be logged to the console
log4j.rootCategory=WARN, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n

# Settings to quiet third party logs that are too verbose
log4j.logger.org.spark-project.jetty=WARN
log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
log4j.logger.io.jaegertracing.spark=INFO

# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR


================================================
FILE: jaeger-spark-dependencies-cassandra/pom.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!--

    Copyright (c) The Jaeger Authors
    SPDX-License-Identifier: Apache-2.0

-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <parent>
    <groupId>io.jaegertracing.dependencies</groupId>
    <artifactId>jaeger-spark-dependencies-parent</artifactId>
    <version>0.0.1-SNAPSHOT</version>
  </parent>

  <artifactId>jaeger-spark-dependencies-cassandra</artifactId>

  <properties>
    <spark-cassandra-connector.version>3.4.1</spark-cassandra-connector.version>
  </properties>

  <dependencies>
    <dependency>
      <groupId>${project.groupId}</groupId>
      <artifactId>jaeger-spark-dependencies-common</artifactId>
    </dependency>

    <dependency>
      <groupId>org.apache.spark</groupId>
      <artifactId>spark-core_${version.scala.binary}</artifactId>
      <exclusions>
        <exclusion>
          <groupId>com.fasterxml.jackson.core</groupId>
          <artifactId>jackson-annotations</artifactId>
        </exclusion>
      </exclusions>
    </dependency>

    <dependency>
      <groupId>com.datastax.spark</groupId>
      <artifactId>spark-cassandra-connector_${version.scala.binary}</artifactId>
      <version>${spark-cassandra-connector.version}</version>
    </dependency>
    
    <dependency>
      <groupId>org.apache.commons</groupId>
      <artifactId>commons-compress</artifactId>
      <version>1.26.0</version>
    </dependency>
    <dependency>
      <groupId>org.eclipse.jetty</groupId>
      <artifactId>jetty-xml</artifactId>
      <version>10.0.26</version>
    </dependency>

    <dependency>
      <groupId>${project.groupId}</groupId>
      <artifactId>jaeger-spark-dependencies-test</artifactId>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.testcontainers</groupId>
      <artifactId>testcontainers</artifactId>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.testcontainers</groupId>
      <artifactId>cassandra</artifactId>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>com.google.guava</groupId>
      <artifactId>guava</artifactId>
    </dependency>
    <dependency>
      <groupId>commons-lang</groupId>
      <artifactId>commons-lang</artifactId>
      <version>2.6</version>
      <scope>test</scope>
    </dependency>
  </dependencies>

  <build>
    <plugins>
      <plugin>
        <artifactId>maven-shade-plugin</artifactId>
        <version>${version.maven-shade-plugin}</version>
        <executions>
          <execution>
            <phase>package</phase>
            <goals>
              <goal>shade</goal>
            </goals>
            <configuration>
              <transformers>
                <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                  <resource>reference.conf</resource>
                </transformer>
                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                  <mainClass>io.jaegertracing.spark.dependencies.cassandra.CassandraDependenciesJob</mainClass>
                </transformer>
              </transformers>
              <minimizeJar>true</minimizeJar>
              <filters>
                <filter>
                  <artifact>org.apache.hadoop:hadoop-common</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>com.datastax.oss:*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>com.datastax.spark:*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>log4j:log4j</artifact>
                  <includes>
                    <include>org/apache/log4j/spi/LoggingEvent.class</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>org.apache.logging.log4j:log4j-*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>io.netty:*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>org.slf4j:*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>org.scala-lang:*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>org.apache.spark:*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>org.lz4:*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>xerces:xercesImpl</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <!-- Keep all okhttp3 classes to avoid NoSuchMethodError -->
                  <artifact>com.squareup.okhttp3:*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <!-- Keep all okio classes to avoid NoSuchMethodError -->
                  <artifact>com.squareup.okio:*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>*:*</artifact>
                  <excludes>
                    <exclude>META-INF/*.SF</exclude>
                    <exclude>META-INF/*.DSA</exclude>
                    <exclude>META-INF/*.RSA</exclude>
                  </excludes>
                </filter>
              </filters>
              <createDependencyReducedPom>false</createDependencyReducedPom>
            </configuration>
          </execution>
        </executions>
      </plugin>
    </plugins>
  </build>
</project>


================================================
FILE: jaeger-spark-dependencies-cassandra/src/main/java/io/jaegertracing/spark/dependencies/cassandra/CassandraDependenciesJob.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * Copyright 2016-2017 The OpenZipkin Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.cassandra;

import static com.datastax.spark.connector.japi.CassandraJavaUtil.javaFunctions;
import static com.datastax.spark.connector.japi.CassandraJavaUtil.mapRowTo;
import static com.datastax.spark.connector.japi.CassandraJavaUtil.mapToRow;

import com.google.common.base.Joiner;
import com.google.common.net.HostAndPort;
import io.jaegertracing.spark.dependencies.DependenciesSparkHelper;
import io.jaegertracing.spark.dependencies.Utils;
import io.jaegertracing.spark.dependencies.model.Dependency;
import io.jaegertracing.spark.dependencies.model.Span;
import java.io.Serializable;
import java.text.SimpleDateFormat;
import java.time.LocalDate;
import java.time.Period;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.time.temporal.ChronoUnit;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TimeZone;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;

/**
 * @author OpenZipkin authors
 * @author Pavol Loffay
 */
public final class CassandraDependenciesJob {
  private static final Logger log = LoggerFactory.getLogger(CassandraDependenciesJob.class);

  public static Builder builder() {
    return new Builder();
  }

  public static final class Builder {
    String keyspace = Utils.getEnv("CASSANDRA_KEYSPACE", "jaeger_v1_dc1");
    String contactPoints = Utils.getEnv("CASSANDRA_CONTACT_POINTS", "localhost");
    String localDc = Utils.getEnv("CASSANDRA_LOCAL_DC", null);
    // local[*] master lets us run & test the job locally without setting a Spark cluster
    String sparkMaster = Utils.getEnv("SPARK_MASTER", "local[*]");
    String username = Utils.getEnv("CASSANDRA_USERNAME", "");
    String password = Utils.getEnv("CASSANDRA_PASSWORD", "");
    // needed when not in local mode
    String[] jars;

    // By default the job only works on traces whose first timestamp is today
    ZonedDateTime day = ZonedDateTime.of(LocalDate.now().atStartOfDay(), ZoneOffset.UTC);

    final Map<String, String> sparkProperties = new LinkedHashMap<>();

    Builder() {
      sparkProperties.put("spark.ui.enabled", "false");
      sparkProperties.put("spark.cassandra.connection.ssl.enabled",
          Utils.getEnv("CASSANDRA_USE_SSL", "false"));
      sparkProperties.put("spark.cassandra.connection.ssl.trustStore.password",
          System.getProperty("javax.net.ssl.trustStorePassword", ""));
      sparkProperties.put("spark.cassandra.connection.ssl.trustStore.path",
          System.getProperty("javax.net.ssl.trustStore", ""));
      sparkProperties.put("spark.cassandra.connection.ssl.clientAuth.enabled",  
    		  Utils.getEnv("CASSANDRA_CLIENT_AUTH_ENABLED", "false"));
      sparkProperties.put("spark.cassandra.connection.ssl.keyStore.path", 
    		  System.getProperty("javax.net.ssl.keyStore", ""));
      sparkProperties.put("spark.cassandra.connection.ssl.keyStore.password", 
    		  System.getProperty("javax.net.ssl.keyStorePassword", ""));
    }

    /** When set, this indicates which jars to distribute to the cluster. */
    public Builder jars(String... jars) {
      this.jars = jars;
      return this;
    }

    /** Keyspace to store dependency rowsToLinks. Defaults to "jaeger_v1_test" */
    public Builder keyspace(String keyspace) {
      Utils.checkNoTNull("keyspace", keyspace);
      this.keyspace = keyspace;
      return this;
    }

    /** Cassandra username. */
    public Builder username(String username) {
      Utils.checkNoTNull("username", username);
      this.username = username;
      return this;
    }

    /** Cassandra username. */
    public Builder password(String password) {
      Utils.checkNoTNull("password", password);
      this.password = password;
      return this;
    }

    /** Day to process dependencies for. Defaults to today. */
    public Builder day(LocalDate day) {
      this.day = day.atStartOfDay(ZoneOffset.UTC);
      return this;
    }

    /** Comma separated list of hosts / IPs part of Cassandra cluster. Defaults to localhost */
    public Builder contactPoints(String contactPoints) {
      this.contactPoints = contactPoints;
      return this;
    }

    /** The local DC to connect to (other nodes will be ignored) */
    public Builder localDc(String localDc) {
      this.localDc = localDc;
      return this;
    }

    public CassandraDependenciesJob build() {
      return new CassandraDependenciesJob(this);
    }

  }

  private final String keyspace;
  private final ZonedDateTime day;
  private final SparkConf conf;

  CassandraDependenciesJob(Builder builder) {
    this.keyspace = builder.keyspace;
    this.day = builder.day;
    SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd");
    df.setTimeZone(TimeZone.getTimeZone("UTC"));
    this.conf = new SparkConf(true)
        .setMaster(builder.sparkMaster)
        .setAppName(getClass().getName());
    conf.set("spark.cassandra.connection.host", parseHosts(builder.contactPoints));
    conf.set("spark.cassandra.connection.port", parsePort(builder.contactPoints));
    conf.set("spark.cassandra.auth.username", builder.username);
    conf.set("spark.cassandra.auth.password", builder.password);
    if (builder.localDc != null) {
      conf.set("connection.local_dc", builder.localDc);
    }
    if (builder.jars != null) {
      conf.setJars(builder.jars);
    }
    for (Map.Entry<String, String> entry : builder.sparkProperties.entrySet()) {
      conf.set(entry.getKey(), entry.getValue());
    }
  }

  public void run(String peerServiceTag) {
    long microsLower = day.toInstant().toEpochMilli() * 1000;
    long microsUpper = day.plus(Period.ofDays(1)).toInstant().toEpochMilli() * 1000 - 1;

    log.info("Running Dependencies job for {}: {} ≤ Span.timestamp {}", day, microsLower, microsUpper);
    JavaSparkContext sc = new JavaSparkContext(conf);
    try {
      JavaPairRDD<String, Iterable<Span>> traces = javaFunctions(sc)
          .cassandraTable(keyspace, "traces", mapRowTo(CassandraSpan.class))
          .where("start_time < ? AND start_time > ?", microsUpper, microsLower)
          .mapToPair(span -> new Tuple2<>(span.getTraceId(), span))
          .mapValues(span -> (Span) span)
          .groupByKey();

      List<Dependency> dependencyLinks = DependenciesSparkHelper.derive(traces,peerServiceTag);
      store(sc, dependencyLinks);
      log.info("Done, {} dependency objects created", dependencyLinks.size());
    } finally {
      sc.stop();
    }
  }

  private void store(JavaSparkContext sc, List<Dependency> links) {
    String table = dependenciesTable(sc);
    log.info("Storing dependencies into {}", table);
    if ("dependencies_v2".equals(table)) {
      CassandraDependenciesV2 dependencies = new CassandraDependenciesV2(links, day);
      javaFunctions(sc.parallelize(Collections.singletonList(dependencies)))
          .writerBuilder(keyspace, table, mapToRow(CassandraDependenciesV2.class))
          .saveToCassandra();
    } else {
      CassandraDependencies dependencies = new CassandraDependencies(links, day);
      javaFunctions(sc.parallelize(Collections.singletonList(dependencies)))
          .writerBuilder(keyspace, table, mapToRow(CassandraDependencies.class))
          .saveToCassandra();
    }
  }

  static String parseHosts(String contactPoints) {
    List<String> result = new LinkedList<>();
    for (String contactPoint : contactPoints.split(",")) {
      HostAndPort parsed = HostAndPort.fromString(contactPoint);
      result.add(parsed.getHost());
    }
    return Joiner.on(',').join(result);
  }

  /** Returns the consistent port across all contact points or 9042 */
  static String parsePort(String contactPoints) {
    Set<Integer> ports = new HashSet<>();
    for (String contactPoint: contactPoints.split(",")) {
      HostAndPort parsed = HostAndPort.fromString(contactPoint);
      ports.add(parsed.getPortOrDefault(9042));
    }
    return ports.size() == 1 ? String.valueOf(ports.iterator().next()) : "9042";
  }

  private String dependenciesTable(JavaSparkContext sc) {
    try {
      javaFunctions(sc)
        .cassandraTable(keyspace, "dependencies_v2")
        .limit(1L).collect();
    } catch (Exception ex) {
      return "dependencies";
    }
    return "dependencies_v2";
  }

  /**
   * DTO object used to store dependencies to Cassandra, see {@link com.datastax.spark.connector.mapper.JavaBeanColumnMapper}
   */
  public final static class CassandraDependencies implements Serializable {
    private static final long serialVersionUID = 0L;

    private List<Dependency> dependencies;
    private ZonedDateTime zonedDateTime;

    public CassandraDependencies(List<Dependency> dependencies, ZonedDateTime ts) {
      this.dependencies = dependencies;
      this.zonedDateTime = ts;
    }

    public List<Dependency> getDependencies() {
      return dependencies;
    }

    public Long getTs() {
      return zonedDateTime.toInstant().toEpochMilli();
    }

    public Long getTsIndex() {
      return zonedDateTime.toInstant().toEpochMilli();
    }
  }

  /**
   * DTO object used to store dependencies to Cassandra, see {@link com.datastax.spark.connector.mapper.JavaBeanColumnMapper}
   */
  public final static class CassandraDependenciesV2 implements Serializable {
    private static final long serialVersionUID = 0L;

    private List<Dependency> dependencies;
    private ZonedDateTime zonedDateTime;

    public CassandraDependenciesV2(List<Dependency> dependencies, ZonedDateTime ts) {
      this.dependencies = dependencies;
      this.zonedDateTime = ts;
    }

    public List<Dependency> getDependencies() {
      return dependencies;
    }

    public Long getTs() {
      return zonedDateTime.toInstant().toEpochMilli();
    }

    public Long getTsBucket() {
      return zonedDateTime.toInstant().truncatedTo(ChronoUnit.DAYS).toEpochMilli();
    }
  }

  /**
   * Entry point for running CassandraDependenciesJob directly.
   * This is used when the Docker image variant is cassandra-specific.
   */
  public static void main(String[] args) throws java.io.UnsupportedEncodingException {
    LocalDate date = LocalDate.now();
    if (args.length == 1) {
      date = LocalDate.parse(args[0]);
    } else if (System.getenv("DATE") != null) {
      date = LocalDate.parse(System.getenv("DATE"));
    }

    String peerServiceTag = System.getenv("PEER_SERVICE_TAG");
    if (peerServiceTag == null) {
      peerServiceTag = "peer.service";
    }

    String jarPath = Utils.pathToUberJar(CassandraDependenciesJob.class);
    CassandraDependenciesJob.builder()
        .jars(jarPath)
        .day(date)
        .build()
        .run(peerServiceTag);
  }
}



================================================
FILE: jaeger-spark-dependencies-cassandra/src/main/java/io/jaegertracing/spark/dependencies/cassandra/CassandraSpan.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.cassandra;

import io.jaegertracing.spark.dependencies.model.Reference;
import io.jaegertracing.spark.dependencies.model.Span;
import java.util.ArrayList;
import java.util.List;

/**
 * Jaeger > 1.5 does not store parentId. All references are stored in references table.
 * This class is used to maintain compatibility with older Jaeger deployments.
 *
 * @author Pavol Loffay
 */
public class CassandraSpan extends Span {

  private Long parentId;

  public Long getParentId() {
    return parentId;
  }

  public void setParentId(Long parentId) {
    this.parentId = parentId;
  }

  @Override
  public List<Reference> getRefs() {
    ArrayList<Reference> references = new ArrayList<>(super.getRefs());
    Reference legacyParent = new Reference();
    legacyParent.setSpanId(parentId);
    references.add(legacyParent);
    return references;
  }
}


================================================
FILE: jaeger-spark-dependencies-cassandra/src/main/resources/log4j2.component.properties
================================================
#
# Copyright (c) The Jaeger Authors
# SPDX-License-Identifier: Apache-2.0
#

# Disable Log4j status logger console output
log4j2.StatusLogger.level = OFF


================================================
FILE: jaeger-spark-dependencies-cassandra/src/main/resources/log4j2.properties
================================================
#
# Copyright (c) The Jaeger Authors
# SPDX-License-Identifier: Apache-2.0
#

# Set root logger level to WARN and use console appender
rootLogger.level = WARN
rootLogger.appenderRef.console.ref = console

# Console appender configuration
appender.console.type = Console
appender.console.name = console
appender.console.target = SYSTEM_ERR
appender.console.layout.type = PatternLayout
appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n

# Settings to quiet third party logs that are too verbose
logger.jetty.name = org.spark-project.jetty
logger.jetty.level = WARN

logger.jettyLifecycle.name = org.spark-project.jetty.util.component.AbstractLifeCycle
logger.jettyLifecycle.level = ERROR

logger.sparkReplTyper.name = org.apache.spark.repl.SparkIMain$exprTyper
logger.sparkReplTyper.level = INFO

logger.sparkReplInterpreter.name = org.apache.spark.repl.SparkILoop$SparkILoopInterpreter
logger.sparkReplInterpreter.level = INFO

logger.jaegertracing.name = io.jaegertracing.spark
logger.jaegertracing.level = INFO

# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
logger.hiveMetastore.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler
logger.hiveMetastore.level = FATAL

logger.hiveFunctionRegistry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry
logger.hiveFunctionRegistry.level = ERROR


================================================
FILE: jaeger-spark-dependencies-cassandra/src/test/java/io/jaegertracing/spark/dependencies/cassandra/CassandraDependenciesDockerJobTest.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.cassandra;

import io.jaegertracing.spark.dependencies.LogToConsolePrinter;
import org.testcontainers.containers.GenericContainer;
import org.testcontainers.utility.DockerImageName;

import java.util.concurrent.TimeUnit;

import static org.awaitility.Awaitility.await;

public class CassandraDependenciesDockerJobTest extends CassandraDependenciesJobTest {
  private static String dependenciesJobTag() {
    String tag = System.getenv("SPARK_DEPENDENCIES_JOB_IMAGE_TAG");
    if (tag == null || tag.isEmpty()) {
      throw new IllegalStateException(
          "SPARK_DEPENDENCIES_JOB_IMAGE_TAG environment variable is required but not set. " +
              "This variable must be set to ensure tests use the locally built Docker image.");
    }
    return tag.trim();
  }

  @Override
  protected void deriveDependencies() {
    System.out.println("::group::🚧 🚧 🚧 CassandraDependenciesDockerJob logs");
    try (GenericContainer<?> sparkDependenciesJob = new GenericContainer<>(
        DockerImageName.parse("ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:" + dependenciesJobTag()))
        .withNetwork(network)
        .withLogConsumer(new LogToConsolePrinter("[spark-dependencies] "))
        .withEnv("CASSANDRA_KEYSPACE", "jaeger_v1_dc1")
        .withEnv("CASSANDRA_CONTACT_POINTS", "cassandra") // This should be an address within the docker network
        .withEnv("CASSANDRA_LOCAL_DC", cassandra.getLocalDatacenter())
        .withEnv("CASSANDRA_USERNAME", cassandra.getUsername())
        .withEnv("CASSANDRA_PASSWORD", cassandra.getPassword())
        .dependsOn(cassandra, jaegerCassandraSchema);) {
      sparkDependenciesJob.start();
      await("spark-dependencies-job execution")
          .atMost(3, TimeUnit.MINUTES)
          .until(() -> !sparkDependenciesJob.isRunning());
    } finally {
      System.out.println("::endgroup::");
    }
  }
}


================================================
FILE: jaeger-spark-dependencies-cassandra/src/test/java/io/jaegertracing/spark/dependencies/cassandra/CassandraDependenciesJobTest.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.cassandra;

import static org.awaitility.Awaitility.await;

import com.datastax.oss.driver.api.core.CqlSession;
import io.jaegertracing.spark.dependencies.LogToConsolePrinter;
import io.jaegertracing.spark.dependencies.test.DependenciesTest;
import java.time.LocalDate;
import java.util.Collections;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.testcontainers.cassandra.CassandraContainer;
import org.testcontainers.containers.GenericContainer;
import org.testcontainers.containers.Network;
import org.testcontainers.containers.wait.strategy.HttpWaitStrategy;

/**
 * @author Pavol Loffay
 */
public class CassandraDependenciesJobTest extends DependenciesTest {

  protected static Network network;
  protected static CassandraContainer cassandra;
  protected static GenericContainer jaegerAll;
  protected static GenericContainer jaegerCassandraSchema;
  private static int cassandraPort;

  @BeforeClass
  public static void beforeClass() {
    System.out.println("=== Starting CassandraDependenciesJobTest setup ===");

    network = Network.newNetwork();
    System.out.println("Created network: " + network.getId());

    System.out.println("Starting Cassandra container (cassandra:4.1)...");
    cassandra = new CassandraContainer("cassandra:4.1")
        .withNetwork(network)
        .withNetworkAliases("cassandra")
        .withExposedPorts(9042);
    cassandra.start();
    cassandraPort = cassandra.getMappedPort(9042);
    System.out.println("Cassandra started. Mapped port: " + cassandraPort);

    System.out.println("Starting Jaeger Cassandra schema container (jaegertracing/jaeger-cassandra-schema:"
        + jaegerVersion() + ")...");
    jaegerCassandraSchema = new GenericContainer<>("jaegertracing/jaeger-cassandra-schema:" + jaegerVersion())
        .withLogConsumer(new LogToConsolePrinter("[jaeger-cassandra-schema] "))
        .withNetwork(network);
    jaegerCassandraSchema.start();
    System.out.println("Jaeger Cassandra schema container started, waiting for schema creation...");
    /**
     * Wait until schema is created
     */
    await().until(() -> !jaegerCassandraSchema.isRunning());
    System.out.println("Jaeger Cassandra schema creation completed");

    System.out.println("Starting Jaeger v2 unified container (jaegertracing/jaeger:" + jaegerVersion() + ")...");
    jaegerAll = new GenericContainer<>("jaegertracing/jaeger:" + jaegerVersion())
        .withNetwork(network)
        .withClasspathResourceMapping("jaeger-v2-config-cassandra.yaml", "/etc/jaeger/config.yaml",
            org.testcontainers.containers.BindMode.READ_ONLY)
        .withCommand("--config", "/etc/jaeger/config.yaml")
        .waitingFor(new BoundPortHttpWaitStrategy(16687)
            .forStatusCodeMatching(statusCode -> statusCode >= 200 && statusCode < 300))
        .withExposedPorts(16687, 16686, 4317, 4318, 14268, 9411);
    jaegerAll.start();
    System.out.println("Jaeger v2 container started");

    queryUrl = String.format("http://127.0.0.1:%d", jaegerAll.getMappedPort(16686));
    collectorUrl = String.format("http://127.0.0.1:%d", jaegerAll.getMappedPort(4317));

    System.out.println("=== Container setup complete ===");
    System.out.println("Query URL: " + queryUrl);
    System.out.println("Collector URL: " + collectorUrl);
    System.out.println("Health check port: " + jaegerAll.getMappedPort(16687));
  }

  @AfterClass
  public static void afterClass() {
    Optional.of(cassandra).ifPresent(GenericContainer::close);
    Optional.of(jaegerAll).ifPresent(GenericContainer::close);
    Optional.of(jaegerCassandraSchema).ifPresent(GenericContainer::close);
  }

  @After
  public void after() {
    try (CqlSession session = CqlSession.builder()
        .addContactPoint(cassandra.getContactPoint())
        .withLocalDatacenter(cassandra.getLocalDatacenter())
        .build()) {
      session.execute("TRUNCATE jaeger_v1_dc1.traces");
      session.execute(String.format("TRUNCATE jaeger_v1_dc1.%s", dependenciesTable(session)));
    }
  }

  private String dependenciesTable(CqlSession session) {
    try {
      session.execute("SELECT ts from jaeger_v1_dc1.dependencies_v2 limit 1;");
    } catch (Exception ex) {
      return "dependencies";
    }
    return "dependencies_v2";
  }

  @Override
  protected void deriveDependencies() {
    System.out.println("::group::🚧 🚧 🚧 CassandraDependenciesJob logs");
    try {
      CassandraDependenciesJob.builder()
          .contactPoints("localhost:" + cassandraPort)
          .day(LocalDate.now())
          .keyspace("jaeger_v1_dc1")
          .username(cassandra.getUsername())
          .password(cassandra.getPassword())
          .build()
          .run("peer.service");
    } finally {
      System.out.println("::endgroup::");
    }
  }

  @Override
  protected void waitBetweenTraces() throws InterruptedException {
    // TODO otherwise it sometimes fails
    TimeUnit.SECONDS.sleep(1);
  }

  public static class BoundPortHttpWaitStrategy extends HttpWaitStrategy {
    private final int port;

    public BoundPortHttpWaitStrategy(int port) {
      this.port = port;
    }

    @Override
    protected Set<Integer> getLivenessCheckPorts() {
      int mapptedPort = this.waitStrategyTarget.getMappedPort(port);
      return Collections.singleton(mapptedPort);
    }
  }
}


================================================
FILE: jaeger-spark-dependencies-cassandra/src/test/java/io/jaegertracing/spark/dependencies/cassandra/JaegerTestDriverContainer.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.cassandra;

import java.net.ConnectException;
import java.time.Duration;
import java.util.concurrent.Callable;
import java.util.concurrent.TimeUnit;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.Response;
import org.rnorth.ducttape.unreliables.Unreliables;
import org.testcontainers.containers.ContainerLaunchException;
import org.testcontainers.containers.GenericContainer;
import org.testcontainers.containers.traits.LinkableContainer;

/**
 * @author Pavol Loffay
 */
public class JaegerTestDriverContainer extends GenericContainer<JaegerTestDriverContainer>
    implements LinkableContainer {
  protected final OkHttpClient okHttpClient = new OkHttpClient.Builder().build();
  protected final Duration waitUntilReady;

  public JaegerTestDriverContainer(String dockerImageName) {
    this(dockerImageName, Duration.ofMinutes(1));
  }

  public JaegerTestDriverContainer(String dockerImageName, Duration waitUntilReady) {
    super(dockerImageName);
    this.waitUntilReady = waitUntilReady;
  }

  @Override
  protected void waitUntilContainerStarted() {
    String statusUrl = String.format("http://localhost:%d/", this.getMappedPort(8080));
    Unreliables.retryUntilTrue((int)waitUntilReady.toMillis(), TimeUnit.MILLISECONDS, containerStartedCondition(statusUrl));
  }

  protected Callable<Boolean> containerStartedCondition(String statusUrl) {
    return () -> {
      if (!isRunning()) {
        throw new ContainerLaunchException("Container failed to start");
      }

      Request request = new Request.Builder()
          .url(statusUrl)
          .head()
          .build();
      try (Response response = okHttpClient.newCall(request).execute()) {
        return response.code() == 200;
      } catch (ConnectException ex) {
        return false;
      }
    };
  }
}


================================================
FILE: jaeger-spark-dependencies-cassandra/src/test/resources/jaeger-v2-config-cassandra.yaml
================================================
#
# Copyright (c) The Jaeger Authors
# SPDX-License-Identifier: Apache-2.0
#

service:
  extensions: [jaeger_storage, jaeger_query, healthcheckv2]
  pipelines:
    traces:
      receivers: [otlp, jaeger, zipkin]
      processors: [filter/jaeger, batch]
      exporters: [jaeger_storage_exporter]
  telemetry:
    resource:
      service.name: jaeger-backend
    metrics:
      level: detailed
      readers:
        - pull:
            exporter:
              prometheus:
                host: 0.0.0.0
                port: 8888
    logs:
      level: info
    traces:
      level: none

extensions:
  healthcheckv2:
    use_v2: true
    http:
      endpoint: "0.0.0.0:16687"
      status:
        enabled: true
        path: "/"

  jaeger_query:
    storage:
      traces: some_storage

  jaeger_storage:
    backends:
      some_storage:
        cassandra:
          schema:
            keyspace: "jaeger_v1_dc1"
          connection:
            servers: ["cassandra:9042"]
            tls:
              insecure: true

receivers:
  otlp:
    protocols:
      grpc:
        endpoint: "0.0.0.0:4317"
      http:
        endpoint: "0.0.0.0:4318"

  jaeger:
    protocols:
      grpc:
      thrift_binary:
      thrift_compact:
      thrift_http:
        endpoint: "0.0.0.0:14268"

  zipkin:
    endpoint: "0.0.0.0:9411"

processors:
  filter/jaeger:
    error_mode: ignore
    traces:
      span:
        - 'resource.attributes["service.name"] == "jaeger"'
  
  batch:

exporters:
  jaeger_storage_exporter:
    trace_storage: some_storage


================================================
FILE: jaeger-spark-dependencies-cassandra/src/test/resources/log4j.properties
================================================
#
# Copyright (c) The Jaeger Authors
# SPDX-License-Identifier: Apache-2.0
#

# Set everything to be logged to the console
log4j.rootCategory=WARN, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n

# Settings to quiet third party logs that are too verbose
log4j.logger.org.spark-project.jetty=WARN
log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO

# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR

log4j.logger.io.jaegertracing=WARN
log4j.logger.org.testcontainers=INFO
log4j.logger.org.apache.http=WARN
log4j.logger.com.github.dockerjava=WARN
log4j.logger.org.zeroturnaround.exec=WARN


================================================
FILE: jaeger-spark-dependencies-common/pom.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!--

    Copyright (c) The Jaeger Authors
    SPDX-License-Identifier: Apache-2.0

-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <parent>
    <artifactId>jaeger-spark-dependencies-parent</artifactId>
    <groupId>io.jaegertracing.dependencies</groupId>
    <version>0.0.1-SNAPSHOT</version>
  </parent>

  <artifactId>jaeger-spark-dependencies-common</artifactId>

  <dependencies>
    <dependency>
      <groupId>io.opentracing</groupId>
      <artifactId>opentracing-api</artifactId>
      <version>${version.io.opentracing}</version>
    </dependency>
  </dependencies>
</project>


================================================
FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/DependenciesSparkHelper.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies;

import io.jaegertracing.spark.dependencies.model.Dependency;
import io.jaegertracing.spark.dependencies.model.Span;
import java.util.List;
import org.apache.spark.api.java.JavaPairRDD;
import scala.Tuple2;

/**
 * @author Pavol Loffay
 */
public class DependenciesSparkHelper {
  private DependenciesSparkHelper() {}

  /**
   * Derives dependency links based on supplied spans (e.g. multiple traces). If there is a link A->B
   * in multiple traces it will return just one {@link Dependency} link with a correct {@link Dependency#callCount}.
   * Note that RDDs are grouped on traceId so if a span contains multiple references from different traces
   * the job does not produce correct result.
   *
   * @param traceIdSpans <traceId, trace> {@link org.apache.spark.api.java.JavaRDD} with trace id and a collection of
   *                     spans with that traceId.
   * @return Aggregated dependency links for all traces.
   */
  public static List<Dependency> derive(JavaPairRDD<String, Iterable<Span>> traceIdSpans,String peerServiceTag) {
    return traceIdSpans.flatMapValues(new SpansToDependencyLinks(peerServiceTag))
        .values()
        .mapToPair(dependency -> new Tuple2<>(new Tuple2<>(dependency.getParent(), dependency.getChild()), dependency))
        .reduceByKey((v1, v2) -> new Dependency(v1.getParent(), v1.getChild(), v1.getCallCount() + v2.getCallCount()))
        .values()
        .collect();
  }
}


================================================
FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/SpansToDependencyLinks.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies;

import io.jaegertracing.spark.dependencies.model.Dependency;
import io.jaegertracing.spark.dependencies.model.KeyValue;
import io.jaegertracing.spark.dependencies.model.Reference;
import io.jaegertracing.spark.dependencies.model.Span;
import io.opentracing.tag.Tags;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;

import org.apache.spark.api.java.function.FlatMapFunction;

/**
 * @author Pavol Loffay
 */
public class SpansToDependencyLinks implements FlatMapFunction<Iterable<Span>, Dependency>{

    /**
     * Derives dependency links based on supplied spans.
     *
     * @param trace trace
     * @return collection of dependency links, note that it contains duplicates
     * @throws Exception
     */

    public String peerServiceTag = "";

    public SpansToDependencyLinks(String peerServiceTag){
        this.peerServiceTag = peerServiceTag;
    }

    @Override
    public java.util.Iterator<Dependency> call(Iterable<Span> trace) {
        Set<Span> uniqueSpans = new LinkedHashSet<>();
        for (Span span : trace) {
            uniqueSpans.add(span);
        }

        Map<Long, Set<Span>> spanMap = new LinkedHashMap<>();
        Map<Long, Set<Span>> spanChildrenMap = new LinkedHashMap<>();
        for (Span span : uniqueSpans) {
            // Map of children
            for (Reference ref: span.getRefs()){
              Set <Span> children = spanChildrenMap.get(ref.getSpanId());
              if (children == null){
                children = new LinkedHashSet<>();
                spanChildrenMap.put(ref.getSpanId(), children);
              }
              children.add(span);
            }
            // Map of parents
            Set<Span> sharedSpans = spanMap.get(span.getSpanId());
            if (sharedSpans == null) {
                sharedSpans = new LinkedHashSet<>();
                spanMap.put(span.getSpanId(), sharedSpans);
            }
            sharedSpans.add(span);
        }

        // Let's start with zipkin shared spans
        List<Dependency> result = sharedSpanDependencies(spanMap);

        for (Span span : uniqueSpans) {
            if (span.getRefs() == null || span.getRefs().isEmpty() ||
                span.getProcess() == null || span.getProcess().getServiceName() == null) {
                continue;
            }

            // if the current span is shared and not a client span we skip it
            // because the link from this span to parent should be from client span
            if (spanMap.get(span.getSpanId()).size() > 1 && !isClientSpan(span)) {
                continue;
            }

            for (Reference reference: span.getRefs()) {
                Set<Span> parents = spanMap.get(reference.getSpanId());
                if (parents != null) {
                    if (parents.size() > 1) {
                        serverSpan(parents)
                            .ifPresent(parent ->
                                result.add(new Dependency(parent.getProcess().getServiceName(), span.getProcess().getServiceName()))
                            );
                    } else {
                        // this is jaeger span or zipkin native (not shared!)
                        Span parent = parents.iterator().next();
                        if (parent.getProcess() == null || parent.getProcess().getServiceName() == null) {
                            continue;
                        }
                        result.add(new Dependency(parent.getProcess().getServiceName(), span.getProcess().getServiceName()));
                    }
                }
            }
            // We are on a leaf so we try to add a dependency for calls to components that calls remote components not instrumented
            if (spanChildrenMap.get(span.getSpanId()) == null ){
              String targetName = span.getTag(peerServiceTag);
              if (targetName != null) {
                result.add(new Dependency(span.getProcess().getServiceName(), targetName));
              }
            }
        }
        return result.iterator();
    }

    static Optional<Span> serverSpan(Set<Span> sharedSpans) {
        for (Span span: sharedSpans) {
            if (isServerSpan(span)) {
                return Optional.of(span);
            }
        }

        return Optional.empty();
    }

    static boolean isClientSpan(Span span) {
        return Tags.SPAN_KIND_CLIENT.equals(span.getTag(Tags.SPAN_KIND.getKey()));
    }

    static boolean isServerSpan(Span span) {
        return Tags.SPAN_KIND_SERVER.equals(span.getTag(Tags.SPAN_KIND.getKey()));
    }

    private List<Dependency> sharedSpanDependencies(Map<Long, Set<Span>> spanMap) {
        List<Dependency> dependencies = new ArrayList<>();
        // create links between shared spans
        for (Set<Span> sharedSpans: spanMap.values()) {
            sharedSpanDependency(sharedSpans)
                .ifPresent(dependencies::add);
        }
        return dependencies;
    }

    protected Optional<Dependency> sharedSpanDependency(Set<Span> sharedSpans) {
        String clientService = null;
        String serverService = null;
        for (Span span: sharedSpans) {
            for (KeyValue tag: span.getTags()) {
                if (Tags.SPAN_KIND_CLIENT.equals(tag.getValueString()) || Tags.SPAN_KIND_PRODUCER.equals(tag.getValueString())) {
                    clientService = span.getProcess().getServiceName();
                } else if (Tags.SPAN_KIND_SERVER.equals(tag.getValueString()) || Tags.SPAN_KIND_CONSUMER.equals(tag.getValueString())) {
                    serverService = span.getProcess().getServiceName();
                }

                if (clientService != null && serverService != null) {
                    return Optional.of(new Dependency(clientService, serverService));
                }
            }
        }
        return Optional.empty();
    }
}


================================================
FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/Utils.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies;

import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLDecoder;

/**
 * @author Pavol Loffay
 */
public class Utils {
  private Utils() {}

  public static String getEnv(String key, String defaultValue) {
    String result = System.getenv(key);
    return result != null ? result : defaultValue;
  }

  public static void checkNoTNull(String msg, Object object) {
    if (object == null) {
      throw new NullPointerException(String.format("%s is null", msg));
    }
  }

  /**
   * Returns the path to the uber jar containing the calling class.
   * This is used to distribute the jar to Spark workers.
   */
  public static String pathToUberJar(Class<?> clazz) throws UnsupportedEncodingException {
    URL jarFile = clazz.getProtectionDomain().getCodeSource().getLocation();
    return URLDecoder.decode(jarFile.getPath(), "UTF-8");
  }
}


================================================
FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/JsonHelper.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.json;

import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import io.jaegertracing.spark.dependencies.model.KeyValue;
import io.jaegertracing.spark.dependencies.model.Reference;
import io.jaegertracing.spark.dependencies.model.Span;

/**
 * @author Pavol Loffay
 * @author Danish Siddiqui
 */
public class JsonHelper {

  private JsonHelper() {
  }

  public static ObjectMapper configure(ObjectMapper objectMapper) {
    objectMapper.addMixIn(Span.class, SpanMixin.class);
    objectMapper.addMixIn(KeyValue.class, KeyValueMixin.class);
    objectMapper.addMixIn(Reference.class, ReferenceMixin.class);
    objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
    return objectMapper;
  }
}


================================================
FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/KeyValueDeserializer.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.json;

import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.deser.std.StdDeserializer;
import io.jaegertracing.spark.dependencies.model.KeyValue;
import java.io.IOException;

/**
 * @author Pavol Loffay
 * @author Danish Siddiqui
 */
public class KeyValueDeserializer extends StdDeserializer<KeyValue> {

  // TODO Spark incorrectly serializes object mapper, therefore reinitializing
  // here
  private ObjectMapper objectMapper = JsonHelper.configure(new ObjectMapper());

  public KeyValueDeserializer() {
    super(KeyValue.class);
  }

  @Override
  public KeyValue deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException, JsonProcessingException {
    JsonNode node = objectMapper.getFactory().setCodec(objectMapper).getCodec().readTree(jp);

    String key = node.get("key").asText();
    String type = node.get("type").asText();

    KeyValue keyValue = new KeyValue();
    keyValue.setKey(key);
    keyValue.setValueType(type);

    if ("string".equalsIgnoreCase(type)) {
      JsonNode valueNode = node.get("value");
      if (valueNode != null) {
        keyValue.setValueString(valueNode.asText());
      }
    } else {
      // TODO: KeyValue model only supports string value for now, other types are
      // ignored
    }

    return keyValue;
  }
}


================================================
FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/KeyValueMixin.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.json;

import com.fasterxml.jackson.databind.annotation.JsonDeserialize;

/**
 * @author Pavol Loffay
 * @author Danish Siddiqui
 */
@JsonDeserialize(using = KeyValueDeserializer.class)
public class KeyValueMixin {
}


================================================
FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/ReferenceDeserializer.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.json;

import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.deser.std.StdDeserializer;
import io.jaegertracing.spark.dependencies.model.Reference;
import java.io.IOException;
import java.math.BigInteger;

/**
 * @author Pavol Loffay
 * @author Danish Siddiqui
 */
public class ReferenceDeserializer extends StdDeserializer<Reference> {

  private ObjectMapper objectMapper = JsonHelper.configure(new ObjectMapper());

  protected ReferenceDeserializer() {
    super(Reference.class);
  }

  @Override
  public Reference deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException, JsonProcessingException {
    JsonNode node = objectMapper.getFactory().setCodec(objectMapper).getCodec().readTree(jp);

    String spanIdHex = node.get("spanID").asText();

    Reference reference = new Reference();
    reference.setSpanId(new BigInteger(spanIdHex, 16).longValue());
    return reference;
  }
}


================================================
FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/ReferenceMixin.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.json;

import com.fasterxml.jackson.databind.annotation.JsonDeserialize;

/**
 * @author Pavol Loffay
 * @author Danish Siddiqui
 */
@JsonDeserialize(using = ReferenceDeserializer.class)
public class ReferenceMixin {
}


================================================
FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/SpanDeserializer.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.json;

import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.deser.std.StdDeserializer;
import io.jaegertracing.spark.dependencies.model.KeyValue;
import io.jaegertracing.spark.dependencies.model.Process;
import io.jaegertracing.spark.dependencies.model.Reference;
import io.jaegertracing.spark.dependencies.model.Span;
import java.io.IOException;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

/**
 * @author Pavol Loffay
 * @author Danish Siddiqui
 */
public class SpanDeserializer extends StdDeserializer<Span> {

  // TODO Spark incorrectly serializes object mapper, therefore reinitializing
  // here
  private ObjectMapper objectMapper = JsonHelper.configure(new ObjectMapper());

  public SpanDeserializer() {
    super(Span.class);
  }

  @Override
  public Span deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException, JsonProcessingException {
    JsonNode node = objectMapper.getFactory().setCodec(objectMapper).getCodec().readTree(jp);

    JsonNode spanIdNode = node.get("spanID");
    JsonNode traceIdNode = node.get("traceID");
    JsonNode startTimeNode = node.get("startTime");

    if (spanIdNode == null || traceIdNode == null) {
      throw new JsonProcessingException("Missing required fields: spanID or traceID") {
      };
    }

    String spanIdHex = spanIdNode.asText();
    String traceIdHex = traceIdNode.asText();
    String startTimeStr = startTimeNode != null ? startTimeNode.asText() : null;

    JsonNode processNode = node.get("process");
    Process process = objectMapper.treeToValue(processNode, Process.class);

    JsonNode tagsNode = node.get("tags");
    List<KeyValue> tags = Arrays.asList(objectMapper.treeToValue(tagsNode, KeyValue[].class));

    JsonNode tagFieldsNode = node.get("tag");
    if (tagFieldsNode != null) {
      Map<String, Object> tagFields = objectMapper.treeToValue(tagFieldsNode, Map.class);
      tags = addTagFields(tags, tagFields);
    }

    Span span = new Span();
    span.setSpanId(new BigInteger(spanIdHex, 16).longValue());
    span.setTraceId(traceIdHex);
    span.setRefs(deserializeReferences(node));
    span.setStartTime(startTimeStr != null ? Long.parseLong(startTimeStr) : null);
    span.setProcess(process);
    span.setTags(tags);
    return span;
  }

  private List<KeyValue> addTagFields(List<KeyValue> tags, Map<String, Object> tagFields) {
    ArrayList<KeyValue> result = new ArrayList<>(tags.size() + tagFields.size());
    result.addAll(tags);
    List<KeyValue> collect = tagFields.entrySet().stream().map(stringObjectEntry -> {
      KeyValue kv = new KeyValue();
      kv.setKey(stringObjectEntry.getKey());
      kv.setValueString(stringObjectEntry.getValue().toString());
      return kv;
    }).collect(Collectors.toList());
    result.addAll(collect);
    return result;
  }

  private List<Reference> deserializeReferences(JsonNode node) throws JsonProcessingException {
    List<Reference> references = new ArrayList<>();
    JsonNode parentSpanID = node.get("parentSpanID");
    if (parentSpanID != null) {
      BigInteger bigInteger = new BigInteger(parentSpanID.asText(), 16);
      Reference reference = new Reference();
      reference.setSpanId(bigInteger.longValue());
      references.add(reference);
    }

    JsonNode referencesNode = node.get("references");
    if (!referencesNode.isNull()) {
      Reference[] referencesArr = objectMapper.treeToValue(referencesNode, Reference[].class);
      references.addAll(Arrays.asList(referencesArr));
    }

    return references;
  }
}


================================================
FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/SpanMixin.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.json;

import com.fasterxml.jackson.databind.annotation.JsonDeserialize;

/**
 * @author Pavol Loffay
 * @author Danish Siddiqui
 */
@JsonDeserialize(using = SpanDeserializer.class)
public class SpanMixin {
}


================================================
FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/model/Dependency.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.model;

import java.io.Serializable;

/**
 * @author Pavol Loffay
 */
public class Dependency implements Serializable {
  private static final long serialVersionUID = 0L;

  private final String parent;
  private final String child;
  private final long callCount;

  public Dependency(String parent, String child) {
    this(parent, child, 1);
  }

  public Dependency(String parent, String child, long callCount) {
    this.parent = parent;
    this.child = child;
    this.callCount = callCount;
  }

  public String getParent() {
    return parent;
  }

  public String getChild() {
    return child;
  }

  public long getCallCount() {
    return callCount;
  }

  @Override
  public boolean equals(Object o) {
    if (this == o) {
      return true;
    }
    if (!(o instanceof Dependency)) {
      return false;
    }

    Dependency that = (Dependency) o;

    if (!parent.equals(that.parent)) {
      return false;
    }
    return (this.parent.equals(that.parent))
        && (this.child.equals(that.child))
        && this.callCount == that.callCount;
  }

  public String getSource() {
    return "jaeger";
  }

  public void setSource(String source) {
  }

  @Override
  public int hashCode() {
    int h = 1;
    h *= 1000003;
    h ^= parent.hashCode();
    h *= 1000003;
    h ^= child.hashCode();
    h *= 1000003;
    h ^= (int) (h ^ ((callCount >>> 32) ^ callCount));
    h *= 1000003;
    return h;
  }

  @Override
  public String toString() {
    return "Dependency{" +
            "parent='" + parent + '\'' +
            ", child='" + child + '\'' +
            ", callCount=" + callCount +
            '}';
  }
}


================================================
FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/model/KeyValue.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.model;

import java.io.Serializable;

/**
 * @author Pavol Loffay
 */
public class KeyValue implements Serializable {
  private static final long serialVersionUID = 0L;

  private String key;
  private String valueType;

  // TODO there are more types: double, long, binary, not needed at the moment
  private String valueString;

  public String getKey() {
    return key;
  }

  public void setKey(String key) {
    this.key = key;
  }

  public String getValueString() {
    return valueString;
  }

  public void setValueString(String valueString) {
    this.valueString = valueString;
  }

  public String getValueType() {
    return valueType;
  }

  public void setValueType(String valueType) {
    this.valueType = valueType;
  }

  @Override
  public boolean equals(Object o) {
    if (this == o) {
      return true;
    }
    if (o == null || getClass() != o.getClass()) {
      return false;
    }

    KeyValue keyValue = (KeyValue) o;

    if (key != null ? !key.equals(keyValue.key) : keyValue.key != null) {
      return false;
    }
    if (valueType != null ? !valueType.equals(keyValue.valueType) : keyValue.valueType != null) {
      return false;
    }
    return valueString != null ? valueString.equals(keyValue.valueString) : keyValue.valueString == null;
  }

  @Override
  public int hashCode() {
    int result = key != null ? key.hashCode() : 0;
    result = 31 * result + (valueType != null ? valueType.hashCode() : 0);
    result = 31 * result + (valueString != null ? valueString.hashCode() : 0);
    return result;
  }
}

================================================
FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/model/Process.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.model;

import java.io.Serializable;

/**
 * @author Pavol Loffay
 */
public class Process implements Serializable {
  private static final long serialVersionUID = 0L;

  private String serviceName;

  public String getServiceName() {
    return serviceName;
  }

  public void setServiceName(String serviceName) {
    this.serviceName = serviceName;
  }

  @Override
  public boolean equals(Object o) {
    if (this == o) {
      return true;
    }
    if (!(o instanceof Process)) {
      return false;
    }

    Process process = (Process) o;

    return serviceName != null ? serviceName.equals(process.serviceName)
        : process.serviceName == null;
  }

  @Override
  public int hashCode() {
    return serviceName != null ? serviceName.hashCode() : 0;
  }
}


================================================
FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/model/Reference.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.model;

import java.io.Serializable;

/**
 * @author Pavol Loffay
 */
public class Reference implements Serializable {
  private static final long serialVersionUID = 0L;

  private Long spanId;

  public Long getSpanId() {
    return spanId;
  }

  public void setSpanId(Long spanId) {
    this.spanId = spanId;
  }

  @Override
  public boolean equals(Object o) {
    if (this == o) {
      return true;
    }
    if (o == null || getClass() != o.getClass()) {
      return false;
    }

    Reference reference = (Reference) o;

    return spanId != null ? spanId.equals(reference.spanId) : reference.spanId == null;
  }

  @Override
  public int hashCode() {
    return spanId != null ? spanId.hashCode() : 0;
  }
}


================================================
FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/model/Span.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.model;

import java.io.Serializable;
import java.util.List;

/**
 * @author Pavol Loffay
 */
public class Span implements Serializable {

  private static final long serialVersionUID = 0L;

  private String traceId;
  private Long spanId;

  private Long startTime;
  private Process process;
  private List<KeyValue> tags;
  private List<Reference> refs;

  public String getTraceId() {
    return traceId;
  }

  public void setTraceId(String traceId) {
    this.traceId = traceId;
  }

  public Long getSpanId() {
    return spanId;
  }

  public void setSpanId(Long spanId) {
    this.spanId = spanId;
  }

  public long getStartTime() {
    return startTime;
  }

  public void setStartTime(Long startTime) {
    this.startTime = startTime;
  }

  public Process getProcess() {
    return process;
  }

  public void setProcess(Process process) {
    this.process = process;
  }

  public List<KeyValue> getTags() {
    return tags;
  }

  public String getTag(String key){
    for (KeyValue kv : tags){
      if (kv.getKey().equals(key)){
        return kv.getValueString();
      }
    }
    return null;
  }

  public void setTags(List<KeyValue> tags) {
    this.tags = tags;
  }

  public List<Reference> getRefs() {
    return refs;
  }

  public void setRefs(List<Reference> refs) {
    this.refs = refs;
  }

  @Override
  public boolean equals(Object o) {
    if (this == o) {
      return true;
    }
    if (o == null || getClass() != o.getClass()) {
      return false;
    }

    Span span = (Span) o;

    if (traceId != null ? !traceId.equals(span.traceId) : span.traceId != null) {
      return false;
    }
    if (spanId != null ? !spanId.equals(span.spanId) : span.spanId != null) {
      return false;
    }
    if (startTime != null ? !startTime.equals(span.startTime) : span.startTime != null) {
      return false;
    }
    if (process != null ? !process.equals(span.process) : span.process != null) {
      return false;
    }
    if (tags != null ? !tags.equals(span.tags) : span.tags != null) {
      return false;
    }
    return refs != null ? refs.equals(span.refs) : span.refs == null;
  }

  @Override
  public int hashCode() {
    int result = traceId != null ? traceId.hashCode() : 0;
    result = 31 * result + (spanId != null ? spanId.hashCode() : 0);
    result = 31 * result + (startTime != null ? startTime.hashCode() : 0);
    result = 31 * result + (process != null ? process.hashCode() : 0);
    result = 31 * result + (tags != null ? tags.hashCode() : 0);
    result = 31 * result + (refs != null ? refs.hashCode() : 0);
    return result;
  }
}


================================================
FILE: jaeger-spark-dependencies-common/src/test/java/io/jaegertracing/spark/dependencies/SpansToDependencyLinksTest.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;

import io.jaegertracing.spark.dependencies.model.Dependency;
import io.jaegertracing.spark.dependencies.model.KeyValue;
import io.jaegertracing.spark.dependencies.model.Process;
import io.jaegertracing.spark.dependencies.model.Span;
import io.opentracing.tag.Tags;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import org.junit.Test;

public class SpansToDependencyLinksTest {

    @Test
    public void shouldReturnDependencyWithClientAndServerSpans() {
        SpansToDependencyLinks spansToDependencyLinks = new SpansToDependencyLinks("");
        Set<Span> sharedSpans = new HashSet<>();
        sharedSpans.add(createSpan("clientName", Tags.SPAN_KIND_CLIENT));
        sharedSpans.add(createSpan("serverName", Tags.SPAN_KIND_SERVER));
        Optional<Dependency> result = spansToDependencyLinks.sharedSpanDependency(sharedSpans);
        assertTrue(result.isPresent());
        assertEquals(new Dependency("clientName", "serverName"), result.get());
    }

    @Test
    public void shouldReturnDependencyWithConsumerAndProducer() {
        SpansToDependencyLinks spansToDependencyLinks = new SpansToDependencyLinks("");
        Set<Span> sharedSpans = new HashSet<>();
        sharedSpans.add(createSpan("consumerName", Tags.SPAN_KIND_CONSUMER));
        sharedSpans.add(createSpan("producerName", Tags.SPAN_KIND_PRODUCER));
        Optional<Dependency> result = spansToDependencyLinks.sharedSpanDependency(sharedSpans);
        assertTrue(result.isPresent());
        assertEquals(new Dependency("producerName", "consumerName"), result.get());
    }

    @Test
    public void shouldReturnEmptyDependencyForSpansWithoutSpanKindDefinition() {
        SpansToDependencyLinks spansToDependencyLinks = new SpansToDependencyLinks("");
        Set<Span> sharedSpans = new HashSet<>();
        sharedSpans.add(createSpan("consumerName", "tag"));
        sharedSpans.add(createSpan("producerName", "tag"));
        Optional<Dependency> result = spansToDependencyLinks.sharedSpanDependency(sharedSpans);
        assertFalse(result.isPresent());
    }

    private Span createSpan(String serviceName, String tag) {
        List<KeyValue> tags = new ArrayList<>();
        KeyValue keyValue = new KeyValue();
        keyValue.setKey("span.kind");
        keyValue.setValueString(tag);
        tags.add(keyValue);
        Span span = new Span();
        Process process = new Process();
        process.setServiceName(serviceName);
        span.setProcess(process);
        span.setTags(tags);
        return span;
    }
}


================================================
FILE: jaeger-spark-dependencies-common/src/test/java/io/jaegertracing/spark/dependencies/model/SpanTest.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.model;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;

import java.util.ArrayList;
import java.util.List;
import org.junit.Test;

public class SpanTest {

    @Test
    public void testEquals() {
        Span span1 = new Span();
        span1.setTraceId("trace1");
        span1.setSpanId(1L);
        span1.setProcess(createProcess("service1"));

        Span span2 = new Span();
        span2.setTraceId("trace1");
        span2.setSpanId(1L);
        span2.setProcess(createProcess("service1"));

        assertEquals(span1, span2);

        // Different service name
        Span span3 = new Span();
        span3.setTraceId("trace1");
        span3.setSpanId(1L);
        span3.setProcess(createProcess("service2"));

        assertNotEquals(span1, span3);

        // Different tags
        List<KeyValue> tags1 = new ArrayList<>();
        KeyValue kv1 = new KeyValue();
        kv1.setKey("key");
        kv1.setValueString("value1");
        tags1.add(kv1);
        span1.setTags(tags1);

        List<KeyValue> tags2 = new ArrayList<>();
        KeyValue kv2 = new KeyValue();
        kv2.setKey("key");
        kv2.setValueString("value1");
        tags2.add(kv2);
        span2.setTags(tags2);

        assertEquals(span1, span2);

        tags2.get(0).setValueString("value2");
        assertNotEquals(span1, span2);
    }

    private Process createProcess(String serviceName) {
        Process process = new Process();
        process.setServiceName(serviceName);
        return process;
    }
}


================================================
FILE: jaeger-spark-dependencies-elasticsearch/pom.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!--

    Copyright (c) The Jaeger Authors
    SPDX-License-Identifier: Apache-2.0

-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <parent>
    <artifactId>jaeger-spark-dependencies-parent</artifactId>
    <groupId>io.jaegertracing.dependencies</groupId>
    <version>0.0.1-SNAPSHOT</version>
  </parent>

  <artifactId>jaeger-spark-dependencies-elasticsearch</artifactId>

  <dependencies>
    <dependency>
      <groupId>${project.groupId}</groupId>
      <artifactId>jaeger-spark-dependencies-common</artifactId>
      <exclusions>
        <exclusion>
          <groupId>com.fasterxml.jackson.core</groupId>
          <artifactId>jackson-annotations</artifactId>
        </exclusion>
      </exclusions>
    </dependency>

    <dependency>
      <groupId>org.elasticsearch</groupId>
      <artifactId>elasticsearch-spark-30_${version.scala.binary}</artifactId>
      <version>${version.elasticsearch.spark}</version>
    </dependency>

    <dependency>
      <groupId>${project.groupId}</groupId>
      <artifactId>jaeger-spark-dependencies-test</artifactId>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.testcontainers</groupId>
      <artifactId>testcontainers</artifactId>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.awaitility</groupId>
      <artifactId>awaitility</artifactId>
      <version>${version.org.awaitility-awaitility}</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>io.opentelemetry</groupId>
      <artifactId>opentelemetry-api</artifactId>
      <version>${version.io.opentelemetry}</version>
      <scope>test</scope>
    </dependency>
  </dependencies>

  <build>
    <plugins>
      <plugin>
        <artifactId>maven-shade-plugin</artifactId>
        <version>${version.maven-shade-plugin}</version>
        <executions>
          <execution>
            <phase>package</phase>
            <goals>
              <goal>shade</goal>
            </goals>
            <configuration>
              <transformers>
                <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                  <resource>reference.conf</resource>
                </transformer>
                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                  <mainClass>io.jaegertracing.spark.dependencies.elastic.ElasticsearchDependenciesJob</mainClass>
                </transformer>
              </transformers>
              <minimizeJar>true</minimizeJar>
              <filters>
                <filter>
                  <artifact>org.apache.hadoop:hadoop-common</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>log4j:log4j</artifact>
                  <includes>
                    <include>org/apache/log4j/spi/LoggingEvent.class</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>org.apache.logging.log4j:log4j-*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>io.netty:*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>org.slf4j:*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>org.scala-lang:*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>org.apache.spark:*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>org.lz4:*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>org.elasticsearch:elasticsearch-spark-*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>commons-httpclient:commons-httpclient</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>xerces:xercesImpl</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <!-- Keep all okhttp3 classes to avoid NoSuchMethodError -->
                  <artifact>com.squareup.okhttp3:*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <!-- Keep all okio classes to avoid NoSuchMethodError -->
                  <artifact>com.squareup.okio:*</artifact>
                  <includes>
                    <include>**</include>
                  </includes>
                </filter>
                <filter>
                  <artifact>*:*</artifact>
                  <excludes>
                    <exclude>META-INF/*.SF</exclude>
                    <exclude>META-INF/*.DSA</exclude>
                    <exclude>META-INF/*.RSA</exclude>
                  </excludes>
                </filter>
              </filters>
              <createDependencyReducedPom>false</createDependencyReducedPom>
            </configuration>
          </execution>
        </executions>
      </plugin>
    </plugins>
  </build>
</project>


================================================
FILE: jaeger-spark-dependencies-elasticsearch/src/main/java/io/jaegertracing/spark/dependencies/elastic/ElasticTupleToSpan.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.elastic;

import com.fasterxml.jackson.databind.ObjectMapper;
import io.jaegertracing.spark.dependencies.json.JsonHelper;
import io.jaegertracing.spark.dependencies.model.Span;
import org.apache.spark.api.java.function.Function;
import scala.Tuple2;

/**
 * @author Pavol Loffay
 */
public class ElasticTupleToSpan implements Function<Tuple2<String, String>, Span> {

  private ObjectMapper objectMapper = JsonHelper.configure(new ObjectMapper());

  @Override
  public Span call(Tuple2<String, String> tuple) throws Exception {
    Span span = objectMapper.readValue(tuple._2(), Span.class);
    String originalTraceId = span.getTraceId();
    span.setTraceId(normalizeTraceId(originalTraceId));
    if (span.getTags() != null) {
      span.getTags().sort((o1, o2) -> o1.getKey().compareTo(o2.getKey()));
    }
    if (span.getRefs() != null) {
      span.getRefs().sort((o1, o2) -> o1.getSpanId().compareTo(o2.getSpanId()));
    }

    return span;
  }

  private String normalizeTraceId(String traceId) {
    if (traceId != null && traceId.length() < 32) {
      return String.format("%32s", traceId).replace(' ', '0');
    }
    return traceId;
  }
}


================================================
FILE: jaeger-spark-dependencies-elasticsearch/src/main/java/io/jaegertracing/spark/dependencies/elastic/ElasticsearchDependenciesJob.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * Copyright 2016-2017 The OpenZipkin Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.elastic;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import io.jaegertracing.spark.dependencies.DependenciesSparkHelper;
import io.jaegertracing.spark.dependencies.Utils;
import io.jaegertracing.spark.dependencies.model.Dependency;
import io.jaegertracing.spark.dependencies.model.Span;
import java.time.LocalDate;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.elasticsearch.hadoop.rest.RestClient;
import org.elasticsearch.hadoop.util.EsMajorVersion;
import org.elasticsearch.spark.cfg.SparkSettings;
import org.elasticsearch.spark.rdd.api.java.JavaEsSpark;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * @author OpenZipkin authors
 * @author Pavol Loffay
 */
public class ElasticsearchDependenciesJob {
  private static final Logger log = LoggerFactory.getLogger(ElasticsearchDependenciesJob.class);
  private static final Pattern PORT_PATTERN = Pattern.compile(":\\d+");

  public static Builder builder() {
    return new Builder();
  }

  public static final class Builder {

    String hosts = Utils.getEnv("ES_NODES", "127.0.0.1");
    String username = Utils.getEnv("ES_USERNAME", null);
    String password = Utils.getEnv("ES_PASSWORD", null);
    Boolean clientNodeOnly = Boolean.parseBoolean(Utils.getEnv("ES_CLIENT_NODE_ONLY", "false"));
    Boolean nodesWanOnly = Boolean.parseBoolean(Utils.getEnv("ES_NODES_WAN_ONLY", "false"));
    String indexPrefix = Utils.getEnv("ES_INDEX_PREFIX", null);
    String indexDatePattern = datePattern(Utils.getEnv("ES_INDEX_DATE_SEPARATOR", "-"));
    String spanRange = Utils.getEnv("ES_TIME_RANGE", "24h");
    Boolean useAliases = Boolean.parseBoolean(Utils.getEnv("ES_USE_ALIASES", "false"));

    final Map<String, String> sparkProperties = new LinkedHashMap<>();

    Builder() {
      sparkProperties.put("spark.ui.enabled", "false");
      // don't die if there are no spans
      sparkProperties.put("es.index.read.missing.as.empty", "true");
      sparkProperties.put("es.net.ssl.keystore.location",
          getSystemPropertyAsFileResource("javax.net.ssl.keyStore"));
      sparkProperties.put("es.net.ssl.keystore.pass",
          System.getProperty("javax.net.ssl.keyStorePassword", ""));
      sparkProperties.put("es.net.ssl.truststore.location",
          getSystemPropertyAsFileResource("javax.net.ssl.trustStore"));
      sparkProperties.put("es.net.ssl.truststore.pass",
          System.getProperty("javax.net.ssl.trustStorePassword", ""));

    }

    // local[*] master lets us run & test the job locally without setting a Spark cluster
    String sparkMaster = Utils.getEnv("SPARK_MASTER", "local[*]");
    // needed when not in local mode
    String[] jars;

    // By default the job only works on traces whose first timestamp is today
    ZonedDateTime day = ZonedDateTime.of(LocalDate.now().atStartOfDay(), ZoneOffset.UTC);

    /** When set, this indicates which jars to distribute to the cluster. */
    public Builder jars(String... jars) {
      this.jars = jars;
      return this;
    }

    /** es.nodes separated by ',' */
    public Builder nodes(String hosts) {
      Utils.checkNoTNull(hosts, "nodes");
      this.hosts = hosts;
      this.nodesWanOnly = true;
      return this;
    }

    /** username used for basic auth. Needed when Shield or X-Pack security is enabled */
    public Builder username(String username) {
      this.username = username;
      return this;
    }

    /** password used for basic auth. Needed when Shield or X-Pack security is enabled */
    public Builder password(String password) {
      this.password = password;
      return this;
    }

    /** index prefix for Jaeger indices. By default empty */
    public Builder indexPrefix(String indexPrefix) {
      this.indexPrefix = indexPrefix;
      return this;
    }

    /** index date pattern for Jaeger indices. By default yyyy-MM-dd */
    public Builder indexDatePattern(String indexDatePattern) {
      this.indexDatePattern = indexDatePattern;
      return this;
    }

     /** span range for Jaeger indices. By default 24h */
    public Builder spanRange(String spanRange) {
      this.spanRange = spanRange;
      return this;
    }

    /** Day to process dependencies for. Defaults to today. */
    public Builder day(LocalDate day) {
      this.day = day.atStartOfDay(ZoneOffset.UTC);
      return this;
    }

    /** Whether the connector is used against an Elasticsearch instance in a cloud/restricted
     *  environment over the WAN, such as Amazon Web Services. In this mode, the
     *  connector disables discovery and only connects through the declared es.nodes during all operations,
     *  including reads and writes. Note that in this mode, performance is highly affected. */
    public Builder nodesWanOnly(boolean wanOnly) {
      this.nodesWanOnly = wanOnly;
      return this;
    }

    private static void logIfNoPort(String hosts) {
      if (!PORT_PATTERN.matcher(hosts).find()) {
        log.warn("Port is not specified, default port 9200 will be used");
      }
    }

    public ElasticsearchDependenciesJob build() {
      String hosts = System.getenv("ES_NODES");
      String wanOnly = System.getenv("ES_NODES_WAN_ONLY");
      // Optimize user configuration - nodes specified but wan only not
      if (hosts != null && wanOnly == null) {
        this.nodesWanOnly = true;
      }
      logIfNoPort(this.hosts);
      return new ElasticsearchDependenciesJob(this);
    }
  }

  private static String getSystemPropertyAsFileResource(String key) {
    String prop = System.getProperty(key, "");
    return prop != null && !prop.isEmpty() ? "file:" + prop : prop;
  }

  private final ZonedDateTime day;
  private final SparkConf conf;
  private final String indexPrefix;
  private final String indexDatePattern;
  private final String spanRange;
  private final Boolean useAliases;

  ElasticsearchDependenciesJob(Builder builder) {
    this.day = builder.day;
    this.conf = new SparkConf(true).setMaster(builder.sparkMaster).setAppName(getClass().getName());
    if (builder.jars != null) {
      conf.setJars(builder.jars);
    }
    if (builder.username != null) {
      conf.set("es.net.http.auth.user", builder.username);
    }
    if (builder.password != null) {
      conf.set("es.net.http.auth.pass", builder.password);
    }
    conf.set("es.nodes", builder.hosts);
    if (builder.hosts.indexOf("https") != -1) {
      conf.set("es.net.ssl", "true");
    }
    if (builder.nodesWanOnly) {
      conf.set("es.nodes.wan.only", "true");
    }
    if (builder.clientNodeOnly) {
      conf.set("es.nodes.discovery", "0");
      conf.set("es.nodes.client.only", "1");
    }
    for (Map.Entry<String, String> entry : builder.sparkProperties.entrySet()) {
      conf.set(entry.getKey(), entry.getValue());
    }
    this.indexPrefix = builder.indexPrefix;
    this.indexDatePattern = builder.indexDatePattern;
    this.spanRange = builder.spanRange;
    this.useAliases = builder.useAliases;
  }

  /**
   * https://github.com/jaegertracing/jaeger/blob/master/CHANGELOG.md#190-2019-01-21
   */
  private static String prefixBefore19(String prefix) {
    return prefix != null ? String.format("%s:", prefix) : "";
  }

  private static String prefix(String prefix) {
    return prefix != null ? String.format("%s-", prefix) : "";
  }

  private static String datePattern(String separator) {
    if (separator.equals("")) {
      return "yyyyMMdd";
    }
    // ' is escape character in date format, we should double it here.
    if (separator.contains("'")) {
      separator = separator.replace("'", "''");
    }
    return String.format("yyyy'%s'MM'%s'dd", separator, separator);
  }

  public void run(String peerServiceTag) {

    String[] readIndices;
    String[] writeIndex;

    // use alias indices common when using index rollover
    if (this.useAliases) {
      readIndices = new String[]{prefix(indexPrefix) + "jaeger-span-read", prefixBefore19(indexPrefix) + "jaeger-span-read"};
      writeIndex = new String[] {prefix(indexPrefix) + "jaeger-dependencies-write", prefixBefore19(indexPrefix) + "jaeger-dependencies-write"};
    }
    else {
      readIndices = indexDate("jaeger-span");
      writeIndex = indexDate("jaeger-dependencies");
    }

    run(readIndices, writeIndex, peerServiceTag);
  }

  String[] indexDate(String index) {
    String date = day.toLocalDate().format(DateTimeFormatter.ofPattern(indexDatePattern));
    if (indexPrefix != null && indexPrefix.length() > 0) {
      return new String[]{String.format("%s%s-%s", prefix(indexPrefix), index, date), String.format("%s%s-%s", prefixBefore19(indexPrefix), index, date)};
    }
    // if there is no prefix we read and write only to one index
    return new String[]{String.format("%s-%s", index, date)};
  }

  void run(String[] spanIndices, String[] depIndices,String peerServiceTag) {
    JavaSparkContext sc = new JavaSparkContext(conf);
    try {
      for (int i = 0; i < spanIndices.length; i++) {
        String spanIndex = spanIndices[i];
        String depIndex = depIndices[i];
        log.info("Running Dependencies job for {}, reading from {} index, result storing to {}", day, spanIndex, depIndex);
        // Send raw query to ES to select only the docs / spans we want to consider for this job
        // This doesn't change the default behavior as the daily indexes only contain up to 24h of data
        String esQuery = String.format("{\"range\": {\"startTimeMillis\": { \"gte\": \"now-%s\" }}}", spanRange);
        JavaPairRDD<String, Iterable<Span>> traces = JavaEsSpark.esJsonRDD(sc, spanIndex, esQuery)
            .map(new ElasticTupleToSpan())
            .groupBy(Span::getTraceId);
        List<Dependency> dependencyLinks = DependenciesSparkHelper.derive(traces,peerServiceTag);
        EsMajorVersion esMajorVersion = getEsVersion();
        // Add type for ES < 7
        // WARN log is produced for older ES versions, however it's produced by spark-es library and not ES itself, it cannot be disabled
        //  WARN Resource: Detected type name in resource [jaeger-dependencies-2019-08-14/dependencies]. Type names are deprecated and will be removed in a later release.
        if (esMajorVersion.before(EsMajorVersion.V_7_X)) {
          depIndex = depIndex + "/dependencies";
        }
        store(sc, dependencyLinks, depIndex);
        log.info("Done, {} dependency objects created", dependencyLinks.size());
        if (dependencyLinks.size() > 0) {
          // we do not derive dependencies for old prefix "prefix:" if new prefix "prefix-" contains data
          break;
        }
      }
    } finally {
      sc.stop();
    }
  }

  private EsMajorVersion getEsVersion() {
    RestClient client = new RestClient(new SparkSettings(conf));
    try {
      return client.mainInfo().getMajorVersion();
    } finally {
      client.close();
    }
  }

  private void store(JavaSparkContext javaSparkContext, List<Dependency> dependencyLinks, String resource) {
    if (dependencyLinks.isEmpty()) {
      return;
    }

    String json;
    try {
      ObjectMapper objectMapper = new ObjectMapper();
      json = objectMapper.writeValueAsString(new ElasticsearchDependencies(dependencyLinks, day));
    } catch (JsonProcessingException e) {
      throw new IllegalStateException("Could not serialize dependencies", e);
    }

    JavaEsSpark.saveJsonToEs(javaSparkContext.parallelize(Collections.singletonList(json)), resource);
  }

  /**
   * Helper class used to serialize dependencies to JSON.
   */
  public static final class ElasticsearchDependencies {
    private List<Dependency> dependencies;
    private ZonedDateTime ts;

    public ElasticsearchDependencies(List<Dependency> dependencies, ZonedDateTime ts) {
      this.dependencies = dependencies;
      this.ts = ts;
    }

    public List<Dependency> getDependencies() {
      return dependencies;
    }

    public String getTimestamp() {
      // Jaeger ES dependency storage uses RFC3339Nano for timestamp
      return ts.format(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ssXXX"));
    }
  }

  /**
   * Entry point for running ElasticsearchDependenciesJob directly.
   * This is used when the Docker image variant is elasticsearch-specific.
   */
  public static void main(String[] args) throws java.io.UnsupportedEncodingException {
    LocalDate date = LocalDate.now();
    if (args.length == 1) {
      date = LocalDate.parse(args[0]);
    } else if (System.getenv("DATE") != null) {
      date = LocalDate.parse(System.getenv("DATE"));
    }

    String peerServiceTag = System.getenv("PEER_SERVICE_TAG");
    if (peerServiceTag == null) {
      peerServiceTag = "peer.service";
    }

    String jarPath = Utils.pathToUberJar(ElasticsearchDependenciesJob.class);
    ElasticsearchDependenciesJob.builder()
        .jars(jarPath)
        .day(date)
        .build()
        .run(peerServiceTag);
  }
}


================================================
FILE: jaeger-spark-dependencies-elasticsearch/src/main/resources/log4j2.component.properties
================================================
#
# Copyright (c) The Jaeger Authors
# SPDX-License-Identifier: Apache-2.0
#

# Disable Log4j status logger console output
log4j2.StatusLogger.level = OFF


================================================
FILE: jaeger-spark-dependencies-elasticsearch/src/main/resources/log4j2.properties
================================================
#
# Copyright (c) The Jaeger Authors
# SPDX-License-Identifier: Apache-2.0
#

# Set root logger level to WARN and use console appender
rootLogger.level = WARN
rootLogger.appenderRef.console.ref = console

# Console appender configuration
appender.console.type = Console
appender.console.name = console
appender.console.target = SYSTEM_ERR
appender.console.layout.type = PatternLayout
appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n

# Settings to quiet third party logs that are too verbose
logger.jetty.name = org.spark-project.jetty
logger.jetty.level = WARN

logger.jettyLifecycle.name = org.spark-project.jetty.util.component.AbstractLifeCycle
logger.jettyLifecycle.level = ERROR

logger.sparkReplTyper.name = org.apache.spark.repl.SparkIMain$exprTyper
logger.sparkReplTyper.level = INFO

logger.sparkReplInterpreter.name = org.apache.spark.repl.SparkILoop$SparkILoopInterpreter
logger.sparkReplInterpreter.level = INFO

logger.jaegertracing.name = io.jaegertracing.spark
logger.jaegertracing.level = INFO

# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
logger.hiveMetastore.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler
logger.hiveMetastore.level = FATAL

logger.hiveFunctionRegistry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry
logger.hiveFunctionRegistry.level = ERROR


================================================
FILE: jaeger-spark-dependencies-elasticsearch/src/test/java/io/jaegertracing/spark/dependencies/elastic/ElasticsearchDependenciesDockerJobTest.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.elastic;

import io.jaegertracing.spark.dependencies.LogToConsolePrinter;
import org.testcontainers.containers.GenericContainer;
import org.testcontainers.utility.DockerImageName;

import java.util.concurrent.TimeUnit;

import static org.awaitility.Awaitility.await;

public class ElasticsearchDependenciesDockerJobTest extends ElasticsearchDependenciesJobTest {
  private static String dependenciesJobTag() {
    String tag = System.getenv("SPARK_DEPENDENCIES_JOB_IMAGE_TAG");
    if (tag == null || tag.isEmpty()) {
      throw new IllegalStateException(
          "SPARK_DEPENDENCIES_JOB_IMAGE_TAG environment variable is required but not set. " +
              "This variable must be set to ensure tests use the locally built Docker image.");
    }
    return tag.trim();
  }

  @Override
  protected void deriveDependencies() {
    // Create the dependenciesJob instance so that after() method can call
    // indexDate() on it
    dependenciesJob = ElasticsearchDependenciesJob.builder()
        .nodes("http://" + jaegerElasticsearchEnvironment.getElasticsearchIPPort())
        .day(java.time.LocalDate.now())
        .build();

    try {
      jaegerElasticsearchEnvironment.refresh();
      // Wait a bit to ensure all spans are fully indexed and visible
      Thread.sleep(2000);
    } catch (java.io.IOException e) {
      throw new RuntimeException("Could not refresh Elasticsearch", e);
    } catch (InterruptedException e) {
      Thread.currentThread().interrupt();
      throw new RuntimeException("Interrupted while waiting", e);
    }

    // Use the same date as the test - format it as ISO-8601 date string for the
    // DATE env var
    String dateStr = java.time.LocalDate.now().toString();

    System.out
        .println("Running Docker spark-dependencies job with DATE=" + dateStr + ", ES_NODES=http://elasticsearch:9200");
    System.out.println("::group::🚧 🚧 🚧 ElasticsearchDependenciesDockerJob logs");
    try (GenericContainer<?> sparkDependenciesJob = new GenericContainer<>(
        DockerImageName.parse("ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:" + dependenciesJobTag()))
        .withNetwork(jaegerElasticsearchEnvironment.network)
        .withLogConsumer(new LogToConsolePrinter("[spark-dependencies] "))
        .withEnv("STORAGE", "elasticsearch")
        .withEnv("ES_NODES", "http://elasticsearch:9200")
        .withEnv("DATE", dateStr)
        .dependsOn(jaegerElasticsearchEnvironment.elasticsearch, jaegerElasticsearchEnvironment.jaegerAll)) {
      sparkDependenciesJob.start();
      await("spark-dependencies-job execution")
          .atMost(3, TimeUnit.MINUTES)
          .until(() -> !sparkDependenciesJob.isRunning());
    } finally {
      System.out.println("::endgroup::");
    }

    try {
      jaegerElasticsearchEnvironment.refresh();
    } catch (java.io.IOException e) {
      throw new RuntimeException("Could not refresh Elasticsearch", e);
    }
  }
}


================================================
FILE: jaeger-spark-dependencies-elasticsearch/src/test/java/io/jaegertracing/spark/dependencies/elastic/ElasticsearchDependenciesJobTest.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.elastic;


import io.opentelemetry.api.trace.Span;
import io.opentelemetry.api.trace.Tracer;
import io.jaegertracing.spark.dependencies.test.DependenciesTest;
import io.jaegertracing.spark.dependencies.test.TracersGenerator;
import java.io.IOException;
import java.time.LocalDate;
import java.util.Collections;
import java.util.HashMap;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.testcontainers.containers.wait.strategy.HttpWaitStrategy;

/**
 * @author Pavol Loffay
 */
public class ElasticsearchDependenciesJobTest extends DependenciesTest {

  protected ElasticsearchDependenciesJob dependenciesJob;
  static JaegerElasticsearchEnvironment jaegerElasticsearchEnvironment;

  @BeforeClass
  public static void beforeClass() {
    jaegerElasticsearchEnvironment = new JaegerElasticsearchEnvironment();
    jaegerElasticsearchEnvironment.start(new HashMap<>(), jaegerVersion(), JaegerElasticsearchEnvironment.elasticsearchVersion());
    collectorUrl = jaegerElasticsearchEnvironment.getCollectorUrl();
    queryUrl = jaegerElasticsearchEnvironment.getQueryUrl();
  }

  @Before
  public void before() {
    String serviceName = UUID.randomUUID().toString();
    String operationName = UUID.randomUUID().toString();
    TracersGenerator.Tuple<Tracer, TracersGenerator.Flushable> tuple = TracersGenerator.createJaeger(serviceName, collectorUrl);
    Tracer initStorageTracer = tuple.getA();
    Span span = initStorageTracer.spanBuilder(operationName).startSpan();
    span.setAttribute("foo", "bar");
    span.end();
    tuple.getB().flush();
    try {
      // Give extra time for spans to be exported and indexed
      TimeUnit.SECONDS.sleep(2);
    } catch (InterruptedException e) {
      Thread.currentThread().interrupt();
    }
    waitJaegerQueryContains(serviceName, "foo");
  }

  @After
  public void after() throws IOException {
    if (dependenciesJob != null) {
      jaegerElasticsearchEnvironment.cleanUp(dependenciesJob.indexDate("jaeger-span"), dependenciesJob.indexDate("jaeger-dependencies"));
    }
  }

  @AfterClass
  public static void afterClass() {
    jaegerElasticsearchEnvironment.stop();
  }

  @Override
  protected void deriveDependencies() {
    dependenciesJob = ElasticsearchDependenciesJob.builder()
        .nodes("http://" + jaegerElasticsearchEnvironment.getElasticsearchIPPort())
        .day(LocalDate.now())
        .build();
    try {
      jaegerElasticsearchEnvironment.refresh();
    } catch (IOException e) {
      throw new RuntimeException("Could not refresh Elasticsearch", e);
    }
    dependenciesJob.run("peer.service");
    try {
      jaegerElasticsearchEnvironment.refresh();
    } catch (IOException e) {
      throw new RuntimeException("Could not refresh Elasticsearch", e);
    }
  }

  @Override
  protected void waitBetweenTraces() throws InterruptedException {
    try {
      jaegerElasticsearchEnvironment.refresh();
    } catch (IOException e) {
      throw new RuntimeException("Could not refresh Elasticsearch", e);
    }
  }

  public static class BoundPortHttpWaitStrategy extends HttpWaitStrategy {
    private final int port;

    public BoundPortHttpWaitStrategy(int port) {
      this.port = port;
    }

    @Override
    protected Set<Integer> getLivenessCheckPorts() {
      int mapptedPort = this.waitStrategyTarget.getMappedPort(port);
      return Collections.singleton(mapptedPort);
    }
  }
}


================================================
FILE: jaeger-spark-dependencies-elasticsearch/src/test/java/io/jaegertracing/spark/dependencies/elastic/ElasticsearchDependenciesTagFieldsJobTest.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.elastic;

import java.util.HashMap;
import org.junit.BeforeClass;

/**
 * @author Pavol Loffay
 */
public class ElasticsearchDependenciesTagFieldsJobTest extends ElasticsearchDependenciesJobTest {

  @BeforeClass
  public static void beforeClass() {
    jaegerElasticsearchEnvironment = new JaegerElasticsearchEnvironment();
    HashMap<String, String> jaegerEnvSetting = new HashMap<>();
    jaegerEnvSetting.put("ES_TAGS__AS_FIELDS_ALL", "true");
    jaegerElasticsearchEnvironment.start(jaegerEnvSetting, jaegerVersion(), JaegerElasticsearchEnvironment.elasticsearchVersion());
    collectorUrl = jaegerElasticsearchEnvironment.getCollectorUrl();
    queryUrl = jaegerElasticsearchEnvironment.getQueryUrl();
  }
}


================================================
FILE: jaeger-spark-dependencies-elasticsearch/src/test/java/io/jaegertracing/spark/dependencies/elastic/JaegerElasticsearchEnvironment.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.elastic;

import static io.jaegertracing.spark.dependencies.test.DependenciesTest.jaegerVersion;

import io.jaegertracing.spark.dependencies.elastic.ElasticsearchDependenciesJobTest.BoundPortHttpWaitStrategy;
import java.io.IOException;
import java.util.Map;
import java.util.Optional;
import okhttp3.MediaType;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.RequestBody;
import okhttp3.Response;
import org.testcontainers.containers.GenericContainer;
import org.testcontainers.containers.Network;

/**
 * @author Pavol Loffay
 */
public class JaegerElasticsearchEnvironment {

  private OkHttpClient okHttpClient = new OkHttpClient();
  Network network;
  GenericContainer elasticsearch;
  GenericContainer jaegerAll;

  /**
   * Set these in subclasses
   */
  private String queryUrl;
  private String collectorUrl;

  public static String elasticsearchVersion() {
    String version = System.getProperty("elasticsearch.version", System.getenv("ELASTICSEARCH_VERSION"));
    return version != null ? version : "7.17.10";
  }

  public void start(Map<String, String> jaegerEnvs, String jaegerVersion, String elasticsearchVersion) {
    network = Network.newNetwork();
    elasticsearch = new GenericContainer<>(String.format("docker.elastic.co/elasticsearch/elasticsearch:%s", elasticsearchVersion))
        .withNetwork(network)
        .withNetworkAliases("elasticsearch")
        .waitingFor(new BoundPortHttpWaitStrategy(9200).forStatusCode(200))
        .withExposedPorts(9200, 9300)
        .withEnv("xpack.security.enabled", "false")
        .withEnv("discovery.type", "single-node")
        .withEnv("network.bind_host", "elasticsearch")
        .withEnv("network.host", "_site_")
        .withEnv("network.publish_host", "_local_");
    elasticsearch.start();

    jaegerAll = new GenericContainer<>("jaegertracing/jaeger:" + jaegerVersion)
        .withNetwork(network)
        .withClasspathResourceMapping("jaeger-v2-config-elasticsearch.yaml", "/etc/jaeger/config.yaml", org.testcontainers.containers.BindMode.READ_ONLY)
        .withCommand("--config", "/etc/jaeger/config.yaml")
        .withEnv(jaegerEnvs)
        .waitingFor(new BoundPortHttpWaitStrategy(16687).forStatusCodeMatching(statusCode -> statusCode >= 200 && statusCode < 300))
        .withExposedPorts(16687, 16686, 4317, 4318, 14268, 9411);
    jaegerAll.start();

    collectorUrl = String.format("http://%s:%d", jaegerAll.getContainerIpAddress(), jaegerAll.getMappedPort(4317));
    queryUrl = String.format("http://%s:%d", jaegerAll.getContainerIpAddress(), jaegerAll.getMappedPort(16686));
  }

  public void cleanUp(String[] spanIndex, String[] dependenciesIndex) throws IOException {
      String matchAllQuery = "{\"query\": {\"match_all\":{} }}";
      Request request = new Request.Builder()
          .url(String.format("http://%s:%d/%s,%s/_delete_by_query?conflicts=proceed",
              elasticsearch.getContainerIpAddress(),
              elasticsearch.getMappedPort(9200),
              // we don't use index prefix
              spanIndex[0],
              dependenciesIndex[0]))
          .post(
              RequestBody.create(MediaType.parse("application/json; charset=utf-8"), matchAllQuery))
          .build();


      try (Response response =  okHttpClient.newCall(request).execute()) {
        if (!response.isSuccessful()) {
          String body = response.body().string();
          throw new IllegalStateException(String.format("Could not remove data from ES: %s, %s", response, body));
        }
      }
  }

  /**
   * In Elasticsearch, the _refresh endpoint is used to make recently indexed,
   * updated, or deleted documents visible to search, as otherwise they might
   * be still sitting in a memory buffer.
   */
  public void refresh() throws IOException {
    Request request = new Request.Builder()
        .url(String.format("http://%s:%d/_refresh",
            elasticsearch.getContainerIpAddress(),
            elasticsearch.getMappedPort(9200)))
        .post(RequestBody.create(MediaType.parse("application/json; charset=utf-8"), ""))
        .build();

    try (Response response = okHttpClient.newCall(request).execute()) {
      if (!response.isSuccessful()) {
        String body = response.body().string();
        throw new IllegalStateException(String.format("Could not refresh ES: %s, %s", response, body));
      }
    }
  }

  public void stop() {
    Optional.of(jaegerAll).ifPresent(GenericContainer::close);
    Optional.of(elasticsearch).ifPresent(GenericContainer::close);
    Optional.of(network).ifPresent(network1 -> {
      try {
        network1.close();
      } catch (Exception e) {
        e.printStackTrace();
      }
    });
  }

  public String getQueryUrl() {
    return queryUrl;
  }

  public String getCollectorUrl() {
    return collectorUrl;
  }

  public String getElasticsearchIPPort() {
    return String.format("%s:%d", elasticsearch.getContainerIpAddress(), elasticsearch.getMappedPort(9200));
  }
}


================================================
FILE: jaeger-spark-dependencies-elasticsearch/src/test/resources/jaeger-v2-config-elasticsearch.yaml
================================================
#
# Copyright (c) The Jaeger Authors
# SPDX-License-Identifier: Apache-2.0
#

service:
  extensions: [jaeger_storage, jaeger_query, healthcheckv2]
  pipelines:
    traces:
      receivers: [otlp, jaeger, zipkin]
      processors: [filter/jaeger, batch]
      exporters: [jaeger_storage_exporter]
  telemetry:
    resource:
      service.name: jaeger-backend
    metrics:
      level: detailed
      readers:
        - pull:
            exporter:
              prometheus:
                host: 0.0.0.0
                port: 8888
    logs:
      level: info
    traces:
      level: none

extensions:
  healthcheckv2:
    use_v2: true
    http:
      endpoint: "0.0.0.0:16687"
      status:
        enabled: true
        path: "/"

  jaeger_query:
    storage:
      traces: some_storage

  jaeger_storage:
    backends:
      some_storage:
        elasticsearch:
          server_urls:
            - http://elasticsearch:9200
          service_cache_ttl: 1s

receivers:
  otlp:
    protocols:
      grpc:
        endpoint: "0.0.0.0:4317"
      http:
        endpoint: "0.0.0.0:4318"

  jaeger:
    protocols:
      grpc:
      thrift_binary:
      thrift_compact:
      thrift_http:
        endpoint: "0.0.0.0:14268"

  zipkin:
    endpoint: "0.0.0.0:9411"

processors:
  filter/jaeger:
    error_mode: ignore
    traces:
      span:
        - 'resource.attributes["service.name"] == "jaeger"'
  
  batch:

exporters:
  jaeger_storage_exporter:
    trace_storage: some_storage


================================================
FILE: jaeger-spark-dependencies-opensearch/pom.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!--

    Copyright (c) The Jaeger Authors
    SPDX-License-Identifier: Apache-2.0

-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <parent>
    <artifactId>jaeger-spark-dependencies-parent</artifactId>
    <groupId>io.jaegertracing.dependencies</groupId>
    <version>0.0.1-SNAPSHOT</version>
  </parent>

  <artifactId>jaeger-spark-dependencies-opensearch</artifactId>

  <dependencies>
    <dependency>
      <groupId>${project.groupId}</groupId>
      <artifactId>jaeger-spark-dependencies-common</artifactId>
      <exclusions>
        <exclusion>
          <groupId>com.fasterxml.jackson.core</groupId>
          <artifactId>jackson-annotations</artifactId>
        </exclusion>
      </exclusions>
    </dependency>

    <dependency>
      <groupId>org.opensearch.client</groupId>
      <artifactId>opensearch-spark-30_${version.scala.binary}</artifactId>
      <version>1.3.0</version>
      <exclusions>
        <exclusion>
          <groupId>org.apache.spark</groupId>
          <artifactId>spark-core_2.12</artifactId>
        </exclusion>
        <exclusion>
          <groupId>org.apache.spark</groupId>
          <artifactId>spark-sql_2.12</artifactId>
        </exclusion>
      </exclusions>
    </dependency>

    <dependency>
      <groupId>org.opensearch.client</groupId>
      <artifactId>opensearch-rest-high-level-client</artifactId>
      <version>2.18.0</version>
    </dependency>

    <dependency>
      <groupId>${project.groupId}</groupId>
      <artifactId>jaeger-spark-dependencies-test</artifactId>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.testcontainers</groupId>
      <artifactId>testcontainers</artifactId>
      <scope>test</scope>
    </dependency>
    <dependency>
        <groupId>org.opensearch</groupId>
        <artifactId>opensearch-testcontainers</artifactId>
        <version>2.1.4</version>
        <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.awaitility</groupId>
      <artifactId>awaitility</artifactId>
      <version>${version.org.awaitility-awaitility}</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>io.opentelemetry</groupId>
      <artifactId>opentelemetry-api</artifactId>
      <version>${version.io.opentelemetry}</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>io.jaegertracing</groupId>
      <artifactId>jaeger-client</artifactId>
      <version>${version.io.jaegertracing}</version>
      <scope>test</scope>
    </dependency>
  </dependencies>

  <build>
    <plugins>
      <plugin>
        <artifactId>maven-shade-plugin</artifactId>
        <version>${version.maven-shade-plugin}</version>
        <executions>
          <execution>
            <phase>package</phase>
            <goals>
              <goal>shade</goal>
            </goals>
            <configuration>
              <transformers>
                <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                  <resource>reference.conf</resource>
                </transformer>
                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                  <mainClass>io.jaegertracing.spark.dependencies.opensearch.OpenSearchDependenciesJob</mainClass>
                </transformer>
              </transformers>
              <minimizeJar>false</minimizeJar>
              <filters>
                <filter>
                  <artifact>*:*</artifact>
                  <excludes>
                    <exclude>META-INF/*.SF</exclude>
                    <exclude>META-INF/*.DSA</exclude>
                    <exclude>META-INF/*.RSA</exclude>
                  </excludes>
                </filter>
              </filters>
              <createDependencyReducedPom>false</createDependencyReducedPom>
            </configuration>
          </execution>
        </executions>
      </plugin>
    </plugins>
  </build>
</project>


================================================
FILE: jaeger-spark-dependencies-opensearch/src/main/java/io/jaegertracing/spark/dependencies/opensearch/OpenSearchDependenciesJob.java
================================================
/**
 * Copyright 2017 The Jaeger Authors
 * Copyright 2016-2017 The OpenZipkin Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */
package io.jaegertracing.spark.dependencies.opensearch;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import io.jaegertracing.spark.dependencies.DependenciesSparkHelper;
import io.jaegertracing.spark.dependencies.Utils;
import io.jaegertracing.spark.dependencies.model.Dependency;
import io.jaegertracing.spark.dependencies.model.Span;
import java.time.LocalDate;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.opensearch.spark.rdd.api.java.JavaOpenSearchSpark;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * @author OpenZipkin authors
 * @author Pavol Loffay
 * @author Danish Siddiqui
 */
public class OpenSearchDependenciesJob {
  private static final Logger log = LoggerFactory.getLogger(OpenSearchDependenciesJob.class);
  private static final Pattern PORT_PATTERN = Pattern.compile(":\\d+");

  public static Builder builder() {
    return new Builder();
  }

  public static final class Builder {

    String hosts = Utils.getEnv("OS_NODES", "127.0.0.1");
    String username = Utils.getEnv("OS_USERNAME", null);
    String password = Utils.getEnv("OS_PASSWORD", null);
    Boolean clientNodeOnly = Boolean.parseBoolean(Utils.getEnv("OS_CLIENT_NODE_ONLY", "false"));
    Boolean nodesWanOnly = Boolean.parseBoolean(Utils.getEnv("OS_NODES_WAN_ONLY", "false"));
    String indexPrefix = Utils.getEnv("OS_INDEX_PREFIX", null);
    String indexDatePattern = datePattern(Utils.getEnv("OS_INDEX_DATE_SEPARATOR", "-"));
    String spanRange = Utils.getEnv("OS_TIME_RANGE", "24h");
    Boolean useAliases = Boolean.parseBoolean(Utils.getEnv("OS_USE_ALIASES", "false"));
    Boolean allowSelfSigned = Boolean.parseBoolean(Utils.getEnv("OS_SSL_CERT_ALLOW_SELF_SIGNED", "false"));

    final Map<String, String> sparkProperties = new LinkedHashMap<>();

    Builder() {
      sparkProperties.put("spark.ui.enabled", "false");
      // don't die if there are no spans
      sparkProperties.put("opensearch.index.read.missing.as.empty", "true");
      sparkProperties.put("opensearch.net.ssl.keystore.location",
          getSystemPropertyAsFileResource("javax.net.ssl.keyStore"));
      sparkProperties.put("opensearch.net.ssl.keystore.pass",
          System.getProperty("javax.net.ssl.keyStorePassword", ""));
      sparkProperties.put("opensearch.net.ssl.truststore.location",
          getSystemPropertyAsFileResource("javax.net.ssl.trustStore"));
      sparkProperties.put("opensearch.net.ssl.truststore.pass",
          System.getProperty("javax.net.ssl.trustStorePassword", ""));
      if (allowSelfSigned) {
        sparkProperties.put("opensearch.net.ssl.cert.allow.self.signed", "true");
      }

    }

    // local[*] master lets us run & test the job locally without setting a Spark cluster
    String sparkMaster = Utils.getEnv("SPARK_MASTER", "local[*]");
    // needed when not in local mode
    String[] jars;

    // By default the job only works on traces whose first timestamp is today
    ZonedDateTime day = ZonedDateTime.of(LocalDate.now().atStartOfDay(), ZoneOffset.UTC);

    /** When set, this indicates which jars to distribute to the cluster. */
    public Builder jars(String... jars) {
      this.jars = jars;
      return this;
    }

    /** opensearch.nodes separated by ',' */
    public Builder nodes(String hosts) {
      Utils.checkNoTNull(hosts, "nodes");
      this.hosts = hosts;
      this.nodesWanOnly = true;
      return this;
    }

    /** username used for basic auth. Needed when Shield or X-Pack security is enabled */
    public Builder username(String username) {
      this.username = username;
      return this;
    }

    /** password used for basic auth. Needed when Shield or X-Pack security is enabled */
    public Builder password(String password) {
      this.password = password;
      return this;
    }

    /** index prefix for Jaeger indices. By default empty */
    public Builder indexPrefix(String indexPrefix) {
      this.indexPrefix = indexPrefix;
      return this;
    }

    /** index date pattern for Jaeger indices. By default yyyy-MM-dd */
    public Builder indexDatePattern(String indexDatePattern) {
      this.indexDatePattern = indexDatePattern;
      return this;
    }

     /** span range for Jaeger indices. By default 24h */
    public Builder spanRange(String spanRange) {
      this.spanRange = spanRange;
      return this;
    }

    /** Day to process dependencies for. Defaults to today. */
    public Builder day(LocalDate day) {
      this.day = day.atStartOfDay(ZoneOffset.UTC);
      return this;
    }

    /** Whether the connector is used against an OpenSearch instance in a cloud/restricted
     *  environment over the WAN, such as Amazon Web Services. In this mode, the
     *  connector disables discovery and only connects through the declared opensearch.nodes during all operations,
     *  including reads and writes. Note that in this mode, performance is highly affected. */
    public Builder nodesWanOnly(boolean wanOnly) {
      this.nodesWanOnly = wanOnly;
      return this;
    }

    private static void logIfNoPort(String hosts) {
      if (!PORT_PATTERN.matcher(hosts).find()) {
        log.warn("Port is not specified, default port 9200 will be used");
      }
    }

    public OpenSearchDependenciesJob build() {
      String hosts = System.getenv("OS_NODES");
      String wanOnly = System.getenv("OS_NODES_WAN_ONLY");
      // Optimize user configuration - nodes specified but wan only not
      if (hosts != null && wanOnly == null) {
        this.nodesWanOnly = true;
      }
      logIfNoPort(this.hosts);
      return new OpenSearchDependenciesJob(this);
    }
  }

  private static String getSystemPropertyAsFileResource(String key) {
    String prop = System.getProperty(key, "");
    return prop != null && !prop.isEmpty() ? "file:" + prop : prop;
  }

  private final ZonedDateTime day;
  private final SparkConf conf;
  private final String indexPrefix;
  private final String indexDatePattern;
  private final String spanRange;
  private final Boolean useAliases;

  OpenSearchDependenciesJob(Builder builder) {
    this.day = builder.day;
    this.conf = new SparkConf(true).setMaster(builder.sparkMaster).setAppName(getClass().getName());
    if (builder.jars != null) {
      conf.setJars(builder.jars);
    }
    if (builder.username != null) {
      conf.set("opensearch.net.http.auth.user", builder.username);
    }
    if (builder.password != null) {
      conf.set("opensearch.net.http.auth.pass", builder.password);
    }
    conf.set("opensearch.nodes", builder.hosts);
    if (builder.hosts.indexOf("https") != -1) {
      conf.set("opensearch.net.ssl", "true");
    }
    if (builder.nodesWanOnly) {
      conf.set("opensearch.nodes.wan.only", "true");
    }
    if (builder.clientNodeOnly) {
      conf.set("opensearch.nodes.discovery", "0");
      conf.set("opensearch.nodes.client.only", "1");
    }
    for (Map.Entry<String, String> entry : builder.sparkProperties.entrySet()) {
      conf.set(entry.getKey(), entry.getValue());
    }
    this.indexPrefix = builder.indexPrefix;
    this.indexDatePattern = builder.indexDatePattern;
    this.spanRange = builder.spanRange;
    this.useAliases = builder.useAliases;
  }

  /**
   * https://github.com/jaegertracing/jaeger/blob/master/CHANGELOG.md#190-2019-01-21
   */
  private static String prefixBefore19(String prefix) {
    return prefix != null ? String.format("%s:", prefix) : "";
  }

  private static String prefix(String prefix) {
    return prefix != null ? String.format("%s-", prefix) : "";
  }

  private static String datePattern(String separator) {
    if (separator.equals("")) {
      return "yyyyMMdd";
    }
    // ' is escape character in date format, we should double it here.
    if (separator.contains("'")) {
      separator = separator.replace("'", "''");
    }
    return String.format("yyyy'%s'MM'%s'dd", separator, separator);
  }

  public void run(String peerServiceTag) {

    String[] readIndices;
    String[] writeIndex;

    // use alias indices common when using index rollover
    if (this.useAliases) {
      readIndices = new String[]{prefix(indexPrefix) + "jaeger-span-read", prefixBefore19(indexPrefix) + "jaeger-span-read"};
      writeIndex = new String[] {prefix(indexPrefix) + "jaeger-dependencies-write", prefixBefore19(indexPrefix) + "jaeger-dependencies-write"};
    }
    else {
      readIndices = indexDate("jaeger-span");
      writeIndex = indexDate("jaeger-dependencies");
    }

    run(readIndices, writeIndex, peerServiceTag);
  }

  String[] indexDate(String index) {
    String date = day.toLocalDate().format(DateTimeFormatter.ofPattern(indexDatePattern));
    if (indexPrefix != null && indexPrefix.length() > 0) {
      return new String[]{String.format("%s%s-%s", prefix(indexPrefix), index, date), String.format("%s%s-%s", prefixBefore19(indexPrefix), index, date)};
    }
    // if there is no prefix we read and write only to one index
    return new String[]{String.format("%s-%s", index, date)};
  }

  void run(String[] spanIndices, String[] depIndices,String peerServiceTag) {
    JavaSparkContext sc = new JavaSparkContext(conf);
    try {
      for (int i = 0; i < spanIndices.length; i++) {
        String spanIndex = spanIndices[i];
        String depIndex = depIndices[i];
        log.info("Running Dependencies job for {}, reading from {} index, result storing to {}", day, spanIndex, depIndex);
        // Send raw query to OS to select only the docs / spans we want to consider for this job
        // This doesn't change the default behavior as the daily indexes only contain up to 24h of data
        String osQuery = String.format("{\"range\": {\"startTimeMillis\": { \"gte\": \"now-%s\" }}}", spanRange);
        JavaPairRDD<String, Iterable<Span>> traces = JavaOpenSearchSpark.opensearchRDD(sc, spanIndex, osQuery)
            .map(new OpenSearchTupleToSpan())
            .groupBy(Span::getTraceId);
        List<Dependency> dependencyLinks = DependenciesSparkHelper.derive(traces,peerServiceTag);
        
        // No version check needed for OpenSearch as we don't support types in indexes
        store(sc, dependencyLinks, depIndex);
        log.info("Done, {} dependency objects created", dependencyLinks.size());
        if (dependencyLinks.size() > 0) {
          // we do not derive dependencies for old prefix "prefix:" if new prefix "prefix-" contains data
          break;
        }
      }
    } finally {
      sc.stop();
    }
  }

  private void store(JavaSparkContext javaSparkContext, List<Dependency> dependencyLinks, String resource) {
    if (dependencyLinks.isEmpty()) {
      return;
    }

    String json;
    try {
      ObjectMapper objectMapper = new ObjectMapper();
      json = objectMapper.writeValueAsString(new OpenSearchDependencies(dependencyLinks, day));
    } catch (JsonProcessingException e) {
      throw new IllegalStateException("Could not serialize dependencies", e);
    }

    JavaOpenSearchSpark.saveJsonToOpenSearch(javaSparkContext.parallelize(Collections.singletonList(json)), resource);
  }

  /**
   * Helper class used to serialize dependencies to JSON.
   */
  public static final class OpenSearchDependencies {
    private List<Dependency> dependencies;
    private ZonedDateTime ts;

    public OpenSearchDependencies(List<Dependency> dependencies, ZonedDateTime ts) {
      this.dependencies = dependencies;
      this.ts = ts;
    }

    public List<Dependency> getDependencies() {
      return dependencies;
    }

    public String getTimestamp() {
      // Jaeger OS dependency storage uses RFC3339Nano for timestamp
      return ts.format(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ssXXX"));
    }
  }

  /**
   * Entry point for running OpenSearchDependenciesJob directly.
   */
  public static void main(String[] args) throws java.io.UnsupportedEncodingException {
    LocalDate date = LocalDate.now();
    if (args.length == 1) {
      date = LocalDate.parse(args[0]);
    } else if (System.getenv("DATE") != null) {
      date = LocalDate.parse(System.getenv("DATE"));
    }

    String peerServiceTag = System.getenv("PEER_SERVICE_TAG");
    if (peerServiceTag == null) {
      peerServiceTag = "peer.service";
    }

    String jarPath = Utils.pathToUberJar(OpenSearchDependenciesJob.class);
    OpenSearchDependenciesJob.builder()
        .jars(jarPath)
        .day(date)
        .build()
        .run(peerServiceTag);
  }
}


================================================
FILE: jaeger-spark-dependencies-opensearch/src/main/java/io/jaegertracing/spark/dependencies/opensearch/OpenSearchTupleToSpan.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.opensearch;

import com.fasterxml.jackson.databind.ObjectMapper;
import io.jaegertracing.spark.dependencies.json.JsonHelper;
import io.jaegertracing.spark.dependencies.model.Span;
import java.util.Map;
import org.apache.spark.api.java.function.Function;
import scala.Tuple2;

/**
 * @author Pavol Loffay
 * @author Danish Siddiqui
 */
public class OpenSearchTupleToSpan implements Function<Tuple2<String, Map<String, Object>>, Span> {

  private ObjectMapper objectMapper = JsonHelper.configure(new ObjectMapper());

  @Override
  public Span call(Tuple2<String, Map<String, Object>> tuple) throws Exception {
    Span span = objectMapper.convertValue(tuple._2(), Span.class);
    String originalTraceId = span.getTraceId();
    span.setTraceId(normalizeTraceId(originalTraceId));
    if (span.getTags() != null) {
      span.getTags().sort((o1, o2) -> o1.getKey().compareTo(o2.getKey()));
    }
    if (span.getRefs() != null) {
      span.getRefs().sort((o1, o2) -> o1.getSpanId().compareTo(o2.getSpanId()));
    }

    return span;
  }

  private String normalizeTraceId(String traceId) {
    if (traceId != null && traceId.length() < 32) {
      return String.format("%32s", traceId).replace(' ', '0');
    }
    return traceId;
  }
}


================================================
FILE: jaeger-spark-dependencies-opensearch/src/main/resources/log4j.properties
================================================
#
# Copyright (c) The Jaeger Authors
# SPDX-License-Identifier: Apache-2.0
#

# Set everything to be logged to the console
log4j.rootCategory=WARN, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n

# Settings to quiet third party logs that are too verbose
log4j.logger.org.spark-project.jetty=WARN
log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
log4j.logger.io.jaegertracing.spark=INFO

# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR


================================================
FILE: jaeger-spark-dependencies-opensearch/src/main/resources/log4j2.component.properties
================================================
#
# Copyright (c) The Jaeger Authors
# SPDX-License-Identifier: Apache-2.0
#

# Disable Log4j status logger console output
log4j2.StatusLogger.level = OFF


================================================
FILE: jaeger-spark-dependencies-opensearch/src/main/resources/log4j2.properties
================================================
#
# Copyright (c) The Jaeger Authors
# SPDX-License-Identifier: Apache-2.0
#

# Set root logger level to WARN and use console appender
rootLogger.level = WARN
rootLogger.appenderRef.console.ref = console

# Console appender configuration
appender.console.type = Console
appender.console.name = console
appender.console.target = SYSTEM_ERR
appender.console.layout.type = PatternLayout
appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n

# Settings to quiet third party logs that are too verbose
logger.jetty.name = org.spark-project.jetty
logger.jetty.level = WARN

logger.jettyLifecycle.name = org.spark-project.jetty.util.component.AbstractLifeCycle
logger.jettyLifecycle.level = ERROR

logger.sparkReplTyper.name = org.apache.spark.repl.SparkIMain$exprTyper
logger.sparkReplTyper.level = INFO

logger.sparkReplInterpreter.name = org.apache.spark.repl.SparkILoop$SparkILoopInterpreter
logger.sparkReplInterpreter.level = INFO

logger.jaegertracing.name = io.jaegertracing.spark
logger.jaegertracing.level = INFO

# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
logger.hiveMetastore.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler
logger.hiveMetastore.level = FATAL

logger.hiveFunctionRegistry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry
logger.hiveFunctionRegistry.level = ERROR


================================================
FILE: jaeger-spark-dependencies-opensearch/src/test/java/io/jaegertracing/spark/dependencies/opensearch/JaegerOpenSearchEnvironment.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.opensearch;

import java.io.IOException;
import java.util.Map;
import java.util.Optional;
import okhttp3.MediaType;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.RequestBody;
import okhttp3.Response;
import org.testcontainers.containers.GenericContainer;
import org.testcontainers.containers.Network;

/**
 * @author Pavol Loffay
 * @author Danish Siddiqui
 */
public class JaegerOpenSearchEnvironment {

  private OkHttpClient okHttpClient = new OkHttpClient();
  Network network;
  GenericContainer<?> opensearch;
  GenericContainer<?> jaegerAll;

  /**
   * Set these in subclasses
   */
  private String queryUrl;
  private String collectorUrl;

  public static String opensearchVersion() {
    String version = System.getProperty("opensearch.version", System.getenv("OPENSEARCH_VERSION"));
    return version != null ? version : "2.11.1";
  }

  public void start(Map<String, String> jaegerEnvs, String jaegerVersion, String opensearchVersion) {
    network = Network.newNetwork();
    opensearch = new GenericContainer<>(String.format("opensearchproject/opensearch:%s", opensearchVersion))
        .withNetwork(network)
        .withNetworkAliases("opensearch")
        .waitingFor(new BoundPortHttpWaitStrategy(9200).forStatusCode(200))
        .withExposedPorts(9200)
        .withEnv("DISABLE_SECURITY_PLUGIN", "true")
        .withEnv("discovery.type", "single-node")
        .withEnv("network.bind_host", "opensearch")
        .withEnv("network.host", "0.0.0.0");
    opensearch.start();

    jaegerAll = new GenericContainer<>("jaegertracing/jaeger:" + jaegerVersion)
        .withNetwork(network)
        .withClasspathResourceMapping("jaeger-v2-config-opensearch.yaml", "/etc/jaeger/config.yaml",
            org.testcontainers.containers.BindMode.READ_ONLY)
        .withCommand("--config", "/etc/jaeger/config.yaml")
        .withEnv(jaegerEnvs)
        .waitingFor(new BoundPortHttpWaitStrategy(16687)
            .forStatusCodeMatching(statusCode -> statusCode >= 200 && statusCode < 300))
        .withExposedPorts(16687, 16686, 4317, 4318, 14268, 9411);
    jaegerAll.start();

    collectorUrl = String.format("http://%s:%d", jaegerAll.getHost(), jaegerAll.getMappedPort(4317));
    queryUrl = String.format("http://%s:%d", jaegerAll.getHost(), jaegerAll.getMappedPort(16686));
  }

  public void cleanUp(String[] spanIndex, String[] dependenciesIndex) throws IOException {
    String matchAllQuery = "{\"query\": {\"match_all\":{} }}";
    Request request = new Request.Builder()
        .url(String.format("http://%s:%d/%s,%s/_delete_by_query?conflicts=proceed",
            opensearch.getHost(),
            opensearch.getMappedPort(9200),
            // we don't use index prefix
            spanIndex[0],
            dependenciesIndex[0]))
        .post(
            RequestBody.create(MediaType.parse("application/json; charset=utf-8"), matchAllQuery))
        .build();

    try (Response response = okHttpClient.newCall(request).execute()) {
      if (!response.isSuccessful()) {
        String body = response.body().string();
        throw new IllegalStateException(String.format("Could not remove data from OS: %s, %s", response, body));
      }
    }
  }

  /**
   * In OpenSearch, the _refresh endpoint is used to make recently indexed,
   * updated, or deleted documents visible to search, as otherwise they might
   * be still sitting in a memory buffer.
   */
  public void refresh() throws IOException {
    Request request = new Request.Builder()
        .url(String.format("http://%s:%d/_refresh",
            opensearch.getHost(),
            opensearch.getMappedPort(9200)))
        .post(RequestBody.create(MediaType.parse("application/json; charset=utf-8"), ""))
        .build();

    try (Response response = okHttpClient.newCall(request).execute()) {
      if (!response.isSuccessful()) {
        String body = response.body().string();
        throw new IllegalStateException(String.format("Could not refresh OS: %s, %s", response, body));
      }
    }
  }

  public void stop() {
    Optional.of(jaegerAll).ifPresent(GenericContainer::close);
    Optional.of(opensearch).ifPresent(GenericContainer::close);
    Optional.of(network).ifPresent(network1 -> {
      try {
        network1.close();
      } catch (Exception e) {
        e.printStackTrace();
      }
    });
  }

  public String getQueryUrl() {
    return queryUrl;
  }

  public String getCollectorUrl() {
    return collectorUrl;
  }

  public String getOpenSearchIPPort() {
    return String.format("%s:%d", opensearch.getHost(), opensearch.getMappedPort(9200));
  }

  public static class BoundPortHttpWaitStrategy extends org.testcontainers.containers.wait.strategy.HttpWaitStrategy {
    private final int port;

    public BoundPortHttpWaitStrategy(int port) {
      this.port = port;
    }

    @Override
    protected java.util.Set<Integer> getLivenessCheckPorts() {
      int mapptedPort = this.waitStrategyTarget.getMappedPort(port);
      return java.util.Collections.singleton(mapptedPort);
    }
  }
}


================================================
FILE: jaeger-spark-dependencies-opensearch/src/test/java/io/jaegertracing/spark/dependencies/opensearch/OpenSearchDependenciesDockerJobTest.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.opensearch;

import io.jaegertracing.spark.dependencies.LogToConsolePrinter;
import org.testcontainers.containers.GenericContainer;

/**
 * @author Danish Siddiqui
 */
import org.testcontainers.utility.DockerImageName;

import java.util.concurrent.TimeUnit;

import static org.awaitility.Awaitility.await;

public class OpenSearchDependenciesDockerJobTest extends OpenSearchDependenciesJobTest {
  private static String dependenciesJobTag() {
    String tag = System.getenv("SPARK_DEPENDENCIES_JOB_IMAGE_TAG");
    if (tag == null || tag.trim().isEmpty()) {
      throw new IllegalStateException(
          "SPARK_DEPENDENCIES_JOB_IMAGE_TAG environment variable is required but not set. " +
              "This variable must be set to ensure tests use the locally built Docker image.");
    }
    return tag.trim();
  }

  @Override
  protected void deriveDependencies() {
    // Create the dependenciesJob instance so that after() method can call
    // indexDate() on it
    dependenciesJob = OpenSearchDependenciesJob.builder()
        .nodes("http://" + jaegerOpenSearchEnvironment.getOpenSearchIPPort())
        .day(java.time.LocalDate.now())
        .build();

    try {
      jaegerOpenSearchEnvironment.refresh();
      // Wait a bit to ensure all spans are fully indexed and visible
      Thread.sleep(2000);
    } catch (java.io.IOException e) {
      throw new RuntimeException("Could not refresh OpenSearch", e);
    } catch (InterruptedException e) {
      Thread.currentThread().interrupt();
      throw new RuntimeException("Interrupted while waiting", e);
    }

    // Use the same date as the test - format it as ISO-8601 date string for the
    // DATE env var
    String dateStr = java.time.LocalDate.now().toString();

    System.out
        .println("Running Docker spark-dependencies job with DATE=" + dateStr + ", OS_NODES=http://opensearch:9200");
    System.out.println("::group::🚧 🚧 🚧 OpenSearchDependenciesDockerJob logs");
    try (GenericContainer<?> sparkDependenciesJob = new GenericContainer<>(
        DockerImageName.parse("ghcr.io/jaegertracing/spark-dependencies/spark-dependencies:" + dependenciesJobTag()))
        .withNetwork(jaegerOpenSearchEnvironment.network)
        .withLogConsumer(new LogToConsolePrinter("[spark-dependencies] "))
        .withEnv("STORAGE", "opensearch")
        .withEnv("OS_NODES", "http://opensearch:9200")
        .withEnv("DATE", dateStr)
        .dependsOn(jaegerOpenSearchEnvironment.opensearch, jaegerOpenSearchEnvironment.jaegerAll)) {
      sparkDependenciesJob.start();
      await("spark-dependencies-job execution")
          .atMost(3, TimeUnit.MINUTES)
          .until(() -> !sparkDependenciesJob.isRunning());

      Long exitCode = sparkDependenciesJob.getCurrentContainerInfo()
          .getState()
          .getExitCodeLong();

      if (exitCode != null && exitCode != 0) {
        throw new RuntimeException("Spark dependencies job failed with exit code: " + exitCode);
      }
    } finally {
      System.out.println("::endgroup::");
    }

    try {
      jaegerOpenSearchEnvironment.refresh();
    } catch (java.io.IOException e) {
      throw new RuntimeException("Could not refresh OpenSearch", e);
    }
  }
}


================================================
FILE: jaeger-spark-dependencies-opensearch/src/test/java/io/jaegertracing/spark/dependencies/opensearch/OpenSearchDependenciesJobTest.java
================================================
/**
 * Copyright (c) The Jaeger Authors
 * SPDX-License-Identifier: Apache-2.0
 */
package io.jaegertracing.spark.dependencies.opensearch;

import io.opentelemetry.api.trace.Span;
import io.opentelemetry.api.trace.Tracer;
import io.jaegertracing.spark.dependencies.test.DependenciesTest;
import io.jaegertracing.spark.dependencies.test.TracersGenerator;
import java.io.IOException;
import java.time.LocalDate;
import java.util.Collections;
import java.util.HashMap;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.testcontainers.containers.wait.strategy.HttpWaitStrategy;

/**
 * @author Pavol Loffay
 * @author Danish Siddiqui
 */
public class OpenSearchDependenciesJobTest extends DependenciesTest {

  protected OpenSearchDependenciesJob dependenciesJob;
  static JaegerOpenSearchEnvironment jaegerOpenSearchEnvironment;

  @BeforeClass
  public static void beforeClass() {
    jaegerOpenSearchEnvironment = new JaegerOpenSearchEnvironment();
    jaegerOpenSearchEnvironment.start(new HashMap<>(), jaegerVersion(),
        JaegerOpenSearchEnvironment.opensearchVersion());
    collectorUrl = jaegerOpenSearchEnvironment.getCollectorUrl();
    queryUrl = jaegerOpenSearchEnvironment.getQueryUrl();
  }

  @Before
  public void before() throws Exception {
    String serviceName = UUID.randomUUID().toString();
    String operationName = UUID.randomUUID().toString();
    TracersGenerator.Tuple<Tracer, TracersGenerator.Flushable> tuple = TracersGenerator.createJaeger(serviceName,
        collectorUrl);
    Tracer initStorageTracer = tuple.getA();
    Span span = initStorageTracer.spanBuilder(operationName).startSpan();
    span.setAttribute("foo", "bar");
    span.end();
    tuple.getB().flush();
    try {
      // Give extra time for spans to be exported and indexed
      TimeUnit.SECONDS.sleep(2);
    } catch (InterruptedException e) {
      Thread.currentThread().interrupt();
    }
    waitJaegerQueryContains(serviceName, "foo");
  }

  @After
  public void after() throws IOException {
    if (dependenciesJob != null) {
      jaegerOpenSearchEnvironment.cleanUp(dependenciesJob.indexDate("jaeger-span"),
          dependenciesJob.indexDate("jaeger-dependencies"));
    }
  }

  @AfterClass
  public static void afterClass() {
    jaegerOpenSearchEnvironment.stop();
  }

  @Override
  protected void deriveDependencies() {
    dependenciesJob = OpenSearchDependenciesJob.builder()
        .nodes("http://" + jaegerOpenSearchEnvironment.getOpenSearchIPPort())
        .day(LocalDate.now())
        .build();
    try {
      jaegerOpenSearchEnvironment.refresh();
    } catch (IOException e) {
      throw new RuntimeException("Could not refresh OpenSearch", e);
    }
    dependenciesJob.run("peer.service");
    try {
      jaegerOpenSearchEnvironment.refresh();
    } catch (IOException e) {
      throw new RuntimeException("Could not refresh OpenSearch", e);
    }
  }

  @Override
  protected void waitBetweenTraces() throws InterruptedException {
    try {
      jaegerOpenSearchEnvironment.refresh();
    } catch (IOException e) {
      throw new RuntimeException
Download .txt
gitextract_0_ynlnqz/

├── .github/
│   └── workflows/
│       ├── ci-cd.yml
│       └── stale.yml
├── .gitignore
├── .mvn/
│   └── wrapper/
│       ├── maven-wrapper.jar
│       └── maven-wrapper.properties
├── AGENTS.md
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── RELEASES.md
├── entrypoint.sh
├── header.txt
├── jaeger-spark-dependencies/
│   ├── pom.xml
│   └── src/
│       └── main/
│           ├── java/
│           │   └── io/
│           │       └── jaegertracing/
│           │           └── spark/
│           │               └── dependencies/
│           │                   └── DependenciesSparkJob.java
│           └── resources/
│               └── log4j.properties
├── jaeger-spark-dependencies-cassandra/
│   ├── pom.xml
│   └── src/
│       ├── main/
│       │   ├── java/
│       │   │   └── io/
│       │   │       └── jaegertracing/
│       │   │           └── spark/
│       │   │               └── dependencies/
│       │   │                   └── cassandra/
│       │   │                       ├── CassandraDependenciesJob.java
│       │   │                       └── CassandraSpan.java
│       │   └── resources/
│       │       ├── log4j2.component.properties
│       │       └── log4j2.properties
│       └── test/
│           ├── java/
│           │   └── io/
│           │       └── jaegertracing/
│           │           └── spark/
│           │               └── dependencies/
│           │                   └── cassandra/
│           │                       ├── CassandraDependenciesDockerJobTest.java
│           │                       ├── CassandraDependenciesJobTest.java
│           │                       └── JaegerTestDriverContainer.java
│           └── resources/
│               ├── jaeger-v2-config-cassandra.yaml
│               └── log4j.properties
├── jaeger-spark-dependencies-common/
│   ├── pom.xml
│   └── src/
│       ├── main/
│       │   └── java/
│       │       └── io/
│       │           └── jaegertracing/
│       │               └── spark/
│       │                   └── dependencies/
│       │                       ├── DependenciesSparkHelper.java
│       │                       ├── SpansToDependencyLinks.java
│       │                       ├── Utils.java
│       │                       ├── json/
│       │                       │   ├── JsonHelper.java
│       │                       │   ├── KeyValueDeserializer.java
│       │                       │   ├── KeyValueMixin.java
│       │                       │   ├── ReferenceDeserializer.java
│       │                       │   ├── ReferenceMixin.java
│       │                       │   ├── SpanDeserializer.java
│       │                       │   └── SpanMixin.java
│       │                       └── model/
│       │                           ├── Dependency.java
│       │                           ├── KeyValue.java
│       │                           ├── Process.java
│       │                           ├── Reference.java
│       │                           └── Span.java
│       └── test/
│           └── java/
│               └── io/
│                   └── jaegertracing/
│                       └── spark/
│                           └── dependencies/
│                               ├── SpansToDependencyLinksTest.java
│                               └── model/
│                                   └── SpanTest.java
├── jaeger-spark-dependencies-elasticsearch/
│   ├── pom.xml
│   └── src/
│       ├── main/
│       │   ├── java/
│       │   │   └── io/
│       │   │       └── jaegertracing/
│       │   │           └── spark/
│       │   │               └── dependencies/
│       │   │                   └── elastic/
│       │   │                       ├── ElasticTupleToSpan.java
│       │   │                       └── ElasticsearchDependenciesJob.java
│       │   └── resources/
│       │       ├── log4j2.component.properties
│       │       └── log4j2.properties
│       └── test/
│           ├── java/
│           │   └── io/
│           │       └── jaegertracing/
│           │           └── spark/
│           │               └── dependencies/
│           │                   └── elastic/
│           │                       ├── ElasticsearchDependenciesDockerJobTest.java
│           │                       ├── ElasticsearchDependenciesJobTest.java
│           │                       ├── ElasticsearchDependenciesTagFieldsJobTest.java
│           │                       └── JaegerElasticsearchEnvironment.java
│           └── resources/
│               └── jaeger-v2-config-elasticsearch.yaml
├── jaeger-spark-dependencies-opensearch/
│   ├── pom.xml
│   └── src/
│       ├── main/
│       │   ├── java/
│       │   │   └── io/
│       │   │       └── jaegertracing/
│       │   │           └── spark/
│       │   │               └── dependencies/
│       │   │                   └── opensearch/
│       │   │                       ├── OpenSearchDependenciesJob.java
│       │   │                       └── OpenSearchTupleToSpan.java
│       │   └── resources/
│       │       ├── log4j.properties
│       │       ├── log4j2.component.properties
│       │       └── log4j2.properties
│       └── test/
│           ├── java/
│           │   └── io/
│           │       └── jaegertracing/
│           │           └── spark/
│           │               └── dependencies/
│           │                   └── opensearch/
│           │                       ├── JaegerOpenSearchEnvironment.java
│           │                       ├── OpenSearchDependenciesDockerJobTest.java
│           │                       └── OpenSearchDependenciesJobTest.java
│           └── resources/
│               └── jaeger-v2-config-opensearch.yaml
├── jaeger-spark-dependencies-test/
│   ├── pom.xml
│   └── src/
│       ├── main/
│       │   └── java/
│       │       └── io/
│       │           └── jaegertracing/
│       │               └── spark/
│       │                   └── dependencies/
│       │                       ├── LogToConsolePrinter.java
│       │                       └── test/
│       │                           ├── DependenciesTest.java
│       │                           ├── DependencyLinkDerivator.java
│       │                           ├── TracersGenerator.java
│       │                           ├── rest/
│       │                           │   ├── DependencyLink.java
│       │                           │   ├── JsonHelper.java
│       │                           │   └── RestResult.java
│       │                           └── tree/
│       │                               ├── Node.java
│       │                               ├── TracingWrapper.java
│       │                               ├── Traversals.java
│       │                               └── TreeGenerator.java
│       └── test/
│           └── java/
│               └── io/
│                   └── jaegertracing/
│                       └── spark/
│                           └── dependencies/
│                               └── test/
│                                   ├── DependencyLinksDerivatorTest.java
│                                   ├── MockTracingWrapper.java
│                                   ├── rest/
│                                   │   └── DeserializationTest.java
│                                   └── tree/
│                                       ├── TraversalsTest.java
│                                       └── TreeGeneratorTest.java
├── mvnw
├── mvnw.cmd
├── pom.xml
└── renovate.json
Download .txt
SYMBOL INDEX (346 symbols across 50 files)

FILE: jaeger-spark-dependencies-cassandra/src/main/java/io/jaegertracing/spark/dependencies/cassandra/CassandraDependenciesJob.java
  class CassandraDependenciesJob (line 44) | public final class CassandraDependenciesJob {
    method builder (line 47) | public static Builder builder() {
    class Builder (line 51) | public static final class Builder {
      method Builder (line 67) | Builder() {
      method jars (line 84) | public Builder jars(String... jars) {
      method keyspace (line 90) | public Builder keyspace(String keyspace) {
      method username (line 97) | public Builder username(String username) {
      method password (line 104) | public Builder password(String password) {
      method day (line 111) | public Builder day(LocalDate day) {
      method contactPoints (line 117) | public Builder contactPoints(String contactPoints) {
      method localDc (line 123) | public Builder localDc(String localDc) {
      method build (line 128) | public CassandraDependenciesJob build() {
    method CassandraDependenciesJob (line 138) | CassandraDependenciesJob(Builder builder) {
    method run (line 161) | public void run(String peerServiceTag) {
    method store (line 183) | private void store(JavaSparkContext sc, List<Dependency> links) {
    method parseHosts (line 199) | static String parseHosts(String contactPoints) {
    method parsePort (line 209) | static String parsePort(String contactPoints) {
    method dependenciesTable (line 218) | private String dependenciesTable(JavaSparkContext sc) {
    class CassandraDependencies (line 232) | public final static class CassandraDependencies implements Serializable {
      method CassandraDependencies (line 238) | public CassandraDependencies(List<Dependency> dependencies, ZonedDat...
      method getDependencies (line 243) | public List<Dependency> getDependencies() {
      method getTs (line 247) | public Long getTs() {
      method getTsIndex (line 251) | public Long getTsIndex() {
    class CassandraDependenciesV2 (line 259) | public final static class CassandraDependenciesV2 implements Serializa...
      method CassandraDependenciesV2 (line 265) | public CassandraDependenciesV2(List<Dependency> dependencies, ZonedD...
      method getDependencies (line 270) | public List<Dependency> getDependencies() {
      method getTs (line 274) | public Long getTs() {
      method getTsBucket (line 278) | public Long getTsBucket() {
    method main (line 287) | public static void main(String[] args) throws java.io.UnsupportedEncod...

FILE: jaeger-spark-dependencies-cassandra/src/main/java/io/jaegertracing/spark/dependencies/cassandra/CassandraSpan.java
  class CassandraSpan (line 18) | public class CassandraSpan extends Span {
    method getParentId (line 22) | public Long getParentId() {
    method setParentId (line 26) | public void setParentId(Long parentId) {
    method getRefs (line 30) | @Override

FILE: jaeger-spark-dependencies-cassandra/src/test/java/io/jaegertracing/spark/dependencies/cassandra/CassandraDependenciesDockerJobTest.java
  class CassandraDependenciesDockerJobTest (line 15) | public class CassandraDependenciesDockerJobTest extends CassandraDepende...
    method dependenciesJobTag (line 16) | private static String dependenciesJobTag() {
    method deriveDependencies (line 26) | @Override

FILE: jaeger-spark-dependencies-cassandra/src/test/java/io/jaegertracing/spark/dependencies/cassandra/CassandraDependenciesJobTest.java
  class CassandraDependenciesJobTest (line 28) | public class CassandraDependenciesJobTest extends DependenciesTest {
    method beforeClass (line 36) | @BeforeClass
    method afterClass (line 86) | @AfterClass
    method after (line 93) | @After
    method dependenciesTable (line 104) | private String dependenciesTable(CqlSession session) {
    method deriveDependencies (line 113) | @Override
    method waitBetweenTraces (line 130) | @Override
    class BoundPortHttpWaitStrategy (line 136) | public static class BoundPortHttpWaitStrategy extends HttpWaitStrategy {
      method BoundPortHttpWaitStrategy (line 139) | public BoundPortHttpWaitStrategy(int port) {
      method getLivenessCheckPorts (line 143) | @Override

FILE: jaeger-spark-dependencies-cassandra/src/test/java/io/jaegertracing/spark/dependencies/cassandra/JaegerTestDriverContainer.java
  class JaegerTestDriverContainer (line 22) | public class JaegerTestDriverContainer extends GenericContainer<JaegerTe...
    method JaegerTestDriverContainer (line 27) | public JaegerTestDriverContainer(String dockerImageName) {
    method JaegerTestDriverContainer (line 31) | public JaegerTestDriverContainer(String dockerImageName, Duration wait...
    method waitUntilContainerStarted (line 36) | @Override
    method containerStartedCondition (line 42) | protected Callable<Boolean> containerStartedCondition(String statusUrl) {

FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/DependenciesSparkHelper.java
  class DependenciesSparkHelper (line 16) | public class DependenciesSparkHelper {
    method DependenciesSparkHelper (line 17) | private DependenciesSparkHelper() {}
    method derive (line 29) | public static List<Dependency> derive(JavaPairRDD<String, Iterable<Spa...

FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/SpansToDependencyLinks.java
  class SpansToDependencyLinks (line 25) | public class SpansToDependencyLinks implements FlatMapFunction<Iterable<...
    method SpansToDependencyLinks (line 37) | public SpansToDependencyLinks(String peerServiceTag){
    method call (line 41) | @Override
    method serverSpan (line 113) | static Optional<Span> serverSpan(Set<Span> sharedSpans) {
    method isClientSpan (line 123) | static boolean isClientSpan(Span span) {
    method isServerSpan (line 127) | static boolean isServerSpan(Span span) {
    method sharedSpanDependencies (line 131) | private List<Dependency> sharedSpanDependencies(Map<Long, Set<Span>> s...
    method sharedSpanDependency (line 141) | protected Optional<Dependency> sharedSpanDependency(Set<Span> sharedSp...

FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/Utils.java
  class Utils (line 14) | public class Utils {
    method Utils (line 15) | private Utils() {}
    method getEnv (line 17) | public static String getEnv(String key, String defaultValue) {
    method checkNoTNull (line 22) | public static void checkNoTNull(String msg, Object object) {
    method pathToUberJar (line 32) | public static String pathToUberJar(Class<?> clazz) throws UnsupportedE...

FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/JsonHelper.java
  class JsonHelper (line 17) | public class JsonHelper {
    method JsonHelper (line 19) | private JsonHelper() {
    method configure (line 22) | public static ObjectMapper configure(ObjectMapper objectMapper) {

FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/KeyValueDeserializer.java
  class KeyValueDeserializer (line 20) | public class KeyValueDeserializer extends StdDeserializer<KeyValue> {
    method KeyValueDeserializer (line 26) | public KeyValueDeserializer() {
    method deserialize (line 30) | @Override

FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/KeyValueMixin.java
  class KeyValueMixin (line 13) | @JsonDeserialize(using = KeyValueDeserializer.class)

FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/ReferenceDeserializer.java
  class ReferenceDeserializer (line 21) | public class ReferenceDeserializer extends StdDeserializer<Reference> {
    method ReferenceDeserializer (line 25) | protected ReferenceDeserializer() {
    method deserialize (line 29) | @Override

FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/ReferenceMixin.java
  class ReferenceMixin (line 13) | @JsonDeserialize(using = ReferenceDeserializer.class)

FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/SpanDeserializer.java
  class SpanDeserializer (line 29) | public class SpanDeserializer extends StdDeserializer<Span> {
    method SpanDeserializer (line 35) | public SpanDeserializer() {
    method deserialize (line 39) | @Override
    method addTagFields (line 78) | private List<KeyValue> addTagFields(List<KeyValue> tags, Map<String, O...
    method deserializeReferences (line 91) | private List<Reference> deserializeReferences(JsonNode node) throws Js...

FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/SpanMixin.java
  class SpanMixin (line 13) | @JsonDeserialize(using = SpanDeserializer.class)

FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/model/Dependency.java
  class Dependency (line 12) | public class Dependency implements Serializable {
    method Dependency (line 19) | public Dependency(String parent, String child) {
    method Dependency (line 23) | public Dependency(String parent, String child, long callCount) {
    method getParent (line 29) | public String getParent() {
    method getChild (line 33) | public String getChild() {
    method getCallCount (line 37) | public long getCallCount() {
    method equals (line 41) | @Override
    method getSource (line 60) | public String getSource() {
    method setSource (line 64) | public void setSource(String source) {
    method hashCode (line 67) | @Override
    method toString (line 80) | @Override

FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/model/KeyValue.java
  class KeyValue (line 12) | public class KeyValue implements Serializable {
    method getKey (line 21) | public String getKey() {
    method setKey (line 25) | public void setKey(String key) {
    method getValueString (line 29) | public String getValueString() {
    method setValueString (line 33) | public void setValueString(String valueString) {
    method getValueType (line 37) | public String getValueType() {
    method setValueType (line 41) | public void setValueType(String valueType) {
    method equals (line 45) | @Override
    method hashCode (line 65) | @Override

FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/model/Process.java
  class Process (line 12) | public class Process implements Serializable {
    method getServiceName (line 17) | public String getServiceName() {
    method setServiceName (line 21) | public void setServiceName(String serviceName) {
    method equals (line 25) | @Override
    method hashCode (line 40) | @Override

FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/model/Reference.java
  class Reference (line 12) | public class Reference implements Serializable {
    method getSpanId (line 17) | public Long getSpanId() {
    method setSpanId (line 21) | public void setSpanId(Long spanId) {
    method equals (line 25) | @Override
    method hashCode (line 39) | @Override

FILE: jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/model/Span.java
  class Span (line 13) | public class Span implements Serializable {
    method getTraceId (line 25) | public String getTraceId() {
    method setTraceId (line 29) | public void setTraceId(String traceId) {
    method getSpanId (line 33) | public Long getSpanId() {
    method setSpanId (line 37) | public void setSpanId(Long spanId) {
    method getStartTime (line 41) | public long getStartTime() {
    method setStartTime (line 45) | public void setStartTime(Long startTime) {
    method getProcess (line 49) | public Process getProcess() {
    method setProcess (line 53) | public void setProcess(Process process) {
    method getTags (line 57) | public List<KeyValue> getTags() {
    method getTag (line 61) | public String getTag(String key){
    method setTags (line 70) | public void setTags(List<KeyValue> tags) {
    method getRefs (line 74) | public List<Reference> getRefs() {
    method setRefs (line 78) | public void setRefs(List<Reference> refs) {
    method equals (line 82) | @Override
    method hashCode (line 111) | @Override

FILE: jaeger-spark-dependencies-common/src/test/java/io/jaegertracing/spark/dependencies/SpansToDependencyLinksTest.java
  class SpansToDependencyLinksTest (line 23) | public class SpansToDependencyLinksTest {
    method shouldReturnDependencyWithClientAndServerSpans (line 25) | @Test
    method shouldReturnDependencyWithConsumerAndProducer (line 36) | @Test
    method shouldReturnEmptyDependencyForSpansWithoutSpanKindDefinition (line 47) | @Test
    method createSpan (line 57) | private Span createSpan(String serviceName, String tag) {

FILE: jaeger-spark-dependencies-common/src/test/java/io/jaegertracing/spark/dependencies/model/SpanTest.java
  class SpanTest (line 14) | public class SpanTest {
    method testEquals (line 16) | @Test
    method createProcess (line 59) | private Process createProcess(String serviceName) {

FILE: jaeger-spark-dependencies-elasticsearch/src/main/java/io/jaegertracing/spark/dependencies/elastic/ElasticTupleToSpan.java
  class ElasticTupleToSpan (line 16) | public class ElasticTupleToSpan implements Function<Tuple2<String, Strin...
    method call (line 20) | @Override
    method normalizeTraceId (line 35) | private String normalizeTraceId(String traceId) {

FILE: jaeger-spark-dependencies-elasticsearch/src/main/java/io/jaegertracing/spark/dependencies/elastic/ElasticsearchDependenciesJob.java
  class ElasticsearchDependenciesJob (line 37) | public class ElasticsearchDependenciesJob {
    method builder (line 41) | public static Builder builder() {
    class Builder (line 45) | public static final class Builder {
      method Builder (line 59) | Builder() {
      method jars (line 83) | public Builder jars(String... jars) {
      method nodes (line 89) | public Builder nodes(String hosts) {
      method username (line 97) | public Builder username(String username) {
      method password (line 103) | public Builder password(String password) {
      method indexPrefix (line 109) | public Builder indexPrefix(String indexPrefix) {
      method indexDatePattern (line 115) | public Builder indexDatePattern(String indexDatePattern) {
      method spanRange (line 121) | public Builder spanRange(String spanRange) {
      method day (line 127) | public Builder day(LocalDate day) {
      method nodesWanOnly (line 136) | public Builder nodesWanOnly(boolean wanOnly) {
      method logIfNoPort (line 141) | private static void logIfNoPort(String hosts) {
      method build (line 147) | public ElasticsearchDependenciesJob build() {
    method getSystemPropertyAsFileResource (line 159) | private static String getSystemPropertyAsFileResource(String key) {
    method ElasticsearchDependenciesJob (line 171) | ElasticsearchDependenciesJob(Builder builder) {
    method prefixBefore19 (line 206) | private static String prefixBefore19(String prefix) {
    method prefix (line 210) | private static String prefix(String prefix) {
    method datePattern (line 214) | private static String datePattern(String separator) {
    method run (line 225) | public void run(String peerServiceTag) {
    method indexDate (line 243) | String[] indexDate(String index) {
    method run (line 252) | void run(String[] spanIndices, String[] depIndices,String peerServiceT...
    method getEsVersion (line 285) | private EsMajorVersion getEsVersion() {
    method store (line 294) | private void store(JavaSparkContext javaSparkContext, List<Dependency>...
    class ElasticsearchDependencies (line 313) | public static final class ElasticsearchDependencies {
      method ElasticsearchDependencies (line 317) | public ElasticsearchDependencies(List<Dependency> dependencies, Zone...
      method getDependencies (line 322) | public List<Dependency> getDependencies() {
      method getTimestamp (line 326) | public String getTimestamp() {
    method main (line 336) | public static void main(String[] args) throws java.io.UnsupportedEncod...

FILE: jaeger-spark-dependencies-elasticsearch/src/test/java/io/jaegertracing/spark/dependencies/elastic/ElasticsearchDependenciesDockerJobTest.java
  class ElasticsearchDependenciesDockerJobTest (line 15) | public class ElasticsearchDependenciesDockerJobTest extends Elasticsearc...
    method dependenciesJobTag (line 16) | private static String dependenciesJobTag() {
    method deriveDependencies (line 26) | @Override

FILE: jaeger-spark-dependencies-elasticsearch/src/test/java/io/jaegertracing/spark/dependencies/elastic/ElasticsearchDependenciesJobTest.java
  class ElasticsearchDependenciesJobTest (line 28) | public class ElasticsearchDependenciesJobTest extends DependenciesTest {
    method beforeClass (line 33) | @BeforeClass
    method before (line 41) | @Before
    method after (line 60) | @After
    method afterClass (line 67) | @AfterClass
    method deriveDependencies (line 72) | @Override
    method waitBetweenTraces (line 91) | @Override
    class BoundPortHttpWaitStrategy (line 100) | public static class BoundPortHttpWaitStrategy extends HttpWaitStrategy {
      method BoundPortHttpWaitStrategy (line 103) | public BoundPortHttpWaitStrategy(int port) {
      method getLivenessCheckPorts (line 107) | @Override

FILE: jaeger-spark-dependencies-elasticsearch/src/test/java/io/jaegertracing/spark/dependencies/elastic/ElasticsearchDependenciesTagFieldsJobTest.java
  class ElasticsearchDependenciesTagFieldsJobTest (line 13) | public class ElasticsearchDependenciesTagFieldsJobTest extends Elasticse...
    method beforeClass (line 15) | @BeforeClass

FILE: jaeger-spark-dependencies-elasticsearch/src/test/java/io/jaegertracing/spark/dependencies/elastic/JaegerElasticsearchEnvironment.java
  class JaegerElasticsearchEnvironment (line 24) | public class JaegerElasticsearchEnvironment {
    method elasticsearchVersion (line 37) | public static String elasticsearchVersion() {
    method start (line 42) | public void start(Map<String, String> jaegerEnvs, String jaegerVersion...
    method cleanUp (line 69) | public void cleanUp(String[] spanIndex, String[] dependenciesIndex) th...
    method refresh (line 96) | public void refresh() throws IOException {
    method stop (line 112) | public void stop() {
    method getQueryUrl (line 124) | public String getQueryUrl() {
    method getCollectorUrl (line 128) | public String getCollectorUrl() {
    method getElasticsearchIPPort (line 132) | public String getElasticsearchIPPort() {

FILE: jaeger-spark-dependencies-opensearch/src/main/java/io/jaegertracing/spark/dependencies/opensearch/OpenSearchDependenciesJob.java
  class OpenSearchDependenciesJob (line 44) | public class OpenSearchDependenciesJob {
    method builder (line 48) | public static Builder builder() {
    class Builder (line 52) | public static final class Builder {
      method Builder (line 67) | Builder() {
      method jars (line 94) | public Builder jars(String... jars) {
      method nodes (line 100) | public Builder nodes(String hosts) {
      method username (line 108) | public Builder username(String username) {
      method password (line 114) | public Builder password(String password) {
      method indexPrefix (line 120) | public Builder indexPrefix(String indexPrefix) {
      method indexDatePattern (line 126) | public Builder indexDatePattern(String indexDatePattern) {
      method spanRange (line 132) | public Builder spanRange(String spanRange) {
      method day (line 138) | public Builder day(LocalDate day) {
      method nodesWanOnly (line 147) | public Builder nodesWanOnly(boolean wanOnly) {
      method logIfNoPort (line 152) | private static void logIfNoPort(String hosts) {
      method build (line 158) | public OpenSearchDependenciesJob build() {
    method getSystemPropertyAsFileResource (line 170) | private static String getSystemPropertyAsFileResource(String key) {
    method OpenSearchDependenciesJob (line 182) | OpenSearchDependenciesJob(Builder builder) {
    method prefixBefore19 (line 217) | private static String prefixBefore19(String prefix) {
    method prefix (line 221) | private static String prefix(String prefix) {
    method datePattern (line 225) | private static String datePattern(String separator) {
    method run (line 236) | public void run(String peerServiceTag) {
    method indexDate (line 254) | String[] indexDate(String index) {
    method run (line 263) | void run(String[] spanIndices, String[] depIndices,String peerServiceT...
    method store (line 291) | private void store(JavaSparkContext javaSparkContext, List<Dependency>...
    class OpenSearchDependencies (line 310) | public static final class OpenSearchDependencies {
      method OpenSearchDependencies (line 314) | public OpenSearchDependencies(List<Dependency> dependencies, ZonedDa...
      method getDependencies (line 319) | public List<Dependency> getDependencies() {
      method getTimestamp (line 323) | public String getTimestamp() {
    method main (line 332) | public static void main(String[] args) throws java.io.UnsupportedEncod...

FILE: jaeger-spark-dependencies-opensearch/src/main/java/io/jaegertracing/spark/dependencies/opensearch/OpenSearchTupleToSpan.java
  class OpenSearchTupleToSpan (line 18) | public class OpenSearchTupleToSpan implements Function<Tuple2<String, Ma...
    method call (line 22) | @Override
    method normalizeTraceId (line 37) | private String normalizeTraceId(String traceId) {

FILE: jaeger-spark-dependencies-opensearch/src/test/java/io/jaegertracing/spark/dependencies/opensearch/JaegerOpenSearchEnvironment.java
  class JaegerOpenSearchEnvironment (line 22) | public class JaegerOpenSearchEnvironment {
    method opensearchVersion (line 35) | public static String opensearchVersion() {
    method start (line 40) | public void start(Map<String, String> jaegerEnvs, String jaegerVersion...
    method cleanUp (line 68) | public void cleanUp(String[] spanIndex, String[] dependenciesIndex) th...
    method refresh (line 94) | public void refresh() throws IOException {
    method stop (line 110) | public void stop() {
    method getQueryUrl (line 122) | public String getQueryUrl() {
    method getCollectorUrl (line 126) | public String getCollectorUrl() {
    method getOpenSearchIPPort (line 130) | public String getOpenSearchIPPort() {
    class BoundPortHttpWaitStrategy (line 134) | public static class BoundPortHttpWaitStrategy extends org.testcontaine...
      method BoundPortHttpWaitStrategy (line 137) | public BoundPortHttpWaitStrategy(int port) {
      method getLivenessCheckPorts (line 141) | @Override

FILE: jaeger-spark-dependencies-opensearch/src/test/java/io/jaegertracing/spark/dependencies/opensearch/OpenSearchDependenciesDockerJobTest.java
  class OpenSearchDependenciesDockerJobTest (line 19) | public class OpenSearchDependenciesDockerJobTest extends OpenSearchDepen...
    method dependenciesJobTag (line 20) | private static String dependenciesJobTag() {
    method deriveDependencies (line 30) | @Override

FILE: jaeger-spark-dependencies-opensearch/src/test/java/io/jaegertracing/spark/dependencies/opensearch/OpenSearchDependenciesJobTest.java
  class OpenSearchDependenciesJobTest (line 28) | public class OpenSearchDependenciesJobTest extends DependenciesTest {
    method beforeClass (line 33) | @BeforeClass
    method before (line 42) | @Before
    method after (line 62) | @After
    method afterClass (line 70) | @AfterClass
    method deriveDependencies (line 75) | @Override
    method waitBetweenTraces (line 94) | @Override
    class BoundPortHttpWaitStrategy (line 103) | public static class BoundPortHttpWaitStrategy extends HttpWaitStrategy {
      method BoundPortHttpWaitStrategy (line 106) | public BoundPortHttpWaitStrategy(int port) {
      method getLivenessCheckPorts (line 110) | @Override

FILE: jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/LogToConsolePrinter.java
  class LogToConsolePrinter (line 11) | public final class LogToConsolePrinter implements Consumer<OutputFrame> {
    method LogToConsolePrinter (line 14) | public LogToConsolePrinter(String prefix) {
    method accept (line 18) | @Override

FILE: jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/DependenciesTest.java
  class DependenciesTest (line 36) | public abstract class DependenciesTest {
    method jaegerVersion (line 47) | public static String jaegerVersion() {
    method deriveDependencies (line 55) | protected abstract void deriveDependencies() throws Exception;
    method waitBetweenTraces (line 60) | protected abstract void waitBetweenTraces() throws InterruptedException;
    method testJaegerOneTrace (line 62) | @Test
    method testJaegerMultipleTraces (line 94) | @Test
    method testMultipleReferences (line 127) | @Test
    method assertDependencies (line 177) | protected void assertDependencies(Map<String, Map<String, Long>> expec...
    method waitJaegerQueryContains (line 190) | protected void waitJaegerQueryContains(String service, String spanCont...

FILE: jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/DependencyLinkDerivator.java
  class DependencyLinkDerivator (line 18) | public class DependencyLinkDerivator {
    method serviceDependencies (line 20) | public static Map<String, Map<String, Long>> serviceDependencies(Node ...
    method serviceDependencies (line 24) | public static Map<String, Map<String, Long>> serviceDependencies(Node ...
    method serviceDependencies (line 45) | public static Map<String, Map<String, Long>> serviceDependencies(List<...

FILE: jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/TracersGenerator.java
  class TracersGenerator (line 25) | public class TracersGenerator {
    class Tuple (line 27) | public static class Tuple<A, B> {
      method Tuple (line 31) | Tuple(A a, B b) {
      method getA (line 36) | public A getA() {
      method getB (line 40) | public B getB() {
    type Flushable (line 45) | public interface Flushable {
      method flush (line 46) | void flush();
    class TracerHolder (line 49) | public static class TracerHolder<T> {
      method TracerHolder (line 54) | TracerHolder(T tracer, String serviceName, Flushable flushable) {
      method getTracer (line 60) | public T getTracer() {
      method tracingWrapper (line 64) | public TracingWrapper tracingWrapper() {
      method flushable (line 68) | public Flushable flushable() {
    method generateJaeger (line 73) | public static List<TracerHolder<Tracer>> generateJaeger(int number, St...
    method createJaeger (line 83) | public static Tuple<Tracer, Flushable> createJaeger(String serviceName...
    method serviceName (line 156) | private static String serviceName() {

FILE: jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/rest/DependencyLink.java
  class DependencyLink (line 10) | public class DependencyLink {
    method DependencyLink (line 16) | public DependencyLink() {}
    method DependencyLink (line 18) | public DependencyLink(String parent, String child, long callCount) {
    method getParent (line 24) | public String getParent() {
    method setParent (line 28) | public void setParent(String parent) {
    method getChild (line 32) | public String getChild() {
    method setChild (line 36) | public void setChild(String child) {
    method getCallCount (line 40) | public long getCallCount() {
    method setCallCount (line 44) | public void setCallCount(long callCount) {

FILE: jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/rest/JsonHelper.java
  class JsonHelper (line 14) | public class JsonHelper {
    method JsonHelper (line 15) | private JsonHelper() {}
    method configure (line 17) | public static ObjectMapper configure(ObjectMapper objectMapper) {

FILE: jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/rest/RestResult.java
  class RestResult (line 12) | public class RestResult<T> {
    method getData (line 17) | public List<T> getData() {
    method setData (line 21) | public void setData(List<T> data) {
    method getErrors (line 25) | public List<Object> getErrors() {
    method setErrors (line 29) | public void setErrors(List<Object> errors) {

FILE: jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/tree/Node.java
  class Node (line 17) | public class Node<T extends TracingWrapper> {
    method Node (line 22) | public Node(TracingWrapper<T> tracingWrapper, Node parent) {
    method getTracingWrapper (line 33) | public TracingWrapper<T> getTracingWrapper() {
    method addDescendant (line 37) | private void addDescendant(Node descendant) {
    method getDescendants (line 41) | public List<Node> getDescendants() {
    method getServiceName (line 45) | public String getServiceName() {

FILE: jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/tree/TracingWrapper.java
  type TracingWrapper (line 21) | public interface TracingWrapper<T extends TracingWrapper> {
    method get (line 22) | T get();
    method serviceName (line 23) | String serviceName();
    method operationName (line 24) | String operationName();
    method createChildSpan (line 25) | void createChildSpan(TracingWrapper<T> parent);
    class OpenTelemetryWrapper (line 27) | class OpenTelemetryWrapper implements TracingWrapper<OpenTelemetryWrap...
      method OpenTelemetryWrapper (line 33) | public OpenTelemetryWrapper(Tracer tracer, String serviceName) {
      method get (line 38) | @Override
      method serviceName (line 43) | @Override
      method operationName (line 48) | @Override
      method createChildSpan (line 53) | @Override
      method getSpan (line 69) | public Span getSpan() {

FILE: jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/tree/Traversals.java
  class Traversals (line 12) | public class Traversals {
    method postOrder (line 20) | public static <T extends TracingWrapper> void postOrder(Node<T> root, ...
    method postOrder (line 28) | private static <T extends TracingWrapper> void postOrder(Node<T> paren...

FILE: jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/tree/TreeGenerator.java
  class TreeGenerator (line 18) | public class TreeGenerator<Tracer> {
    method TreeGenerator (line 23) | public TreeGenerator(List<TracerHolder<Tracer>> tracers) {
    method generateTree (line 30) | public Node generateTree(int numOfNodes, int maxNumberOfDescendants) {
    method generateDescendants (line 40) | private void generateDescendants(Queue<Node> queue, int numOfNodes, fi...
    method getTracers (line 59) | public List<TracerHolder<Tracer>> getTracers() {

FILE: jaeger-spark-dependencies-test/src/test/java/io/jaegertracing/spark/dependencies/test/DependencyLinksDerivatorTest.java
  class DependencyLinksDerivatorTest (line 19) | public class DependencyLinksDerivatorTest {
    method testRootToMap (line 21) | @Test
    method testDepLinkToMap (line 45) | @Test

FILE: jaeger-spark-dependencies-test/src/test/java/io/jaegertracing/spark/dependencies/test/MockTracingWrapper.java
  class MockTracingWrapper (line 15) | public class MockTracingWrapper implements TracingWrapper<MockTracingWra...
    method MockTracingWrapper (line 21) | public MockTracingWrapper(MockTracer mockTracer, String serviceName) {
    method get (line 26) | @Override
    method serviceName (line 31) | @Override
    method operationName (line 36) | @Override
    method createChildSpan (line 41) | @Override

FILE: jaeger-spark-dependencies-test/src/test/java/io/jaegertracing/spark/dependencies/test/rest/DeserializationTest.java
  class DeserializationTest (line 17) | public class DeserializationTest {
    method testDependencyLinkDeserialization (line 20) | @Test

FILE: jaeger-spark-dependencies-test/src/test/java/io/jaegertracing/spark/dependencies/test/tree/TraversalsTest.java
  class TraversalsTest (line 19) | public class TraversalsTest {
    method testInorder (line 21) | @Test

FILE: jaeger-spark-dependencies-test/src/test/java/io/jaegertracing/spark/dependencies/test/tree/TreeGeneratorTest.java
  class TreeGeneratorTest (line 20) | public class TreeGeneratorTest {
    method testGenerateOne (line 22) | @Test
    method testBranchingFactorOne (line 32) | @Test

FILE: jaeger-spark-dependencies/src/main/java/io/jaegertracing/spark/dependencies/DependenciesSparkJob.java
  class DependenciesSparkJob (line 12) | public final class DependenciesSparkJob {
    method main (line 14) | public static void main(String[] args) throws UnsupportedEncodingExcep...
    method run (line 30) | private static void run(String storage, LocalDate localDate) throws Un...
    method parseZonedDateTime (line 53) | static LocalDate parseZonedDateTime(String date) {
Condensed preview — 85 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (271K chars).
[
  {
    "path": ".github/workflows/ci-cd.yml",
    "chars": 8746,
    "preview": "#\n# Copyright (c) The Jaeger Authors\n# SPDX-License-Identifier: Apache-2.0\n#\n\nname: \"CI/CD Pipeline\"\n\non:\n  push:\n    br"
  },
  {
    "path": ".github/workflows/stale.yml",
    "chars": 2027,
    "preview": "#\n# Copyright (c) The Jaeger Authors\n# SPDX-License-Identifier: Apache-2.0\n#\n\nname: 'Close stale issues and PRs'\n\non:\n  "
  },
  {
    "path": ".gitignore",
    "chars": 345,
    "preview": "target/\npom.xml.tag\npom.xml.releaseBackup\npom.xml.versionsBackup\npom.xml.next\nrelease.properties\ndependency-reduced-pom."
  },
  {
    "path": ".mvn/wrapper/maven-wrapper.properties",
    "chars": 112,
    "preview": "distributionUrl=https://repo1.maven.org/maven2/org/apache/maven/apache-maven/3.9.12/apache-maven-3.9.12-bin.zip\n"
  },
  {
    "path": "AGENTS.md",
    "chars": 932,
    "preview": "# Integration Tests Guide\n\nThis guide provides instructions for running integration tests for the Jaeger Spark Dependenc"
  },
  {
    "path": "Dockerfile",
    "chars": 829,
    "preview": "#\n# Copyright (c) The Jaeger Authors\n# SPDX-License-Identifier: Apache-2.0\n#\n\n# Simple runtime image that receives a pre"
  },
  {
    "path": "LICENSE",
    "chars": 11357,
    "preview": "                                 Apache License\n                           Version 2.0, January 2004\n                   "
  },
  {
    "path": "Makefile",
    "chars": 2281,
    "preview": "#\n# Copyright (c) The Jaeger Authors\n# SPDX-License-Identifier: Apache-2.0\n#\n\n.PHONY: e2e-cassandra e2e-es7 e2e-es8 e2e-"
  },
  {
    "path": "README.md",
    "chars": 15554,
    "preview": "[![Latest image](https://ghcr-badge.egpl.dev/jaegertracing/spark-dependencies/spark-dependencies/latest_tag?trim=major&l"
  },
  {
    "path": "RELEASES.md",
    "chars": 203,
    "preview": "# Release process\n\n1. Create a new GitHub release with a new tag\n2. Use \"generate release notes\" button\n3. Once the rele"
  },
  {
    "path": "entrypoint.sh",
    "chars": 3060,
    "preview": "#!/bin/sh\n#\n# Copyright (c) The Jaeger Authors\n# SPDX-License-Identifier: Apache-2.0\n#\n\n\n# Taken from https://github.com"
  },
  {
    "path": "header.txt",
    "chars": 69,
    "preview": "Copyright (c) The Jaeger Authors\nSPDX-License-Identifier: Apache-2.0\n"
  },
  {
    "path": "jaeger-spark-dependencies/pom.xml",
    "chars": 6193,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!--\n\n    Copyright (c) The Jaeger Authors\n    SPDX-License-Identifier: Apache-2."
  },
  {
    "path": "jaeger-spark-dependencies/src/main/java/io/jaegertracing/spark/dependencies/DependenciesSparkJob.java",
    "chars": 1803,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies/src/main/resources/log4j.properties",
    "chars": 1003,
    "preview": "#\n# Copyright (c) The Jaeger Authors\n# SPDX-License-Identifier: Apache-2.0\n#\n\n# Set everything to be logged to the conso"
  },
  {
    "path": "jaeger-spark-dependencies-cassandra/pom.xml",
    "chars": 6900,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!--\n\n    Copyright (c) The Jaeger Authors\n    SPDX-License-Identifier: Apache-2."
  },
  {
    "path": "jaeger-spark-dependencies-cassandra/src/main/java/io/jaegertracing/spark/dependencies/cassandra/CassandraDependenciesJob.java",
    "chars": 11054,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * Copyright 2016-2017 The OpenZipkin Authors\n * SPDX-License-Identifier: Apache"
  },
  {
    "path": "jaeger-spark-dependencies-cassandra/src/main/java/io/jaegertracing/spark/dependencies/cassandra/CassandraSpan.java",
    "chars": 979,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-cassandra/src/main/resources/log4j2.component.properties",
    "chars": 155,
    "preview": "#\n# Copyright (c) The Jaeger Authors\n# SPDX-License-Identifier: Apache-2.0\n#\n\n# Disable Log4j status logger console outp"
  },
  {
    "path": "jaeger-spark-dependencies-cassandra/src/main/resources/log4j2.properties",
    "chars": 1388,
    "preview": "#\n# Copyright (c) The Jaeger Authors\n# SPDX-License-Identifier: Apache-2.0\n#\n\n# Set root logger level to WARN and use co"
  },
  {
    "path": "jaeger-spark-dependencies-cassandra/src/test/java/io/jaegertracing/spark/dependencies/cassandra/CassandraDependenciesDockerJobTest.java",
    "chars": 2008,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-cassandra/src/test/java/io/jaegertracing/spark/dependencies/cassandra/CassandraDependenciesJobTest.java",
    "chars": 5554,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-cassandra/src/test/java/io/jaegertracing/spark/dependencies/cassandra/JaegerTestDriverContainer.java",
    "chars": 1933,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-cassandra/src/test/resources/jaeger-v2-config-cassandra.yaml",
    "chars": 1542,
    "preview": "#\n# Copyright (c) The Jaeger Authors\n# SPDX-License-Identifier: Apache-2.0\n#\n\nservice:\n  extensions: [jaeger_storage, ja"
  },
  {
    "path": "jaeger-spark-dependencies-cassandra/src/test/resources/log4j.properties",
    "chars": 1151,
    "preview": "#\n# Copyright (c) The Jaeger Authors\n# SPDX-License-Identifier: Apache-2.0\n#\n\n# Set everything to be logged to the conso"
  },
  {
    "path": "jaeger-spark-dependencies-common/pom.xml",
    "chars": 830,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!--\n\n    Copyright (c) The Jaeger Authors\n    SPDX-License-Identifier: Apache-2."
  },
  {
    "path": "jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/DependenciesSparkHelper.java",
    "chars": 1558,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/SpansToDependencyLinks.java",
    "chars": 6122,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/Utils.java",
    "chars": 1002,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/JsonHelper.java",
    "chars": 910,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/KeyValueDeserializer.java",
    "chars": 1663,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/KeyValueMixin.java",
    "chars": 344,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/ReferenceDeserializer.java",
    "chars": 1288,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/ReferenceMixin.java",
    "chars": 346,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/SpanDeserializer.java",
    "chars": 4009,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/json/SpanMixin.java",
    "chars": 336,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/model/Dependency.java",
    "chars": 1766,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/model/KeyValue.java",
    "chars": 1679,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/model/Process.java",
    "chars": 897,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/model/Reference.java",
    "chars": 846,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-common/src/main/java/io/jaegertracing/spark/dependencies/model/Span.java",
    "chars": 2721,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-common/src/test/java/io/jaegertracing/spark/dependencies/SpansToDependencyLinksTest.java",
    "chars": 2861,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-common/src/test/java/io/jaegertracing/spark/dependencies/model/SpanTest.java",
    "chars": 1685,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-elasticsearch/pom.xml",
    "chars": 6343,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!--\n\n    Copyright (c) The Jaeger Authors\n    SPDX-License-Identifier: Apache-2."
  },
  {
    "path": "jaeger-spark-dependencies-elasticsearch/src/main/java/io/jaegertracing/spark/dependencies/elastic/ElasticTupleToSpan.java",
    "chars": 1282,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-elasticsearch/src/main/java/io/jaegertracing/spark/dependencies/elastic/ElasticsearchDependenciesJob.java",
    "chars": 13423,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * Copyright 2016-2017 The OpenZipkin Authors\n * SPDX-License-Identifier: Apache"
  },
  {
    "path": "jaeger-spark-dependencies-elasticsearch/src/main/resources/log4j2.component.properties",
    "chars": 155,
    "preview": "#\n# Copyright (c) The Jaeger Authors\n# SPDX-License-Identifier: Apache-2.0\n#\n\n# Disable Log4j status logger console outp"
  },
  {
    "path": "jaeger-spark-dependencies-elasticsearch/src/main/resources/log4j2.properties",
    "chars": 1388,
    "preview": "#\n# Copyright (c) The Jaeger Authors\n# SPDX-License-Identifier: Apache-2.0\n#\n\n# Set root logger level to WARN and use co"
  },
  {
    "path": "jaeger-spark-dependencies-elasticsearch/src/test/java/io/jaegertracing/spark/dependencies/elastic/ElasticsearchDependenciesDockerJobTest.java",
    "chars": 3065,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-elasticsearch/src/test/java/io/jaegertracing/spark/dependencies/elastic/ElasticsearchDependenciesJobTest.java",
    "chars": 3663,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-elasticsearch/src/test/java/io/jaegertracing/spark/dependencies/elastic/ElasticsearchDependenciesTagFieldsJobTest.java",
    "chars": 844,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-elasticsearch/src/test/java/io/jaegertracing/spark/dependencies/elastic/JaegerElasticsearchEnvironment.java",
    "chars": 5114,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-elasticsearch/src/test/resources/jaeger-v2-config-elasticsearch.yaml",
    "chars": 1477,
    "preview": "#\n# Copyright (c) The Jaeger Authors\n# SPDX-License-Identifier: Apache-2.0\n#\n\nservice:\n  extensions: [jaeger_storage, ja"
  },
  {
    "path": "jaeger-spark-dependencies-opensearch/pom.xml",
    "chars": 4249,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!--\n\n    Copyright (c) The Jaeger Authors\n    SPDX-License-Identifier: Apache-2."
  },
  {
    "path": "jaeger-spark-dependencies-opensearch/src/main/java/io/jaegertracing/spark/dependencies/opensearch/OpenSearchDependenciesJob.java",
    "chars": 13441,
    "preview": "/**\n * Copyright 2017 The Jaeger Authors\n * Copyright 2016-2017 The OpenZipkin Authors\n *\n * Licensed under the Apache L"
  },
  {
    "path": "jaeger-spark-dependencies-opensearch/src/main/java/io/jaegertracing/spark/dependencies/opensearch/OpenSearchTupleToSpan.java",
    "chars": 1366,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-opensearch/src/main/resources/log4j.properties",
    "chars": 1003,
    "preview": "#\n# Copyright (c) The Jaeger Authors\n# SPDX-License-Identifier: Apache-2.0\n#\n\n# Set everything to be logged to the conso"
  },
  {
    "path": "jaeger-spark-dependencies-opensearch/src/main/resources/log4j2.component.properties",
    "chars": 155,
    "preview": "#\n# Copyright (c) The Jaeger Authors\n# SPDX-License-Identifier: Apache-2.0\n#\n\n# Disable Log4j status logger console outp"
  },
  {
    "path": "jaeger-spark-dependencies-opensearch/src/main/resources/log4j2.properties",
    "chars": 1388,
    "preview": "#\n# Copyright (c) The Jaeger Authors\n# SPDX-License-Identifier: Apache-2.0\n#\n\n# Set root logger level to WARN and use co"
  },
  {
    "path": "jaeger-spark-dependencies-opensearch/src/test/java/io/jaegertracing/spark/dependencies/opensearch/JaegerOpenSearchEnvironment.java",
    "chars": 5176,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-opensearch/src/test/java/io/jaegertracing/spark/dependencies/opensearch/OpenSearchDependenciesDockerJobTest.java",
    "chars": 3335,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-opensearch/src/test/java/io/jaegertracing/spark/dependencies/opensearch/OpenSearchDependenciesJobTest.java",
    "chars": 3669,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-opensearch/src/test/resources/jaeger-v2-config-opensearch.yaml",
    "chars": 1477,
    "preview": "#\n# Copyright (c) The Jaeger Authors\n# SPDX-License-Identifier: Apache-2.0\n#\n\nservice:\n  extensions: [jaeger_storage, ja"
  },
  {
    "path": "jaeger-spark-dependencies-test/pom.xml",
    "chars": 2204,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!--\n\n    Copyright (c) The Jaeger Authors\n    SPDX-License-Identifier: Apache-2."
  },
  {
    "path": "jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/LogToConsolePrinter.java",
    "chars": 683,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/DependenciesTest.java",
    "chars": 9393,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/DependencyLinkDerivator.java",
    "chars": 2068,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/TracersGenerator.java",
    "chars": 4955,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/rest/DependencyLink.java",
    "chars": 869,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/rest/JsonHelper.java",
    "chars": 525,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/rest/RestResult.java",
    "chars": 536,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/tree/Node.java",
    "chars": 1186,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/tree/TracingWrapper.java",
    "chars": 2012,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/tree/Traversals.java",
    "chars": 810,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-test/src/main/java/io/jaegertracing/spark/dependencies/test/tree/TreeGenerator.java",
    "chars": 1804,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-test/src/test/java/io/jaegertracing/spark/dependencies/test/DependencyLinksDerivatorTest.java",
    "chars": 2602,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-test/src/test/java/io/jaegertracing/spark/dependencies/test/MockTracingWrapper.java",
    "chars": 1235,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-test/src/test/java/io/jaegertracing/spark/dependencies/test/rest/DeserializationTest.java",
    "chars": 1189,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-test/src/test/java/io/jaegertracing/spark/dependencies/test/tree/TraversalsTest.java",
    "chars": 1897,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "jaeger-spark-dependencies-test/src/test/java/io/jaegertracing/spark/dependencies/test/tree/TreeGeneratorTest.java",
    "chars": 1449,
    "preview": "/**\n * Copyright (c) The Jaeger Authors\n * SPDX-License-Identifier: Apache-2.0\n */\npackage io.jaegertracing.spark.depend"
  },
  {
    "path": "mvnw",
    "chars": 7272,
    "preview": "#!/bin/sh\n# ----------------------------------------------------------------------------\n# Licensed to the Apache Softwa"
  },
  {
    "path": "mvnw.cmd",
    "chars": 5271,
    "preview": "@REM ----------------------------------------------------------------------------\r\n@REM Licensed to the Apache Software "
  },
  {
    "path": "pom.xml",
    "chars": 13767,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!--\n\n    Copyright (c) The Jaeger Authors\n    SPDX-License-Identifier: Apache-2."
  },
  {
    "path": "renovate.json",
    "chars": 550,
    "preview": "{\n  \"$schema\": \"https://docs.renovatebot.com/renovate-schema.json\",\n  \"extends\": [\n    \"config:best-practices\",\n    \":gi"
  }
]

// ... and 1 more files (download for full content)

About this extraction

This page contains the full source code of the jaegertracing/spark-dependencies GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 85 files (246.4 KB), approximately 64.3k tokens, and a symbol index with 346 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!